diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 3acc4f1a6f8420552614eed779d414dd9a926520..4a5a887181c30c29535aba5728f353c948ade9da 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -436,6 +436,8 @@ sysrq.txt - info on the magic SysRq key. target/ - directory with info on generating TCM v4 fabric .ko modules +tee.txt + - info on the TEE subsystem and drivers this_cpu_ops.txt - List rationale behind and the way to use this_cpu operations. thermal/ diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 49874173705507f72fac2f5f55da46fc34c3cc52..dfd56ec7a850c1c3b722fe09403e74ea8ca849d7 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list Description: AArch64 CPU registers 'identification' directory exposes the CPU ID registers for identifying model and revision of the CPU. + +What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 +Date: January 2018 +Contact: Linux kernel mailing list +Description: Information about CPU vulnerabilities + + The files are named after the code names of CPU + vulnerabilities. The output of those files reflects the + state of the CPUs in the system. Possible output values: + + "Not affected" CPU is not affected by the vulnerability + "Vulnerable" CPU is affected and no mitigation in effect + "Mitigation: $M" CPU is affected and mitigation $M is in effect diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt new file mode 100644 index 0000000000000000000000000000000000000000..d38834c67dffe910af59fa15531b9fcd2303a33b --- /dev/null +++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt @@ -0,0 +1,31 @@ +OP-TEE Device Tree Bindings + +OP-TEE is a piece of software using hardware features to provide a Trusted +Execution Environment. The security can be provided with ARM TrustZone, but +also by virtualization or a separate chip. + +We're using "linaro" as the first part of the compatible property for +the reference implementation maintained by Linaro. + +* OP-TEE based on ARM TrustZone required properties: + +- compatible : should contain "linaro,optee-tz" + +- method : The method of calling the OP-TEE Trusted OS. Permitted + values are: + + "smc" : SMC #0, with the register assignments specified + in drivers/tee/optee/optee_smc.h + + "hvc" : HVC #0, with the register assignments specified + in drivers/tee/optee/optee_smc.h + + + +Example: + firmware { + optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; + }; diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt index 07a250498fbb4cccb3210a418c572907d8310c43..f569db58f64a100b6c72e1053ce443b77ae102c2 100644 --- a/Documentation/devicetree/bindings/hwmon/jc42.txt +++ b/Documentation/devicetree/bindings/hwmon/jc42.txt @@ -34,6 +34,10 @@ Required properties: - reg: I2C address +Optional properties: +- smbus-timeout-disable: When set, the smbus timeout function will be disabled. + This is not supported on all chips. + Example: temp-sensor@1a { diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt b/Documentation/devicetree/bindings/usb/usb-device.txt index 1c35e7b665e1f8e9eb64dc6495f029a3d7c3ff64..03ab8f5eab40a6e8f712bd8dcb0f3c6af1847864 100644 --- a/Documentation/devicetree/bindings/usb/usb-device.txt +++ b/Documentation/devicetree/bindings/usb/usb-device.txt @@ -11,7 +11,7 @@ Required properties: be used, but a device adhering to this binding may leave out all except for usbVID,PID. - reg: the port number which this device is connecting to, the range - is 1-31. + is 1-255. Example: diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index bceffffb7502d80b71cbf3ddff2670cfcc7353d2..d0526e05aea33a9a4beb6d1dd59e36ee448741ab 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -156,6 +156,7 @@ lacie LaCie lantiq Lantiq Semiconductor lenovo Lenovo Group Ltd. lg LG Corporation +linaro Linaro Limited linux Linux-specific binding lltc Linear Technology Corporation lsi LSI Corp. (LSI Logic) diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 81c7f2bb7daf09c9c414bf5f90afc7c259b24e38..efb38da700c8d9b1681c18fcab6c4e578fd70258 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -308,6 +308,7 @@ Code Seq#(hex) Include File Comments 0xA3 80-8F Port ACL in development: 0xA3 90-9F linux/dtlk.h +0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem 0xAA 00-3F linux/uapi/linux/userfaultfd.h 0xAB 00-1F linux/nbd.h 0xAC 00-1F linux/raw.h diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 92e629781279afa98293166ed6f851a9c2f05726..29a998d64404ead8497d0d7c6aea564a952ed984 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1909,6 +1909,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. kernel and module base offset ASLR (Address Space Layout Randomization). + kasan_multi_shot + [KNL] Enforce KASAN (Kernel Address Sanitizer) to print + report on every invalid memory access. Without this + parameter KASAN will print report only for the first + invalid access. + keepinitrd [HW,ARM] kernelcore= [KNL,X86,IA-64,PPC] @@ -2697,6 +2703,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 + (indirect branch prediction) vulnerability. System may + allow data leaks with this option, which is equivalent + to spectre_v2=off. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -2801,11 +2812,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nopat [X86] Disable PAT (page attribute table extension of pagetables) support. + nopcid [X86-64] Disable the PCID cpu feature. + norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops - noreplace-smp [X86-32,SMP] Don't replace SMP instructions with UP alternatives @@ -3329,6 +3340,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted. pt. [PARIDE] See Documentation/blockdev/paride.txt. + pti= [X86_64] Control Page Table Isolation of user and + kernel address spaces. Disabling this feature + removes hardening, but improves performance of + system calls and interrupts. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable to issues that PTI mitigates + + Not specifying this option is equivalent to pti=auto. + + nopti [X86_64] + Equivalent to pti=off + pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in default number. @@ -3933,6 +3959,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable + + Selecting 'on' will, and 'auto' may, choose a + mitigation method at run time according to the + CPU, the available microcode, the setting of the + CONFIG_RETPOLINE configuration option, and the + compiler with which the kernel was built. + + Specific mitigations can also be selected manually: + + retpoline - replace indirect branches + retpoline,generic - google's original retpoline + retpoline,amd - AMD-specific minimal thunk + + Not specifying this option is equivalent to + spectre_v2=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt new file mode 100644 index 0000000000000000000000000000000000000000..e9e6cbae284164fd38963e3eb57ad7da3f772fb9 --- /dev/null +++ b/Documentation/speculation.txt @@ -0,0 +1,90 @@ +This document explains potential effects of speculation, and how undesirable +effects can be mitigated portably using common APIs. + +=========== +Speculation +=========== + +To improve performance and minimize average latencies, many contemporary CPUs +employ speculative execution techniques such as branch prediction, performing +work which may be discarded at a later stage. + +Typically speculative execution cannot be observed from architectural state, +such as the contents of registers. However, in some cases it is possible to +observe its impact on microarchitectural state, such as the presence or +absence of data in caches. Such state may form side-channels which can be +observed to extract secret information. + +For example, in the presence of branch prediction, it is possible for bounds +checks to be ignored by code which is speculatively executed. Consider the +following code: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else + return array[index]; + } + +Which, on arm64, may be compiled to an assembly sequence such as: + + CMP , #MAX_ARRAY_ELEMS + B.LT less + MOV , #0 + RET + less: + LDR , [, ] + RET + +It is possible that a CPU mis-predicts the conditional branch, and +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This +value will subsequently be discarded, but the speculated load may affect +microarchitectural state which can be subsequently measured. + +More complex sequences involving multiple dependent memory accesses may +result in sensitive information being leaked. Consider the following +code, building on the prior example: + + int load_dependent_arrays(int *arr1, int *arr2, int index) + { + int val1, val2, + + val1 = load_array(arr1, index); + val2 = load_array(arr2, val1); + + return val2; + } + +Under speculation, the first call to load_array() may return the value +of an out-of-bounds address, while the second call will influence +microarchitectural state dependent on this value. This may provide an +arbitrary read primitive. + +==================================== +Mitigating speculation side-channels +==================================== + +The kernel provides a generic API to ensure that bounds checks are +respected even under speculation. Architectures which are affected by +speculation-based side-channels are expected to implement these +primitives. + +The array_index_nospec() helper in can be used to +prevent information from being leaked via side-channels. + +A call to array_index_nospec(index, size) returns a sanitized index +value that is bounded to [0, size) even under cpu speculation +conditions. + +This can be used to protect the earlier load_array() example: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else { + index = array_index_nospec(index, MAX_ARRAY_ELEMS); + return array[index]; + } + } diff --git a/Documentation/tee.txt b/Documentation/tee.txt new file mode 100644 index 0000000000000000000000000000000000000000..56ea85ffebf24545dccb0d4c467c0400d3b7ff90 --- /dev/null +++ b/Documentation/tee.txt @@ -0,0 +1,127 @@ +============= +TEE subsystem +============= + +This document describes the TEE subsystem in Linux. + +A TEE (Trusted Execution Environment) is a trusted OS running in some +secure environment, for example, TrustZone on ARM CPUs, or a separate +secure co-processor etc. A TEE driver handles the details needed to +communicate with the TEE. + +This subsystem deals with: + +- Registration of TEE drivers + +- Managing shared memory between Linux and the TEE + +- Providing a generic API to the TEE + +The TEE interface +================= + +include/uapi/linux/tee.h defines the generic interface to a TEE. + +User space (the client) connects to the driver by opening /dev/tee[0-9]* or +/dev/teepriv[0-9]*. + +- TEE_IOC_SHM_ALLOC allocates shared memory and returns a file descriptor + which user space can mmap. When user space doesn't need the file + descriptor any more, it should be closed. When shared memory isn't needed + any longer it should be unmapped with munmap() to allow the reuse of + memory. + +- TEE_IOC_VERSION lets user space know which TEE this driver handles and + the its capabilities. + +- TEE_IOC_OPEN_SESSION opens a new session to a Trusted Application. + +- TEE_IOC_INVOKE invokes a function in a Trusted Application. + +- TEE_IOC_CANCEL may cancel an ongoing TEE_IOC_OPEN_SESSION or TEE_IOC_INVOKE. + +- TEE_IOC_CLOSE_SESSION closes a session to a Trusted Application. + +There are two classes of clients, normal clients and supplicants. The latter is +a helper process for the TEE to access resources in Linux, for example file +system access. A normal client opens /dev/tee[0-9]* and a supplicant opens +/dev/teepriv[0-9]. + +Much of the communication between clients and the TEE is opaque to the +driver. The main job for the driver is to receive requests from the +clients, forward them to the TEE and send back the results. In the case of +supplicants the communication goes in the other direction, the TEE sends +requests to the supplicant which then sends back the result. + +OP-TEE driver +============= + +The OP-TEE driver handles OP-TEE [1] based TEEs. Currently it is only the ARM +TrustZone based OP-TEE solution that is supported. + +Lowest level of communication with OP-TEE builds on ARM SMC Calling +Convention (SMCCC) [2], which is the foundation for OP-TEE's SMC interface +[3] used internally by the driver. Stacked on top of that is OP-TEE Message +Protocol [4]. + +OP-TEE SMC interface provides the basic functions required by SMCCC and some +additional functions specific for OP-TEE. The most interesting functions are: + +- OPTEE_SMC_FUNCID_CALLS_UID (part of SMCCC) returns the version information + which is then returned by TEE_IOC_VERSION + +- OPTEE_SMC_CALL_GET_OS_UUID returns the particular OP-TEE implementation, used + to tell, for instance, a TrustZone OP-TEE apart from an OP-TEE running on a + separate secure co-processor. + +- OPTEE_SMC_CALL_WITH_ARG drives the OP-TEE message protocol + +- OPTEE_SMC_GET_SHM_CONFIG lets the driver and OP-TEE agree on which memory + range to used for shared memory between Linux and OP-TEE. + +The GlobalPlatform TEE Client API [5] is implemented on top of the generic +TEE API. + +Picture of the relationship between the different components in the +OP-TEE architecture:: + + User space Kernel Secure world + ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~ + +--------+ +-------------+ + | Client | | Trusted | + +--------+ | Application | + /\ +-------------+ + || +----------+ /\ + || |tee- | || + || |supplicant| \/ + || +----------+ +-------------+ + \/ /\ | TEE Internal| + +-------+ || | API | + + TEE | || +--------+--------+ +-------------+ + | Client| || | TEE | OP-TEE | | OP-TEE | + | API | \/ | subsys | driver | | Trusted OS | + +-------+----------------+----+-------+----+-----------+-------------+ + | Generic TEE API | | OP-TEE MSG | + | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) | + +-----------------------------+ +------------------------------+ + +RPC (Remote Procedure Call) are requests from secure world to kernel driver +or tee-supplicant. An RPC is identified by a special range of SMCCC return +values from OPTEE_SMC_CALL_WITH_ARG. RPC messages which are intended for the +kernel are handled by the kernel driver. Other RPC messages will be forwarded to +tee-supplicant without further involvement of the driver, except switching +shared memory buffer representation. + +References +========== + +[1] https://github.com/OP-TEE/optee_os + +[2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html + +[3] drivers/tee/optee/optee_smc.h + +[4] drivers/tee/optee/optee_msg.h + +[5] http://www.globalplatform.org/specificationsdevice.asp look for + "TEE Client API Specification v1.0" and click download. diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt new file mode 100644 index 0000000000000000000000000000000000000000..5cd58439ad2d6f7bf8a3d781f89ccd9bbb8e1597 --- /dev/null +++ b/Documentation/x86/pti.txt @@ -0,0 +1,186 @@ +Overview +======== + +Page Table Isolation (pti, previously known as KAISER[1]) is a +countermeasure against attacks on the shared user/kernel address +space such as the "Meltdown" approach[2]. + +To mitigate this class of attacks, we create an independent set of +page tables for use only when running userspace applications. When +the kernel is entered via syscalls, interrupts or exceptions, the +page tables are switched to the full "kernel" copy. When the system +switches back to user mode, the user copy is used again. + +The userspace page tables contain only a minimal amount of kernel +data: only what is needed to enter/exit the kernel such as the +entry/exit functions themselves and the interrupt descriptor table +(IDT). There are a few strictly unnecessary things that get mapped +such as the first C function when entering an interrupt (see +comments in pti.c). + +This approach helps to ensure that side-channel attacks leveraging +the paging structures do not function when PTI is enabled. It can be +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. +Once enabled at compile-time, it can be disabled at boot with the +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). + +Page Table Management +===================== + +When PTI is enabled, the kernel manages two sets of page tables. +The first set is very similar to the single set which is present in +kernels without PTI. This includes a complete mapping of userspace +that the kernel can use for things like copy_to_user(). + +Although _complete_, the user portion of the kernel page tables is +crippled by setting the NX bit in the top level. This ensures +that any missed kernel->user CR3 switch will immediately crash +userspace upon executing its first instruction. + +The userspace page tables map only the kernel data needed to enter +and exit the kernel. This data is entirely contained in the 'struct +cpu_entry_area' structure which is placed in the fixmap which gives +each CPU's copy of the area a compile-time-fixed virtual address. + +For new userspace mappings, the kernel makes the entries in its +page tables like normal. The only difference is when the kernel +makes entries in the top (PGD) level. In addition to setting the +entry in the main kernel PGD, a copy of the entry is made in the +userspace page tables' PGD. + +This sharing at the PGD level also inherently shares all the lower +layers of the page tables. This leaves a single, shared set of +userspace page tables to manage. One PTE to lock, one set of +accessed bits, dirty bits, etc... + +Overhead +======== + +Protection against side-channel attacks is important. But, +this protection comes at a cost: + +1. Increased Memory Use + a. Each process now needs an order-1 PGD instead of order-0. + (Consumes an additional 4k per process). + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB + aligned so that it can be mapped by setting a single PMD + entry. This consumes nearly 2MB of RAM once the kernel + is decompressed, but no space in the kernel image itself. + +2. Runtime Cost + a. CR3 manipulation to switch between the page table copies + must be done at interrupt, syscall, and exception entry + and exit (it can be skipped when the kernel is interrupted, + though.) Moves to CR3 are on the order of a hundred + cycles, and are required at every entry and exit. + b. A "trampoline" must be used for SYSCALL entry. This + trampoline depends on a smaller set of resources than the + non-PTI SYSCALL entry code, so requires mapping fewer + things into the userspace page tables. The downside is + that stacks must be switched at entry time. + c. Global pages are disabled for all kernel structures not + mapped into both kernel and userspace page tables. This + feature of the MMU allows different processes to share TLB + entries mapping the kernel. Losing the feature means more + TLB misses after a context switch. The actual loss of + performance is very small, however, never exceeding 1%. + d. Process Context IDentifiers (PCID) is a CPU feature that + allows us to skip flushing the entire TLB when switching page + tables by setting a special bit in CR3 when the page tables + are changed. This makes switching the page tables (at context + switch, or kernel entry/exit) cheaper. But, on systems with + PCID support, the context switch code must flush both the user + and kernel entries out of the TLB. The user PCID TLB flush is + deferred until the exit to userspace, minimizing the cost. + See intel.com/sdm for the gory PCID/INVPCID details. + e. The userspace page tables must be populated for each new + process. Even without PTI, the shared kernel mappings + are created by copying top-level (PGD) entries into each + new process. But, with PTI, there are now *two* kernel + mappings: one in the kernel page tables that maps everything + and one for the entry/exit structures. At fork(), we need to + copy both. + f. In addition to the fork()-time copying, there must also + be an update to the userspace PGD any time a set_pgd() is done + on a PGD used to map userspace. This ensures that the kernel + and userspace copies always map the same userspace + memory. + g. On systems without PCID support, each CR3 write flushes + the entire TLB. That means that each syscall, interrupt + or exception flushes the TLB. + h. INVPCID is a TLB-flushing instruction which allows flushing + of TLB entries for non-current PCIDs. Some systems support + PCIDs, but do not support INVPCID. On these systems, addresses + can only be flushed from the TLB for the current PCID. When + flushing a kernel address, we need to flush all PCIDs, so a + single kernel address flush will require a TLB-flushing CR3 + write upon the next use of every PCID. + +Possible Future Work +==================== +1. We can be more careful about not actually writing to CR3 + unless its value is actually changed. +2. Allow PTI to be enabled/disabled at runtime in addition to the + boot-time switching. + +Testing +======== + +To test stability of PTI, the following test procedure is recommended, +ideally doing all of these in parallel: + +1. Set CONFIG_DEBUG_ENTRY=y +2. Run several copies of all of the tools/testing/selftests/x86/ tests + (excluding MPX and protection_keys) in a loop on multiple CPUs for + several minutes. These tests frequently uncover corner cases in the + kernel entry code. In general, old kernels might cause these tests + themselves to crash, but they should never crash the kernel. +3. Run the 'perf' tool in a mode (top or record) that generates many + frequent performance monitoring non-maskable interrupts (see "NMI" + in /proc/interrupts). This exercises the NMI entry/exit code which + is known to trigger bugs in code paths that did not expect to be + interrupted, including nested NMIs. Using "-c" boosts the rate of + NMIs, and using two -c with separate counters encourages nested NMIs + and less deterministic behavior. + + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done + +4. Launch a KVM virtual machine. +5. Run 32-bit binaries on systems supporting the SYSCALL instruction. + This has been a lightly-tested code path and needs extra scrutiny. + +Debugging +========= + +Bugs in PTI cause a few different signatures of crashes +that are worth noting here. + + * Failures of the selftests/x86 code. Usually a bug in one of the + more obscure corners of entry_64.S + * Crashes in early boot, especially around CPU bringup. Bugs + in the trampoline code or mappings cause these. + * Crashes at the first interrupt. Caused by bugs in entry_64.S, + like screwing up a page table switch. Also caused by + incorrectly mapping the IRQ handler entry code. + * Crashes at the first NMI. The NMI code is separate from main + interrupt handlers and can have bugs that do not affect + normal interrupts. Also caused by incorrectly mapping NMI + code. NMIs that interrupt the entry code must be very + careful and can be the cause of crashes that show up when + running perf. + * Kernel crashes at the first exit to userspace. entry_64.S + bugs, or failing to map some of the exit code. + * Crashes at first interrupt that interrupts userspace. The paths + in entry_64.S that return to userspace are sometimes separate + from the ones that return to the kernel. + * Double faults: overflowing the kernel stack because of page + faults upon page faults. Caused by touching non-pti-mapped + data in the entry code, or forgetting to switch to kernel + CR3 before calling into C functions which are not pti-mapped. + * Userspace segfaults early in boot, sometimes manifesting + as mount(8) failing to mount the rootfs. These have + tended to be TLB invalidation issues. Usually invalidating + the wrong PCID, or otherwise missing an invalidation. + +1. https://gruss.cc/files/kaiser.pdf +2. https://meltdownattack.com/meltdown.pdf diff --git a/MAINTAINERS b/MAINTAINERS index 63cefa62324cd1e0cc788efbeffcff9b4ab875f3..a419303662bfa68475a902e1b1487b48c7363616 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9010,6 +9010,11 @@ F: arch/*/oprofile/ F: drivers/oprofile/ F: include/linux/oprofile.h +OP-TEE DRIVER +M: Jens Wiklander +S: Maintained +F: drivers/tee/optee/ + ORACLE CLUSTER FILESYSTEM 2 (OCFS2) M: Mark Fasheh M: Joel Becker @@ -10655,6 +10660,14 @@ F: drivers/hwtracing/stm/ F: include/linux/stm.h F: include/uapi/linux/stm.h +TEE SUBSYSTEM +M: Jens Wiklander +S: Maintained +F: include/linux/tee_drv.h +F: include/uapi/linux/tee.h +F: drivers/tee/ +F: Documentation/tee.txt + THUNDERBOLT DRIVER M: Andreas Noever S: Maintained diff --git a/Makefile b/Makefile index 7c9313cf92db8af9827355bf691a7579bad16f48..aeb2708d8fbc539826ec6795901445c0b2d5ea9c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 66 +SUBLEVEL = 82 EXTRAVERSION = NAME = Roaring Lionus @@ -370,9 +370,6 @@ LDFLAGS_MODULE = CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = -CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) -CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) - # Use USERINCLUDE when you must reference the UAPI directories only. USERINCLUDE := \ @@ -393,21 +390,19 @@ LINUXINCLUDE := \ LINUXINCLUDE += $(filter-out $(LINUXINCLUDE),$(USERINCLUDE)) -KBUILD_CPPFLAGS := -D__KERNEL__ - +KBUILD_AFLAGS := -D__ASSEMBLY__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 $(call cc-option,-fno-PIE) - - + -std=gnu89 +KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := -KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE) KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds +GCC_PLUGINS_CFLAGS := # Read KERNELRELEASE from include/config/kernel.release (if it exists) KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) @@ -420,7 +415,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -620,6 +615,12 @@ endif # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS += $(call cc-option,-fno-PIE) +CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) +CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) +export CFLAGS_GCOV CFLAGS_KCOV + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -801,6 +802,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# Make sure -fstack-check isn't enabled (like gentoo apparently did) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) + # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 2b0ac429f5ebc4912e34b41029810fa914124479..412bb3c24f366f7e01e35a36c28f7fd0df4c3ec6 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -143,7 +143,8 @@ struct pci_iommu_arena }; #if defined(CONFIG_ALPHA_SRM) && \ - (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA)) + (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \ + defined(CONFIG_ALPHA_AVANTI)) # define NEED_SRM_SAVE_RESTORE #else # undef NEED_SRM_SAVE_RESTORE diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index b483156698d5043358ecc9a681fc77f197282d25..60c17b9bf04db7bd2ec101c51eca5b2e6a0796be 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp, application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork; diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 74aceead06e98a391a1f0fc49f5486ef2562844c..32ba92cdf5f69492cbeeac0d7b5f3386df4f1918 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -158,11 +158,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) for(i=0; i < kstack_depth_to_print; i++) { if (((long) stack & (THREAD_SIZE-1)) == 0) break; - if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + if ((i % 4) == 0) { + if (i) + pr_cont("\n"); + printk(" "); + } else { + pr_cont(" "); + } + pr_cont("%016lx", *stack++); } - printk("\n"); + pr_cont("\n"); dik_show_trace(sp); } diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 41faf17cd28d2ced7461f4e6860d2456fb49f8ae..0684fd2f42e8754df81eb5953084f8d45c5d65a1 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -673,6 +673,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) return 0; __asm__ __volatile__( + " mov lp_count, %5 \n" " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" " breq.d %3, 0, 3f \n" @@ -689,8 +690,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .word 1b, 4b \n" " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) - : "g"(-EFAULT), "l"(count) - : "memory"); + : "g"(-EFAULT), "r"(count) + : "lp_count", "lp_start", "lp_end", "memory"); return res; } diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 975c36e332a2583e778d3dc41390ed67577693c8..8e6b3938bef9f6c135f63003f34ed9f26dbb2e5b 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -668,6 +668,7 @@ ti,non-removable; bus-width = <4>; cap-power-off-card; + keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 7c9e0fae9bb9b13b2972bd3fa5538c8232a7c948..65e0db1d3bd7bb76566af0eff1db8a71f4a2f6cb 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -85,7 +85,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; @@ -93,7 +93,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 064d84f87e45f76828dc571a6c4119fbe4cd6cd1..ce54a70b7695d16aa6bd0e8f12c83c81cea98e79 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -282,6 +282,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; + bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; linux,pci-domain = <0>; @@ -318,6 +319,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; + bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; linux,pci-domain = <1>; diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts index cf2f5240e176d0b1d60f578db6028f143bfea97c..27cc913ca0f569e840762fc600034247819a1937 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts @@ -53,7 +53,8 @@ }; pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>; + pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header + &pmx_gpio_header_gpo>; pinctrl-names = "default"; pmx_uart0: pmx-uart0 { @@ -85,11 +86,16 @@ * ground. */ pmx_gpio_header: pmx-gpio-header { - marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28", + marvell,pins = "mpp17", "mpp29", "mpp28", "mpp35", "mpp34", "mpp40"; marvell,function = "gpio"; }; + pmx_gpio_header_gpo: pxm-gpio-header-gpo { + marvell,pins = "mpp7"; + marvell,function = "gpo"; + }; + pmx_gpio_init: pmx-init { marvell,pins = "mpp38"; marvell,function = "gpio"; diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 08cce17a25a01f460fd199f5f380bb3c907e20ed..b4575bbaf0852a4cfff98f5bae93f8eb93c4d6ef 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -192,7 +192,7 @@ interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins &mmc1_cd>; - cd-gpios = <&gpio4 31 IRQ_TYPE_LEVEL_LOW>; /* gpio127 */ + cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio127 */ vmmc-supply = <&vmmc1>; bus-width = <4>; cap-power-off-card; @@ -249,9 +249,9 @@ OMAP3_CORE1_IOPAD(0x2110, PIN_INPUT | MUX_MODE0) /* cam_xclka.cam_xclka */ OMAP3_CORE1_IOPAD(0x2112, PIN_INPUT | MUX_MODE0) /* cam_pclk.cam_pclk */ - OMAP3_CORE1_IOPAD(0x2114, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */ - OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */ - OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */ + OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */ + OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */ + OMAP3_CORE1_IOPAD(0x211a, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */ OMAP3_CORE1_IOPAD(0x211c, PIN_INPUT | MUX_MODE0) /* cam_d3.cam_d3 */ OMAP3_CORE1_IOPAD(0x211e, PIN_INPUT | MUX_MODE0) /* cam_d4.cam_d4 */ OMAP3_CORE1_IOPAD(0x2120, PIN_INPUT | MUX_MODE0) /* cam_d5.cam_d5 */ diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 714da336ec86ffc94bc9c348cedd4e1ba1d536d3..5d49b60841e308ce85fd6143e51ad703c41b61a2 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -11,6 +11,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_FREQ=y @@ -35,6 +36,7 @@ CONFIG_CAN_SUN4I=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_AHCI_SUNXI=y diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 68b06f9c65ded6c810421a79b345e16f77e3d527..12f99fd2e3b2edff01a72194ec28987f219c99e8 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -516,4 +516,22 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro bug, msg, line +#ifdef CONFIG_THUMB2_KERNEL +1: .inst 0xde02 +#else +1: .inst 0xe7f001f2 +#endif +#ifdef CONFIG_DEBUG_BUGVERBOSE + .pushsection .rodata.str, "aMS", %progbits, 1 +2: .asciz "\msg" + .popsection + .pushsection __bug_table, "aw" + .align 2 + .word 1b, 2b + .hword \line + .popsection +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index e22089fb44dc86b7ed2fdb175bc6ec7b47ee4001..98d6de177b7a31b71994e75fc925fca609c439a8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,8 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) @@ -209,6 +208,7 @@ #define HSR_EC_IABT_HYP (0x21) #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_EC_MAX (0x3f) #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index f555bb3664dcaa2dbee6ab4e1f486d24e360ba8f..683d9230984a51c33a89dac39ae8e7904c7017f9 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -18,7 +18,6 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { extern char __irqentry_text_start[]; @@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr) return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif static inline int in_exception_text(unsigned long ptr) { diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 1f59ea051bab814132074b09f55d3a57c800a471..b7e0125c0bbf2014a447800a383426b62d5147b8 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -478,11 +478,10 @@ extern unsigned long __must_check arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) +__arch_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned int __ua_flags; - check_object_size(to, n, false); __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); @@ -495,18 +494,15 @@ extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) +__arch_copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY unsigned int __ua_flags; - - check_object_size(from, n, true); __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else - check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } @@ -526,25 +522,49 @@ __clear_user(void __user *addr, unsigned long n) } #else -#define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) -#define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) +#define __arch_copy_from_user(to, from, n) \ + (memcpy(to, (void __force *)from, n), 0) +#define __arch_copy_to_user(to, from, n) \ + (memcpy((void __force *)to, from, n), 0) #define __clear_user(addr, n) (memset((void __force *)addr, 0, n), 0) #endif -static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; + + check_object_size(to, n, false); + if (likely(access_ok(VERIFY_READ, from, n))) - res = __copy_from_user(to, from, n); + res = __arch_copy_from_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; } -static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); + + return __arch_copy_to_user(to, from, n); +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 6391728c8f0377019404c6a6744df1e24697b3a8..e056c9a9aa9db178ba47ec421f2e8d1f53d3cab4 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -299,6 +299,8 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc + tst r1, #PSR_I_BIT | 0x0f + bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -313,6 +315,7 @@ @ after ldm {}^ add sp, sp, #\offset + PT_REGS_SIZE movs pc, lr @ return & move spsr_svc into cpsr +1: bug "Returning to usermode but unexpected PSR bits set?", \@ #elif defined(CONFIG_CPU_V7M) @ V7M restore. @ Note that we don't need to do clrex here as clearing the local @@ -328,6 +331,8 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP + tst r1, #PSR_I_BIT | 0x0f + bne 1f msr spsr_cxsf, r1 @ save in spsr_svc @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -340,6 +345,7 @@ .endif add sp, sp, #PT_REGS_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr +1: bug "Returning to usermode but unexpected PSR bits set?", \@ #endif /* !CONFIG_THUMB2_KERNEL */ .endm diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 19b5f5c1c0ff3ef8fa68300f5ec87fe5564c916e..c38bfbeec306e5019b0498733c726bffaf1763a7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1165,6 +1165,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, cpu_hyp_reset(); return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: if (__this_cpu_read(kvm_arm_hardware_enabled)) /* The hardware was enabled before suspend. */ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 066b6d4508ceb17debb6d31798129b1c7713969c..4e57ebca6e6919eb496178f8e39bfbaa954b1526 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,7 +38,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } @@ -79,7 +88,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", + hsr); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, @@ -98,13 +119,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index b6e715fd3c90af8c74408b72652f9974a3fb894d..dac7ceb1a677746cadb086a2cf8a07d8e560373c 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - data); + &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), len); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); kvm_mmio_write_buf(data_buf, len, data); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } else { trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, - fault_ipa, 0); + fault_ipa, NULL); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2206e0e00934cb2c4c5819f0c2d35f0b6944b5da..2a35c1963f6d8d22571a973c11a94aba1e8ccb92 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma) && !logging_active) { + if (vma_kernel_pagesize(vma) && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c index f6ba589cd312ecb257829d81f7f04c967b60e216..c821c1d5610ef25c56e1c0873452a019a602380d 100644 --- a/arch/arm/mach-omap1/dma.c +++ b/arch/arm/mach-omap1/dma.c @@ -32,7 +32,6 @@ #include "soc.h" #define OMAP1_DMA_BASE (0xfffed800) -#define OMAP1_LOGICAL_DMA_CH_COUNT 17 static u32 enable_1510_mode; @@ -348,8 +347,6 @@ static int __init omap1_system_dma_init(void) goto exit_iounmap; } - d->lch_count = OMAP1_LOGICAL_DMA_CH_COUNT; - /* Valid attributes for omap1 plus processors */ if (cpu_is_omap15xx()) d->dev_caps = ENABLE_1510_MODE; @@ -366,13 +363,14 @@ static int __init omap1_system_dma_init(void) d->dev_caps |= CLEAR_CSR_ON_READ; d->dev_caps |= IS_WORD_16; - if (cpu_is_omap15xx()) - d->chan_count = 9; - else if (cpu_is_omap16xx() || cpu_is_omap7xx()) { - if (!(d->dev_caps & ENABLE_1510_MODE)) - d->chan_count = 16; + /* available logical channels */ + if (cpu_is_omap15xx()) { + d->lch_count = 9; + } else { + if (d->dev_caps & ENABLE_1510_MODE) + d->lch_count = 9; else - d->chan_count = 9; + d->lch_count = 16; } p = dma_plat_info; diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 8633c703546a65c2e5b0071ffca0d5a12b664884..2944af82055847935462da4035a73b513c5795a6 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr) return ret; } -void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) +int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) { int err; struct device *dev = &gpmc_onenand_device.dev; @@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) if (err < 0) { dev_err(dev, "Cannot request GPMC CS %d, error %d\n", gpmc_onenand_data->cs, err); - return; + return err; } gpmc_onenand_resource.end = gpmc_onenand_resource.start + ONENAND_IO_SIZE - 1; - if (platform_device_register(&gpmc_onenand_device) < 0) { + err = platform_device_register(&gpmc_onenand_device); + if (err) { dev_err(dev, "Unable to register OneNAND device\n"); gpmc_cs_free(gpmc_onenand_data->cs); - return; } + + return err; } diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 1cc4a6f3954e1936c30005a9b61bcda65bbccd69..bca54154e14f7c57f931b9068646853c7d1aa45e 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3828,16 +3828,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = { * Return: 0 if device named @dev_name is not likely to be accessible, * or 1 if it is likely to be accessible. */ -static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, - const char *dev_name) +static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, + const char *dev_name) { + struct device_node *node; + bool available; + if (!bus) - return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0; + return omap_type() == OMAP2_DEVICE_TYPE_GP; - if (of_device_is_available(of_find_node_by_name(bus, dev_name))) - return 1; + node = of_get_child_by_name(bus, dev_name); + available = of_device_is_available(node); + of_node_put(node); - return 0; + return available; } int __init omap3xxx_hwmod_init(void) @@ -3906,15 +3910,20 @@ int __init omap3xxx_hwmod_init(void) if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) { r = omap_hwmod_register_links(h_sham); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) { r = omap_hwmod_register_links(h_aes); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } + of_node_put(bus); /* * Register hwmod links specific to certain ES levels of a diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 770216baa73783d806399c13b4eb64ae1a8cfd8e..88676fe9b11941f29b5415063851b6457b501b89 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -147,7 +147,7 @@ static struct ti_st_plat_data wilink_pdata = { .nshutdown_gpio = 137, .dev_name = "/dev/ttyO1", .flow_cntrl = 1, - .baud_rate = 300000, + .baud_rate = 3000000, }; static struct platform_device wl18xx_device = { @@ -162,7 +162,7 @@ static struct ti_st_plat_data wilink7_pdata = { .nshutdown_gpio = 162, .dev_name = "/dev/ttyO1", .flow_cntrl = 1, - .baud_rate = 300000, + .baud_rate = 3000000, }; static struct platform_device wl128x_device = { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab7710002ba60e99287beb41e689e3ae4d148d6e..00e9e79b6cb89836f7bb7041847d0e24bf8300a7 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -930,13 +930,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); } +/* + * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems + * that the intention is to allow exporting memory allocated via the + * coherent DMA APIs through the dma_buf API, which only accepts a + * scattertable. This presents a couple of problems: + * 1. Not all memory allocated via the coherent DMA APIs is backed by + * a struct page + * 2. Passing coherent DMA memory into the streaming APIs is not allowed + * as we will try to flush the memory through a different alias to that + * actually being used (and the flushes are redundant.) + */ int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) { - struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + unsigned long pfn = dma_to_pfn(dev, handle); + struct page *page; int ret; + /* If the PFN is not valid, we do not have a struct page */ + if (!pfn_valid(pfn)) + return -ENXIO; + + page = pfn_to_page(pfn); + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); if (unlikely(ret)) return ret; diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index a4ec240ee7ba38647e7a473b5eb88ef64029e39a..3eb018fa1a1f5c343817b79d3de2244f53b4f7ec 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -433,6 +433,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -455,14 +456,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __this_cpu_write(current_kprobe, NULL); } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -474,7 +495,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 9775de22e2ffa3359ff228116adc5a0874613bc8..a48354de1aa1e5f60beea896d0fa6d33ddef7c17 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -976,7 +976,10 @@ static void coverage_end(void) void __naked __kprobes_test_case_start(void) { __asm__ __volatile__ ( - "stmdb sp!, {r4-r11} \n\t" + "mov r2, sp \n\t" + "bic r3, r2, #7 \n\t" + "mov sp, r3 \n\t" + "stmdb sp!, {r2-r11} \n\t" "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" @@ -996,7 +999,8 @@ void __naked __kprobes_test_case_end_32(void) "movne pc, r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "mov pc, r0 \n\t" ); } @@ -1012,7 +1016,8 @@ void __naked __kprobes_test_case_end_16(void) "bxne r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "bx r0 \n\t" ); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fbf07a6bd8a4baebf5a058cd990299ddc03dca4e..5702e7d0f5e06e144df3fbdd47991a281d2ac8a3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -733,6 +733,18 @@ config FORCE_MAX_ZONEORDER However for 4K, we choose a higher default value, 11 as opposed to 10, giving us 4M allocations matching the default size used by generic code. +config UNMAP_KERNEL_AT_EL0 + bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + be used to bypass MMU permission checks and leak kernel data to + userspace. This can be defended against by unmapping the kernel + when running in userspace, mapping it back in on exception entry + via a trampoline page in the vector table. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f736cda53f6ff10a48dbb163fa2f8a50cf59ade4..c76311c2f5d68550f82c81401ecdb40aded883dd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -14,8 +14,12 @@ LDFLAGS_vmlinux :=-p --no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 -ifneq ($(CONFIG_RELOCATABLE),) -LDFLAGS_vmlinux += -pie -shared -Bsymbolic +ifeq ($(CONFIG_RELOCATABLE), y) +# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour +# for relative relocs, since this leads to better Image compression +# with the relocation offsets always being zero. +LDFLAGS_vmlinux += -pie -shared -Bsymbolic \ + $(call ld-option, --no-apply-dynamic-relocs) endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7c4218ea6ff519aef8e04ef22f9acfa2bf18569d..e008b76cff0bb857c932ce577f44b9c18628dc67 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -429,17 +429,4 @@ alternative_endif mrs \rd, sp_el0 .endm -/* - * Errata workaround post TTBR0_EL1 update. - */ - .macro post_ttbr0_update_workaround -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -alternative_if ARM64_WORKAROUND_CAVIUM_27456 - ic iallu - dsb nsh - isb -alternative_else_nop_endif -#endif - .endm - #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 87b44653518551d39797a67c1cd367e44ea27b55..4a4a98a4b1bd452f8371d261e1e4eeacb6f7972e 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -35,6 +35,8 @@ #define ARM64_HYP_OFFSET_LOW 14 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 -#define ARM64_NCAPS 16 +#define ARM64_UNMAP_KERNEL_AT_EL0 23 + +#define ARM64_NCAPS 24 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 7278ce729cd6c30e84e84e5b8e9dc29ee69bddef..6c4f9e8febf3235e0063bfffe40e620ce768f409 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -85,12 +85,14 @@ static inline void efi_set_pgd(struct mm_struct *mm) if (mm != current->active_mm) { /* * Update the current thread's saved ttbr0 since it is - * restored as part of a return from exception. Set - * the hardware TTBR0_EL1 using cpu_switch_mm() - * directly to enable potential errata workarounds. + * restored as part of a return from exception. Enable + * access to the valid TTBR0_EL1 and invoke the errata + * workaround directly since there is no return from + * exception when invoking the EFI run-time services. */ update_saved_ttbr0(current, mm); - cpu_switch_mm(mm->pgd, mm); + uaccess_ttbr0_enable(); + post_ttbr_update_workaround(); } else { /* * Defer the switch to the current thread's TTBR0_EL1 @@ -98,7 +100,7 @@ static inline void efi_set_pgd(struct mm_struct *mm) * thread's saved ttbr0 corresponding to its active_mm * (if different from init_mm). */ - cpu_set_reserved_ttbr0(); + uaccess_ttbr0_disable(); if (current->active_mm != &init_mm) update_saved_ttbr0(current, current->active_mm); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d14c478976d0abeb435163da5c16599a0f2c3b53..85997c0e544312a3d4e60157c4ba30d4d93239bd 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -175,6 +175,12 @@ #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index caf86be815ba2cfa26d6cbcab8ee1cd7f6bdf158..d8e58051f32d47ab5e93ec389672c1fbc4bf71c8 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -51,6 +51,12 @@ enum fixed_addresses { FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_DATA, + FIX_ENTRY_TRAMP_TEXT, +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, /* diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 7803343e5881fbd7b2f635b25082d3e91d2583f8..77a27af013710de1d4bed56655ee587c5e6ef349 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -78,8 +78,16 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG) +#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG) +#else +#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS +#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS +#endif #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 2a2752b5b6aa7a424c956d4dd0ec2642644762de..0dbc1c6ab7dc3ef1f1a761a70e75c340133b6ab2 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -170,8 +170,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f43680fb229291a441c16a8546ba97d526286663..30cd69729aab6ccc405d2d5522db958d6eb46cf5 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -165,6 +165,11 @@ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; +static inline unsigned long kaslr_offset(void) +{ + return kimage_vaddr - KIMAGE_VADDR; +} + /* * Allow all memory at the discovery stage. We will clip it later. */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 8d9fce037b2fccd53e4a4340b1d9b4b3c99dff90..d7750fca1259978357669b31a26975945e0a6131 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -16,6 +16,12 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H + +#define USER_ASID_FLAG (UL(1) << 48) +#define TTBR_ASID_MASK (UL(0xffff) << 48) + +#ifndef __ASSEMBLY__ + typedef struct { atomic64_t id; void *vdso; @@ -28,6 +34,12 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) +static inline bool arm64_kernel_unmapped_at_el0(void) +{ + return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && + cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0); +} + extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); @@ -37,4 +49,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot, bool allow_block_mappings); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +#endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 63e9982daca19447b90c61655870b6c368ad2819..6ad61e7d9725f802bc0621bdb28a9b52e883b5f0 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -51,6 +51,13 @@ static inline void cpu_set_reserved_ttbr0(void) isb(); } +static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) +{ + BUG_ON(pgd == swapper_pg_dir); + cpu_set_reserved_ttbr0(); + cpu_do_switch_mm(virt_to_phys(pgd),mm); +} + /* * TCR.T0SZ value to use when the ID map is active. Usually equals * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in @@ -169,9 +176,10 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) { if (system_uses_ttbr0_pan()) { + u64 ttbr; BUG_ON(mm->pgd == swapper_pg_dir); - task_thread_info(tsk)->ttbr0 = - virt_to_phys(mm->pgd) | ASID(mm) << 48; + ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr); } } #else @@ -219,5 +227,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev,next) switch_mm(prev, next, current) void verify_cpu_asid_bits(void); +void post_ttbr_update_workaround(void); #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index eb0c2bd90de903469790a66c6d7f528af64c9d76..8df4cb6ac6f71e963031268e3c30246f04bf0914 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -272,6 +272,7 @@ #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) +#define TCR_A1 (UL(1) << 22) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2142c7726e76d048dd61f4322977832a69da4733..84b5283d2e7feb0392af2a4f168396616e31623b 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -34,8 +34,16 @@ #include -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG) +#else +#define PROT_DEFAULT _PROT_DEFAULT +#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -48,6 +56,7 @@ #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) @@ -55,15 +64,15 @@ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) -#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) -#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) -#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) +#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) +#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) +#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1af9f3c5cdf6de9768b97300e48e6355f6557fd9..7bcd8b96a4b387bd444173d0d3824454ff91f4e2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -692,6 +692,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 14ad6e4e87d11477ca350b7dbf6ff7e6017a7cc4..16cef2e8449ec1c178bcf863d32168680122e779 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include -#define cpu_switch_mm(pgd,mm) \ -do { \ - BUG_ON(pgd == swapper_pg_dir); \ - cpu_do_switch_mm(virt_to_phys(pgd),mm); \ -} while (0) - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index deab523741197684dcaac73ae57a8add2d4217b6..ad6bd8b26ada0e42956a7c6496da536f804fc694 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -23,6 +23,7 @@ #include #include +#include /* * Raw TLBI operations. @@ -42,6 +43,11 @@ #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) +#define __tlbi_user(op, arg) do { \ + if (arm64_kernel_unmapped_at_el0()) \ + __tlbi(op, (arg) | USER_ASID_FLAG); \ +} while (0) + /* * TLB Management * ============== @@ -103,6 +109,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) dsb(ishst); __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); dsb(ish); } @@ -113,6 +120,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ishst); __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); dsb(ish); } @@ -139,10 +147,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { - if (last_level) + if (last_level) { __tlbi(vale1is, addr); - else + __tlbi_user(vale1is, addr); + } else { __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); + } } dsb(ish); } @@ -182,6 +193,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 02e9035b0685823faca22e5b6943ba39b7d27ce2..47a9066f7c86f970c5c006d624af590af744d126 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook); void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif static inline int in_exception_text(unsigned long ptr) { diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 31b6940ec2f27096f50cc34d0fbb93f7f3a21d32..95cbadeee3d58ec003a41e407cf609e9dbbbda53 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -20,6 +20,7 @@ #include #include +#include #include #ifndef __ASSEMBLY__ @@ -130,17 +131,23 @@ static inline void set_fs(mm_segment_t fs) #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void __uaccess_ttbr0_disable(void) { - unsigned long ttbr; + unsigned long flags, ttbr; + local_irq_save(flags); + ttbr = read_sysreg(ttbr1_el1); + ttbr &= ~TTBR_ASID_MASK; /* reserved_ttbr0 placed at the end of swapper_pg_dir */ - ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; - write_sysreg(ttbr, ttbr0_el1); + write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); + isb(); + /* Set reserved ASID */ + write_sysreg(ttbr, ttbr1_el1); isb(); + local_irq_restore(flags); } static inline void __uaccess_ttbr0_enable(void) { - unsigned long flags; + unsigned long flags, ttbr0, ttbr1; /* * Disable interrupts to avoid preemption between reading the 'ttbr0' @@ -148,7 +155,17 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + ttbr0 = READ_ONCE(current_thread_info()->ttbr0); + + /* Restore active ASID */ + ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ + ttbr1 |= ttbr0 & TTBR_ASID_MASK; + write_sysreg(ttbr1, ttbr1_el1); + isb(); + + /* Restore user page table */ + write_sysreg(ttbr0, ttbr0_el1); isb(); local_irq_restore(flags); } @@ -435,51 +452,62 @@ extern __must_check long strnlen_user(const char __user *str, long n); #ifdef CONFIG_ARM64_SW_TTBR0_PAN .macro __uaccess_ttbr0_disable, tmp1 mrs \tmp1, ttbr1_el1 // swapper_pg_dir + bic \tmp1, \tmp1, #TTBR_ASID_MASK add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb + sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE + msr ttbr1_el1, \tmp1 // set reserved ASID + isb .endm - .macro __uaccess_ttbr0_enable, tmp1 + .macro __uaccess_ttbr0_enable, tmp1, tmp2 get_thread_info \tmp1 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + mrs \tmp2, ttbr1_el1 + extr \tmp2, \tmp2, \tmp1, #48 + ror \tmp2, \tmp2, #16 + msr ttbr1_el1, \tmp2 // set the active ASID + isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption __uaccess_ttbr0_disable \tmp1 + restore_irq \tmp2 alternative_else_nop_endif .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 + save_and_disable_irq \tmp3 // avoid preemption + __uaccess_ttbr0_enable \tmp1, \tmp2 + restore_irq \tmp3 alternative_else_nop_endif .endm #else - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 .endm #endif /* * These macros are no-ops when UAO is present. */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 + .macro uaccess_disable_not_uao, tmp1, tmp2 + uaccess_ttbr0_disable \tmp1, \tmp2 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(1) alternative_else_nop_endif .endm - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 + .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3 + uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(0) alternative_else_nop_endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 433f03ec8e9b153bc7407aafb409d62eb62c494a..b6c9389c160a4606bf07fff5607c0feeba6ba9ed 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -147,11 +148,14 @@ int main(void) DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); - BLANK(); DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); + BLANK(); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + DEFINE(TRAMP_VALIAS, TRAMP_VALIAS); +#endif return 0; } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0127e1bb5a7c40bf3b0dccc1734e6077a9533099..bfbd9e90fa042d366f420008c30980743497fe28 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -748,6 +748,40 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ + +static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + /* Forced on command line? */ + if (__kpti_forced) { + pr_info_once("kernel page table isolation forced %s by command line option\n", + __kpti_forced > 0 ? "ON" : "OFF"); + return __kpti_forced > 0; + } + + /* Useful for KASLR robustness */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return true; + + return false; +} + +static int __init parse_kpti(char *str) +{ + bool enabled; + int ret = strtobool(str, &enabled); + + if (ret) + return ret; + + __kpti_forced = enabled ? 1 : -1; + return 0; +} +__setup("kpti=", parse_kpti); +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -831,6 +865,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .def_scope = SCOPE_SYSTEM, .matches = hyp_offset_low, }, +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + { + .capability = ARM64_UNMAP_KERNEL_AT_EL0, + .def_scope = SCOPE_SYSTEM, + .matches = unmap_kernel_at_el0, + }, +#endif {}, }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c44a93387908661ef46a14ce9b627fd26bcc029d..32de48466ed00d3ce1c2adfdbeca8cc17951119a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -70,8 +71,31 @@ #define BAD_FIQ 2 #define BAD_ERROR 3 - .macro kernel_entry, el, regsize = 64 + .macro kernel_ventry, el, label, regsize = 64 + .align 7 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +alternative_if ARM64_UNMAP_KERNEL_AT_EL0 + .if \el == 0 + .if \regsize == 64 + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr + .else + mov x30, xzr + .endif + .endif +alternative_else_nop_endif +#endif + sub sp, sp, #S_FRAME_SIZE + b el\()\el\()_\label + .endm + + .macro tramp_alias, dst, sym + mov_q \dst, TRAMP_VALIAS + add \dst, \dst, #(\sym - .entry.tramp.text) + .endm + + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif @@ -128,7 +152,7 @@ alternative_else_nop_endif .if \el != 0 mrs x21, ttbr0_el1 - tst x21, #0xffff << 48 // Check for the reserved ASID + tst x21, #TTBR_ASID_MASK // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR @@ -191,7 +215,7 @@ alternative_else_nop_endif tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set .endif - __uaccess_ttbr0_enable x0 + __uaccess_ttbr0_enable x0, x1 .if \el == 0 /* @@ -200,7 +224,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr0_update_workaround + bl post_ttbr_update_workaround .endif 1: .if \el != 0 @@ -212,18 +236,20 @@ alternative_else_nop_endif .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 + tst x22, #PSR_MODE32_BIT // native task? + b.eq 3f + #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 - tbz x22, #4, 1f #ifdef CONFIG_PID_IN_CONTEXTIDR mrs x29, contextidr_el1 msr contextidr_el1, x29 #else msr contextidr_el1, xzr #endif -1: alternative_else_nop_endif #endif +3: .endif msr elr_el1, x21 // set up the return data @@ -245,7 +271,21 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp - eret // return to kernel + + .if \el == 0 +alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + bne 4f + msr far_el1, x30 + tramp_alias x30, tramp_exit_native + br x30 +4: + tramp_alias x30, tramp_exit_compat + br x30 +#endif + .else + eret + .endif .endm .macro irq_stack_entry @@ -316,31 +356,31 @@ tsk .req x28 // current thread_info .align 11 ENTRY(vectors) - ventry el1_sync_invalid // Synchronous EL1t - ventry el1_irq_invalid // IRQ EL1t - ventry el1_fiq_invalid // FIQ EL1t - ventry el1_error_invalid // Error EL1t + kernel_ventry 1, sync_invalid // Synchronous EL1t + kernel_ventry 1, irq_invalid // IRQ EL1t + kernel_ventry 1, fiq_invalid // FIQ EL1t + kernel_ventry 1, error_invalid // Error EL1t - ventry el1_sync // Synchronous EL1h - ventry el1_irq // IRQ EL1h - ventry el1_fiq_invalid // FIQ EL1h - ventry el1_error_invalid // Error EL1h + kernel_ventry 1, sync // Synchronous EL1h + kernel_ventry 1, irq // IRQ EL1h + kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, error_invalid // Error EL1h - ventry el0_sync // Synchronous 64-bit EL0 - ventry el0_irq // IRQ 64-bit EL0 - ventry el0_fiq_invalid // FIQ 64-bit EL0 - ventry el0_error_invalid // Error 64-bit EL0 + kernel_ventry 0, sync // Synchronous 64-bit EL0 + kernel_ventry 0, irq // IRQ 64-bit EL0 + kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, error_invalid // Error 64-bit EL0 #ifdef CONFIG_COMPAT - ventry el0_sync_compat // Synchronous 32-bit EL0 - ventry el0_irq_compat // IRQ 32-bit EL0 - ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 - ventry el0_error_invalid_compat // Error 32-bit EL0 + kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 + kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 + kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, error_invalid_compat, 32 // Error 32-bit EL0 #else - ventry el0_sync_invalid // Synchronous 32-bit EL0 - ventry el0_irq_invalid // IRQ 32-bit EL0 - ventry el0_fiq_invalid // FIQ 32-bit EL0 - ventry el0_error_invalid // Error 32-bit EL0 + kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 + kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0 + kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0 + kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0 #endif END(vectors) @@ -860,6 +900,119 @@ __ni_sys_trace: .popsection // .entry.text +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + */ + .pushsection ".entry.tramp.text", "ax" + + .macro tramp_map_kernel, tmp + mrs \tmp, ttbr1_el1 + sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + bic \tmp, \tmp, #USER_ASID_FLAG + msr ttbr1_el1, \tmp +#ifdef CONFIG_ARCH_MSM8996 + /* ASID already in \tmp[63:48] */ + movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12) + movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12) + /* 2MB boundary containing the vectors, so we nobble the walk cache */ + movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12) + isb + tlbi vae1, \tmp + dsb nsh +#endif /* CONFIG_ARCH_MSM8996 */ + .endm + + .macro tramp_unmap_kernel, tmp + mrs \tmp, ttbr1_el1 + add \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + orr \tmp, \tmp, #USER_ASID_FLAG + msr ttbr1_el1, \tmp + /* + * We avoid running the post_ttbr_update_workaround here because the + * user and kernel ASIDs don't have conflicting mappings, so any + * "blessing" as described in: + * + * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com + * + * will not hurt correctness. Whilst this may partially defeat the + * point of using split ASIDs in the first place, it avoids + * the hit of invalidating the entire I-cache on every return to + * userspace. + */ + .endm + + .macro tramp_ventry, regsize = 64 + .align 7 +1: + .if \regsize == 64 + msr tpidrro_el0, x30 // Restored in kernel_ventry + .endif + bl 2f + b . +2: + tramp_map_kernel x30 +#ifdef CONFIG_RANDOMIZE_BASE + adr x30, tramp_vectors + PAGE_SIZE +#ifndef CONFIG_ARCH_MSM8996 + isb +#endif + ldr x30, [x30] +#else + ldr x30, =vectors +#endif + prfm plil1strm, [x30, #(1b - tramp_vectors)] + msr vbar_el1, x30 + add x30, x30, #(1b - tramp_vectors) + isb + ret + .endm + + .macro tramp_exit, regsize = 64 + adr x30, tramp_vectors + msr vbar_el1, x30 + tramp_unmap_kernel x30 + .if \regsize == 64 + mrs x30, far_el1 + .endif + eret + .endm + + .align 11 +ENTRY(tramp_vectors) + .space 0x400 + + tramp_ventry + tramp_ventry + tramp_ventry + tramp_ventry + + tramp_ventry 32 + tramp_ventry 32 + tramp_ventry 32 + tramp_ventry 32 +END(tramp_vectors) + +ENTRY(tramp_exit_native) + tramp_exit +END(tramp_exit_native) + +ENTRY(tramp_exit_compat) + tramp_exit 32 +END(tramp_exit_compat) + + .ltorg + .popsection // .entry.tramp.text +#ifdef CONFIG_RANDOMIZE_BASE + .pushsection ".rodata", "a" + .align PAGE_SHIFT + .globl __entry_tramp_data_start +__entry_tramp_data_start: + .quad vectors + .popsection // .rodata +#endif /* CONFIG_RANDOMIZE_BASE */ +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 2a474166d38456026e87d176f4539dd909d56561..7e365ffe13700f23e09918795cae76c37ed547aa 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -321,6 +321,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); + /* + * In case p was allocated the same task_struct pointer as some + * other recently-exited task, make sure p is disassociated from + * any cpu that may have run that now-exited task recently. + * Otherwise we could erroneously skip reloading the FPSIMD + * registers for p. + */ + fpsimd_flush_task_state(p); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; @@ -363,17 +372,17 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, static void tls_thread_switch(struct task_struct *next) { - unsigned long tpidr, tpidrro; + unsigned long tpidr; tpidr = read_sysreg(tpidr_el0); *task_user_tls(current) = tpidr; - tpidr = *task_user_tls(next); - tpidrro = is_compat_thread(task_thread_info(next)) ? - next->thread.tp_value : 0; + if (is_compat_thread(task_thread_info(next))) + write_sysreg(next->thread.tp_value, tpidrro_el0); + else if (!arm64_kernel_unmapped_at_el0()) + write_sysreg(0, tpidrro_el0); - write_sysreg(tpidr, tpidr_el0); - write_sysreg(tpidrro, tpidrro_el0); + write_sysreg(*task_user_tls(next), tpidr_el0); } /* Restore the UAO state depending on next's addr_limit */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index fe014d62e2c6bc3810b3a035f80ef2fa9f10fd8c..6a9dc51da64cb465a2b79b9c98454b619f9de157 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -339,11 +339,11 @@ subsys_initcall(topology_init); static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + const unsigned long offset = kaslr_offset(); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { - pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", - kaslr_offset, KIMAGE_VADDR); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) { + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", + offset, KIMAGE_VADDR); } else { pr_emerg("Kernel Offset: disabled\n"); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index dc1c9fc2d9a2868d946e678bf73b8a13baefb09d..5fd787ad377712c10272bd8aa115292e30035671 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -495,6 +496,25 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + isb(); + if (rt != 31) + regs->regs[rt] = arch_counter_get_cntvct(); + regs->pc += 4; +} + +static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + if (rt != 31) + regs->regs[rt] = read_sysreg(cntfrq_el0); + regs->pc += 4; +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -513,6 +533,18 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ, .handler = ctr_read_handler, }, + { + /* Trap read access to CNTVCT_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT, + .handler = cntvct_read_handler, + }, + { + /* Trap read access to CNTFRQ_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, + .handler = cntfrq_read_handler, + }, {}, }; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b8deffa9e1bf3ec06d9411afb7849695df27d6d2..34d3ed64fe8ed91a5e2955c7d404e9e170089bac 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -56,6 +56,17 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_TEXT \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + *(.entry.tramp.text) \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__entry_tramp_text_end) = .; +#else +#define TRAMP_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -128,6 +139,7 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + TRAMP_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -221,6 +233,11 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + _end = .; STABS_DEBUG @@ -240,7 +257,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif - +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, + "Entry trampoline text too big") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a204adf29f0a6dedd1c47485564086ef9682f82b..2e6e9e99977b87ff578f6309e5e04dc40954d0ef 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -44,7 +44,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -53,7 +53,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -125,7 +125,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -151,13 +163,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index d7150e30438aef3bb3f29a8a41a73fbd4c5d4157..07c7ad97ee281b1edb2a320d031a174bc65627a7 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -30,7 +30,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) - uaccess_enable_not_uao x2, x3 + uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 - uaccess_disable_not_uao x2 + uaccess_disable_not_uao x2, x3 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index cfe13396085bef6e5dabbf6c0837735228ed1446..c7a7d9689e8f231a0ed672b3b32c0b6c85bacdc8 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -64,10 +64,10 @@ end .req x5 ENTRY(__arch_copy_from_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 718b1c4e2f85a7aa44be720815ba1c9dc9909c4a..e8bfaf19f778f85644df8a3b39bd59aca0e3648d 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -65,10 +65,10 @@ end .req x5 ENTRY(__copy_in_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index e99e31c9acac81a26a762524471bec471735b541..f6cfcc0441de64e2e159557e1f3e3594bd011f3f 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -63,10 +63,10 @@ end .req x5 ENTRY(__arch_copy_to_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index da957693232203089181e3b3537552db26c8c3b4..82ecb6c5f01510700934de6850bf1ffe45f6b89f 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -49,7 +49,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) - uaccess_ttbr0_enable x2, x3 + uaccess_ttbr0_enable x2, x3, x4 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 @@ -72,7 +72,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU isb mov x0, #0 1: - uaccess_ttbr0_disable x1 + uaccess_ttbr0_disable x1, x2 ret 9: mov x0, #-EFAULT diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 4c63cb1548598283085811f44bea3bd7d65af0a5..d45dea9f14fc7dff01e262399970b8c439b9a622 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#define NUM_USER_ASIDS ASID_FIRST_VERSION + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) +#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) +#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) +#else +#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define asid2idx(asid) ((asid) & ~ASID_MASK) +#define idx2asid(idx) asid2idx(idx) +#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -104,7 +113,7 @@ static void flush_context(unsigned int cpu) */ if (asid == 0) asid = per_cpu(reserved_asids, i); - __set_bit(asid & ~ASID_MASK, asid_map); + __set_bit(asid2idx(asid), asid_map); per_cpu(reserved_asids, i) = asid; } @@ -159,16 +168,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) * We had a valid ASID in a previous life, so try to re-use * it if possible. */ - asid &= ~ASID_MASK; - if (!__test_and_set_bit(asid, asid_map)) + if (!__test_and_set_bit(asid2idx(asid), asid_map)) return newasid; } /* * Allocate a free ASID. If we can't find one, take a note of the - * currently active ASIDs and mark the TLBs as requiring flushes. - * We always count from ASID #1, as we use ASID #0 when setting a - * reserved TTBR0 for the init_mm. + * currently active ASIDs and mark the TLBs as requiring flushes. We + * always count from ASID #2 (index 1), as we use ASID #0 when setting + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd + * pairs. */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); if (asid != NUM_USER_ASIDS) @@ -185,7 +194,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) set_asid: __set_bit(asid, asid_map); cur_idx = asid; - return asid | generation; + return idx2asid(asid) | generation; } void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) @@ -229,6 +238,15 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) cpu_switch_mm(mm->pgd, mm); } +/* Errata workaround post TTBRx_EL1 update. */ +asmlinkage void post_ttbr_update_workaround(void) +{ + asm(ALTERNATIVE("nop; nop; nop", + "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456, + CONFIG_CAVIUM_ERRATUM_27456)); +} + static int asids_init(void) { asid_bits = get_cpu_asid_bits(); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f8ef49665102b08c7e99021e92faf6115e3d9cc8..2b35b67540b9c8c03ea97a893f3d8fdbaa691afe 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -299,6 +299,7 @@ void __init arm64_memblock_init(void) arm64_dma_phys_limit = max_zone_dma_phys(); else arm64_dma_phys_limit = PHYS_MASK + 1; + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); @@ -325,7 +326,6 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); - high_memory = __va((max << PAGE_SHIFT) - 1) + 1; memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 33ecaffbfc232c60741bac2811cde721b12628aa..31d16d8257e2d7a7e713b4d2352a7f8a68cddbcc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -420,6 +420,37 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, vm_area_add_early(vma); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ + extern char __entry_tramp_text_start[]; + + pgprot_t prot = PAGE_KERNEL_EXEC; + phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + + /* The trampoline is always mapped and can therefore be global */ + pgprot_val(prot) &= ~PTE_NG; + + /* Map only the text into the trampoline page table */ + memset(tramp_pg_dir, 0, PGD_SIZE); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, + prot, pgd_pgtable_alloc, 0); + + /* Map both the text and data into the kernel page table */ + __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern char __entry_tramp_data_start[]; + + __set_fixmap(FIX_ENTRY_TRAMP_DATA, + __pa_symbol(__entry_tramp_data_start), + PAGE_KERNEL_RO); + } + + return 0; +} +core_initcall(map_entry_trampoline); +#endif + /* * Create fine-grained mappings for the kernel. */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2adb0cb952a75ad9ef03db67acde19318b5ba1b..89e3d95e13bda3219aeec78d185327944669d93a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -132,12 +132,17 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) + mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id - bfi x0, x1, #48, #16 // set the ASID - msr ttbr0_el1, x0 // set TTBR0 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + bfi x0, x1, #48, #16 // set the ASID field in TTBR0 +#endif + bfi x2, x1, #48, #16 // set the ASID + msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) isb - post_ttbr0_update_workaround - ret + msr ttbr0_el1, x0 // now update TTBR0 + isb + b post_ttbr_update_workaround // Back to C code... ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" @@ -218,7 +223,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1 tcr_set_idmap_t0sz x10, x9 /* diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index b41aff25426d6b360b24cf69300a51419a58eada..69711f24b74313c9c87b3c88cf724bb73d6d859d 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -100,12 +100,12 @@ ENTRY(privcmd_call) * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation * is enabled (it implies that hardware UAO and PAN disabled). */ - uaccess_ttbr0_enable x6, x7 + uaccess_ttbr0_enable x6, x7, x8 hvc XEN_IMM /* * Disable userspace access from kernel once the hyp call completed. */ - uaccess_ttbr0_disable x6 + uaccess_ttbr0_disable x6, x7 ret ENDPROC(privcmd_call); diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 3c1bd640042a0dc9f0c0866c8c93c7346cf1171a..88c4b77ec8d2d231c11a3f0cb06bef87706db101 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -319,11 +319,14 @@ config BF53x config GPIO_ADI def_bool y + depends on !PINCTRL depends on (BF51x || BF52x || BF53x || BF538 || BF539 || BF561) -config PINCTRL +config PINCTRL_BLACKFIN_ADI2 def_bool y - depends on BF54x || BF60x + depends on (BF54x || BF60x) + select PINCTRL + select PINCTRL_ADI2 config MEM_MT48LC64M4A2FB_7E bool diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug index f3337ee03621d5f79a31af217bbd4bdf99dee79b..a93cf06a4d6fde552a284c6d48377e9636d41160 100644 --- a/arch/blackfin/Kconfig.debug +++ b/arch/blackfin/Kconfig.debug @@ -17,6 +17,7 @@ config DEBUG_VERBOSE config DEBUG_MMRS tristate "Generate Blackfin MMR tree" + depends on !PINCTRL select DEBUG_FS help Create a tree of Blackfin MMRs via the debugfs tree. If diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 87131cd3bc8f8686ed65ad98e345b3e4648f5705..6d3a50446b2155adbaa66f29236a43c2e8542ace 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -169,7 +169,7 @@ void __init cf_bootmem_alloc(void) max_pfn = max_low_pfn = PFN_DOWN(_ramend); high_memory = (void *)_ramend; - m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6; + m68k_virt_to_node_shift = fls(_ramend - 1) - 6; module_fixup(NULL, __start_fixup, __stop_fixup); /* setup bootmem data */ diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 3446b6fb3acb196140b4611f5d2f2bb0d7d11cd6..9da4e2292fc7c9e28ddb93685e0f73e45160d344 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -576,7 +576,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE; + uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; uart_port.regshift = 2; uart_port.line = 0; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c558bce989cd386c940d3f1338f5ae2ec2516760..6e716a5f117374fff602b6cad95f823c1d56d375 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -683,6 +683,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) struct task_struct *t; int max_users; + /* If nothing to change, return right away, successfully. */ + if (value == mips_get_process_fp_mode(task)) + return 0; + + /* Only accept a mode change if 64-bit FP enabled for o32. */ + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) + return -EOPNOTSUPP; + + /* And only for o32 tasks. */ + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS)) + return -EOPNOTSUPP; + /* Check the value is valid */ if (value & ~known_bits) return -EOPNOTSUPP; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 11890e6e409393605a2ca94748962c46d4ca3bcc..0c8ae2cc6380724e58ed4c566c7c82925da3178c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target, #endif /* CONFIG_64BIT */ +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots + * correspond 1:1 to buffer slots. Only general registers are copied. + */ +static int fpr_get_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + return user_regset_copyout(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); +} + +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's + * general register slots are copied to buffer slots. Only general + * registers are copied. + */ +static int fpr_get_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS; i++) { + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); + err = user_regset_copyout(pos, count, kbuf, ubuf, + &fpr_val, i * sizeof(elf_fpreg_t), + (i + 1) * sizeof(elf_fpreg_t)); + if (err) + return err; + } + + return 0; +} + +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - unsigned i; + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); int err; - u64 fpr_val; - /* XXX fcr31 */ + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); - for (i = 0; i < NUM_FPU_REGS; i++) { - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpr_val, i * sizeof(elf_fpreg_t), - (i + 1) * sizeof(elf_fpreg_t)); + return err; +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP + * context's general register slots. Only general registers are copied. + */ +static int fpr_set_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 + * bits only of FP context's general register slots. Only general + * registers are copied. + */ +static int fpr_set_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, + &fpr_val, i * sizeof(elf_fpreg_t), + (i + 1) * sizeof(elf_fpreg_t)); if (err) return err; + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); } return 0; } +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + * + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', + * which is supposed to have been guaranteed by the kernel before + * calling us, e.g. in `ptrace_regset'. We enforce that requirement, + * so that we can safely avoid preinitializing temporaries for + * partial register writes. + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - unsigned i; + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + u32 fcr31; int err; - u64 fpr_val; - /* XXX fcr31 */ + BUG_ON(count % sizeof(elf_fpreg_t)); + + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; init_fp_ctx(target); - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { + if (count > 0) { err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpr_val, i * sizeof(elf_fpreg_t), - (i + 1) * sizeof(elf_fpreg_t)); + &fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); if (err) return err; - set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); + + ptrace_setfcr31(target, fcr31); } - return 0; + return err; } enum mips_regset { diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 9ade60ca08e06b278064c73d6e38b45caa94c3ae..7f2519cfb5d2e0f8bd5e88a3c5927efa8e47c37f 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1781,7 +1781,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); rv.s = ieee754sp_maddf(fd, fs, ft); - break; + goto copcsr; } case fmsubf_op: { @@ -1794,7 +1794,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); rv.s = ieee754sp_msubf(fd, fs, ft); - break; + goto copcsr; } case frint_op: { @@ -1818,7 +1818,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754sp_2008class(fs); rfmt = w_fmt; - break; + goto copcsr; } case fmin_op: { @@ -1830,7 +1830,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmin(fs, ft); - break; + goto copcsr; } case fmina_op: { @@ -1842,7 +1842,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmina(fs, ft); - break; + goto copcsr; } case fmax_op: { @@ -1854,7 +1854,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmax(fs, ft); - break; + goto copcsr; } case fmaxa_op: { @@ -1866,7 +1866,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmaxa(fs, ft); - break; + goto copcsr; } case fabs_op: @@ -2110,7 +2110,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); rv.d = ieee754dp_maddf(fd, fs, ft); - break; + goto copcsr; } case fmsubf_op: { @@ -2123,7 +2123,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); rv.d = ieee754dp_msubf(fd, fs, ft); - break; + goto copcsr; } case frint_op: { @@ -2147,7 +2147,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754dp_2008class(fs); rfmt = w_fmt; - break; + goto copcsr; } case fmin_op: { @@ -2159,7 +2159,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmin(fs, ft); - break; + goto copcsr; } case fmina_op: { @@ -2171,7 +2171,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmina(fs, ft); - break; + goto copcsr; } case fmax_op: { @@ -2183,7 +2183,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmax(fs, ft); - break; + goto copcsr; } case fmaxa_op: { @@ -2195,7 +2195,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmaxa(fs, ft); - break; + goto copcsr; } case fabs_op: diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index b9920b1edd5a7848180876783d39ae20f261011f..70cef54dc40f823d9842bccebc6abaa2498300f9 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -437,7 +437,7 @@ asmlinkage void misalignment(struct pt_regs *regs, enum exception_code code) info.si_signo = SIGSEGV; info.si_errno = 0; - info.si_code = 0; + info.si_code = SEGV_MAPERR; info.si_addr = (void *) regs->pc; force_sig_info(SIGSEGV, &info, current); return; diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 140faa16685a2325f3a1b6cbf9cbb9c8e68fc913..1311e6b139916692bb5f81fbfd188a48b844d977 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -211,7 +211,7 @@ do { \ case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ - case 8: __get_user_asm2(x, ptr, retval); \ + case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ } while (0) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 3d3f6062f49ccee5ae47ec0505101fad7f5d147d..605a284922fb7f1eab5203fbf732f3d96f371320 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -302,12 +302,12 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) siginfo_t info; if (user_mode(regs)) { - /* Send a SIGSEGV */ - info.si_signo = SIGSEGV; + /* Send a SIGBUS */ + info.si_signo = SIGBUS; info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, current); + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 8be707e1b6c77f291a722a0f2b4b1cdb62a1f273..82dea145574eb646fd8f298e00e8ed7d0249876f 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -11,6 +11,7 @@ for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 +#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ @@ -28,6 +29,7 @@ ldcd). */ #define __PA_LDCW_ALIGNMENT 4 +#define __PA_LDCW_ALIGN_ORDER 2 #define __ldcw_align(a) (&(a)->slock) #define __LDCW "ldcw,co" diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 4fcff2dcc9c304decddf7b7643e90f7803968531..e3d3e8e1d708216dc2a5337b5799c41c27457265 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,14 @@ #endif .import pa_tlb_lock,data + .macro load_pa_tlb_lock reg +#if __PA_LDCW_ALIGNMENT > 4 + load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg + depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg +#else + load32 PA(pa_tlb_lock), \reg +#endif + .endm /* space_to_prot macro creates a prot id from a space id */ @@ -457,7 +466,7 @@ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop @@ -480,7 +489,7 @@ /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp tlb_unlock0 \spc,\tmp #endif .endm diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index adf7187f89515ec69b19461f60ce379e552397dc..2d40c4ff3f6918ae9b2e2c6af71e20658a9850e1 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -36,6 +36,7 @@ #include #include #include +#include #include .text @@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP - ldil L%pa_tlb_lock,%r1 - ldo R%pa_tlb_lock(%r1),\la +#if __PA_LDCW_ALIGNMENT > 4 + load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la + depi 0,31,__PA_LDCW_ALIGN_ORDER, \la +#else + load32 pa_tlb_lock, \la +#endif rsm PSW_SM_I,\flags 1: LDCW 0(\la),\tmp cmpib,<>,n 0,\tmp,3f diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 7593787ed4c34b37fdc2aa507d5463a304a4d7ad..c3a532abac03951cf028017a58a0dc4dab5e1a4b 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -180,6 +181,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) return 1; } +/* + * Idle thread support + * + * Detect when running on QEMU with SeaBIOS PDC Firmware and let + * QEMU idle the host too. + */ + +int running_on_qemu __read_mostly; + +void __cpuidle arch_cpu_idle_dead(void) +{ + /* nop on real hardware, qemu will offline CPU. */ + asm volatile("or %%r31,%%r31,%%r31\n":::); +} + +void __cpuidle arch_cpu_idle(void) +{ + local_irq_enable(); + + /* nop on real hardware, qemu will idle sleep. */ + asm volatile("or %%r10,%%r10,%%r10\n":::); +} + +static int __init parisc_idle_init(void) +{ + const char *marker; + + /* check QEMU/SeaBIOS marker in PAGE0 */ + marker = (char *) &PAGE0->pad0; + running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); + + if (!running_on_qemu) + cpu_idle_poll_ctrl(1); + + return 0; +} +arch_initcall(parisc_idle_init); + /* * Copy architecture-specific thread state */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6eda5abbd719e8763d45d51137f44b706914cc35..0a6bb48854e3ead3060f4ba52087fe64a5487458 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,6 +128,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_TIME_VSYSCALL_OLD select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 617dece6792427847458203c229505a054b5331e..a60c9c6e5cc17297e7b2e0e620c48b00838c4a09 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index f61cad3de4e69ec093674355332f7d3ee093cf2b..4c935f7504f783532d3521d04142dfdb5831f943 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -201,6 +201,10 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, unsigned long phys); extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); + +int hash__create_section_mapping(unsigned long start, unsigned long end); +int hash__remove_section_mapping(unsigned long start, unsigned long end); + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1e8fceb308a518950918e1ea7d9102eb313f66ee..430d038eb2a4e06b719abb8e79b9862c802dd4e2 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,17 +53,25 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { #ifdef __powerpc64__ - unsigned long s = (__force u32)sum; + u64 s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; s += proto + len; - s += (s >> 32); - return (__force __wsum) s; + return (__force __wsum) from64to32(s); #else __asm__("\n\ addc %0,%0,%1 \n\ @@ -100,7 +108,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" @@ -123,8 +131,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) for (i = 0; i < ihl - 1; i++, ptr++) s += *ptr; - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); #else __wsum sum, tmp; diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b62f63b455098e88988fcc0b3776c6..555e22d5e07f9e21c322af718df72bdf2da0dfa4 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9a3eee66129766d84d8bd5063fd8ecd142e8fc21..cab6d2a46c415ca311c7fa4e06e2925a2e6d4e47 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -51,6 +51,59 @@ #define EX_PPR 88 /* SMT thread status register (priority) */ #define EX_CTR 96 +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define RFI_TO_USER_OR_KERNEL \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define RFI_TO_GUEST \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_USER_OR_KERNEL \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_GUEST \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_UNKNOWN \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index ddf54f5bbdd1c05efbd286ec305beac3c459d8d6..7b332342071c2d2a7a051a3679426a758d470176 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -189,4 +189,19 @@ void apply_feature_fixups(void); void setup_feature_keys(void); #endif +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + +#ifndef __ASSEMBLY__ + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + +#endif + #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf14746d86bac457b110ab5710c63d32c..dc0996b9d75db0e7e64e0192ba8d9d015dc6542e 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -240,6 +240,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -306,7 +307,19 @@ #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + #ifndef __ASSEMBLY__ +#include /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments @@ -433,6 +446,11 @@ static inline unsigned long cmo_get_page_size(void) } #endif /* CONFIG_PPC_PSERIES */ +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6a6792bb39fbc8616034732322bd497ed30c5961..ea43897183fda6057f5f6270eb8465bc174b5219 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -205,6 +205,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[13] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2dc86c76a26c9b5ed0274ae0de536..4e53b8570d1f1a207287f54dd8e537431dac97ad 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 654d64c9f3acd8366fdcf109111f3b5c16cdde21..6825a67cc3db33936416b8fc1053efc21f847f7c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c833d88c423d8be96859a5bb5f2cd37b87ece0a0..64bcbd5804950397b96b6ab1b64ff369747b56f2 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -240,6 +240,10 @@ int main(void) #ifdef CONFIG_PPC_BOOK3S_64 DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); + DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area)); + DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi)); + DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence)); + DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets)); #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 7803756998e214f14eae5b07bd45cd84287fa068..9e05c8828ee218705f6fe508d5396e44a983a24f 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -97,6 +97,7 @@ _GLOBAL(__setup_cpu_power9) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 @@ -119,6 +120,7 @@ _GLOBAL(__restore_cpu_power9) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index caa6596715990e3e9ad0cb577859c36e503f948c..c33b69d10919812df1872011d8d96fdcd6f815b1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -251,13 +251,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ syscall_error: @@ -859,7 +869,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -872,8 +882,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ - rfid +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fd68e19b9ef7a7219d239149f884c01698b4082b..96db6c3adebe0e314c129a37093e4dc39fcc7fd3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ beq- 2f + andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */ + bne 1f /* All done -- return from exception. */ @@ -671,7 +673,23 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_KERNEL + b . /* prevent speculative execution */ + +1: +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_USER b . /* prevent speculative execution */ 2: mfspr r11,SPRN_SRR0 @@ -679,7 +697,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: mfspr r11,SPRN_SRR0 @@ -1576,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) bl kernel_bad_stack b 1b + .globl rfi_flush_fallback +rfi_flush_fallback: + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + + .globl hrfi_flush_fallback +hrfi_flush_fallback: + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Called from arch_local_irq_enable when an interrupt needs * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a12be60181bf8def5dd68f165a3bcd4d7df53632..7c30a91c1f868ec73eafdae2c5f31b7a0f7dc8e2 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -678,4 +679,142 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +bool rfi_flush; + +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.dsize; + limit = min(safe_stack_limit(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + if (!no_rfi_flush) + rfi_flush_enable(enable); +} + +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7394b770ae1f6b2402ef2b04fe23e92eae2c7ee2..b61fb7902018e82e68a2b2f5042de63136adb276 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RODATA +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 043415f0bdb1646fa85f7bb26d04f0241c68ff63..e86bfa111f3c9774350bbdce4c08a3d530efa1d9 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include #include #include +#include struct fixup_entry { unsigned long mask; @@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 78dabf065ba96eb755267b9099e50dd6f024653d..bd666287c5eda1f9ef6c28607a33cb99fa1ee27b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -747,7 +747,7 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end) +int hash__create_section_mapping(unsigned long start, unsigned long end) { int rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, @@ -761,7 +761,7 @@ int create_section_mapping(unsigned long start, unsigned long end) return rc; } -int remove_section_mapping(unsigned long start, unsigned long end) +int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f4f437cbabf1d44447094e40da48fec9e14eaadc..0fad7f6742ff5acfbb2e0508687a2c1b290958d8 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -125,3 +125,21 @@ void mmu_cleanup_all(void) else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return -ENODEV; + + return hash__create_section_mapping(start, end); +} + +int remove_section_mapping(unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return -ENODEV; + + return hash__remove_section_mapping(start, end); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 9a25dce878757517a5a79fcd9b04d280d02f72d6..44c33ee397a02b22d3d8c6f414072512aa821054 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -173,6 +173,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 72c27b8d2cf3240f201eabeb723afcdb0ff9b0de..083f927469513c9252bdd1bc6b4056c499f63240 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -401,8 +401,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 7b2ca16b1eb4faf3a4488bd0ad8b4fb74318275a..991c6a517ddc17d78126229e6b86b353342432f3 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -516,7 +516,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2) { if (s1 < s2) return 1; - if (s2 > s1) + if (s1 > s2) return -1; return memcmp(d1, d2, s1); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 83bebeec0fea58de5babedeceb8714e66fcec635..0f7b16e293478f3540d55a542b26bf83e1f17290 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -39,18 +39,18 @@ int __opal_async_get_token(void) int token; spin_lock_irqsave(&opal_async_comp_lock, flags); - token = find_first_bit(opal_async_complete_map, opal_max_async_tokens); + token = find_first_zero_bit(opal_async_token_map, opal_max_async_tokens); if (token >= opal_max_async_tokens) { token = -EBUSY; goto out; } - if (__test_and_set_bit(token, opal_async_token_map)) { + if (!__test_and_clear_bit(token, opal_async_complete_map)) { token = -EBUSY; goto out; } - __clear_bit(token, opal_async_complete_map); + __set_bit(token, opal_async_token_map); out: spin_unlock_irqrestore(&opal_async_comp_lock, flags); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index dcdfee0cd4f2f134f6fddeabe8130232bed4d9c6..f602307a438669236fcdb557b05078d431eecec9 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2623,6 +2623,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, levels, tce_table_size, &offset, &total_allocated); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index efe8b6bb168b97961f3943f7e2038a9faae2f702..6f8b4c19373af7e6d63fb1401ecca67d220bd1f7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,13 +35,63 @@ #include #include #include +#include +#include #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); @@ -289,7 +339,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; - ret_freq = cpufreq_quick_get(cpu) * 1000ul; + ret_freq = cpufreq_get(cpu) * 1000ul; /* * If the backend cpufreq driver does not exist, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 97aa3f332f240cb056c1d4bca4f59d4125d6e52b..1845fc61191208acc71be9fdc9ba800c94b67578 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e280c08f6d3ee5671c58da9eb06e38..f523ac88315070873eede1c978312569d48953a7 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ static int axon_ram_probe(struct platform_device *device) if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index f267ee0afc082068266d3032ff41d194523ccb5a..716353b247ded2be54856a309ff57a9de71f0bcc 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -845,12 +845,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq) u32 ipic_get_mcp_status(void) { - return ipic_read(primary_ipic->regs, IPIC_SERMR); + return ipic_read(primary_ipic->regs, IPIC_SERSR); } void ipic_clear_mcp_status(u32 mask) { - ipic_write(primary_ipic->regs, IPIC_SERMR, mask); + ipic_write(primary_ipic->regs, IPIC_SERSR, mask); } /* Return an interrupt vector or 0 if no interrupt is pending. */ diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 649eb62c52b3784ec0214e47482b87442d3081dd..9e02cb7955c1b0812583948b55ad9ae13a646492 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -81,6 +81,6 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range); int zpci_load(u64 *data, u64 req, u64 offset); int zpci_store(u64 data, u64 req, u64 offset); int zpci_store_block(const u64 *data, u64 req, u64 offset); -void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); +int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h index 402ad6df4897123e0e0b1a0eb0ba81157cb9380b..c54a9310d81423a39d1c82451e381d909b540f33 100644 --- a/arch/s390/include/asm/runtime_instr.h +++ b/arch/s390/include/asm/runtime_instr.h @@ -85,6 +85,8 @@ static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, load_runtime_instr_cb(&runtime_instr_empty_cb); } -void exit_thread_runtime_instr(void); +struct task_struct; + +void runtime_instr_release(struct task_struct *tsk); #endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index dde6b52359c50d8fc4c996bcc7c7ea01813abfb1..ff2fbdafe6899fdee9f433f26dc7a4ce2ed407f6 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -29,17 +29,16 @@ static inline void restore_access_regs(unsigned int *acrs) } #define switch_to(prev,next,last) do { \ - if (prev->mm) { \ - save_fpu_regs(); \ - save_access_regs(&prev->thread.acrs[0]); \ - save_ri_cb(prev->thread.ri_cb); \ - } \ + /* save_fpu_regs() sets the CIF_FPU flag, which enforces \ + * a restore of the floating point / vector registers as \ + * soon as the next task returns to user space \ + */ \ + save_fpu_regs(); \ + save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ update_cr_regs(next); \ - if (next->mm) { \ - set_cpu_flag(CIF_FPU); \ - restore_access_regs(&next->thread.acrs[0]); \ - restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - } \ + restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 0f9cd90c11af6173d78d4ace43465cc70332b18f..f06a9a0063f1b11fecfe3abd77b0ca3989a4a0ce 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 172fe1121d99f88b146973517762b360664b2d60..8382fc62cde687ae034c4232f17f77d3960a2ad6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -70,8 +70,6 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) - exit_thread_runtime_instr(); } void flush_thread(void) @@ -84,6 +82,7 @@ void release_thread(struct task_struct *dead_task) void arch_release_task_struct(struct task_struct *tsk) { + runtime_instr_release(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 70cdb03d4acdf7bd865958900833ee0bf1864a88..fd03a7569e101495d8690a65d2de34c9d25765d3 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -18,11 +18,24 @@ /* empty control block to disable RI by loading it */ struct runtime_instr_cb runtime_instr_empty_cb; +void runtime_instr_release(struct task_struct *tsk) +{ + kfree(tsk->thread.ri_cb); +} + static void disable_runtime_instr(void) { - struct pt_regs *regs = task_pt_regs(current); + struct task_struct *task = current; + struct pt_regs *regs; + if (!task->thread.ri_cb) + return; + regs = task_pt_regs(task); + preempt_disable(); load_runtime_instr_cb(&runtime_instr_empty_cb); + kfree(task->thread.ri_cb); + task->thread.ri_cb = NULL; + preempt_enable(); /* * Make sure the RI bit is deleted from the PSW. If the user did not @@ -43,19 +56,6 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->valid = 1; } -void exit_thread_runtime_instr(void) -{ - struct task_struct *task = current; - - preempt_disable(); - if (!task->thread.ri_cb) - return; - disable_runtime_instr(); - kfree(task->thread.ri_cb); - task->thread.ri_cb = NULL; - preempt_enable(); -} - SYSCALL_DEFINE1(s390_runtime_instr, int, command) { struct runtime_instr_cb *cb; @@ -64,7 +64,7 @@ SYSCALL_DEFINE1(s390_runtime_instr, int, command) return -EOPNOTSUPP; if (command == S390_RUNTIME_INSTR_STOP) { - exit_thread_runtime_instr(); + disable_runtime_instr(); return 0; } diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9b59e6212d8fd22cadbc35f9e3546f7aa47e540c..709da452413d43683b8d6e24bc8a51e606fa773f 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -369,10 +369,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg) SYSCALL(sys_sendmmsg,compat_sys_sendmmsg) SYSCALL(sys_socket,sys_socket) SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */ -SYSCALL(sys_bind,sys_bind) -SYSCALL(sys_connect,sys_connect) +SYSCALL(sys_bind,compat_sys_bind) +SYSCALL(sys_connect,compat_sys_connect) SYSCALL(sys_listen,sys_listen) -SYSCALL(sys_accept4,sys_accept4) +SYSCALL(sys_accept4,compat_sys_accept4) SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */ SYSCALL(sys_setsockopt,compat_sys_setsockopt) SYSCALL(sys_getsockname,compat_sys_getsockname) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e18435355c16f454fdee19e155af08493abe1d44..c2905a10cb37f494e7026ccb82bc06a9eef9a4a1 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -197,8 +197,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return -EAGAIN; } - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); return 0; } @@ -209,6 +207,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -238,6 +239,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -273,6 +277,9 @@ static int handle_sske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 15ffc19c8c0c9e121525ff18c8b27dde1ebfd3eb..03a1d5976ff5016b25841dd1c87a425276cb1609 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -354,7 +354,8 @@ static void zpci_irq_handler(struct airq_struct *airq) /* End of second scan with interrupts on. */ break; /* First scan complete, reenable interrupts. */ - zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC)) + break; si = 0; continue; } @@ -928,7 +929,7 @@ static int __init pci_base_init(void) if (!s390_pci_probe) return 0; - if (!test_facility(69) || !test_facility(71) || !test_facility(72)) + if (!test_facility(69) || !test_facility(71)) return 0; rc = zpci_debug_init(); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index fa8d7d4b97515836e0b2d25a1c858fc8b28eef37..248146dcfce32ca466e992b516dda3ec8a481209 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -91,11 +92,14 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) +int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) { + if (!test_facility(72)) + return -EIO; asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); + return 0; } /* PCI Load */ diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index ff639342a8bef97fa81b89120deae05740974b28..c5b997757988c097d536cacb000b760941deb29b 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -607,7 +607,8 @@ asmlinkage void do_divide_error(unsigned long r4) break; } - force_sig_info(SIGFPE, &info, current); + info.si_signo = SIGFPE; + force_sig_info(info.si_signo, &info, current); } #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 57154c638e71dc316bc8f2ad3da23859631a45eb..0f183ffe34164c9ec030c570e8d4aa1273aaedc4 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2391,9 +2391,16 @@ void __init mem_init(void) { high_memory = __va(last_valid_pfn << PAGE_SHIFT); - register_page_bootmem_info(); free_all_bootmem(); + /* + * Must be done after boot memory is put on freelist, because here we + * might set fields in deferred struct pages that have not yet been + * initialized, and free_all_bootmem() initializes all the reserved + * deferred pages for us. + */ + register_page_bootmem_info(); + /* * Set up the zero page, mark it reserved, so that page count * is not manipulated when freeing the page from user ptes. diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c7f2a5295b3a54599b68e4932c1c68cf2bdab99f..83a73cf5116aa15d071f83c754fbf99aa614af07 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -54,6 +54,7 @@ enum mbus_module srmmu_modtype; static unsigned int hwbug_bitmask; int vac_cache_size; +EXPORT_SYMBOL(vac_cache_size); int vac_line_size; extern struct resource sparc_iomap; diff --git a/arch/um/Makefile b/arch/um/Makefile index 0ca46ededfc73ce742c4075e2cc08bdb1e93cd0e..9c150ccb35d223d3a1e834d00ca6e2e3681ed9eb 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -117,7 +117,7 @@ archheaders: archprepare: include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ $(call cc-option, -fno-stack-protector,) \ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b9c546a305a45a836b525ab7171c1ae04f96baef..0ca4d12ce95c67dce88386c76255f13b32bd6636 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,7 +45,7 @@ config X86 select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 @@ -64,6 +64,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP @@ -407,6 +408,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + ---help--- + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ad1abc1813da7ced9e852e1c14c641311b6a9dbf..ea24114a92ae83814c64d953705148972537a9ed 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -203,6 +203,14 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + endif +endif + archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 766a5211f82768303087818c8da4ef280e688dd0..2728e1b7e4a61bb76671e11ab358f775c6db09f7 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -9,6 +9,7 @@ */ #undef CONFIG_PARAVIRT #undef CONFIG_PARAVIRT_SPINLOCKS +#undef CONFIG_PAGE_TABLE_ISOLATION #undef CONFIG_KASAN #include diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060f103c7c43923ad84029e0ce2a9f1..fa8801b35e51a5f9b592cbd5214d9babc0c0ba0c 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * The following macros are used to move an (un)aligned 16 byte value to/from @@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index aa8b0672f87a451865283f5f3e9f3d2c03992870..d9ae404f08c94c4dd94b5663a0d0edca34ebbc1b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -906,7 +906,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) if (sg_is_last(req->src) && req->src->offset + req->src->length <= PAGE_SIZE && - sg_is_last(req->dst) && ++ sg_is_last(req->dst) && req->dst->length && req->dst->offset + req->dst->length <= PAGE_SIZE) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f61b0440372bfbc26e956f71bf95d3..77ff4de2224d92c17efbb5ab56e14fac4a70d13d 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,6 +17,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way: vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656dfdc67fc7433a48dcc51ee62d9bea..7384342fbb4113dbaaff124991d06962fa425a2e 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -12,6 +12,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way: vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b6fd598c444c60a488953e7725655..174fd4146043f7be019d5efa20d800e6f06ba283 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -45,6 +45,7 @@ #include #include +#include ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction @@ -172,7 +173,7 @@ continue_block: movzxw (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp - jmp *bufp + JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e32142bc071d9344533f39d2a6ee5326845b62f2..28c372003e44c1e4f18f0c8270bf0be45e301d4d 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -164,7 +164,6 @@ static struct shash_alg alg = { .init = poly1305_simd_init, .update = poly1305_simd_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_simd_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 399a29d067d6367603714633fb8c4de6ab77275a..cb91a64a99e7cdbc0422227383611378fb6b076a 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.src.virt.addr, walk.dst.virt.addr, diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a73655b96d137c04977c8f392237d..d08805032f0193ab96dde00890a0502a9f2b24e6 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) { unsigned int j; - state->lens[0] = 0; - state->lens[1] = 1; - state->lens[2] = 2; - state->lens[3] = 3; + /* initially all lanes are unused */ + state->lens[0] = 0xFFFFFFFF00000000; + state->lens[1] = 0xFFFFFFFF00000001; + state->lens[2] = 0xFFFFFFFF00000002; + state->lens[3] = 0xFFFFFFFF00000003; + state->unused_lanes = 0xFF03020100; for (j = 0; j < 4; j++) state->ldata[j].job_in_lane = NULL; diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bdd9cc59d20f7b514cf9ecb1f53ca49a5deb4d5b..b0cd306dc527e77e939ddcce374135e49d350b95 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -201,7 +202,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) * regs->orig_ax, which changes the behavior of some syscalls. */ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); + regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); } @@ -299,7 +301,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - current->thread.status |= TS_COMPAT; + ti->status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { @@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) } if (likely(nr < IA32_NR_syscalls)) { + nr = array_index_nospec(nr, IA32_NR_syscalls); /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index edba8606b99a0daa86853e6869553c6854b0efe3..f5434b4670c1182b9c877f1adab60e42bcee933d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -45,6 +45,7 @@ #include #include #include +#include .section .entry.text, "ax" @@ -228,6 +229,18 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + /* Clobbers %ebx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl %esi popl %edi @@ -260,7 +273,7 @@ ENTRY(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - call *%ebx + CALL_NOSPEC %ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -984,7 +997,8 @@ trace: movl 0x4(%ebp), %edx subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + movl ftrace_trace_function, %ecx + CALL_NOSPEC %ecx popl %edx popl %ecx @@ -1020,7 +1034,7 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - jmp *%ecx + JMP_NOSPEC %ecx #endif #ifdef CONFIG_TRACING @@ -1062,7 +1076,7 @@ error_code: movl %ecx, %es TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call *%edi + CALL_NOSPEC %edi jmp ret_from_exception END(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index be9df513141ee3079b992869d831b681ff28c61c..8be2aaa5b8ec609ad683b62afe041e68962d66b4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -54,19 +56,15 @@ ENTRY(native_usergs_sysret64) ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_FLAGS flags:req +.macro TRACE_IRQS_IRETQ #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, \flags /* interrupts off? */ + bt $9, EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: #endif .endm -.macro TRACE_IRQS_IRETQ - TRACE_IRQS_FLAGS EFLAGS(%rsp) -.endm - /* * When dynamic function tracer is enabled it will add a breakpoint * to all locations that it is about to modify, sync CPUs, update @@ -150,6 +148,7 @@ ENTRY(entry_SYSCALL_64) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK /* * A hypervisor implementation might want to use a label * after the swapgs, so that it can do the swapgs @@ -178,83 +177,17 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - - /* - * If we need to do entry work or if we guess we'll need to do - * exit work, go straight to the slow path. - */ - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz entry_SYSCALL64_slow_path - -entry_SYSCALL_64_fastpath: - /* - * Easy case: enable interrupts and issue the syscall. If the syscall - * needs pt_regs, we'll call a stub that disables interrupts again - * and jumps to the slow path. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx - - /* - * This call instruction is handled specially in stub_ptregs_64. - * It might end up jumping to the slow path. If it jumps, RAX - * and all argument registers are clobbered. - */ - call *sys_call_table(, %rax, 8) -.Lentry_SYSCALL_64_after_fastpath_call: - - movq %rax, RAX(%rsp) -1: - - /* - * If we get here, then we know that pt_regs is clean for SYSRET64. - * If we see that no exit work is required (which we are required - * to check with IRQs off), then we can go straight to SYSRET64. - */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz 1f - - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON /* user mode is traced as IRQs on */ - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - RESTORE_C_REGS_EXCEPT_RCX_R11 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 - -1: - /* - * The fast path looked good when we started, but something changed - * along the way and we need to switch to the slow path. Calling - * raise(3) will trigger this, for example. IRQs are off. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_EXTRA_REGS - movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ - jmp return_from_SYSCALL_64 + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ -entry_SYSCALL64_slow_path: /* IRQs are off. */ - SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ -return_from_SYSCALL_64: RESTORE_EXTRA_REGS TRACE_IRQS_IRETQ /* we're about to change IF */ @@ -327,53 +260,31 @@ return_from_SYSCALL_64: syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 + + /* + * This opens a window where we have a user CR3, but are + * running in the kernel. This makes using the CS + * register useless for telling whether or not we need to + * switch CR3 in NMIs. Normal interrupts are OK because + * they are off here. + */ + SWITCH_USER_CR3 movq RSP(%rsp), %rsp USERGS_SYSRET64 opportunistic_sysret_failed: + /* + * This opens a window where we have a user CR3, but are + * running in the kernel. This makes using the CS + * register useless for telling whether or not we need to + * switch CR3 in NMIs. Normal interrupts are OK because + * they are off here. + */ + SWITCH_USER_CR3 SWAPGS jmp restore_c_regs_and_iret END(entry_SYSCALL_64) -ENTRY(stub_ptregs_64) - /* - * Syscalls marked as needing ptregs land here. - * If we are on the fast path, we need to save the extra regs, - * which we achieve by trying again on the slow path. If we are on - * the slow path, the extra regs are already saved. - * - * RAX stores a pointer to the C function implementing the syscall. - * IRQs are on. - */ - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) - jne 1f - - /* - * Called from fast path -- disable IRQs again, pop return address - * and jump to slow path - */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - popq %rax - jmp entry_SYSCALL64_slow_path - -1: - jmp *%rax /* Called from C */ -END(stub_ptregs_64) - -.macro ptregs_stub func -ENTRY(ptregs_\func) - leaq \func(%rip), %rax - jmp stub_ptregs_64 -END(ptregs_\func) -.endm - -/* Instantiate ptregs_stub for each ptregs-using syscall */ -#define __SYSCALL_64_QUAL_(sym) -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) -#include - /* * %rdi: prev task * %rsi: next task @@ -399,6 +310,18 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + /* Clobbers %rbx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq %r15 popq %r14 @@ -428,13 +351,14 @@ ENTRY(ret_from_fork) movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ + SWITCH_USER_CR3 SWAPGS jmp restore_regs_and_iret 1: /* kernel thread */ movq %r12, %rdi - call *%rbx + CALL_NOSPEC %rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -482,6 +406,7 @@ END(irq_entries_start) * tracking that we're in kernel mode. */ SWAPGS + SWITCH_KERNEL_CR3 /* * We need to tell lockdep that IRQs are off. We can't do this until @@ -539,6 +464,7 @@ GLOBAL(retint_user) mov %rsp,%rdi call prepare_exit_to_usermode TRACE_IRQS_IRETQ + SWITCH_USER_CR3 SWAPGS jmp restore_regs_and_iret @@ -616,6 +542,7 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ SWAPGS + SWITCH_KERNEL_CR3 movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -642,6 +569,7 @@ native_irq_return_ldt: * still points to an RO alias of the ESPFIX stack. */ orq PER_CPU_VAR(espfix_stack), %rax + SWITCH_USER_CR3 SWAPGS movq %rax, %rsp @@ -686,13 +614,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) #endif /* Make sure APIC interrupt handlers end up in the irqentry section: */ -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) -# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" -# define POP_SECTION_IRQENTRY .popsection -#else -# define PUSH_SECTION_IRQENTRY -# define POP_SECTION_IRQENTRY -#endif +#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" +#define POP_SECTION_IRQENTRY .popsection .macro apicinterrupt num sym do_sym PUSH_SECTION_IRQENTRY @@ -872,13 +795,11 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 ENTRY(native_load_gs_index) pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) - TRACE_IRQS_OFF SWAPGS .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS - TRACE_IRQS_FLAGS (%rsp) popfq ret END(native_load_gs_index) @@ -1022,13 +943,17 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 #endif #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. - * Return: ebx=0: need swapgs on exit, ebx=1: otherwise + * + * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit + * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit + * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit + * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit */ ENTRY(paranoid_entry) cld @@ -1041,7 +966,26 @@ ENTRY(paranoid_entry) js 1f /* negative -> in kernel */ SWAPGS xorl %ebx, %ebx -1: ret +1: +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 + * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. + * Do a conditional SWITCH_KERNEL_CR3: this could safely be done + * unconditionally, but we need to find out whether the reverse + * should be done on return (conveyed to paranoid_exit in %ebx). + */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + testl $KAISER_SHADOW_PGD_OFFSET, %eax + jz 2f + orl $2, %ebx + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ + ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID + movq %rax, %cr3 +2: +#endif + ret END(paranoid_entry) /* @@ -1054,19 +998,26 @@ END(paranoid_entry) * be complicated. Fortunately, we there's no good reason * to try to handle preemption here. * - * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) + * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3 + * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 + * ebx=2: needs both swapgs and SWITCH_USER_CR3 + * ebx=3: needs SWITCH_USER_CR3 but not swapgs */ ENTRY(paranoid_exit) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG - testl %ebx, %ebx /* swapgs needed? */ + TRACE_IRQS_IRETQ_DEBUG +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ + testl $2, %ebx /* SWITCH_USER_CR3 needed? */ + jz paranoid_exit_no_switch + SWITCH_USER_CR3 +paranoid_exit_no_switch: +#endif + testl $1, %ebx /* swapgs needed? */ jnz paranoid_exit_no_swapgs - TRACE_IRQS_IRETQ SWAPGS_UNSAFE_STACK - jmp paranoid_exit_restore paranoid_exit_no_swapgs: - TRACE_IRQS_IRETQ_DEBUG -paranoid_exit_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 @@ -1081,6 +1032,13 @@ ENTRY(error_entry) cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 + /* + * error_entry() always returns with a kernel gsbase and + * CR3. We must also have a kernel CR3/gsbase before + * calling TRACE_IRQS_*. Just unconditionally switch to + * the kernel CR3 here. + */ + SWITCH_KERNEL_CR3 xorl %ebx, %ebx testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1241,6 +1199,10 @@ ENTRY(nmi) */ SWAPGS_UNSAFE_STACK + /* + * percpu variables are mapped with user CR3, so no need + * to switch CR3 here. + */ cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -1274,12 +1236,34 @@ ENTRY(nmi) movq %rsp, %rdi movq $-1, %rsi +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ + ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + movq %rax, %cr3 +2: +#endif call do_nmi +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Unconditionally restore CR3. I know we return to + * kernel code that needs user CR3, but do we ever return + * to "user mode" where we need the kernel CR3? + */ + ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER +#endif + /* * Return back to user mode. We must *not* do the normal exit - * work, because we don't want to enable interrupts. Fortunately, - * do_nmi doesn't modify pt_regs. + * work, because we don't want to enable interrupts. Do not + * switch to user CR3: we might be going back to kernel code + * that had a user CR3 set. */ SWAPGS jmp restore_c_regs_and_iret @@ -1476,22 +1460,55 @@ end_repeat_nmi: ALLOC_PT_GPREGS_ON_STACK /* - * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit - * as we should not be calling schedule in NMI context. - * Even with normal interrupts enabled. An NMI should not be - * setting NEED_RESCHED or anything that normal interrupts and - * exceptions might do. + * Use the same approach as paranoid_entry to handle SWAPGS, but + * without CR3 handling since we do that differently in NMIs. No + * need to use paranoid_exit as we should not be calling schedule + * in NMI context. Even with normal interrupts enabled. An NMI + * should not be setting NEED_RESCHED or anything that normal + * interrupts and exceptions might do. */ - call paranoid_entry - - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ + cld + SAVE_C_REGS + SAVE_EXTRA_REGS + movl $1, %ebx + movl $MSR_GS_BASE, %ecx + rdmsr + testl %edx, %edx + js 1f /* negative -> in kernel */ + SWAPGS + xorl %ebx, %ebx +1: movq %rsp, %rdi movq $-1, %rsi +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ + ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + movq %rax, %cr3 +2: +#endif + + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ call do_nmi +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Unconditionally restore CR3. We might be returning to + * kernel code that needs user CR3, like just just before + * a sysret. + */ + ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER +#endif + testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: + /* We fixed up CR3 above, so no need to switch it here */ SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_EXTRA_REGS diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e1721dafbcb13fab9230cc20d598b18ebef8306b..d76a97653980145dc982c6abbd3ac541dc1050bb 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -48,6 +50,7 @@ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -184,6 +187,7 @@ ENDPROC(entry_SYSENTER_compat) ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ SWAPGS_UNSAFE_STACK + SWITCH_KERNEL_CR3_NO_STACK /* Stash user ESP and switch to the kernel stack. */ movl %esp, %r8d @@ -259,6 +263,7 @@ sysret32_from_system_call: xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r10 + SWITCH_USER_CR3 movq RSP-ORIG_RAX(%rsp), %rsp swapgs sysretl @@ -297,7 +302,7 @@ ENTRY(entry_INT80_compat) PARAVIRT_ADJUST_EXCEPTION_FRAME ASM_CLAC /* Do this early to minimize exposure */ SWAPGS - + SWITCH_KERNEL_CR3_NO_STACK /* * User tracing code (ptrace or signal handlers) might assume that * the saved RAX contains a 32-bit number when we're invoking a 32-bit @@ -338,6 +343,7 @@ ENTRY(entry_INT80_compat) /* Go back to user mode. */ TRACE_IRQS_ON + SWITCH_USER_CR3 SWAPGS jmp restore_regs_and_iret END(entry_INT80_compat) diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 9dbc5abb6162fa20581069499667a8c49b254868..6705edda4ac3e55dbc614e0f637cbd761c6c5a6d 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -6,14 +6,11 @@ #include #include -#define __SYSCALL_64_QUAL_(sym) sym -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym - -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 636c4b341f36a93975c1ab732f8dfc5fb66bf4ba..0174290b285731446a0bcc349f46b3cfaf7f3809 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode = #else EMULATE; #endif +unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL; static int __init vsyscall_setup(char *str) { @@ -66,6 +67,11 @@ static int __init vsyscall_setup(char *str) } early_param("vsyscall", vsyscall_setup); +bool vsyscall_enabled(void) +{ + return vsyscall_mode != NONE; +} + static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { @@ -331,11 +337,11 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); + if (vsyscall_mode != NATIVE) + vsyscall_pgprot = __PAGE_KERNEL_VVAR; if (vsyscall_mode != NONE) __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + __pgprot(vsyscall_pgprot)); BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 9842270ed2f20b27f587ef36fefb86a6ff17a951..21a4e4127f4346def3c34d7260f05c90b2c75b82 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void) int ret; if (!x86_match_cpu(cpu_match)) - return 0; + return -ENODEV; if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) return -ENODEV; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e31daca441cc66b0b7fc652f3b4251be7b0..21298c173b0e02c13df84290dda9f8deeaaeb6c3 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) return 1 << (PAGE_SHIFT + page_private(page)); } +static void bts_buffer_free_aux(void *data) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct bts_buffer *buf = data; + int nbuf; + + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { + struct page *page = buf->buf[nbuf].page; + void *kaddr = page_address(page); + size_t page_size = buf_size(page); + + kaiser_remove_mapping((unsigned long)kaddr, page_size); + } +#endif + kfree(data); +} + static void * bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) { @@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) buf->real_size = size - size % BTS_RECORD_SIZE; for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { - unsigned int __nr_pages; + void *kaddr = pages[pg]; + size_t page_size; + + page = virt_to_page(kaddr); + page_size = buf_size(page); + + if (kaiser_add_mapping((unsigned long)kaddr, + page_size, __PAGE_KERNEL) < 0) { + buf->nr_bufs = nbuf; + bts_buffer_free_aux(buf); + return NULL; + } - page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; buf->buf[nbuf].size -= pad; - pg += __nr_pages; - offset += __nr_pages << PAGE_SHIFT; + pg += page_size >> PAGE_SHIFT; + offset += page_size; } return buf; } -static void bts_buffer_free_aux(void *data) -{ - kfree(data); -} - static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) { return buf->buf[idx].offset + buf->buf[idx].displacement; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index be202390bbd37b00106864123a647786497ce2cd..8e7a3f1df3a5dc298d45de5543e5ca11dc30c9a9 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2,11 +2,15 @@ #include #include +#include #include #include #include "../perf_event.h" +static +DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); +static void *dsalloc(size_t size, gfp_t flags, int node) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + unsigned int order = get_order(size); + struct page *page; + unsigned long addr; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + if (!page) + return NULL; + addr = (unsigned long)page_address(page); + if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) { + __free_pages(page, order); + addr = 0; + } + return (void *)addr; +#else + return kmalloc_node(size, flags | __GFP_ZERO, node); +#endif +} + +static void dsfree(const void *buffer, size_t size) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!buffer) + return; + kaiser_remove_mapping((unsigned long)buffer, size); + free_pages((unsigned long)buffer, get_order(size)); +#else + kfree(buffer); +#endif +} + static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree(buffer, x86_pmu.pebs_buffer_size); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; @@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + dsfree((void *)(unsigned long)ds->pebs_buffer_base, + x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; } @@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; @@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu) if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu); + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; return 0; @@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu) return; per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) @@ -1389,9 +1422,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; /* log dropped samples number */ - if (error[bit]) + if (error[bit]) { perf_log_lost_samples(event, error[bit]); + if (perf_event_account_interrupt(event)) + x86_pmu_stop(event, 0); + } + if (counts[bit]) { __intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ccbe24e697c46cee2890d00fda413b9f31b41af6..6665000a03dff7a07d73eeaad05171835ec12998 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 44b8762fa0c784b44a44d22e390af48ce2dcb5d0..166654218329e0f69ba0ffd6df83adc5de3f72c9 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -10,7 +10,34 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); #endif + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +asmlinkage void __fill_rsb(void); +asmlinkage void __clear_rsb(void); + +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index be70795a4c1ef94ed2da0f0261768250c6ed7bb6..08684b3595d7c544c2565981d0b80289cbc3f85b 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -11,10 +11,12 @@ # define __ASM_FORM_COMMA(x) " " #x "," #endif -#ifdef CONFIG_X86_32 +#ifndef __x86_64__ +/* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else +/* 64 bit */ # define __ASM_SEL(a,b) __ASM_FORM(b) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif @@ -133,8 +135,8 @@ * gets set up by the containing function. If you forget to do this, objtool * may print a "call without frame pointer save/setup" warning. */ -register unsigned int __asm_call_sp asm("esp"); -#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index bfb28caf97b1be1f2d6aa8893bd905a39030e310..857590390397a45114eb4da27c55d60a06b2471c 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -23,6 +23,34 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"r"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ + "lfence", X86_FEATURE_LFENCE_RDTSC) + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index e01f7f7ccb0c5711db4f1fe130938cee36169965..84ae170bc3d0cd73cfa7d08fa6f730bd3106e4ea 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,5 +2,7 @@ #define _ASM_X86_CMDLINE_H int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); +int cmdline_find_option(const char *cmdline_ptr, const char *option, + char *buffer, int bufsize); #endif /* _ASM_X86_CMDLINE_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d2b69fc0ceb15baa25f6022b35ace5b55c11de2..8c101579f535c60ad65c2fe2067587366e49ea41 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -28,6 +28,7 @@ enum cpuid_leafs CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, + CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ @@ -135,6 +138,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ed10b5bf9b93d73b3102263c7b025af6fa9b47e9..8eb23f5cf7f4efb1dcdf991ac30dd666e866f8ca 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -12,7 +12,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -189,13 +189,20 @@ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ + +/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -228,6 +235,7 @@ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ @@ -252,6 +260,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -287,6 +298,13 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + /* * BUG word(s) */ @@ -312,5 +330,8 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 12080d87da3b189f421d421c5d6a3cca5dc83014..2ed5a2b3f8f7e86976118ac217199f20d9f17d7d 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -43,7 +43,7 @@ struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 85599ad4d0247863cef655d02b9a4b3f83c77fb7..1f8cca459c6c8c08465aac887a9028d9e4bd252b 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -21,11 +21,13 @@ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +# define DISABLE_PCID 0 #else # define DISABLE_VME 0 # define DISABLE_K6_MTRR 0 # define DISABLE_CYRIX_ARR 0 # define DISABLE_CENTAUR_MCR 0 +# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -43,7 +45,7 @@ #define DISABLED_MASK1 0 #define DISABLED_MASK2 0 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 0 +#define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 0 @@ -57,6 +59,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 59405a248fc2488c66259e5669c1f857b13954aa..9b76cd331990159dcdd042e10f90946f7440adba 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,8 +22,8 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - unsigned int irq_tlb_count; #endif + unsigned int irq_tlb_count; #ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b90e1053049bdd17ad36989315db8ac1b3ccc973..0817d63bce41e378743d4044662b0a6aa547379c 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -178,7 +178,7 @@ extern char irq_entries_start[]; #define VECTOR_RETRIGGERED ((void *)~0UL) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; -DECLARE_PER_CPU(vector_irq_t, vector_irq); +DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq); #endif /* !ASSEMBLY_ */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 34a46dc076d3610212e6f5c9f0abfb2ab9bc3629..75b748a1deb894ab617352c9c03a58ed187cf343 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -12,6 +12,7 @@ */ #define INTEL_FAM6_CORE_YONAH 0x0E + #define INTEL_FAM6_CORE2_MEROM 0x0F #define INTEL_FAM6_CORE2_MEROM_L 0x16 #define INTEL_FAM6_CORE2_PENRYN 0x17 @@ -21,6 +22,7 @@ #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ #define INTEL_FAM6_NEHALEM_EP 0x1A #define INTEL_FAM6_NEHALEM_EX 0x2E + #define INTEL_FAM6_WESTMERE 0x25 #define INTEL_FAM6_WESTMERE_EP 0x2C #define INTEL_FAM6_WESTMERE_EX 0x2F @@ -36,9 +38,9 @@ #define INTEL_FAM6_HASWELL_GT3E 0x46 #define INTEL_FAM6_BROADWELL_CORE 0x3D -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_BROADWELL_GT3E 0x47 #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E @@ -57,9 +59,10 @@ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A /* Xeon Phi */ diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h new file mode 100644 index 0000000000000000000000000000000000000000..802bbbdfe14313eed1c3bdb1235475aed472de00 --- /dev/null +++ b/arch/x86/include/asm/kaiser.h @@ -0,0 +1,141 @@ +#ifndef _ASM_X86_KAISER_H +#define _ASM_X86_KAISER_H + +#include /* For PCID constants */ + +/* + * This file includes the definitions for the KAISER feature. + * KAISER is a counter measure against x86_64 side channel attacks on + * the kernel virtual memory. It has a shadow pgd for every process: the + * shadow pgd has a minimalistic kernel-set mapped, but includes the whole + * user memory. Within a kernel context switch, or when an interrupt is handled, + * the pgd is switched to the normal one. When the system switches to user mode, + * the shadow pgd is enabled. By this, the virtual memory caches are freed, + * and the user may not attack the whole kernel memory. + * + * A minimalistic kernel mapping holds the parts needed to be mapped in user + * mode, such as the entry/exit functions of the user space, or the stacks. + */ + +#define KAISER_SHADOW_PGD_OFFSET 0x1000 + +#ifdef __ASSEMBLY__ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + +.macro _SWITCH_TO_KERNEL_CR3 reg +movq %cr3, \reg +andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ +ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID +movq \reg, %cr3 +.endm + +.macro _SWITCH_TO_USER_CR3 reg regb +/* + * regb must be the low byte portion of reg: because we have arranged + * for the low byte of the user PCID to serve as the high byte of NOFLUSH + * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are + * not enabled): so that the one register can update both memory and cr3. + */ +movq %cr3, \reg +orq PER_CPU_VAR(x86_cr3_pcid_user), \reg +js 9f +/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */ +movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) +9: +movq \reg, %cr3 +.endm + +.macro SWITCH_KERNEL_CR3 +ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER +_SWITCH_TO_KERNEL_CR3 %rax +popq %rax +8: +.endm + +.macro SWITCH_USER_CR3 +ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER +_SWITCH_TO_USER_CR3 %rax %al +popq %rax +8: +.endm + +.macro SWITCH_KERNEL_CR3_NO_STACK +ALTERNATIVE "jmp 8f", \ + __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \ + X86_FEATURE_KAISER +_SWITCH_TO_KERNEL_CR3 %rax +movq PER_CPU_VAR(unsafe_stack_register_backup), %rax +8: +.endm + +#else /* CONFIG_PAGE_TABLE_ISOLATION */ + +.macro SWITCH_KERNEL_CR3 +.endm +.macro SWITCH_USER_CR3 +.endm +.macro SWITCH_KERNEL_CR3_NO_STACK +.endm + +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Upon kernel/user mode switch, it may happen that the address + * space has to be switched before the registers have been + * stored. To change the address space, another register is + * needed. A register therefore has to be stored/restored. +*/ +DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); + +extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; + +extern int kaiser_enabled; +extern void __init kaiser_check_boottime_disable(void); +#else +#define kaiser_enabled 0 +static inline void __init kaiser_check_boottime_disable(void) {} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set, + * so as to build with tests on kaiser_enabled instead of #ifdefs. + */ + +/** + * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping + * @addr: the start address of the range + * @size: the size of the range + * @flags: The mapping flags of the pages + * + * The mapping is done on a global scope, so no bigger + * synchronization has to be done. the pages have to be + * manually unmapped again when they are not needed any longer. + */ +extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); + +/** + * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping + * @addr: the start address of the range + * @size: the size of the range + */ +extern void kaiser_remove_mapping(unsigned long start, unsigned long size); + +/** + * kaiser_init - Initialize the shadow mapping + * + * Most parts of the shadow mapping can be mapped upon boot + * time. Only per-process things like the thread stacks + * or a new LDT have to be mapped at runtime. These boot- + * time mappings are permanent and never unmapped. + */ +extern void kaiser_init(void); + +#endif /* __ASSEMBLY */ + +#endif /* _ASM_X86_KAISER_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bdde80731f490ecc05af1234d99c6ae2820975d4..20cfeeb681c604591c64cebab072690dce74fa26 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); @@ -1397,4 +1398,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #endif } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 72198c64e646d3cbb0d9831d950e4e815003cdd3..8b272a08d1a845a48796b24dd82c8e93ce9bc549 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -33,12 +33,6 @@ typedef struct { #endif } mm_context_t; -#ifdef CONFIG_SMP void leave_mm(int cpu); -#else -static inline void leave_mm(int cpu) -{ -} -#endif #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index f9dd2246938884c1f7d9b4557f8435ad8beb79c2..d23e35584f157f3fc748fbe3e01e33f8ae6650a1 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#ifdef CONFIG_SMP if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif } static inline int init_new_context(struct task_struct *tsk, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b601ddac57193ad9f486a8d06497ad51152e94ec..c768bc1550a15056d45729343e2d10073e21bc46 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -37,6 +37,13 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd @@ -50,6 +57,11 @@ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e @@ -330,6 +342,9 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index b5fee97813cdf8d1408aa7ac874b21593eef8121..ed35b915b5c9760415c9ef97c879ef249205e84d 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * that some other imaginary CPU is updating continuously with a * time stamp. */ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); + barrier_nospec(); return rdtsc(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 0000000000000000000000000000000000000000..300cc159b4a0a47acdf1cab08822dfcde706d41d --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ +#define _ASM_X86_NOSPEC_BRANCH_H_ + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before a retpoline alternative. It tells + * objtool where the retpolines are so that it can make sense of the control + * flow by just reading the original instruction(s) and ignoring the + * alternatives. + */ +.macro ANNOTATE_NOSPEC_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.nospec + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + lfence + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + +/* This clobbers the BX register */ +.macro FILL_RETURN_BUFFER nr:req ftr:req +#ifdef CONFIG_RETPOLINE + ALTERNATIVE "", "call __clear_rsb", \ftr +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_NOSPEC_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.nospec\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " lfence;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline for C / inline asm */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE_MINIMAL, + SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS, +}; + +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + +/* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future + * CPUs with IBRS_ALL *might* it be avoided. + */ +static inline void vmexit_fill_RSB(void) +{ +#ifdef CONFIG_RETPOLINE + alternative_input("", + "call __fill_rsb", + X86_FEATURE_RETPOLINE, + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); +#endif +} + +static inline void indirect_branch_prediction_barrier(void) +{ + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_USE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +} + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 437feb436efa666dbe13732c7d4269160fed49e3..5af0401ccff2cdd81b4b01012ae77f9de4edff19 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -18,6 +18,18 @@ #ifndef __ASSEMBLY__ #include +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern int kaiser_enabled; +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#else +#define kaiser_enabled 0 +#endif +#define PGD_ALLOCATION_ORDER kaiser_enabled + void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_checkwx(void); @@ -690,7 +702,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + pgdval_t ignore_flags = _PAGE_USER; + /* + * We set NX on KAISER pgds that map userspace memory so + * that userspace can not meaningfully use the kernel + * page table by accident; it will fault on the first + * instruction it tries to run. See native_set_pgd(). + */ + if (kaiser_enabled) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -903,7 +925,15 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (kaiser_enabled) { + /* Clone the shadow pgd part as well */ + memcpy(native_get_shadow_pgd(dst), + native_get_shadow_pgd(src), + count * sizeof(pgd_t)); + } +#endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1cc82ece9ac1819b92ec82aca72805c0e966af97..ce97c8c6a310eb84ce121bfa068307225fc5b81a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -106,9 +106,32 @@ static inline void native_pud_clear(pud_t *pud) native_set_pud(pud, native_make_pud(0)); } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) +{ +#ifdef CONFIG_DEBUG_VM + /* linux/mmdebug.h may not have been included at this point */ + BUG_ON(!kaiser_enabled); +#endif + return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); +} +#else +static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) +{ + BUILD_BUG_ON(1); + return NULL; +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { - *pgdp = pgd; + *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d64299e8997e8b12270e5c23112f85597..f1c8ac46829289415a0fdc9325c0fd926b1f1920 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -119,7 +119,7 @@ #define _PAGE_DEVMAP (_AT(pteval_t, 0)) #endif -#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ _PAGE_ACCESSED | _PAGE_DIRTY) @@ -137,6 +137,33 @@ _PAGE_SOFT_DIRTY) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) +/* The ASID is the lower 12 bits of CR3 */ +#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL)) + +/* Mask for all the PCID-related bits in CR3: */ +#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) +#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) + +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64) +/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ +#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) + +#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) +#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) +#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) +#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) +#else +#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) +/* + * PCIDs are unsupported on 32-bit and none of these bits can be + * set in CR3: + */ +#define X86_CR3_PCID_KERN_FLUSH (0) +#define X86_CR3_PCID_USER_FLUSH (0) +#define X86_CR3_PCID_KERN_NOFLUSH (0) +#define X86_CR3_PCID_USER_NOFLUSH (0) +#endif + /* * The cache modes defined here are used to translate between pure SW usage * and the HW defined cache mode bits and/or PAT entries. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 83db0eae99797da9fca22c59eee3fb1ed12ca373..cb866ae1bc5d5f49c77d93e8553a7dc0feffcb5a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern struct tss_struct doublefault_tss; -extern __u32 cpu_caps_cleared[NCAPINTS]; -extern __u32 cpu_caps_set[NCAPINTS]; +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); @@ -308,7 +308,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); @@ -391,8 +391,6 @@ struct thread_struct { unsigned short gsindex; #endif - u32 status; /* thread synchronous flags */ - #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; @@ -596,7 +594,7 @@ static inline void sync_core(void) { int tmp; -#ifdef CONFIG_M486 +#ifdef CONFIG_X86_32 /* * Do a CPUID if available, otherwise do a jump. The jump * can conveniently enough be the jump around CPUID. diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fac9a5c0abe94b233b72b35bca8c7a665847b694..6847d85400a8b738ca2adf00ea95fdf8f349fafc 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -100,6 +100,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index e3c95e8e61c5fca9808524ba47d539bd0377ece2..03eedc21246d5b65d39ffb45ce27c588151d7488 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 91dfcafe27a662cba41d162bf374d9632961406e..bad25bb80679fe79cec042a0644d1205db5498fc 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); /* kernel/ldt.c */ -asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +asmlinkage long sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ asmlinkage long sys_rt_sigreturn(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95bd221fe4e13c8cdb3af0cd27aa37..89978b9c667a68bd8ec1d384e2a94884f8780af7 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -54,6 +54,7 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ }; #define INIT_THREAD_INFO(tsk) \ @@ -152,17 +153,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. @@ -224,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, #define in_ia32_syscall() true #else #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ - current->thread.status & TS_COMPAT) + current_thread_info()->status & TS_COMPAT) #endif /* diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fc5abff9b7fd63d6b3a01a18061be8b3f752d109..94146f665a3ce9f251425c6b6c859393ce365653 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,7 @@ #include #include #include +#include static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) @@ -65,10 +66,8 @@ static inline void invpcid_flush_all_nonglobals(void) #endif struct tlb_state { -#ifdef CONFIG_SMP struct mm_struct *active_mm; int state; -#endif /* * Access to this CR4 shadow and to H/W CR4 is protected by @@ -133,6 +132,24 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) cr4_set_bits(mask); } +/* + * Declare a couple of kaiser interfaces here for convenience, + * to avoid the need for asm/kaiser.h in unexpected places. + */ +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern int kaiser_enabled; +extern void kaiser_setup_pcid(void); +extern void kaiser_flush_tlb_on_return_to_user(void); +#else +#define kaiser_enabled 0 +static inline void kaiser_setup_pcid(void) +{ +} +static inline void kaiser_flush_tlb_on_return_to_user(void) +{ +} +#endif + static inline void __native_flush_tlb(void) { /* @@ -141,6 +158,8 @@ static inline void __native_flush_tlb(void) * back: */ preempt_disable(); + if (kaiser_enabled) + kaiser_flush_tlb_on_return_to_user(); native_write_cr3(native_read_cr3()); preempt_enable(); } @@ -150,20 +169,27 @@ static inline void __native_flush_tlb_global_irq_disabled(void) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); + if (cr4 & X86_CR4_PGE) { + /* clear PGE and flush TLB of all entries */ + native_write_cr4(cr4 & ~X86_CR4_PGE); + /* restore PGE as it was before */ + native_write_cr4(cr4); + } else { + /* do it with cr3, letting kaiser flush user PCID */ + __native_flush_tlb(); + } } static inline void __native_flush_tlb_global(void) { unsigned long flags; - if (static_cpu_has(X86_FEATURE_INVPCID)) { + if (this_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; @@ -175,23 +201,52 @@ static inline void __native_flush_tlb_global(void) * be called from deep inside debugging code.) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); - raw_local_irq_restore(flags); } static inline void __native_flush_tlb_single(unsigned long addr) { - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + /* + * SIMICS #GP's if you run INVPCID with type 2/3 + * and X86_CR4_PCIDE clear. Shame! + * + * The ASIDs used below are hard-coded. But, we must not + * call invpcid(type=1/2) before CR4.PCIDE=1. Just call + * invlpg in the case we are called early. + */ + + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { + if (kaiser_enabled) + kaiser_flush_tlb_on_return_to_user(); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + return; + } + /* Flush the address out of both PCIDs. */ + /* + * An optimization here might be to determine addresses + * that are only kernel-mapped and only flush the kernel + * ASID. But, userspace flushes are probably much more + * important performance-wise. + * + * Make sure to do only a single invpcid when KAISER is + * disabled and we have only a single ASID. + */ + if (kaiser_enabled) + invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); } static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) - __flush_tlb_global(); - else - __flush_tlb(); + __flush_tlb_global(); + /* + * Note: if we somehow had PCID but not PGE, then this wouldn't work -- + * we'd end up flushing kernel translations for the current ASID but + * we might fail to flush kernel translations for other cached ASIDs. + * + * To avoid this issue, we force PCID off if PGE is off. + */ } static inline void __flush_tlb_one(unsigned long addr) @@ -205,7 +260,6 @@ static inline void __flush_tlb_one(unsigned long addr) /* * TLB flushing: * - * - flush_tlb() flushes the current mm struct TLBs * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page @@ -217,84 +271,6 @@ static inline void __flush_tlb_one(unsigned long addr) * and page-granular flushes are available only on i486 and up. */ -#ifndef CONFIG_SMP - -/* "_up" is for UniProcessor. - * - * This is a helper for other header functions. *Not* intended to be called - * directly. All global TLB flushes need to either call this, or to bump the - * vm statistics themselves. - */ -static inline void __flush_tlb_up(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); -} - -static inline void flush_tlb_all(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb_all(); -} - -static inline void flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void local_flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_mm_range(struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned long vmflag) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ -} - -static inline void reset_lazy_tlbstate(void) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#else /* SMP */ - -#include - #define local_flush_tlb() __flush_tlb() #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) @@ -303,13 +279,14 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -#define flush_tlb() flush_tlb_current_task() +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) +{ + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); +} void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, @@ -324,8 +301,6 @@ static inline void reset_lazy_tlbstate(void) this_cpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif /* SMP */ - #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, mm, start, end) \ native_flush_tlb_others(mask, mm, start, end) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 01fd0a7f48cd2f97dac2c1fd20e809e17975ad34..688315b7a922c13a5030c2190a31d50b27ac140a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif +dotraplinkage void do_mce(struct pt_regs *, long); static inline int get_si_code(unsigned long condition) { diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 0bd651e65cd15f75cb050dbcd694bdbfc415b24b..0c8784091b4baf3d3d5aee992601794ec48a34fc 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -123,6 +123,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -432,7 +437,7 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current->thread.uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ @@ -538,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -573,7 +582,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 7d3bdd1ed6977b5e1f69dc8ba3e3d6cfa8f861a3..d6d245088dd56dc223e4456eba2a4eaa860c5a49 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; @@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 673059a109fee067a6739704470f947fd7acae57..6e5cc08134babfd16bf6ae30d3b4e7f81af26341 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 6ba66ee79710ffdc19806994df22375859b95736..62210da19a929a5fef69e7f268471533a1076285 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -12,12 +12,15 @@ extern void map_vsyscall(void); * Returns true if handled. */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool vsyscall_enabled(void); #else static inline void map_vsyscall(void) {} static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { return false; } +static inline bool vsyscall_enabled(void) { return false; } #endif +extern unsigned long vsyscall_pgprot; #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index dc8a00a93f73b5a147b07cf162fd00828dfb231a..1fdcb8cda74a0f332cd5593818207d9b704cb3b7 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -215,9 +216,9 @@ privcmd_call(unsigned call, __HYPERCALL_5ARG(a1, a2, a3, a4, a5); stac(); - asm volatile("call *%[call]" + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM - : [call] "a" (&hypercall_page[call]) + : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); clac(); diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 567de50a4c2a59c1a85ed4ee6025498352227dc1..6768d1321016d372e58c0f6c59cba1553a3a12e8 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -77,7 +77,8 @@ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) -#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ +#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ +#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) /* * Intel CPU features in CR4 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b89bef95f63b13cc55aa8985763bb3ea998afb57..0a1e8a67cc998a5c2f0bca55a40a38b4a837e706 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -335,13 +335,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi); + static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - /* * Check bus_irq boundary. */ @@ -350,14 +349,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, return; } - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - /* * TBD: This check is for faulty timer entries, where the override * erroneously sets the trigger to level, resulting in a HUGE @@ -366,16 +357,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0) + return; /* * Reset default identity mapping if gsi is also an legacy IRQ, * otherwise there will be more than one entry with the same GSI @@ -422,6 +405,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi) +{ + struct mpc_intsrc mp_irq; + int ioapic, pin; + + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + pr_warn("Failed to find ioapic for gsi : %u\n", gsi); + return ioapic; + } + + pin = mp_find_ioapic_pin(ioapic, gsi); + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = pin; + + mp_save_irq(&mp_irq); + + return 0; +} + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -466,7 +477,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + if (bus_irq < NR_IRQS_LEGACY) + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + else + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* @@ -720,7 +735,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5cb272a7a5a32eccbbee61a23c50ab9b5a80a0fb..03b6e5c6cf2310316cca63d1aa981580cd0ef077 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -340,9 +329,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; + int i; - if (instr[0] != 0x90) - return; + for (i = 0; i < a->padlen; i++) { + if (instr[i] != 0x90) + return; + } local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); @@ -585,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1eb562fbe6e46b2e3db3289ca8535b1b6c..b5229abd1629f163f14bbf30ef9e871691d505aa 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -361,14 +361,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data->chip_data = data; irq_data->hwirq = virq + i; err = assign_irq_vector_policy(virq + i, node, data, info); - if (err) + if (err) { + irq_data->chip_data = NULL; + free_apic_chip_data(data); goto error; + } } return 0; error: - x86_vector_free_irqs(domain, virq, i + 1); + x86_vector_free_irqs(domain, virq, i); return err; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4a8697f7d4ef804921c7422fac02f28c4b8b0c2b..33b63670bf09e6d34c1f194cfe3600d9ea16dcb4 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o +obj-y += bugs.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_X86_32) += bugs.o -obj-$(CONFIG_X86_64) += bugs_64.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2b4cf04239b6c011a9d9a5ef84bdca5ef073a69e..1b89f0c4251e5a1413674c60dbe35e7fbbeb9923 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -782,8 +782,32 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + unsigned long long val; + int ret; + + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + + /* + * Verify that the MSR write was successful (could be running + * under a hypervisor) and only then assume that LFENCE is + * serializing. + */ + ret = rdmsrl_safe(MSR_F10H_DECFG, &val); + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } else { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bd17db15a2c1ef07412c73f064f282a3f55c9313..957ad443b786126d2060e178a0524fe24e09046d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -9,6 +9,11 @@ */ #include #include +#include +#include + +#include +#include #include #include #include @@ -16,15 +21,25 @@ #include #include #include +#include +#include +#include + +static void __init spectre_v2_select_mitigation(void); void __init check_bugs(void) { identify_boot_cpu(); -#ifndef CONFIG_SMP - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + +#ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. * @@ -40,4 +55,284 @@ void __init check_bugs(void) alternative_instructions(); fpu__init_check_bugs(); +#else /* CONFIG_X86_64 */ + alternative_instructions(); + + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); +#endif +} + +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +#ifdef RETPOLINE +static bool spectre_v2_bad_module; + +bool retpoline_module_ok(bool has_retpoline) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + + pr_err("System may be vulnerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; +} + +static inline const char *spectre_v2_module_string(void) +{ + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; +} +#else +static inline const char *spectre_v2_module_string(void) { return ""; } +#endif + +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + +static inline bool retp_compiler(void) +{ + return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ + char arg[20]; + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; +} + +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; } + +static void __init spectre_v2_select_mitigation(void) +{ + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + + /* + * If the CPU is not affected and the command line mode is NONE or AUTO + * then nothing to do. + */ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) + return; + + switch (cmd) { + case SPECTRE_V2_CMD_NONE: + return; + + case SPECTRE_V2_CMD_FORCE: + case SPECTRE_V2_CMD_AUTO: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; + break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_generic; + break; + case SPECTRE_V2_CMD_RETPOLINE: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + } + pr_err("kernel not compiled with retpoline; no mitigation available!"); + return; + +retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : + SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : + SPECTRE_V2_RETPOLINE_MINIMAL; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); + + /* + * If neither SMEP or KPTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. + * + * Skylake era CPUs have a separate issue with *underflow* of the + * RSB, when they will predict 'ret' targets from the generic BTB. + * The proper mitigation for this is IBRS. If IBRS is not supported + * or deactivated in favour of retpolines the RSB fill on context + * switch is required. + */ + if ((!boot_cpu_has(X86_FEATURE_KAISER) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } +} + +#undef pr_fmt + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); + if (boot_cpu_has(X86_FEATURE_KAISER)) + return sprintf(buf, "Mitigation: PTI\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + spectre_v2_module_string()); +} +#endif diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c deleted file mode 100644 index a972ac4c7e7df05238e78b08e093cded47640fcd..0000000000000000000000000000000000000000 --- a/arch/x86/kernel/cpu/bugs_64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * Copyright (C) 2000 SuSE - */ - -#include -#include -#include -#include -#include -#include -#include - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#if !defined(CONFIG_SMP) - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4eece91ada374d1dc25c39b763861c755354d060..08e89ed6aa87b23035c59107bd4ecab4efdfe861 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -44,6 +44,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -93,7 +95,7 @@ static const struct cpu_dev default_cpu = { static const struct cpu_dev *this_cpu = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 /* * We need valid kernel segments for data and code in long mode too @@ -163,6 +165,24 @@ static int __init x86_mpx_setup(char *s) } __setup("nompx", x86_mpx_setup); +#ifdef CONFIG_X86_64 +static int __init x86_pcid_setup(char *s) +{ + /* require an exact match without trailing characters */ + if (strlen(s)) + return 0; + + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_PCID)) + return 1; + + setup_clear_cpu_cap(X86_FEATURE_PCID); + pr_info("nopcid: PCID feature disabled\n"); + return 1; +} +__setup("nopcid", x86_pcid_setup); +#endif + static int __init x86_noinvpcid_setup(char *s) { /* noinvpcid doesn't accept parameters */ @@ -306,6 +326,39 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } } +static void setup_pcid(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PCID)) { + if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) { + cr4_set_bits(X86_CR4_PCIDE); + /* + * INVPCID has two "groups" of types: + * 1/2: Invalidate an individual address + * 3/4: Invalidate all contexts + * + * 1/2 take a PCID, but 3/4 do not. So, 3/4 + * ignore the PCID argument in the descriptor. + * But, we have to be careful not to call 1/2 + * with an actual non-zero PCID in them before + * we do the above cr4_set_bits(). + */ + if (cpu_has(c, X86_FEATURE_INVPCID)) + set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't + * work if PCID is on but PGE is not. Since that + * combination doesn't exist on real hardware, there's + * no reason to try to fully support it, but it's + * polite to avoid corrupting data if we're on + * an improperly configured VM. + */ + clear_cpu_cap(c, X86_FEATURE_PCID); + } + } + kaiser_setup_pcid(); +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -429,8 +482,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -655,6 +708,36 @@ void cpu_detect(struct cpuinfo_x86 *c) } } +static void apply_forced_caps(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +} + +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -676,6 +759,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; + c->x86_capability[CPUID_7_EDX] = edx; } /* Extended state features: level 0x0000000d */ @@ -748,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -776,6 +861,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_CENTAUR, 5 }, + { X86_VENDOR_INTEL, 5 }, + { X86_VENDOR_NSC, 5 }, + { X86_VENDOR_ANY, 4 }, + {} +}; + +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} +}; + +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = 0; + + if (x86_match_cpu(cpu_no_meltdown)) + return false; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return false; + + return true; +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -821,7 +941,23 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + if (!x86_match_cpu(cpu_no_speculation)) { + if (cpu_vulnerable_to_meltdown(c)) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + } + fpu__init_system(c); + +#ifdef CONFIG_X86_32 + /* + * Regardless of whether PCID is enumerated, the SDM says + * that it can't be enabled in 32-bit mode. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); +#endif } void __init early_cpu_init(void) @@ -1035,10 +1171,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overridden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); @@ -1064,6 +1197,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_smep(c); setup_smap(c); + /* Set up PCID */ + setup_pcid(c); + /* * The vendor-specific functions might have changed features. * Now we do "generic changes." @@ -1097,10 +1233,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); /* * On SMP, boot_cpu_data holds the common feature set between @@ -1325,7 +1458,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [DEBUG_STACK - 1] = DEBUG_STKSZ }; -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks +DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); /* May not be marked __init: used by software suspend */ @@ -1483,6 +1616,14 @@ void cpu_init(void) * try to read it. */ cr4_init_shadow(); + if (!kaiser_enabled) { + /* + * secondary_startup_64() deferred setting PGE in cr4: + * probe_page_size_mask() sets it on the boot cpu, + * but it needs to be set on each secondary cpu. + */ + cr4_set_bits(X86_CR4_PGE); + } /* * Load microcode on this cpu if a valid microcode is available. diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb034a4533d3c4355d3ace82c92060dd..4097b43cba2da0c5ebabfc807cb504d116049dbf 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release + */ +struct sku_microcode { + u8 model; + u8 stepping; + u32 microcode; +}; +static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + /* Updated in the 20180108 release; blacklist until we know otherwise */ + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +}; + +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_mask == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } + /* Now if any of them are set, check the blacklist and clear the lot */ + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + } + /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index de6626c18e427b771ea902451ae77ab52e233915..be633715650212059e8a67eb7a4974b188d5e076 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, &id4_regs); __cache_cpumap_setup(cpu, idx, &id4_regs); } + this_cpu_ci->cpu_map_populated = true; + return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ca5f8ad008eb2541b5745600572c1ad08b117c8..fe5cd6ea1f0e9638d93feff57992ee1941534b7d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1754,6 +1754,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 017bda12caaed9c46f60fccc90f05d861e58e1ba..b74bb29db6b94038598fb4f7931ba95a60d8d53b 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -592,6 +592,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, #define F14H_MPB_MAX_SIZE 1824 #define F15H_MPB_MAX_SIZE 4096 #define F16H_MPB_MAX_SIZE 3458 +#define F17H_MPB_MAX_SIZE 3200 switch (family) { case 0x14: @@ -603,6 +604,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, case 0x16: max_size = F16H_MPB_MAX_SIZE; break; + case 0x17: + max_size = F17H_MPB_MAX_SIZE; + break; default: max_size = F1XH_MPB_MAX_SIZE; break; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5ce5155f0695a60f77e9a7add2360dc7bfa0903f..0afaf00b029bb0f5d499b7d158d603321a660030 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -43,7 +43,7 @@ #define MICROCODE_VERSION "2.01" static struct microcode_ops *microcode_ops; -static bool dis_ucode_ldr; +static bool dis_ucode_ldr = true; /* * Synchronization. @@ -73,6 +73,7 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; + u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -85,8 +86,20 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (cmdline_find_option_bool(cmdline, option)) - *res = true; + a = 1; + c = 0; + native_cpuid(&a, &b, &c, &d); + + /* + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not + * completely accurate as xen pv guests don't see that CPUID bit set but + * that's good enough as they don't land on the BSP path anyway. + */ + if (c & BIT(31)) + return *res; + + if (cmdline_find_option_bool(cmdline, option) <= 0) + *res = false; return *res; } @@ -114,9 +127,7 @@ void __init load_ucode_bsp(void) { int vendor; unsigned int family; - - if (check_loader_disabled_bsp()) - return; + bool intel = true; if (!have_cpuid_p()) return; @@ -126,16 +137,27 @@ void __init load_ucode_bsp(void) switch (vendor) { case X86_VENDOR_INTEL: - if (family >= 6) - load_ucode_intel_bsp(); + if (family < 6) + return; break; + case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (family < 0x10) + return; + intel = false; break; + default: - break; + return; } + + if (check_loader_disabled_bsp()) + return; + + if (intel) + load_ucode_intel_bsp(); + else + load_ucode_amd_bsp(family); } static bool check_loader_disabled_ap(void) @@ -154,9 +176,6 @@ void load_ucode_ap(void) if (check_loader_disabled_ap()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 13dbcc0f9d0326a1755760f8f11e8b59da514441..f90f17610f627fc5f35a62b63e8501ca74ebc008 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -40,6 +40,9 @@ #include #include +/* last level cache size per core */ +static int llc_size_per_core; + /* * Temporary microcode blobs pointers storage. We note here during early load * the pointers to microcode blobs we've got from whatever storage (detached @@ -1051,8 +1054,19 @@ static bool is_blacklisted(unsigned int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { - pr_err_once("late loading on model 79 is disabled.\n"); + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 + * and LLC size per core bigger than 2.5MB may result in a system hang. + * This behavior is documented in item BDF90, #334165 (Intel Xeon + * Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == INTEL_FAM6_BROADWELL_X && + c->x86_mask == 0x01 && + llc_size_per_core > 2621440 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); return true; } @@ -1116,6 +1130,15 @@ static struct microcode_ops microcode_intel_ops = { .microcode_fini_cpu = microcode_fini_cpu, }; +static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) +{ + u64 llc_size = c->x86_cache_size * 1024; + + do_div(llc_size, c->x86_max_cores); + + return (int)llc_size; +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1126,6 +1149,8 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + llc_size_per_core = calc_llc_size_per_core(c); + return µcode_intel_ops; } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 1db8dc490b665e751f43f3411cc21079eea75ae2..afbb52532791fc168bbdad61bab774e4e80880b6 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,9 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 04f89caef9c4926c40092aed725db848adc2379e..e33b38541be31dcc091fd97d1f4bd29a0ba7fe0e 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Note: we only need 6*8 = 48 bytes for the espfix stack, but round @@ -126,6 +127,15 @@ void __init init_espfix_bsp(void) /* Install the espfix pud into the kernel page directory */ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); + /* + * Just copy the top-level PGD that is mapping the espfix + * area to ensure it is mapped into the shadow user page + * tables. + */ + if (kaiser_enabled) { + set_pgd(native_get_shadow_pgd(pgd_p), + __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); + } /* Randomize the locations */ init_espfix_random(); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 095ef7ddd6ae4d1c6d5476d6e63561963786e793..abfbb61b18b8ff3e7049ce2d6a35882cf331c4a9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1077,6 +1077,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= xfeatures; + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; return 0; } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b4421cc191b056727f8f8c0def78a750b319a1c4..67cd7c1b99daac6bbe3a7cc03673e610a5720fd8 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -190,8 +190,8 @@ ENTRY(secondary_startup_64) movq $(init_level4_pgt - __START_KERNEL_map), %rax 1: - /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx + /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */ + movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx movq %rcx, %cr4 /* Setup early boot stage 4 level pagetables. */ @@ -405,6 +405,27 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not + * strictly *need* the second page, but this allows us to + * have a single set_pgd() implementation that does not + * need to worry about whether it has 4k or 8k to work + * with. + * + * This ensures PGDs are 8k long: + */ +#define KAISER_USER_PGD_FILL 512 +/* This ensures they are 8k-aligned: */ +#define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ +GLOBAL(name) +#else +#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define KAISER_USER_PGD_FILL 0 +#endif + /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ @@ -414,9 +435,10 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_level4_pgt) +NEXT_PGD_PAGE(early_level4_pgt) .fill 511,8,0 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + .fill KAISER_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -424,16 +446,18 @@ NEXT_PAGE(early_dynamic_pgts) .data #ifndef CONFIG_XEN -NEXT_PAGE(init_level4_pgt) +NEXT_PGD_PAGE(init_level4_pgt) .fill 512,8,0 + .fill KAISER_USER_PGD_FILL,8,0 #else -NEXT_PAGE(init_level4_pgt) +NEXT_PGD_PAGE(init_level4_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + .fill KAISER_USER_PGD_FILL,8,0 NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE @@ -444,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt) */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #endif + .fill KAISER_USER_PGD_FILL,8,0 NEXT_PAGE(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 932348fbb6ea05aff88f004a0feeaa5092b0d1f6..9512529e8eabcd3ff8d6a4538774b022c6cfabda 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1f38d9a4d9deaf707af2b7e658bd9e9022ba8d75..2763573ee1d2fea75a10eb7c0e78eb572e398268 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -54,17 +55,17 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=b" (stack) : "0" (stack), - "D"(func) + [thunk_target] "D"(func) : "memory", "cc", "edx", "ecx", "eax"); } static inline void *current_stack(void) { - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) @@ -88,17 +89,17 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=a" (arg1), "=b" (isp) : "0" (desc), "1" (isp), - "D" (desc->handle_irq) + [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } @@ -139,7 +140,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1423ab1b0312269fd3130392a94a74db755cc2b5..f480b38a03c35b0e2ffa377d4d84aaa4f42f58c0 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -51,7 +51,7 @@ static struct irqaction irq2 = { .flags = IRQF_NO_THREAD, }; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { +DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = { [0 ... NR_VECTORS - 1] = VECTOR_UNUSED, }; diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 5f8f0b3cc6740bc63a925c5f54e168baa8c39add..2c0b0b645a74b14dfd2b2aa00cc35678e86e5d0c 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -26,7 +26,7 @@ #include "common.h" static nokprobe_inline -int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, +void __skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* @@ -41,20 +41,21 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, NULL); if (orig_ip) regs->ip = orig_ip; - return 1; } int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb, 0); - else - return 0; + if (kprobe_ftrace(p)) { + __skip_singlestep(p, regs, kcb, 0); + preempt_enable_no_resched(); + return 1; + } + return 0; } NOKPROBE_SYMBOL(skip_singlestep); -/* Ftrace callback handler for kprobes */ +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { @@ -77,13 +78,17 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); + /* To emulate trap based kprobes, preempt_disable here */ + preempt_disable(); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) + if (!p->pre_handler || !p->pre_handler(p, regs)) { __skip_singlestep(p, regs, kcb, orig_ip); + preempt_enable_no_resched(); + } /* * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. + * resets current kprobe, and keep preempt count +1. */ } end: diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4d74f7386a6192e539c8be3d878a5d5c5fc5c8f0..dc20da1c78f09588ec8462217b8b7b818f9c75c5 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "common.h" @@ -192,7 +193,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int insn_is_indirect_jump(struct insn *insn) +static int __insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -226,6 +227,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } +static int insn_is_indirect_jump(struct insn *insn) +{ + int ret = __insn_is_indirect_jump(insn); + +#ifdef CONFIG_RETPOLINE + /* + * Jump to x86_indirect_thunk_* is treated as an indirect jump. + * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with + * older gcc may use indirect jump. So we add this check instead of + * replace indirect-jump check. + */ + if (!ret) + ret = insn_jump_into_range(insn, + (unsigned long)__indirect_thunk_start, + (unsigned long)__indirect_thunk_end - + (unsigned long)__indirect_thunk_start); +#endif + return ret; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 6707039b9032d98ebdb21b37769d4e400fa80cd0..8bc68cfc0d33fcdc014e19c01b5f016c5afaa8a3 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,9 +12,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -33,11 +35,21 @@ static void flush_ldt(void *current_mm) set_ldt(pc->ldt->entries, pc->ldt->size); } +static void __free_ldt_struct(struct ldt_struct *ldt) +{ + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + free_page((unsigned long)ldt->entries); + kfree(ldt); +} + /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ static struct ldt_struct *alloc_ldt_struct(int size) { struct ldt_struct *new_ldt; int alloc_size; + int ret; if (size > LDT_ENTRIES) return NULL; @@ -65,7 +77,13 @@ static struct ldt_struct *alloc_ldt_struct(int size) return NULL; } + ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, + __PAGE_KERNEL); new_ldt->size = size; + if (ret) { + __free_ldt_struct(new_ldt); + return NULL; + } return new_ldt; } @@ -91,12 +109,10 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (likely(!ldt)) return; + kaiser_remove_mapping((unsigned long)ldt->entries, + ldt->size * LDT_ENTRY_SIZE); paravirt_free_ldt(ldt->entries, ldt->size); - if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(ldt->entries); - else - free_page((unsigned long)ldt->entries); - kfree(ldt); + __free_ldt_struct(ldt); } /* @@ -271,8 +287,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { int ret = -ENOSYS; @@ -290,5 +306,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr, ret = write_ldt(ptr, bytecount, 0); break; } - return ret; + /* + * The SYSCALL_DEFINE() macros give us an 'unsigned long' + * return type, but tht ABI for sys_modify_ldt() expects + * 'int'. This cast gives us an int-sized value in %rax + * for the return code. The 'unsigned' is necessary so + * the compiler does not try to sign-extend the negative + * return codes into the high half of the register when + * taking the value from int->long. + */ + return (unsigned int)ret; } diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 7b0d3da52fb42f288a947f1befe8886319b37ec5..287ec3bc141fc5192d2f4b09c7e24463952aa57a 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -8,7 +8,7 @@ #include #include #include - +#include .code64 .section .entry.text, "ax" @@ -290,8 +290,9 @@ trace: * ip and parent ip are used and the list function is called when * function tracing is enabled. */ - call *ftrace_trace_function + movq ftrace_trace_function, %r8 + CALL_NOSPEC %r8 restore_mcount_regs jmp fgraph_trace @@ -334,5 +335,5 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - jmp *%rdi + JMP_NOSPEC %rdi #endif diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index bb3840cedb4f00c0479b8e97844816a83091ed31..ee43b36075c7f9b6e9b262887491593d531712d2 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); @@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); #if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fc7cf64587f2c8a85c744b22f7b320f2777798b2..54b2711f2dbc28041e3bb9e48ca6576f9521a7d7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,7 +41,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = { .x86_tss = { .sp0 = TOP_OF_INIT_STACK, #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0887d2ae3797906def2e8a51d83e6e1b5962c4a7..dffe81d3c261506ab814d6da1311dcfa3ae0af3f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -538,7 +538,7 @@ void set_personality_ia32(bool x32) current->personality &= ~READ_IMPLIES_EXEC; /* in_compat_syscall() uses the presence of the x32 syscall bit flag to determine compat status */ - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); @@ -546,7 +546,7 @@ void set_personality_ia32(bool x32) current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; } } EXPORT_SYMBOL_GPL(set_personality_ia32); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0e63c0267f99c3360906e8b27a2a1fa646a65dde..e497d374412af6a9c687e1937e12e29decc2140c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 067f9813fd2cf7c15d5a1d297b537eedf6ca7959..ce020a69bba951774fff825b7e1042eb991148fa 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -106,6 +106,10 @@ void __noreturn machine_real_restart(unsigned int type) load_cr3(initial_page_table); #else write_cr3(real_mode_header->trampoline_pgd); + + /* Exiting long mode will fail if CR4.PCIDE is set. */ + if (static_cpu_has(X86_FEATURE_PCID)) + cr4_clear_bits(X86_CR4_PCIDE); #endif /* Jump to the identity-mapped low memory code */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index feaab07fa1247833dfd30300504ce785762ead1e..6b55012d02a372d1c02116d3cdb1246368058a22 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,6 +114,7 @@ #include #include #include +#include /* * max_low_pfn_mapped: highest direct mapped pfn under 4GB @@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); + /* + * This needs to happen right after XENPV is set on xen and + * kaiser_enabled is checked below in cleanup_highmap(). + */ + kaiser_check_boottime_disable(); + x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 763af1d0de64d8f8bbd323453e9ca7d266864c13..b1a5d252d482a8dbbbb2f741b48e1873b5713fbe 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9fe7b9e1ae3056d737ab6f55bfec41b8c1123c85..e803d72ef5253bc67f7d08e29f55ea9adcad5144 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -115,25 +115,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) { unsigned long flags; - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - /* * Paranoid: Set warm reset code and vector here back * to default values. diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 8402907825b02706f87283c45b5e27086a9cce0b..21454e254a4cc3377c2281de2f461fe19c20f1a5 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); + + /* + * PTI poisons low addresses in the kernel page tables in the + * name of making them unusable for userspace. To execute + * code at such a low address, the poison must be cleared. + * + * Note: 'pgd' actually gets set in pud_alloc(). + */ + pgd->pgd &= ~_PAGE_NX; + return 0; } diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 1c113db9ed573c3d8723fccc177431861df2bd19..2bb5ee464df3ea1855ee38e7232a38c3d10c5794 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -9,10 +9,12 @@ #include atomic_t trace_idt_ctr = ATOMIC_INIT(0); +__aligned(PAGE_SIZE) struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) trace_idt_table }; /* No need to be aligned, but done to keep all IDTs defined the same way. */ +__aligned(PAGE_SIZE) gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss; static int trace_irq_vector_refcount; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bd4e3d4d3625ceeb4e4e356371feb444cea94fb6..322f433fbc76b362050e1f962aff6237cf2dcb46 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) * from double_fault. */ BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer()) >= THREAD_SIZE); + current_stack_pointer) >= THREAD_SIZE); preempt_enable_no_resched(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 44bf5cf417d34fd5372868c4d4cf3969510982c5..d07a9390023e0194f05e313a899df6f5a6cf0629 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -693,7 +693,6 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 01f30e56f99e57c2e215b2925ffcc205ea651ee3..4b3012888ada93be7ba094ed82d0a520855b8cc2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -191,7 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) pte_unmap_unlock(pte, ptl); out: up_write(&mm->mmap_sem); - flush_tlb(); + flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL); } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index dbf67f64d5ecf76cee128b2c89d43a4267b57223..c7194e97c3d47975ef36e785d3a39e97eec47328 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -105,6 +105,13 @@ SECTIONS SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; + *(.text.__x86.indirect_thunk) + __indirect_thunk_end = .; +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 91af75e37306819f3897a95373ac51aecd73b01d..93f924de06cf1c70385137ffe5fb0973191bab2b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(IBPB) | F(IBRS); + /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -376,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(SPEC_CTRL) | F(ARCH_CAPABILITIES); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -458,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); } else { entry->ebx = 0; entry->ecx = 0; + entry->edx = 0; } entry->eax = 0; - entry->edx = 0; break; } case 9: @@ -607,7 +617,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ + if (boot_cpu_has(X86_FEATURE_IBPB)) + entry->ebx |= F(IBPB); + if (boot_cpu_has(X86_FEATURE_IBRS)) + entry->ebx |= F(IBRS); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9368fecca3ee89de71b987313f4b370d7830dec0..d1beb7156704beaa97a00ac7d15128e2533d5c7e 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -160,6 +160,37 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_RDTSCP)); } +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_IBPB))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + +static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); +} + + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9606a9d6b1a912a2f213eef3dedf851cc50c3259..c8d573822e60e81b7e32c78b8e7dd5a175a305ee 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "x86.h" #include "tss.h" @@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; call *%[fastop]" - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); + asm("push %[flags]; popf; " CALL_NOSPEC + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); return rc; } @@ -2395,9 +2396,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) } static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr4) + u64 cr0, u64 cr3, u64 cr4) { int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = ctxt->ops->set_cr(ctxt, 3, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; /* * First enable PAE, long mode needs it before CR0.PG = 1 is set. @@ -2416,6 +2429,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, bad = ctxt->ops->set_cr(ctxt, 4, cr4); if (bad) return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + } return X86EMUL_CONTINUE; @@ -2426,11 +2445,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) struct desc_struct desc; struct desc_ptr dt; u16 selector; - u32 val, cr0, cr4; + u32 val, cr0, cr3, cr4; int i; cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); + cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); @@ -2472,14 +2491,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) { struct desc_struct desc; struct desc_ptr dt; - u64 val, cr0, cr4; + u64 val, cr0, cr3, cr4; u32 base3; u16 selector; int i, r; @@ -2496,7 +2515,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); + cr3 = GET_SMSTATE(u64, smbase, 0x7f50); cr4 = GET_SMSTATE(u64, smbase, 0x7f48); ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); val = GET_SMSTATE(u64, smbase, 0x7ed0); @@ -2524,7 +2543,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); - r = rsm_enter_protected_mode(ctxt, cr0, cr4); + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); if (r != X86EMUL_CONTINUE) return r; @@ -4972,6 +4991,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -4990,6 +5011,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; @@ -5286,9 +5312,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) if (!(ctxt->d & ByteOp)) fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), ASM_CALL_CONSTRAINT + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d27c5dc41786953b1114b1e475e346..5f810bb80802ce1c7640e7616786e86fa596a738 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } @@ -279,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -301,14 +301,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3f05c044720b7a6ab5258603a3577dd2054b9110..b24b3c6d686ea6426dcef858167ae9ec0090aa88 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -246,9 +246,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) +{ + return ((id >> 4) << 16) | (1 << (id & 0xf)); +} + static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + u32 ldr = kvm_apic_calc_x2apic_ldr(id); kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); @@ -2029,6 +2034,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2039,6 +2045,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, else *id <<= 24; } + + /* In x2APIC mode, the LDR is fixed and based on the id */ + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(*id); } return 0; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d29c745f10ad14a02c6e9f5563ef81e2f9c8cf24..0a324e120942db8a9a0aca17a9497cfd8b9393df 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5052,13 +5052,13 @@ int kvm_mmu_module_init(void) { pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), - 0, 0, NULL); + 0, SLAB_ACCOUNT, NULL); if (!pte_list_desc_cache) goto nomem; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), - 0, 0, NULL); + 0, SLAB_ACCOUNT, NULL); if (!mmu_page_header_cache) goto nomem; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4fbf0c94f2d1a07d70e27f2a6670dddea74cf8cc..be644afab1bbd55feccdbde5163a77596b8898e6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "trace.h" @@ -182,6 +183,8 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -247,6 +250,8 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -509,6 +514,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -860,6 +866,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1382,6 +1407,9 @@ static void avic_vm_destroy(struct kvm *kvm) unsigned long flags; struct kvm_arch *vm_data = &kvm->arch; + if (!avic) + return; + avic_free_vm_id(vm_data->avic_vm_id); if (vm_data->avic_logical_id_table_page) @@ -1531,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -1640,11 +1670,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1673,6 +1709,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } @@ -2149,6 +2189,8 @@ static int ud_interception(struct vcpu_svm *svm) int er; er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; @@ -3502,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3593,6 +3642,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -4820,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -4863,6 +4964,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" +#endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" #endif "pop %%" _ASM_BP : @@ -4893,6 +5013,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5dd5664e3e55426d4df2bee983cc6122ff3a2217..04c1e0f038e765cf607261866122711fb74e60f7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "x86.h" @@ -48,6 +49,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -108,6 +110,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ @@ -172,7 +182,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -190,6 +199,7 @@ struct loaded_vmcs { struct vmcs *shadow_vmcs; int cpu; int launched; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -206,7 +216,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -385,13 +395,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -418,15 +421,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -435,8 +438,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; - unsigned long *msr_bitmap; - struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -537,6 +538,7 @@ struct vcpu_vmx { unsigned long host_rsp; u8 fail; bool nmi_known_unmasked; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -548,6 +550,10 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + + u64 arch_capabilities; + u64 spec_ctrl; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -855,13 +861,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { static inline short vmcs_field_to_offset(unsigned long field) { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); + unsigned short offset; - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || - vmcs_field_to_offset_table[field] == 0) + BUILD_BUG_ON(size > SHRT_MAX); + if (field >= size) return -ENOENT; - return vmcs_field_to_offset_table[field]; + field = array_index_nospec(field, size); + offset = vmcs_field_to_offset_table[field]; + if (offset == 0) + return -ENOENT; + return offset; } static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) @@ -903,6 +914,9 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -922,12 +936,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; -static unsigned long *vmx_msr_bitmap_legacy; -static unsigned long *vmx_msr_bitmap_longmode; -static unsigned long *vmx_msr_bitmap_legacy_x2apic; -static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; -static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -1199,6 +1207,11 @@ static inline bool cpu_has_vmx_invvpid_global(void) return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; } +static inline bool cpu_has_vmx_invvpid(void) +{ + return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; +} + static inline bool cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -1839,6 +1852,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2248,6 +2307,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -2516,36 +2576,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; - else if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; - } - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2586,7 +2616,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -2975,6 +3005,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3079,6 +3122,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -3518,11 +3623,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3538,9 +3638,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } + return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; +} + static void free_kvm_area(void) { int cpu; @@ -3816,6 +3945,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); } +static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) +{ + if (enable_ept) + vmx_flush_tlb(vcpu); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -4541,10 +4676,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4578,8 +4711,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4613,6 +4746,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, } } +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -4659,58 +4801,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); -} + u8 mode = 0; -static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) -{ - if (apicv_active) { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); - } else { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_R); + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_R); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } } } -static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_W); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_W); - } + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(void) @@ -4718,30 +4870,45 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return 0; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - if (max_irr == 256) - return 0; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); - if (!vapic_page) { - WARN_ON(1); - return -ENOMEM; - } __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4752,7 +4919,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } - return 0; + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4799,14 +4967,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { - /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. */ vmx->nested.pi_pending = true; kvm_make_request(KVM_REQ_EVENT, vcpu); + /* the PIR and ON have been set by L1. */ + if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) + kvm_vcpu_kick(vcpu); return 0; } return -1; @@ -4939,7 +5108,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -5028,7 +5197,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -5102,6 +5271,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); @@ -5130,6 +5301,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->soft_vnmi_blocked = 0; @@ -5174,7 +5346,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -5502,6 +5674,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -6235,7 +6409,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; @@ -6357,7 +6531,7 @@ static void wakeup_handler(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6372,67 +6546,33 @@ static __init int hardware_setup(void) if (!vmx_io_bitmap_b) goto out; - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_legacy_x2apic_apicv_inactive = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) - goto out3; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out4; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out5; - - vmx_msr_bitmap_longmode_x2apic_apicv_inactive = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) - goto out6; - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out7; + goto out1; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out8; + goto out2; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out9; + goto out3; } if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); - if (!cpu_has_vmx_vpid()) + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || + !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) @@ -6480,47 +6620,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - /* - * enable_apicv && kvm_vcpu_apicv_active() - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr, true); - - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839, true); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808, true); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b, true); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f, true); - - /* - * (enable_apicv && !kvm_vcpu_apicv_active()) || - * !enable_apicv - */ - /* TPR */ - vmx_disable_intercept_msr_read_x2apic(0x808, false); - vmx_disable_intercept_msr_write_x2apic(0x808, false); - if (enable_ept) { kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, @@ -6566,22 +6667,10 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out9: - free_page((unsigned long)vmx_vmwrite_bitmap); -out8: - free_page((unsigned long)vmx_vmread_bitmap); -out7: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); -out6: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode); -out4: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_vmwrite_bitmap); out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_vmread_bitmap); out1: free_page((unsigned long)vmx_io_bitmap_b); out: @@ -6592,12 +6681,6 @@ static __init int hardware_setup(void) static __exit void hardware_unsetup(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); @@ -6644,94 +6727,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } -/* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_last_entry(&vmx->nested.vmcs02_pool, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - item->vmcs02.shadow_vmcs = NULL; - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified @@ -7006,6 +7001,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) struct vmcs *shadow_vmcs; const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + int r; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -7045,12 +7041,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) + goto out_vmcs02; vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) @@ -7067,9 +7060,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -7084,9 +7074,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); + free_loaded_vmcs(&vmx->nested.vmcs02); -out_msr_bitmap: +out_vmcs02: return -ENOMEM; } @@ -7162,17 +7152,13 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); - if (vmx->nested.msr_bitmap) { - free_page((unsigned long)vmx->nested.msr_bitmap); - vmx->nested.msr_bitmap = NULL; - } if (enable_shadow_vmcs) { vmcs_clear(vmx->vmcs01.shadow_vmcs); free_vmcs(vmx->vmcs01.shadow_vmcs); vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -7188,7 +7174,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -7206,9 +7192,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7219,24 +7204,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); - - nested_free_vmcs02(vmx, vmptr); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); @@ -8024,6 +7994,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; @@ -8511,10 +8494,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb_ept_only(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -8536,8 +8520,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) */ if (!is_guest_mode(vcpu) || !nested_cpu_has2(get_vmcs12(&vmx->vcpu), - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); + vmx_flush_tlb_ept_only(vcpu); + } } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -8667,14 +8653,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #endif "pushf\n\t" __ASM_SIZE(push) " $%c[cs]\n\t" - "call *%[entry]\n\t" + CALL_NOSPEC : #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif ASM_CALL_CONSTRAINT : - [entry]"r"(entry), + THUNK_TARGET(entry), [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); @@ -8900,6 +8886,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_arm_hv_timer(vcpu); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -8948,6 +8943,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -8964,12 +8960,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" @@ -9006,6 +9013,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); @@ -9116,6 +9147,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -9148,17 +9180,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - vmx->loaded_vmcs->shadow_vmcs = NULL; - if (!vmx->loaded_vmcs->vmcs) - goto free_msrs; if (!vmm_exclusive) kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); - loaded_vmcs_init(vmx->loaded_vmcs); + err = alloc_loaded_vmcs(&vmx->vmcs01); if (!vmm_exclusive) kvm_cpu_vmxoff(); + if (err < 0) + goto free_msrs; + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; @@ -9552,23 +9591,31 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd & spec_ctrl are trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); - /* This shortcut is ok because we support only x2APIC MSRs so far. */ - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd && !spec_ctrl) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) { - WARN_ON(1); + if (!page) return false; - } msr_bitmap_l1 = (unsigned long *)kmap(page); - if (!msr_bitmap_l1) { - nested_release_page_clean(page); - WARN_ON(1); - return false; - } memset(msr_bitmap_l0, 0xff, PAGE_SIZE); @@ -9595,6 +9642,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, MSR_TYPE_W); } } + + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); nested_release_page_clean(page); @@ -10074,6 +10134,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -10111,6 +10174,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); + } else if (nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) @@ -10166,7 +10232,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; - struct loaded_vmcs *vmcs02; bool ia32e; u32 msr_entry_idx; @@ -10306,17 +10371,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; + vmx->loaded_vmcs = &vmx->nested.vmcs02; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; @@ -10468,7 +10529,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - return vmx_complete_nested_posted_interrupt(vcpu); + vmx_complete_nested_posted_interrupt(vcpu); + return 0; } static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) @@ -10779,7 +10841,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) @@ -10830,10 +10892,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ @@ -10851,6 +10909,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.change_vmcs01_virtual_x2apic_mode = false; vmx_set_virtual_x2apic_mode(vcpu, vcpu->arch.apic_base & X2APIC_ENABLE); + } else if (!nested_cpu_has_ept(vmcs12) && + nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ @@ -10900,8 +10962,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 595f8149c0d9eec391a94740b3183179def16f6d..75f756eac9797b03aa77437e9ffe6cd45b8a06eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ - if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) || + !is_long_mode(vcpu)) return 1; } @@ -974,6 +975,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; @@ -1750,10 +1752,13 @@ static u64 __get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); @@ -1797,6 +1802,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, @@ -4260,7 +4268,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4513,7 +4521,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4535,14 +4543,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } @@ -5304,7 +5312,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); @@ -5576,6 +5584,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); @@ -6521,6 +6531,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + unsigned long apic_address; + + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (start <= apic_address && apic_address < end) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; @@ -7113,7 +7137,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) #endif kvm_rip_write(vcpu, regs->rip); - kvm_set_rflags(vcpu, regs->rflags); + kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); vcpu->arch.exception.pending = false; @@ -8424,11 +8448,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_ready(work->arch.token, work->gva); if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); + trace_kvm_async_pf_ready(work->arch.token, work->gva); if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 34a74131a12c58ef9f38712261900093b53220f1..4ad7c4dd311c90e466a6cb63d687c4d855f4a395 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -25,6 +25,8 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_RETPOLINE) += retpoline.o +OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4d34bb548b41ebdddc20fcf464aad4cb44c6a763..46e71a74e6129b861c233ccf86f452b485e62d59 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -29,7 +29,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -156,7 +157,7 @@ ENTRY(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 5cc78bf572325fb1c5b5d6f854bfe878c55dfeb3..3261abb21ef4f5e0d9c9239cc86651ee96b0a74a 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, return 0; /* Buffer overrun */ } +/* + * Find a non-boolean option (i.e. option=argument). In accordance with + * standard Linux practice, if this option is repeated, this returns the + * last instance on the command line. + * + * @cmdline: the cmdline string + * @max_cmdline_size: the maximum size of cmdline + * @option: option string to look for + * @buffer: memory buffer to return the option argument + * @bufsize: size of the supplied memory buffer + * + * Returns the length of the argument (regardless of if it was + * truncated to fit in the buffer), or -1 on not found. + */ +static int +__cmdline_find_option(const char *cmdline, int max_cmdline_size, + const char *option, char *buffer, int bufsize) +{ + char c; + int pos = 0, len = -1; + const char *opptr = NULL; + char *bufptr = buffer; + enum { + st_wordstart = 0, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + st_bufcpy, /* Copying this to buffer */ + } state = st_wordstart; + + if (!cmdline) + return -1; /* No command line */ + + /* + * This 'pos' check ensures we do not overrun + * a non-NULL-terminated 'cmdline' + */ + while (pos++ < max_cmdline_size) { + c = *(char *)cmdline++; + if (!c) + break; + + switch (state) { + case st_wordstart: + if (myisspace(c)) + break; + + state = st_wordcmp; + opptr = option; + /* fall through */ + + case st_wordcmp: + if ((c == '=') && !*opptr) { + /* + * We matched all the way to the end of the + * option we were looking for, prepare to + * copy the argument. + */ + len = 0; + bufptr = buffer; + state = st_bufcpy; + break; + } else if (c == *opptr++) { + /* + * We are currently matching, so continue + * to the next character on the cmdline. + */ + break; + } + state = st_wordskip; + /* fall through */ + + case st_wordskip: + if (myisspace(c)) + state = st_wordstart; + break; + + case st_bufcpy: + if (myisspace(c)) { + state = st_wordstart; + } else { + /* + * Increment len, but don't overrun the + * supplied buffer and leave room for the + * NULL terminator. + */ + if (++len < bufsize) + *bufptr++ = c; + } + break; + } + } + + if (bufsize) + *bufptr = '\0'; + + return len; +} + int cmdline_find_option_bool(const char *cmdline, const char *option) { return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); } + +int cmdline_find_option(const char *cmdline, const char *option, char *buffer, + int bufsize) +{ + return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, + buffer, bufsize); +} diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620bd6797973eaa05338d918c9dd7b54..9758524ee99f79260b2feb1307a20af9957a13a4 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -93,6 +93,13 @@ static void delay_mwaitx(unsigned long __loops) { u64 start, end, delay, loops = __loops; + /* + * Timer value of 0 causes MWAITX to wait indefinitely, unless there + * is a store on the memory monitored by MONITORX. + */ + if (loops == 0) + return; + start = rdtsc_ordered(); for (;;) { diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 37b62d4121481df990741fac20ee48c2e68492e7..b12b214713a603c851615b984d625a3113d497de 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,6 +39,8 @@ ENTRY(__get_user_1) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 1: movzbl (%_ASM_AX),%edx xor %eax,%eax @@ -53,6 +55,8 @@ ENTRY(__get_user_2) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax @@ -67,6 +71,8 @@ ENTRY(__get_user_4) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 3: movl -3(%_ASM_AX),%edx xor %eax,%eax @@ -82,6 +88,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movq -7(%_ASM_AX),%rdx xor %eax,%eax @@ -93,6 +101,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movl -7(%_ASM_AX),%edx 5: movl -3(%_ASM_AX),%ecx diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 0000000000000000000000000000000000000000..480edc3a5e03002dd6f0a0316477cbd7b0971cc8 --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +.macro STUFF_RSB nr:req sp:req + mov $(\nr / 2), %_ASM_BX + .align 16 +771: + call 772f +773: /* speculation trap */ + pause + lfence + jmp 773b + .align 16 +772: + call 774f +775: /* speculation trap */ + pause + lfence + jmp 775b + .align 16 +774: + dec %_ASM_BX + jnz 771b + add $((BITS_PER_LONG/8) * \nr), \sp +.endm + +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +ENTRY(__fill_rsb) + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP + ret +END(__fill_rsb) +EXPORT_SYMBOL_GPL(__fill_rsb) + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +ENTRY(__clear_rsb) + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP + ret +END(__clear_rsb) +EXPORT_SYMBOL_GPL(__clear_rsb) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 3bc7baf2a711fa3a88066ee3c3527c441216441b..5c06dbffc52f3e2fca8cbf872e3faddc526abdc5 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,12 +570,12 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 96d2b847e09ea504fc3ac824d347651a3bc880b9..c548b46100cbf6d7a4f05a922a7dab4586451986 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -37,5 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o - +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c228ec8edf23ca483a669eed0668d345bfe904d4..3d7f60fdb836c3a22aad1c64bbc57f179e3040ff 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -191,14 +191,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) +static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, + u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; /* Fault not from Protection Keys: nothing to do */ - if (si_code != SEGV_PKUERR) + if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) return; /* * force_sig_info_fault() is called from a number of @@ -237,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, pkey); + fill_sig_info_pkey(si_signo, si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 889e7619a0914d87ad49dbb5b960933e5dd316ad..f92bdb9f4e4688c500b3bbfa85069e41cb56b76c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -177,7 +177,7 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ - if (boot_cpu_has(X86_FEATURE_PGE)) { + if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } else @@ -764,13 +764,11 @@ void __init zone_sizes_init(void) } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { -#ifdef CONFIG_SMP .active_mm = &init_mm, .state = 0, -#endif .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_SYMBOL_GPL(cpu_tlbstate); +EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3e27ded6ac653f6cd0a3fa8b8fd18bd11e2f400e..7df8e3a79dc0cb6b31e7ff9d27f03b2b5949f675 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -324,6 +324,16 @@ void __init cleanup_highmap(void) continue; if (vaddr < (unsigned long) _text || vaddr > end) set_pmd(pmd, __pmd(0)); + else if (kaiser_enabled) { + /* + * level2_kernel_pgt is initialized with _PAGE_GLOBAL: + * clear that now. This is not important, so long as + * CR4.PGE remains clear, but it removes an anomaly. + * Physical mapping setup below avoids _PAGE_GLOBAL + * by use of massage_pgprot() inside pfn_pte() etc. + */ + set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL)); + } } } diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c new file mode 100644 index 0000000000000000000000000000000000000000..ec678aafa3f866bdd44b011e1e8b80de0ff3106a --- /dev/null +++ b/arch/x86/mm/kaiser.c @@ -0,0 +1,483 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + +#include +#include /* to verify its kaiser declarations */ +#include +#include +#include +#include +#include + +int kaiser_enabled __read_mostly = 1; +EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ + +__visible +DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +/* + * These can have bit 63 set, so we can not just use a plain "or" + * instruction to get their value or'd into CR3. It would take + * another register. So, we use a memory reference to these instead. + * + * This is also handy because systems that do not support PCIDs + * just end up or'ing a 0 into their CR3, which does no harm. + */ +DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); + +/* + * At runtime, the only things we map are some things for CPU + * hotplug, and stacks for new processes. No two CPUs will ever + * be populating the same addresses, so we only need to ensure + * that we protect between two CPUs trying to allocate and + * populate the same page table page. + * + * Only take this lock when doing a set_p[4um]d(), but it is not + * needed for doing a set_pte(). We assume that only the *owner* + * of a given allocation will be doing this for _their_ + * allocation. + * + * This ensures that once a system has been running for a while + * and there have been stacks all over and these page tables + * are fully populated, there will be no further acquisitions of + * this lock. + */ +static DEFINE_SPINLOCK(shadow_table_allocation_lock); + +/* + * Returns -1 on error. + */ +static inline unsigned long get_pa_from_mapping(unsigned long vaddr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(vaddr); + /* + * We made all the kernel PGDs present in kaiser_init(). + * We expect them to stay that way. + */ + BUG_ON(pgd_none(*pgd)); + /* + * PGDs are either 512GB or 128TB on all x86_64 + * configurations. We don't handle these. + */ + BUG_ON(pgd_large(*pgd)); + + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + WARN_ON_ONCE(1); + return -1; + } + + if (pud_large(*pud)) + return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK); + + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd)) { + WARN_ON_ONCE(1); + return -1; + } + + if (pmd_large(*pmd)) + return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK); + + pte = pte_offset_kernel(pmd, vaddr); + if (pte_none(*pte)) { + WARN_ON_ONCE(1); + return -1; + } + + return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); +} + +/* + * This is a relatively normal page table walk, except that it + * also tries to allocate page tables pages along the way. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static pte_t *kaiser_pagetable_walk(unsigned long address, bool user) +{ + pmd_t *pmd; + pud_t *pud; + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + unsigned long prot = _KERNPG_TABLE; + + if (pgd_none(*pgd)) { + WARN_ONCE(1, "All shadow pgds should have been populated"); + return NULL; + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); + + if (user) { + /* + * The vsyscall page is the only page that will have + * _PAGE_USER set. Catch everything else. + */ + BUG_ON(address != VSYSCALL_ADDR); + + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + prot = _PAGE_TABLE; + } + + pud = pud_offset(pgd, address); + /* The shadow page tables do not use large mappings: */ + if (pud_large(*pud)) { + WARN_ON(1); + return NULL; + } + if (pud_none(*pud)) { + unsigned long new_pmd_page = __get_free_page(gfp); + if (!new_pmd_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); + if (pud_none(*pud)) { + set_pud(pud, __pud(prot | __pa(new_pmd_page))); + __inc_zone_page_state(virt_to_page((void *) + new_pmd_page), NR_KAISERTABLE); + } else + free_page(new_pmd_page); + spin_unlock(&shadow_table_allocation_lock); + } + + pmd = pmd_offset(pud, address); + /* The shadow page tables do not use large mappings: */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + if (pmd_none(*pmd)) { + unsigned long new_pte_page = __get_free_page(gfp); + if (!new_pte_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); + if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(prot | __pa(new_pte_page))); + __inc_zone_page_state(virt_to_page((void *) + new_pte_page), NR_KAISERTABLE); + } else + free_page(new_pte_page); + spin_unlock(&shadow_table_allocation_lock); + } + + return pte_offset_kernel(pmd, address); +} + +static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + unsigned long flags) +{ + int ret = 0; + pte_t *pte; + unsigned long start_addr = (unsigned long )__start_addr; + unsigned long address = start_addr & PAGE_MASK; + unsigned long end_addr = PAGE_ALIGN(start_addr + size); + unsigned long target_address; + + /* + * It is convenient for callers to pass in __PAGE_KERNEL etc, + * and there is no actual harm from setting _PAGE_GLOBAL, so + * long as CR4.PGE is not set. But it is nonetheless troubling + * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser" + * requires that not to be #defined to 0): so mask it off here. + */ + flags &= ~_PAGE_GLOBAL; + if (!(__supported_pte_mask & _PAGE_NX)) + flags &= ~_PAGE_NX; + + for (; address < end_addr; address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); + if (target_address == -1) { + ret = -EIO; + break; + } + pte = kaiser_pagetable_walk(address, flags & _PAGE_USER); + if (!pte) { + ret = -ENOMEM; + break; + } + if (pte_none(*pte)) { + set_pte(pte, __pte(flags | target_address)); + } else { + pte_t tmp; + set_pte(&tmp, __pte(flags | target_address)); + WARN_ON_ONCE(!pte_same(*pte, tmp)); + } + } + return ret; +} + +static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags) +{ + unsigned long size = end - start; + + return kaiser_add_user_map(start, size, flags); +} + +/* + * Ensure that the top level of the (shadow) page tables are + * entirely populated. This ensures that all processes that get + * forked have the same entries. This way, we do not have to + * ever go set up new entries in older processes. + * + * Note: we never free these, so there are no updates to them + * after this. + */ +static void __init kaiser_init_all_pgds(void) +{ + pgd_t *pgd; + int i = 0; + + pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); + for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { + pgd_t new_pgd; + pud_t *pud = pud_alloc_one(&init_mm, + PAGE_OFFSET + i * PGDIR_SIZE); + if (!pud) { + WARN_ON(1); + break; + } + inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE); + new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); + /* + * Make sure not to stomp on some other pgd entry. + */ + if (!pgd_none(pgd[i])) { + WARN_ON(1); + continue; + } + set_pgd(pgd + i, new_pgd); + } +} + +#define kaiser_add_user_map_early(start, size, flags) do { \ + int __ret = kaiser_add_user_map(start, size, flags); \ + WARN_ON(__ret); \ +} while (0) + +#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \ + int __ret = kaiser_add_user_map_ptrs(start, end, flags); \ + WARN_ON(__ret); \ +} while (0) + +void __init kaiser_check_boottime_disable(void) +{ + bool enable = true; + char arg[5]; + int ret; + + if (boot_cpu_has(X86_FEATURE_XENPV)) + goto silent_disable; + + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (!strncmp(arg, "on", 2)) + goto enable; + + if (!strncmp(arg, "off", 3)) + goto disable; + + if (!strncmp(arg, "auto", 4)) + goto skip; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) + goto disable; + +skip: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + goto disable; + +enable: + if (enable) + setup_force_cpu_cap(X86_FEATURE_KAISER); + + return; + +disable: + pr_info("disabled\n"); + +silent_disable: + kaiser_enabled = 0; + setup_clear_cpu_cap(X86_FEATURE_KAISER); +} + +/* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we + * will have most of the kernel up by then and should be able to + * get a clean warning out of it. If we BUG_ON() here, we run + * the risk of being before we have good console output. + */ +void __init kaiser_init(void) +{ + int cpu; + + if (!kaiser_enabled) + return; + + kaiser_init_all_pgds(); + + /* + * Note that this sets _PAGE_USER and it needs to happen when the + * pagetable hierarchy gets created, i.e., early. Otherwise + * kaiser_pagetable_walk() will encounter initialized PTEs in the + * hierarchy and not set the proper permissions, leading to the + * pagefaults with page-protection violations when trying to read the + * vsyscall page. For example. + */ + if (vsyscall_enabled()) + kaiser_add_user_map_early((void *)VSYSCALL_ADDR, + PAGE_SIZE, + vsyscall_pgprot); + + for_each_possible_cpu(cpu) { + void *percpu_vaddr = __per_cpu_user_mapped_start + + per_cpu_offset(cpu); + unsigned long percpu_sz = __per_cpu_user_mapped_end - + __per_cpu_user_mapped_start; + kaiser_add_user_map_early(percpu_vaddr, percpu_sz, + __PAGE_KERNEL); + } + + /* + * Map the entry/exit text section, which is needed at + * switches from user to and from kernel. + */ + kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end, + __PAGE_KERNEL_RX); + +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) + kaiser_add_user_map_ptrs_early(__irqentry_text_start, + __irqentry_text_end, + __PAGE_KERNEL_RX); +#endif + kaiser_add_user_map_early((void *)idt_descr.address, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL_RO); +#ifdef CONFIG_TRACING + kaiser_add_user_map_early(&trace_idt_descr, + sizeof(trace_idt_descr), + __PAGE_KERNEL); + kaiser_add_user_map_early(&trace_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); +#endif + kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr), + __PAGE_KERNEL); + kaiser_add_user_map_early(&debug_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); + + pr_info("enabled\n"); +} + +/* Add a mapping to the shadow mapping, and synchronize the mappings */ +int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +{ + if (!kaiser_enabled) + return 0; + return kaiser_add_user_map((const void *)addr, size, flags); +} + +void kaiser_remove_mapping(unsigned long start, unsigned long size) +{ + extern void unmap_pud_range_nofree(pgd_t *pgd, + unsigned long start, unsigned long end); + unsigned long end = start + size; + unsigned long addr, next; + pgd_t *pgd; + + if (!kaiser_enabled) + return; + pgd = native_get_shadow_pgd(pgd_offset_k(start)); + for (addr = start; addr < end; pgd++, addr = next) { + next = pgd_addr_end(addr, end); + unmap_pud_range_nofree(pgd, addr, next); + } +} + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * This returns true for user pages that need to get copied into + * both the user and kernel copies of the page tables, and false + * for kernel pages that should only be in the kernel copy. + */ +static inline bool is_userspace_pgd(pgd_t *pgdp) +{ + return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2); +} + +pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (!kaiser_enabled) + return pgd; + /* + * Do we need to also populate the shadow pgd? Check _PAGE_USER to + * skip cases like kexec and EFI which make temporary low mappings. + */ + if (pgd.pgd & _PAGE_USER) { + if (is_userspace_pgd(pgdp)) { + native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; + /* + * Even if the entry is *mapping* userspace, ensure + * that userspace can not use it. This way, if we + * get out to userspace running on the kernel CR3, + * userspace will crash instead of running. + */ + if (__supported_pte_mask & _PAGE_NX) + pgd.pgd |= _PAGE_NX; + } + } else if (!pgd.pgd) { + /* + * pgd_clear() cannot check _PAGE_USER, and is even used to + * clear corrupted pgd entries: so just rely on cases like + * kexec and EFI never to be using pgd_clear(). + */ + if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) && + is_userspace_pgd(pgdp)) + native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; + } + return pgd; +} + +void kaiser_setup_pcid(void) +{ + unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; + + if (this_cpu_has(X86_FEATURE_PCID)) + user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; + /* + * These variables are used by the entry/exit + * code to change PCID and pgd and TLB flushing. + */ + this_cpu_write(x86_cr3_pcid_user, user_cr3); +} + +/* + * Make a note that this cpu will need to flush USER tlb on return to user. + * If cpu does not have PCID, then the NOFLUSH bit will never have been set. + */ +void kaiser_flush_tlb_on_return_to_user(void) +{ + if (this_cpu_has(X86_FEATURE_PCID)) + this_cpu_write(x86_cr3_pcid_user, + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); +} +EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index aed206475aa7c04892443646efa6e88a9e5f4d24..319183d936024d8292fcb3739c58381cac1a73b7 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -189,6 +189,6 @@ void __meminit init_trampoline(void) *pud_tramp = *pud; } - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + /* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */ + trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e3353c97d0862d2a20bdd060fc229af2de8324bb..73dcb0e18c1b81c30f4330d5472d017fe02842fa 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_FLUSHTLB 1 #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 +#define CPA_FREE_PAGETABLES 8 #ifdef CONFIG_PROC_FS static unsigned long direct_pages_count[PG_LEVEL_NUM]; @@ -729,10 +730,13 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } -static bool try_to_free_pte_page(pte_t *pte) +static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte) { int i; + if (!(cpa->flags & CPA_FREE_PAGETABLES)) + return false; + for (i = 0; i < PTRS_PER_PTE; i++) if (!pte_none(pte[i])) return false; @@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *pte) return true; } -static bool try_to_free_pmd_page(pmd_t *pmd) +static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd) { int i; + if (!(cpa->flags & CPA_FREE_PAGETABLES)) + return false; + for (i = 0; i < PTRS_PER_PMD; i++) if (!pmd_none(pmd[i])) return false; @@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } -static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) +static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd, + unsigned long start, + unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) pte++; } - if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { + if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) { pmd_clear(pmd); return true; } return false; } -static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, +static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd, unsigned long start, unsigned long end) { - if (unmap_pte_range(pmd, start, end)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + if (unmap_pte_range(cpa, pmd, start, end)) + if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) pud_clear(pud); } -static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) +static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, + unsigned long start, unsigned long end) { pmd_t *pmd = pmd_offset(pud, start); @@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; unsigned long pre_end = min_t(unsigned long, end, next_page); - __unmap_pmd_range(pud, pmd, start, pre_end); + __unmap_pmd_range(cpa, pud, pmd, start, pre_end); start = pre_end; pmd++; @@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) if (pmd_large(*pmd)) pmd_clear(pmd); else - __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); + __unmap_pmd_range(cpa, pud, pmd, + start, start + PMD_SIZE); start += PMD_SIZE; pmd++; @@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) * 4K leftovers? */ if (start < end) - return __unmap_pmd_range(pud, pmd, start, end); + return __unmap_pmd_range(cpa, pud, pmd, start, end); /* * Try again to free the PMD page if haven't succeeded above. */ if (!pud_none(*pud)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) pud_clear(pud); } -static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd, + unsigned long start, + unsigned long end) { pud_t *pud = pud_offset(pgd, start); @@ -834,7 +847,7 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; unsigned long pre_end = min_t(unsigned long, end, next_page); - unmap_pmd_range(pud, start, pre_end); + unmap_pmd_range(cpa, pud, start, pre_end); start = pre_end; pud++; @@ -848,7 +861,7 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) if (pud_large(*pud)) pud_clear(pud); else - unmap_pmd_range(pud, start, start + PUD_SIZE); + unmap_pmd_range(cpa, pud, start, start + PUD_SIZE); start += PUD_SIZE; pud++; @@ -858,7 +871,7 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) * 2M leftovers? */ if (start < end) - unmap_pmd_range(pud, start, end); + unmap_pmd_range(cpa, pud, start, end); /* * No need to try to free the PUD page because we'll free it in @@ -866,6 +879,24 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } +static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +{ + struct cpa_data cpa = { + .flags = CPA_FREE_PAGETABLES, + }; + + __unmap_pud_range(&cpa, pgd, start, end); +} + +void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end) +{ + struct cpa_data cpa = { + .flags = 0, + }; + + __unmap_pud_range(&cpa, pgd, start, end); +} + static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3feec5af4e67c096b9bd663edc4a94fb587f67bb..209b9465e97a9ad4583087afbe3cd0b508b1990d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -344,14 +344,15 @@ static inline void _pgd_free(pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } #else + static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { - free_page((unsigned long)pgd); + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 75fb01109f9453a8fbf66105ff2c0c9d9789722c..578973ade71b016c3537dc7a864a60753b5e36a7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,16 +6,17 @@ #include #include #include +#include #include #include #include #include #include -#include +#include /* - * Smarter SMP flushing macros. + * TLB flushing, formerly SMP-only * c/o Linus Torvalds. * * These mean you can really definitely utterly forget about @@ -28,14 +29,42 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ -#ifdef CONFIG_SMP - struct flush_tlb_info { struct mm_struct *flush_mm; unsigned long flush_start; unsigned long flush_end; }; +static void load_new_mm_cr3(pgd_t *pgdir) +{ + unsigned long new_mm_cr3 = __pa(pgdir); + + if (kaiser_enabled) { + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entries for the PCID out when we change tasks. + * Flush KERN below, flush USER when returning to userspace in + * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro. + * + * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could + * do it here, but can only be used if X86_FEATURE_INVPCID is + * available - and many machines support pcid without invpcid. + * + * If X86_CR3_PCID_KERN_FLUSH actually added something, then it + * would be needed in the write_cr3() below - if PCIDs enabled. + */ + BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH); + kaiser_flush_tlb_on_return_to_user(); + } + + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. @@ -47,7 +76,7 @@ void leave_mm(int cpu) BUG(); if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); - load_cr3(swapper_pg_dir); + load_new_mm_cr3(swapper_pg_dir); /* * This gets called in the idle path where RCU * functions differently. Tracing normally @@ -59,8 +88,6 @@ void leave_mm(int cpu) } EXPORT_SYMBOL_GPL(leave_mm); -#endif /* CONFIG_SMP */ - void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -83,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ - unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + unsigned int stack_pgd_index = pgd_index(current_stack_pointer); pgd_t *pgd = next->pgd + stack_pgd_index; @@ -91,10 +118,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, set_pgd(pgd, init_mm.pgd[stack_pgd_index]); } -#ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); -#endif cpumask_set_cpu(cpu, mm_cpumask(next)); @@ -126,7 +151,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * ordering guarantee we need. * */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); @@ -152,9 +177,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (unlikely(prev->context.ldt != next->context.ldt)) load_mm_ldt(next); #endif - } -#ifdef CONFIG_SMP - else { + } else { this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); @@ -175,17 +198,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * As above, load_cr3() is serializing and orders TLB * fills with respect to the mm_cpumask write. */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); load_mm_ldt(next); } } -#endif } -#ifdef CONFIG_SMP - /* * The flush IPI assumes that a thread switch happens in this order: * [cpu0: the cpu that switches] @@ -287,23 +307,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, smp_call_function_many(cpumask, flush_tlb_func, &info, 1); } -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - - /* This is an implicit full barrier that synchronizes with switch_mm. */ - local_flush_tlb(); - - trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); - preempt_enable(); -} - /* * See Documentation/x86/tlb.txt for details. We choose 33 * because it is large enough to cover the vast majority (at @@ -324,6 +327,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); + + if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) + base_pages_to_flush = (end - start) >> PAGE_SHIFT; + if (base_pages_to_flush > tlb_single_page_flush_ceiling) + base_pages_to_flush = TLB_FLUSH_ALL; + if (current->active_mm != mm) { /* Synchronize with switch_mm. */ smp_mb(); @@ -340,15 +349,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, goto out; } - if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) - base_pages_to_flush = (end - start) >> PAGE_SHIFT; - /* * Both branches below are implicit full barriers (MOV to CR or * INVLPG) that synchronize with switch_mm. */ - if (base_pages_to_flush > tlb_single_page_flush_ceiling) { - base_pages_to_flush = TLB_FLUSH_ALL; + if (base_pages_to_flush == TLB_FLUSH_ALL) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { @@ -369,33 +374,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, preempt_enable(); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) { - /* - * Implicit full barrier (INVLPG) that synchronizes - * with switch_mm. - */ - __flush_tlb_one(start); - } else { - leave_mm(smp_processor_id()); - - /* Synchronize with switch_mm. */ - smp_mb(); - } - } - - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE); - - preempt_enable(); -} - static void do_flush_tlb_all(void *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); @@ -480,5 +458,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -#endif /* CONFIG_SMP */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 15f74361592366b0990e4fbecfd0cbb758e2cf25..7840331d30568665413d0a3319a98e26401ffd6f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -278,10 +278,10 @@ static void emit_bpf_tail_call(u8 **pprog) /* if (index >= array->map.max_entries) * goto out; */ - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 47 /* number of bytes to jump */ +#define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 36 +#define OFFSET2 32 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->ptrs[index]; */ - EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); - EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) * goto out; */ - EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ #define OFFSET3 10 EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index bb461cfd01abc78cdc45c6e69f013128e04ccdb4..526536c81ddc41d395fd971d909a3b687e46d989 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void) * We should get host bridge information from ACPI unless the BIOS * doesn't support it. */ - if (acpi_os_get_root_pointer()) + if (!acpi_disabled && acpi_os_get_root_pointer()) return 0; #endif diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2f25a363068cf9723e8b418e8c1942a6d3ca4029..dcb2d9d185a2292f10753dcd00d1afe99c830ff7 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 9e42842e924a6470015be1c1fd1e9af57942d740..0f0175186f1bf2c37867833166494846e25a5b6d 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1848,7 +1848,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 836a1eb5df436bdad88fe3b4ae59b512f9573b19..3ee234b6234dd6eaf0a3e61b8d4d99677a5a7078 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm) mm->arch.ldt.entry_count = 0; } -int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { - return do_modify_ldt_skas(func, ptr, bytecount); + /* See non-um modify_ldt() for why we do this cast */ + return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8f1f7efa848cf6c5ad9360c3188fa22c224ec090..2bea87cc0ff2340c271ebbdb3e81ba0f983640d6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -444,6 +444,12 @@ static void __init xen_init_cpuid_mask(void) ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + /* + * Xen PV would need some work to support PCID: CR3 handling as well + * as xen_flush_tlb_others() would need updating. + */ + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */ + if (!xen_initial_domain()) cpuid_leaf1_edx_mask &= ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index b39531babec03e778e53c481d8b7b5374c67449a..72bfc1cbc2b546578d902866d76256fffa7451fe 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h @@ -109,7 +109,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; - u32 prev; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; @@ -120,26 +119,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __asm__ __volatile__ ( " # futex_atomic_cmpxchg_inatomic\n" - "1: l32i %1, %3, 0\n" - " mov %0, %5\n" - " wsr %1, scompare1\n" - "2: s32c1i %0, %3, 0\n" - "3:\n" + " wsr %5, scompare1\n" + "1: s32c1i %1, %4, 0\n" + " s32i %1, %6, 0\n" + "2:\n" " .section .fixup,\"ax\"\n" " .align 4\n" - "4: .long 3b\n" - "5: l32r %1, 4b\n" - " movi %0, %6\n" + "3: .long 2b\n" + "4: l32r %1, 3b\n" + " movi %0, %7\n" " jx %1\n" " .previous\n" " .section __ex_table,\"a\"\n" - " .long 1b,5b,2b,5b\n" + " .long 1b,4b\n" " .previous\n" - : "+r" (ret), "=&r" (prev), "+m" (*uaddr) - : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT) + : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval) + : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT) : "memory"); - *uval = prev; return ret; } diff --git a/block/badblocks.c b/block/badblocks.c index 6ebcef28231486ae2b9dcefadfe61412b2112f11..2fe6c117ac962b3d235b9c0f2250f9e1b3c6daf4 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -178,7 +178,7 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors, if (bb->shift < 0) /* badblocks are disabled */ - return 0; + return 1; if (bb->shift) { /* round the start down, and the end up */ diff --git a/block/blk-core.c b/block/blk-core.c index 1e63ee12d44fb67c48f08b44cfa273f812ab8ed4..b2e57642dbb9d23cab0adfdc2f9335bd7f4c37b3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -529,8 +529,8 @@ void blk_set_queue_dying(struct request_queue *q) blk_queue_for_each_rl(rl, q) { if (rl->rq_pool) { - wake_up(&rl->wait[BLK_RW_SYNC]); - wake_up(&rl->wait[BLK_RW_ASYNC]); + wake_up_all(&rl->wait[BLK_RW_SYNC]); + wake_up_all(&rl->wait[BLK_RW_ASYNC]); } } } @@ -3583,76 +3583,43 @@ int __init blk_dev_init(void) * TODO : If necessary, we can make the histograms per-cpu and aggregate * them when printing them out. */ -void -blk_zero_latency_hist(struct io_latency_state *s) -{ - memset(s->latency_y_axis_read, 0, - sizeof(s->latency_y_axis_read)); - memset(s->latency_y_axis_write, 0, - sizeof(s->latency_y_axis_write)); - s->latency_reads_elems = 0; - s->latency_writes_elems = 0; -} -EXPORT_SYMBOL(blk_zero_latency_hist); - ssize_t -blk_latency_hist_show(struct io_latency_state *s, char *buf) +blk_latency_hist_show(char* name, struct io_latency_state *s, char *buf, + int buf_size) { int i; int bytes_written = 0; u_int64_t num_elem, elem; int pct; - - num_elem = s->latency_reads_elems; - if (num_elem > 0) { - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "IO svc_time Read Latency Histogram (n = %llu):\n", - num_elem); - for (i = 0; - i < ARRAY_SIZE(latency_x_axis_us); - i++) { - elem = s->latency_y_axis_read[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t< %5lluus%15llu%15d%%\n", - latency_x_axis_us[i], - elem, pct); - } - /* Last element in y-axis table is overflow */ - elem = s->latency_y_axis_read[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t> %5dms%15llu%15d%%\n", 10, - elem, pct); - } - num_elem = s->latency_writes_elems; - if (num_elem > 0) { - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "IO svc_time Write Latency Histogram (n = %llu):\n", - num_elem); - for (i = 0; - i < ARRAY_SIZE(latency_x_axis_us); - i++) { - elem = s->latency_y_axis_write[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t< %5lluus%15llu%15d%%\n", - latency_x_axis_us[i], - elem, pct); - } - /* Last element in y-axis table is overflow */ - elem = s->latency_y_axis_write[i]; - pct = div64_u64(elem * 100, num_elem); - bytes_written += scnprintf(buf + bytes_written, - PAGE_SIZE - bytes_written, - "\t> %5dms%15llu%15d%%\n", 10, - elem, pct); + u_int64_t average; + + num_elem = s->latency_elems; + if (num_elem > 0) { + average = div64_u64(s->latency_sum, s->latency_elems); + bytes_written += scnprintf(buf + bytes_written, + buf_size - bytes_written, + "IO svc_time %s Latency Histogram (n = %llu," + " average = %llu):\n", name, num_elem, average); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %6lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t>=%6lluus%15llu%15d%%\n", + latency_x_axis_us[i - 1], elem, pct); } + return bytes_written; } EXPORT_SYMBOL(blk_latency_hist_show); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 01fb455d3377472c6201d6755716b961e5b9620a..8c0894e0713b99b66ae6840f27a85ce776dd77ad 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -429,7 +429,7 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -449,8 +449,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index dcf5ce3ba4bff3068486d8596c5941e9f382d228..4bc701b32ce24c6ddda10fcbb266e5d07f41cd74 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -311,6 +311,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i]; + if (!tags) + continue; + for (j = 0; j < tags->nr_tags; j++) { if (!tags->rqs[j]) continue; diff --git a/block/blk-mq.c b/block/blk-mq.c index 7b597ec4e9c52b41c46efdd17268d687a294ffb1..10f8f94b7f20d7871f84a8c90a792a8b54f22e6d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1707,7 +1707,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -1970,6 +1969,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) diff --git a/block/blk-mq.h b/block/blk-mq.h index e5d25249028c746c2876c668aa040bcf352b70c6..c55bcf67b9560fba32e4e53318ba149dba4320ad 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -50,6 +50,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); diff --git a/crypto/Kconfig b/crypto/Kconfig index 84d71482bf080288d2c379b03dbb02894ba3b4c5..ab0d93ab56952229b61ecbcd49464886412c38d8 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -120,7 +120,7 @@ config CRYPTO_DH config CRYPTO_ECDH tristate "ECDH algorithm" - select CRYTPO_KPP + select CRYPTO_KPP help Generic implementation of the ECDH algorithm diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f5e18c2a48527bb3f5bbdc5202b37577689710b3..ca50eeb13097cfb77b271b99ca4b248b974fcbaa 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(af_alg_release_parent); static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - const u32 forbidden = CRYPTO_ALG_INTERNAL; + const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct sockaddr_alg *sa = (void *)uaddr; @@ -157,6 +157,10 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) void *private; int err; + /* If caller uses non-allowed flag, return error. */ + if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed)) + return -EINVAL; + if (sock->state == SS_CONNECTED) return -EINVAL; @@ -175,9 +179,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (IS_ERR(type)) return PTR_ERR(type); - private = type->bind(sa->salg_name, - sa->salg_feat & ~forbidden, - sa->salg_mask & ~forbidden); + private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask); if (IS_ERR(private)) { module_put(type->owner); return PTR_ERR(private); diff --git a/crypto/ahash.c b/crypto/ahash.c index cce0268a13fefa785ad2207447b1f3b8b16d53e7..f3fa104de47998e899baeb07c38a09f0e19c6691 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -625,5 +625,16 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(ahash_attr_alg); +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg) +{ + struct crypto_alg *alg = &halg->base; + + if (alg->cra_type != &crypto_ahash_type) + return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg)); + + return __crypto_ahash_alg(alg)->setkey != NULL; +} +EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/crypto/algapi.c b/crypto/algapi.c index 1fad2a6b3bbbf0d1d4ee07f585bdc4d501467b5d..5c098ffa7d3d857deeafff9581df6e1bc5f2f4f5 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, spawn->alg = NULL; spawns = &inst->alg.cra_users; + + /* + * We may encounter an unregistered instance here, since + * an instance's spawns are set up prior to the instance + * being registered. An unregistered instance will have + * NULL ->cra_users.next, since ->cra_users isn't + * properly initialized until registration. But an + * unregistered instance cannot have any users, so treat + * it the same as ->cra_users being empty. + */ + if (spawns->next == NULL) + break; } } while ((spawns = crypto_more_spawns(alg, &stack, &top, &secondary_spawns))); diff --git a/crypto/api.c b/crypto/api.c index bbc147cb5dec87affad82510412459c075056f86..e5c1abfd451f4dc7668b87fe730ac14d2d8a2ea1 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "internal.h" LIST_HEAD(crypto_alg_list); @@ -611,5 +612,17 @@ int crypto_has_alg(const char *name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_has_alg); +void crypto_req_done(struct crypto_async_request *req, int err) +{ + struct crypto_wait *wait = req->data; + + if (err == -EINPROGRESS) + return; + + wait->err = err; + complete(&wait->completion); +} +EXPORT_SYMBOL_GPL(crypto_req_done); + MODULE_DESCRIPTION("Cryptographic core API"); MODULE_LICENSE("GPL"); diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 2ffd69769466082eaf55cdfe71fb67704e0364af..5a37962d21994c2f4ee55f31a40970cda1c4d439 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -150,7 +150,7 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7, pr_devel("Sig %u: Found cert serial match X.509[%u]\n", sinfo->index, certix); - if (x509->pub->pkey_algo != sinfo->sig->pkey_algo) { + if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) { pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n", sinfo->index); continue; diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index c80765b211cf0fae7a91c35494dc9ed45247eafe..029f7051f2beae25286f4601948303408ed75849 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -408,6 +408,8 @@ int x509_extract_key_data(void *context, size_t hdrlen, ctx->cert->pub->pkey_algo = "rsa"; /* Discard the BIT STRING metadata */ + if (vlen < 1 || *(const u8 *)value != 0) + return -EBADMSG; ctx->key = value + 1; ctx->key_size = vlen - 1; return 0; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index fb732296cd36437950e9228baaecce4373a329eb..e16009a8da9ca580c230a74536eeda8be7b128ca 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -125,7 +125,7 @@ int x509_check_for_self_signed(struct x509_certificate *cert) } ret = -EKEYREJECTED; - if (cert->pub->pkey_algo != cert->sig->pkey_algo) + if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0) goto out; ret = public_key_verify_signature(cert->pub, cert->sig); diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index e899ef51dc8eda2dec93f98a1260977f04e9d4b2..cb1c3a3287b09a69896f02ddd007f9fbda5a81e5 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); + poly_hash = __crypto_hash_alg_common(poly); + + err = -EINVAL; + if (poly_hash->digestsize != POLY1305_DIGEST_SIZE) + goto out_put_poly; err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; - poly_hash = __crypto_hash_alg_common(poly); err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, aead_crypto_instance(inst)); if (err) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 0c654e59f2157bc13e387a3ebfdcdc8b07b8170a..af9ad45d19090bac622f3cca00910e8ffd3207cd 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -691,7 +691,8 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = cryptd_hash_finup_enqueue; inst->alg.export = cryptd_hash_export; inst->alg.import = cryptd_hash_import; - inst->alg.setkey = cryptd_hash_setkey; + if (crypto_shash_alg_has_setkey(salg)) + inst->alg.setkey = cryptd_hash_setkey; inst->alg.digest = cryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); diff --git a/crypto/hmac.c b/crypto/hmac.c index 72e38c098bb3184d3886ac4054f123dca3bb49ac..ba07fb6221aee61be48bed8c4a1b994f3d61a0cb 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -194,11 +194,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb) salg = shash_attr_alg(tb[1], 0, 0); if (IS_ERR(salg)) return PTR_ERR(salg); + alg = &salg->base; + /* The underlying hash algorithm must be unkeyed */ err = -EINVAL; + if (crypto_shash_alg_has_setkey(salg)) + goto out_put_alg; + ds = salg->digestsize; ss = salg->statesize; - alg = &salg->base; if (ds > alg->cra_blocksize || ss < alg->cra_blocksize) goto out_put_alg; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index c207458d62993350d9a0cfca5e1deca573b09c8c..6e9389c8bfbd644bcfdca559f63668edb3126b4a 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -80,6 +80,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -103,15 +104,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -160,16 +162,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -184,7 +181,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) @@ -537,7 +534,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = mcryptd_hash_finup_enqueue; inst->alg.export = mcryptd_hash_export; inst->alg.import = mcryptd_hash_import; - inst->alg.setkey = mcryptd_hash_setkey; + if (crypto_hash_alg_has_setkey(halg)) + inst->alg.setkey = mcryptd_hash_setkey; inst->alg.digest = mcryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ee9cfb99fe256af06ae7ad5d946c3b76d90de1e9..f8ec3d4ba4a80f8eefed739d9e8a852865a7ac02 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->child); } +static void pcrypt_free(struct aead_instance *inst) +{ + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); + kfree(inst); +} + static int pcrypt_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.encrypt = pcrypt_aead_encrypt; inst->alg.decrypt = pcrypt_aead_decrypt; + inst->free = pcrypt_free; + err = aead_register_instance(tmpl, inst); if (err) goto out_drop_aead; @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) return -EINVAL; } -static void pcrypt_free(struct crypto_instance *inst) -{ - struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - - crypto_drop_aead(&ctx->spawn); - kfree(inst); -} - static int pcrypt_cpumask_change_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt) static struct crypto_template pcrypt_tmpl = { .name = "pcrypt", .create = pcrypt_create, - .free = pcrypt_free, .module = THIS_MODULE, }; diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 2df9835dfbc0c6e039c522460926cee88991809a..bca99238948f14680eb85747d4ce4264ff077c8d 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -51,17 +51,6 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - /* Poly1305 requires a unique key for each tag, which implies that - * we can't set it on the tfm that gets accessed by multiple users - * simultaneously. Instead we expect the key as the first 32 bytes in - * the update() call. */ - return -ENOTSUPP; -} -EXPORT_SYMBOL_GPL(crypto_poly1305_setkey); - static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ @@ -80,6 +69,11 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) dctx->s[3] = le32_to_cpuvp(key + 12); } +/* + * Poly1305 requires a unique key for each tag, which implies that we can't set + * it on the tfm that gets accessed by multiple users simultaneously. Instead we + * expect the key as the first 32 bytes in the update() call. + */ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { @@ -285,7 +279,6 @@ static struct shash_alg poly1305_alg = { .init = crypto_poly1305_init, .update = crypto_poly1305_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c index 0b66dc8246068aa084dd0b44210b04dee5f2bccb..cad395d70d78e18527866bf1a3f6452c338e4c7c 100644 --- a/crypto/rsa_helper.c +++ b/crypto/rsa_helper.c @@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag, return -EINVAL; if (fips_enabled) { - while (!*ptr && n_sz) { + while (n_sz && !*ptr) { ptr++; n_sz--; } diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index f550b5d9463074b16670129341de59e069f8509c..d7da0eea5622af96f63300ad45c35450951551e1 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, - walk.src.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, walk.src.virt.addr, diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 7e8ed96236cefa794ca39684c9c2592ca49f7976..a68be626017c2b7eb9785e02b6806f8ed665f7ae 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -18,6 +18,7 @@ #include #include #include +#include #define KECCAK_ROUNDS 24 @@ -149,7 +150,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int i; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) src)[i]; + sctx->st[i] ^= get_unaligned_le64(src + 8 * i); keccakf(sctx->st); done += sctx->rsiz; @@ -174,7 +175,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) sctx->buf[sctx->rsiz - 1] |= 0x80; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) sctx->buf)[i]; + sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i); keccakf(sctx->st); diff --git a/crypto/shash.c b/crypto/shash.c index 4d8a671d1614b5bc733bf65864793957660ef87f..9bd5044d467b28b6f430d08ef6e8c45559b51779 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -24,11 +24,12 @@ static const struct crypto_type crypto_shash_type; -static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } +EXPORT_SYMBOL_GPL(shash_no_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index ae22f05d5936c724f830cd00afee303a85836dd5..2a07341aca462ae120de5d494b9bce4ec8e59ee5 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -223,11 +223,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], } sg_init_table(sg, np + 1); - np--; + if (rem) + np--; for (k = 0; k < np; k++) sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE); - sg_set_buf(&sg[k + 1], xbuf[k], rem); + if (rem) + sg_set_buf(&sg[k + 1], xbuf[k], rem); } static void test_aead_speed(const char *algo, int enc, unsigned int secs, @@ -342,7 +344,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, } sg_init_aead(sg, xbuf, - *b_size + (enc ? authsize : 0)); + *b_size + (enc ? 0 : authsize)); sg_init_aead(sgout, xoutbuf, *b_size + (enc ? authsize : 0)); @@ -350,7 +352,9 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, sg_set_buf(&sg[0], assoc, aad_size); sg_set_buf(&sgout[0], assoc, aad_size); - aead_request_set_crypt(req, sg, sgout, *b_size, iv); + aead_request_set_crypt(req, sg, sgout, + *b_size + (enc ? 0 : authsize), + iv); aead_request_set_ad(req, aad_size); if (secs) diff --git a/drivers/Kconfig b/drivers/Kconfig index e1e2066cecdb602e44f4a7103aa35827da9bbdd5..de581c13ec9ad1e63d2904f1c8c1c0011472f5f7 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -202,4 +202,6 @@ source "drivers/hwtracing/intel_th/Kconfig" source "drivers/fpga/Kconfig" +source "drivers/tee/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 733bf0b2613ffd0cd6087b0af9bb103b8d1e0898..9a90575c3f38de3811b6f262fabbdafa0466fdd8 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -174,3 +174,4 @@ obj-$(CONFIG_STM) += hwtracing/stm/ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_NVMEM) += nvmem/ obj-$(CONFIG_FPGA) += fpga/ +obj-$(CONFIG_TEE) += tee/ diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 3de3b6b8f0f1b756adc455229aedba9731113e5d..f43a586236eaa3490fc30e2aab51c50f9ac113a3 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu) void __weak arch_unregister_cpu(int cpu) {} -int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ - return -ENODEV; -} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c index 691814dfed313123b739bcb5a4a53e58d6d32ddc..943702dd9517eca601e09784db01204c10ef9f6e 100644 --- a/drivers/acpi/acpica/nsutils.c +++ b/drivers/acpi/acpica/nsutils.c @@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle) void acpi_ns_terminate(void) { acpi_status status; + union acpi_operand_object *prev; + union acpi_operand_object *next; ACPI_FUNCTION_TRACE(ns_terminate); -#ifdef ACPI_EXEC_APP - { - union acpi_operand_object *prev; - union acpi_operand_object *next; + /* Delete any module-level code blocks */ - /* Delete any module-level code blocks */ - - next = acpi_gbl_module_code_list; - while (next) { - prev = next; - next = next->method.mutex; - prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */ - acpi_ut_remove_reference(prev); - } + next = acpi_gbl_module_code_list; + while (next) { + prev = next; + next = next->method.mutex; + prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */ + acpi_ut_remove_reference(prev); } -#endif /* * Free the entire namespace -- all nodes and all objects diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index ec4f507b524fa81343c44d0da039a19e468da5c4..4558cc73abf348a37680e8e62f4200dcc0e9f69a 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1020,7 +1020,7 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count, /* The record may be cleared by others, try read next record */ if (len == -ENOENT) goto skip; - else if (len < sizeof(*rcd)) { + else if (len < 0 || len < sizeof(*rcd)) { rc = -EIO; goto out; } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 56190d00fd87042461f430c5aa1cdd82fb60dc8d..0a3ca20f99af30221293df5ccf1e4cbd8292bd6e 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1197,7 +1197,6 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); - acpi_set_processor_mapping(); return 0; } diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 7b2c48fde4e2b19f7134c1900931ff0695469ad9..201c7ceb70524d2bfab0235e8825b7dbf3094664 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 73c9c7fa9001434ec2bd8b1815260404a3308176..f06317d6fc38e7c20d5d255273deb7bea92289bc 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; /* - * If the device has a _HID (or _CID) returning a valid ACPI/PNP - * device ID, it is better to make it look less attractive here, so that - * the other device with the same _ADR value (that may not have a valid - * device ID) can be matched going forward. [This means a second spec - * violation in a row, so whatever we do here is best effort anyway.] + * If the device has a _HID returning a valid ACPI/PNP device ID, it is + * better to make it look less attractive here, so that the other device + * with the same _ADR value (that may not have a valid device ID) can be + * matched going forward. [This means a second spec violation in a row, + * so whatever we do here is best effort anyway.] */ - return sta_present && list_empty(&adev->pnp.ids) ? + return sta_present && !adev->pnp.type.platform_id ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f3bc901ac930c9b61edb2b65b5d0e55d32236ecf..b1815b20a99c0dcda0b385d9fef0c4321cf83125 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1390,6 +1390,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1446,9 +1451,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } @@ -1528,6 +1535,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) struct kernfs_node *nfit_kernfs; nvdimm = nfit_mem->nvdimm; + if (!nvdimm) + continue; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 5c78ee1860b0ad390671e8f1b1c624339f7414ed..fd59ae871db3b4e2034fbec95373a852ddc9047c 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -280,79 +280,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static bool __init -map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid) -{ - int type, id; - u32 acpi_id; - acpi_status status; - acpi_object_type acpi_type; - unsigned long long tmp; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - - status = acpi_get_type(handle, &acpi_type); - if (ACPI_FAILURE(status)) - return false; - - switch (acpi_type) { - case ACPI_TYPE_PROCESSOR: - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) - return false; - acpi_id = object.processor.proc_id; - - /* validate the acpi_id */ - if(acpi_processor_validate_proc_id(acpi_id)) - return false; - break; - case ACPI_TYPE_DEVICE: - status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); - if (ACPI_FAILURE(status)) - return false; - acpi_id = tmp; - break; - default: - return false; - } - - type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; - - *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false); - id = acpi_map_cpuid(*phys_id, acpi_id); - - if (id < 0) - return false; - *cpuid = id; - return true; -} - -static acpi_status __init -set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context, - void **rv) -{ - phys_cpuid_t phys_id; - int cpu_id; - - if (!map_processor(handle, &phys_id, &cpu_id)) - return AE_ERROR; - - acpi_map_cpu2node(handle, cpu_id, phys_id); - return AE_OK; -} - -void __init acpi_set_processor_mapping(void) -{ - /* Set persistent cpu <-> node mapping for all processors. */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, set_processor_node_mapping, - NULL, NULL, NULL); -} -#else -void __init acpi_set_processor_mapping(void) {} -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base, u64 *phys_addr, int *ioapic_id) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 2fa8304171e09b70e501dcab7892a9b443c84695..7a3431018e0ab4ce96dc055abf31444e498b04bb 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -275,8 +275,8 @@ static int acpi_smbus_hc_add(struct acpi_device *device) device->driver_data = hc; acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc); - printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n", - hc->ec, hc->offset, hc->query_bit); + dev_info(&device->dev, "SBS HC: offset = 0x%0x, query_bit = 0x%0x\n", + hc->offset, hc->query_bit); return 0; } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1ef2f68bfcb2fb1c7cea1538e7a2c163e193142b..bb48a7b6ee2d30f8f52ba7bf9ccbe9ebb318708d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -512,8 +512,6 @@ struct binder_priority { * (protected by @inner_lock) * @todo: list of work for this process * (protected by @inner_lock) - * @wait: wait queue head to wait for proc work - * (invariant after initialized) * @stats: per-process binder statistics * (atomics, no lock needed) * @delivered_death: list of delivered death notification @@ -554,7 +552,6 @@ struct binder_proc { bool is_dead; struct list_head todo; - wait_queue_head_t wait; struct binder_stats stats; struct list_head delivered_death; int max_threads; @@ -4538,6 +4535,18 @@ static int binder_thread_release(struct binder_proc *proc, if (t) spin_lock(&t->lock); } + + /* + * If this thread used poll, make sure we remove the waitqueue + * from any epoll data structures holding it with POLLFREE. + * waitqueue_active() is safe to use here because we're holding + * the inner lock. + */ + if ((thread->looper & BINDER_LOOPER_STATE_POLL) && + waitqueue_active(&thread->wait)) { + wake_up_poll(&thread->wait, POLLHUP | POLLFREE); + } + binder_inner_proc_unlock(thread->proc); if (send_reply) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index c94038206c3aec59ed4535978a0f47d4cfca99fd..9b46ef4c851e1114b16eede6e5f924aad6804dc9 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -265,9 +265,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3b23), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH AHCI */ + { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH M AHCI */ { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ + { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH M RAID */ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */ @@ -290,9 +290,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */ - { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */ + { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT M AHCI */ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */ - { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT RAID */ + { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT M RAID */ { PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ @@ -301,20 +301,20 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */ { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */ { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */ - { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point AHCI */ + { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point M AHCI */ { PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */ - { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point RAID */ + { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point M RAID */ { PCI_VDEVICE(INTEL, 0x1e0e), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x8c02), board_ahci }, /* Lynx Point AHCI */ - { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point AHCI */ + { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point M AHCI */ { PCI_VDEVICE(INTEL, 0x8c04), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c06), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x9c02), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c03), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c04), board_ahci }, /* Lynx Point-LP RAID */ @@ -355,21 +355,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */ - { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */ + { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series M AHCI */ { PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */ - { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H M AHCI */ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */ - { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H M RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ @@ -383,6 +383,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */ + { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci }, /* Apollo Lake AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 33e363dcc63bf8581f9d1cc38f666083e88c9f4b..aee39524375c0316054933b2a1579afe11c1b41a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4322,6 +4322,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 051b6158d1b7f45b7116b9debc98376a9d4d240f..8d22acdf90f0bc546ee70bcdd7d2e8245065ca34 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1481,7 +1481,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) break; default: - WARN_ON_ONCE(1); return AC_ERR_SYSTEM; } diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 5fc81e240c242a12fe972b7054c5d5162cdd4881..e55f418d6ab9c385394d7e26ed73289e351e3d45 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2802,7 +2802,7 @@ static int hrz_probe(struct pci_dev *pci_dev, return err; out_free_irq: - free_irq(dev->irq, dev); + free_irq(irq, dev); out_free: kfree(dev); out_release: diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 10e1b9eee10eaf6ab4e5ac82b81d2ecfea83c10a..f03cf1df8d6bc1cfb58c7938384bed12577cd083 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -121,6 +121,7 @@ config CFAG12864B_RATE config IMG_ASCII_LCD tristate "Imagination Technologies ASCII LCD Display" + depends on HAS_IOMEM default y if MIPS_MALTA || MIPS_SEAD3 select SYSCON help diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index 83f1439e57fd8cb5d40e0a9d435a8a7fe62939c4..6e8eaa7fe7a6ff7adcda5d660db0e6e5a07f744a 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -442,3 +442,7 @@ static struct platform_driver img_ascii_lcd_driver = { .remove = img_ascii_lcd_remove, }; module_platform_driver(img_ascii_lcd_driver); + +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); +MODULE_AUTHOR("Paul Burton "); +MODULE_LICENSE("GPL"); diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index d02e7c0f5bfdff1c6c56372113e230e55be54f5f..0651010bba217c53ef52dca2531a216756089235 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES config GENERIC_CPU_AUTOPROBE bool +config GENERIC_CPU_VULNERABILITIES + bool + config SOC_BUS bool diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e9fd32e91668992e478f31423c1e56be0724e1d2..70e13cf06ed0988b70352797cdf1e0843b8b577c 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -16,6 +16,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +#include #include #include #include @@ -104,9 +105,16 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int index; - int ret; + int ret = 0; + + if (this_cpu_ci->cpu_map_populated) + return 0; - ret = cache_setup_of_node(cpu); + if (of_have_populated_dt()) + ret = cache_setup_of_node(cpu); + else if (!acpi_disabled) + /* No cache property/hierarchy support yet in ACPI */ + ret = -ENOTSUPP; if (ret) return ret; @@ -203,8 +211,7 @@ static int detect_cache_attributes(unsigned int cpu) */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { - pr_warn("Unable to detect cache hierarchy from DT for CPU %d\n", - cpu); + pr_warn("Unable to detect cache hierarchy for CPU %d\n", cpu); goto free_ci; } return 0; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 4c28e1a0978666e3f228a005661981e44205bd49..56b6c8508a8976c31abe84b780781a8d5b8978dd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void) #endif } +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES + +ssize_t __weak cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + +static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, + &dev_attr_spectre_v1.attr, + &dev_attr_spectre_v2.attr, + NULL +}; + +static const struct attribute_group cpu_root_vulnerabilities_group = { + .name = "vulnerabilities", + .attrs = cpu_root_vulnerabilities_attrs, +}; + +static void __init cpu_register_vulnerabilities(void) +{ + if (sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_vulnerabilities_group)) + pr_err("Unable to register CPU vulnerabilities\n"); +} + +#else +static inline void cpu_register_vulnerabilities(void) { } +#endif + void __init cpu_dev_init(void) { if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) panic("Failed to register CPU subsystem"); cpu_dev_register_generic(); + cpu_register_vulnerabilities(); } diff --git a/drivers/base/isa.c b/drivers/base/isa.c index cd6ccdcf9df0c5ef2a37fabb2b4b7bbfa3c88755..372d10af26009dfae317affd625c756a2700fef1 100644 --- a/drivers/base/isa.c +++ b/drivers/base/isa.c @@ -39,7 +39,7 @@ static int isa_bus_probe(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->probe) + if (isa_driver && isa_driver->probe) return isa_driver->probe(dev, to_isa_dev(dev)->id); return 0; @@ -49,7 +49,7 @@ static int isa_bus_remove(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->remove) + if (isa_driver && isa_driver->remove) return isa_driver->remove(dev, to_isa_dev(dev)->id); return 0; @@ -59,7 +59,7 @@ static void isa_bus_shutdown(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->shutdown) + if (isa_driver && isa_driver->shutdown) isa_driver->shutdown(dev, to_isa_dev(dev)->id); } @@ -67,7 +67,7 @@ static int isa_bus_suspend(struct device *dev, pm_message_t state) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->suspend) + if (isa_driver && isa_driver->suspend) return isa_driver->suspend(dev, to_isa_dev(dev)->id, state); return 0; @@ -77,7 +77,7 @@ static int isa_bus_resume(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->resume) + if (isa_driver && isa_driver->resume) return isa_driver->resume(dev, to_isa_dev(dev)->id); return 0; diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 6441dfda489f80a404bacd6132d8900b9d98c6bf..a7c5b79371a7c015211328e123cccd9e706c8e2f 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -331,7 +331,7 @@ int dev_pm_opp_get_opp_count(struct device *dev) opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { count = PTR_ERR(opp_table); - dev_err(dev, "%s: OPP table not found (%d)\n", + dev_dbg(dev, "%s: OPP table not found (%d)\n", __func__, count); goto out_unlock; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 24d6cefceb3272be99f908631efef23fae3a3e3e..402254d262474aecf93098e1c596619d169abc6c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1558,9 +1558,8 @@ static int lo_open(struct block_device *bdev, fmode_t mode) return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo = disk->private_data; int err; if (atomic_dec_return(&lo->lo_refcnt)) @@ -1586,6 +1585,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_ctl_mutex); } +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); + mutex_unlock(&loop_index_mutex); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 98b767d3171e91fb59f4b9c53d507c96266b0b02..4d30da26906064642be13628d677c10f64cce9e1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -272,6 +272,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); + struct bio *bio; u32 type; if (req->cmd_type == REQ_TYPE_DRV_PRIV) @@ -305,16 +306,20 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) return -EIO; } - if (type == NBD_CMD_WRITE) { - struct req_iterator iter; + if (type != NBD_CMD_WRITE) + return 0; + + flags = 0; + bio = req->bio; + while (bio) { + struct bio *next = bio->bi_next; + struct bvec_iter iter; struct bio_vec bvec; - /* - * we are really probing at internals to determine - * whether to set MSG_MORE or not... - */ - rq_for_each_segment(bvec, req, iter) { - flags = 0; - if (!rq_iter_last(bvec, iter)) + + bio_for_each_segment(bvec, bio, iter) { + bool is_last = !next && bio_iter_last(bvec, iter); + + if (is_last) flags = MSG_MORE; dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", cmd, bvec.bv_len); @@ -325,7 +330,16 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) result); return -EIO; } + /* + * The completion might already have come in, + * so break for the last one instead of letting + * the iterator do it. This prevents use-after-free + * of the bio. + */ + if (is_last) + break; } + bio = next; } return 0; } @@ -654,7 +668,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return nbd_size_set(nbd, bdev, nbd->blksize, arg); case NBD_SET_TIMEOUT: - nbd->tag_set.timeout = arg * HZ; + if (arg) { + nbd->tag_set.timeout = arg * HZ; + blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); + } return 0; case NBD_SET_FLAGS: diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90fa4ac149dbe04e79c6dc2d954225b6f988079b..7e4ef050279666f7691a4da6b3ce20d89de441f8 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2779,7 +2779,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) - goto out_new_dev; + goto out_mem2; /* inherit events of the host device */ disk->events = pd->bdev->bd_disk->events; @@ -2797,8 +2797,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) mutex_unlock(&ctl_mutex); return 0; -out_new_dev: - blk_cleanup_queue(disk->queue); out_mem2: put_disk(disk); out_mem: diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 24f4b544d270c1079357e50a8d9313f0e83d9983..e32badd26c8a3e16c5ae1503d1cd7efc9ab1f0ee 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4511,7 +4511,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c9914d6539687de9f2c827acac879aca1f2e0a4f..b7c0b69a02f54b198c0d1836752d0362731d0395 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1286,6 +1286,8 @@ static int zram_add(void) blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; + zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); /* * zram_bio_discard() will clear all logical blocks if logical block diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 1cb958e199ebc36a31e4a09b38eb3f39f2f969af..94e914a33a9990de4ef8a82760014a0730911f6e 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -291,6 +292,14 @@ static int btsdio_probe(struct sdio_func *func, tuple = tuple->next; } + /* BCM43341 devices soldered onto the PCB (non-removable) use an + * uart connection for bluetooth, ignore the BT SDIO interface. + */ + if (func->vendor == SDIO_VENDOR_ID_BROADCOM && + func->device == SDIO_DEVICE_ID_BROADCOM_43341 && + !mmc_card_is_removable(func->card->host)) + return -ENODEV; + data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 693028659cccea3994159ecc89bb7583de053055..3257647d4f74cb67efb90b81f361c07254604c70 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -369,8 +370,8 @@ static const struct usb_device_id blacklist_table[] = { #define BTUSB_FIRMWARE_LOADED 7 #define BTUSB_FIRMWARE_FAILED 8 #define BTUSB_BOOTING 9 -#define BTUSB_RESET_RESUME 10 -#define BTUSB_DIAG_RUNNING 11 +#define BTUSB_DIAG_RUNNING 10 +#define BTUSB_OOB_WAKE_ENABLED 11 struct btusb_data { struct hci_dev *hdev; @@ -2928,9 +2929,9 @@ static int btusb_probe(struct usb_interface *intf, /* QCA Rome devices lose their updated firmware over suspend, * but the USB hub doesn't notice any status change. - * Explicitly request a device reset on resume. + * explicitly request a device reset on resume. */ - set_bit(BTUSB_RESET_RESUME, &data->flags); + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #ifdef CONFIG_BT_HCIBTUSB_RTL @@ -2941,7 +2942,7 @@ static int btusb_probe(struct usb_interface *intf, * but the USB hub doesn't notice any status change. * Explicitly request a device reset on resume. */ - set_bit(BTUSB_RESET_RESUME, &data->flags); + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #endif @@ -3098,14 +3099,6 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) btusb_stop_traffic(data); usb_kill_anchored_urbs(&data->tx_anchor); - /* Optionally request a device reset on resume, but only when - * wakeups are disabled. If wakeups are enabled we assume the - * device will stay powered up throughout suspend. - */ - if (test_bit(BTUSB_RESET_RESUME, &data->flags) && - !device_may_wakeup(&data->udev->dev)) - data->udev->reset_resume = 1; - return 0; } diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 890082315054d0c2043151a4ea25e107fe61b1bb..10f56133b2816020f4bd657634a120ebf0744912 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1755,14 +1755,17 @@ static int cci_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); mutex_init(&cci_pmu->reserve_mutex); atomic_set(&cci_pmu->active_events, 0); - cpumask_set_cpu(smp_processor_id(), &cci_pmu->cpus); + cpumask_set_cpu(get_cpu(), &cci_pmu->cpus); ret = cci_pmu_init(cci_pmu, pdev); - if (ret) + if (ret) { + put_cpu(); return ret; + } cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, &cci_pmu->node); + put_cpu(); pr_info("ARM %s PMU driver probed", cci_pmu->model->name); return 0; } diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index aee83462b796bde9b8f6b2e7d500e9d5d5320288..45d7ecc66b22bbef478dd0808fc69128efa7fbc2 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1271,11 +1271,16 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) int len = snprintf(NULL, 0, "ccn_%d", ccn->dt.id); name = devm_kzalloc(ccn->dev, len + 1, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto error_choose_name; + } snprintf(name, len + 1, "ccn_%d", ccn->dt.id); } /* Perf driver registration */ ccn->dt.pmu = (struct pmu) { + .module = THIS_MODULE, .attr_groups = arm_ccn_pmu_attr_groups, .task_ctx_nr = perf_invalid_context, .event_init = arm_ccn_pmu_event_init, @@ -1297,7 +1302,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) } /* Pick one CPU which we will use to collect data from CCN... */ - cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu); + cpumask_set_cpu(get_cpu(), &ccn->dt.cpu); /* Also make sure that the overflow interrupt is handled by this CPU */ if (ccn->irq) { @@ -1314,10 +1319,13 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, &ccn->dt.node); + put_cpu(); return 0; error_pmu_register: error_set_affinity: + put_cpu(); +error_choose_name: ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); @@ -1578,8 +1586,8 @@ static int __init arm_ccn_init(void) static void __exit arm_ccn_exit(void) { - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); platform_driver_unregister(&arm_ccn_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); } module_init(arm_ccn_init); diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 795c9d9c96a6d5ae08c036a221ae1f9a4b1f9c1b..2051d926e3037947386bfa64ba04a236a8ed313c 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = { .match = sunxi_rsb_device_match, .probe = sunxi_rsb_device_probe, .remove = sunxi_rsb_device_remove, + .uevent = of_device_uevent_modalias, }; static void sunxi_rsb_dev_release(struct device *dev) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index a112c0146012e2ba1fb58b54be59d758d0349108..e0a53156b782f086e944ac333652873faf71cad1 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -241,6 +241,9 @@ struct smi_info { /* The timer for this si. */ struct timer_list si_timer; + /* This flag is set, if the timer can be set */ + bool timer_can_start; + /* This flag is set, if the timer is running (timer_pending() isn't enough) */ bool timer_running; @@ -416,6 +419,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) { + if (!smi_info->timer_can_start) + return; smi_info->last_timeout_jiffies = jiffies; mod_timer(&smi_info->si_timer, new_val); smi_info->timer_running = true; @@ -435,21 +440,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); } -static void start_check_enables(struct smi_info *smi_info, bool start_timer) +static void start_check_enables(struct smi_info *smi_info) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - if (start_timer) - start_new_msg(smi_info, msg, 2); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info, bool start_timer) +static void start_clear_flags(struct smi_info *smi_info) { unsigned char msg[3]; @@ -458,10 +460,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - if (start_timer) - start_new_msg(smi_info, msg, 3); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + start_new_msg(smi_info, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -496,11 +495,11 @@ static void start_getting_events(struct smi_info *smi_info) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) +static inline bool disable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info, start_timer); + start_check_enables(smi_info); return true; } return false; @@ -510,7 +509,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info, true); + start_check_enables(smi_info); return true; } return false; @@ -528,7 +527,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info, true)) + if (!disable_si_irq(smi_info)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -544,7 +543,7 @@ static void handle_flags(struct smi_info *smi_info) /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info, true); + start_clear_flags(smi_info); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -927,7 +926,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->irq) { - start_check_enables(smi_info, true); + start_check_enables(smi_info); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -1234,6 +1233,7 @@ static int smi_start_processing(void *send_info, /* Set up the timer that drives the interface. */ setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi); + new_smi->timer_can_start = true; smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); /* Try to claim any interrupts. */ @@ -3448,10 +3448,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info) check_set_rcv_irq(smi_info); } -static inline void wait_for_timer_and_thread(struct smi_info *smi_info) +static inline void stop_timer_and_thread(struct smi_info *smi_info) { if (smi_info->thread != NULL) kthread_stop(smi_info->thread); + + smi_info->timer_can_start = false; if (smi_info->timer_running) del_timer_sync(&smi_info->si_timer); } @@ -3593,7 +3595,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi, false); + start_clear_flags(new_smi); /* * IRQ is defined to be set when non-zero. req_events will @@ -3671,7 +3673,7 @@ static int try_smi_init(struct smi_info *new_smi) return 0; out_err_stop_timer: - wait_for_timer_and_thread(new_smi); + stop_timer_and_thread(new_smi); out_err: new_smi->interrupt_disabled = true; @@ -3865,7 +3867,7 @@ static void cleanup_one_si(struct smi_info *to_clean) */ if (to_clean->irq_cleanup) to_clean->irq_cleanup(to_clean); - wait_for_timer_and_thread(to_clean); + stop_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off @@ -3876,7 +3878,7 @@ static void cleanup_one_si(struct smi_info *to_clean) poll(to_clean); schedule_timeout_uninterruptible(1); } - disable_si_irq(to_clean, false); + disable_si_irq(to_clean); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c index c0e8e1f196aae4f15c39bf39a725db8fd192f101..2bfaf22e6ffc09ddffeea2d5f9442006df34137a 100644 --- a/drivers/clk/hisilicon/clk-hi6220.c +++ b/drivers/clk/hisilicon/clk-hi6220.c @@ -144,7 +144,7 @@ static struct hisi_gate_clock hi6220_separated_gate_clks_sys[] __initdata = { { HI6220_BBPPLL_SEL, "bbppll_sel", "pll0_bbp_gate", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 9, 0, }, { HI6220_MEDIA_PLL_SRC, "media_pll_src", "pll_media_gate", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 10, 0, }, { HI6220_MMC2_SEL, "mmc2_sel", "mmc2_mux1", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 11, 0, }, - { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 12, 0, }, + { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IS_CRITICAL, 0x270, 12, 0, }, }; static struct hisi_mux_clock hi6220_mux_clks_sys[] __initdata = { diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index ce8ea10407e48647aed9fdcf8d98c30ab29bd514..93a19667003d464b9423606c9196acf87b2ac5b5 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -487,7 +487,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu2d_core_podf", base + 0x6c, 24); clk[IMX6QDL_CLK_GPU3D_CORE] = imx_clk_gate2("gpu3d_core", "gpu3d_core_podf", base + 0x6c, 26); clk[IMX6QDL_CLK_HDMI_IAHB] = imx_clk_gate2("hdmi_iahb", "ahb", base + 0x70, 0); - clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "video_27m", base + 0x70, 4); + clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "mipi_core_cfg", base + 0x70, 4); clk[IMX6QDL_CLK_I2C1] = imx_clk_gate2("i2c1", "ipg_per", base + 0x70, 6); clk[IMX6QDL_CLK_I2C2] = imx_clk_gate2("i2c2", "ipg_per", base + 0x70, 8); clk[IMX6QDL_CLK_I2C3] = imx_clk_gate2("i2c3", "ipg_per", base + 0x70, 10); diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 9f24fcfa304f9c975b63af33e6597722a403afbd..e425e50173c5ac1eeff3e430916ba0dfc0f9b0ce 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -185,6 +185,7 @@ struct mtk_pll_data { uint32_t pcw_reg; int pcw_shift; const struct mtk_pll_div_table *div_table; + const char *parent_name; }; void mtk_clk_register_plls(struct device_node *node, diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c index 0c2deac17ce958fd415bf80a67944b12b5ca39c1..1502384a30938ef0f501549770aeece66b8dcd56 100644 --- a/drivers/clk/mediatek/clk-pll.c +++ b/drivers/clk/mediatek/clk-pll.c @@ -302,7 +302,10 @@ static struct clk *mtk_clk_register_pll(const struct mtk_pll_data *data, init.name = data->name; init.ops = &mtk_pll_ops; - init.parent_names = &parent_name; + if (data->parent_name) + init.parent_names = &data->parent_name; + else + init.parent_names = &parent_name; init.num_parents = 1; clk = clk_register(NULL, &pll->hw); diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index df97e25aec76b505ddb88ab7d4b3dc76475fb035..9fe0939c1273ea64eeb13a9e9082e9bcc859960a 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents, 0x150, 0, 4, 24, 2, BIT(31), CLK_SET_RATE_PARENT); -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0); +static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0); static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0); diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index 6041bdba2e971ecb93c3047c14854e676750bf5e..f69f9e8c6f38060f9eae357e89a0f549e68c1b64 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 8e2db5ead8da683500826a2968bd165ca88943b6..af520d81525fa9856f9550c73a2de820e9309f24 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -963,7 +963,7 @@ static void __init tegra30_super_clk_init(void) * U71 divider of cclk_lp. */ clk = tegra_clk_register_divider("pll_p_out3_cclklp", "pll_p_out3", - clk_base + SUPER_CCLKG_DIVIDER, 0, + clk_base + SUPER_CCLKLP_DIVIDER, 0, TEGRA_DIVIDER_INT, 16, 8, 1, NULL); clk_register_clkdev(clk, "pll_p_out3_cclklp", NULL); diff --git a/drivers/clk/uniphier/clk-uniphier-sys.c b/drivers/clk/uniphier/clk-uniphier-sys.c index 5d029991047ddb7fdda7807d11d2d3dc99978279..481225adef8772406ae07604383f1d8bf6ff1836 100644 --- a/drivers/clk/uniphier/clk-uniphier-sys.c +++ b/drivers/clk/uniphier/clk-uniphier-sys.c @@ -98,7 +98,7 @@ const struct uniphier_clk_data uniphier_sld8_sys_clk_data[] = { const struct uniphier_clk_data uniphier_pro5_sys_clk_data[] = { UNIPHIER_CLK_FACTOR("spll", -1, "ref", 120, 1), /* 2400 MHz */ UNIPHIER_CLK_FACTOR("dapll1", -1, "ref", 128, 1), /* 2560 MHz */ - UNIPHIER_CLK_FACTOR("dapll2", -1, "ref", 144, 125), /* 2949.12 MHz */ + UNIPHIER_CLK_FACTOR("dapll2", -1, "dapll1", 144, 125), /* 2949.12 MHz */ UNIPHIER_CLK_FACTOR("uart", 0, "dapll2", 1, 40), UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48), UNIPHIER_PRO5_SYS_CLK_SD, diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index e2c6e43cf8ca31e27af1b6c8630f7a0c590e8b7a..0bed22ff57a875a153eddc4c39c31d8c3fb12fdc 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -305,6 +305,14 @@ config ARM_ARCH_TIMER_EVTSTREAM This must be disabled for hardware validation purposes to detect any hardware anomalies of missing events. +config ARM_ARCH_TIMER_VCT_ACCESS + bool "Support for ARM architected timer virtual counter access in userspace" + default !ARM64 + depends on ARM_ARCH_TIMER + help + This option enables support for reading the ARM architected timer's + virtual counter in userspace. + config FSL_ERRATUM_A008585 bool "Workaround for Freescale/NXP Erratum A-008585" default y diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a2503db7e533e2f83dfb5937e846b8c916b80054..e3bc592b10556bf150049d6d6f1b702b3ef4b71f 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -449,7 +449,10 @@ static void arch_counter_set_user_access(void) | ARCH_TIMER_USR_PCT_ACCESS_EN); /* Enable user access to the virtual counter */ - cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_VCT_ACCESS)) + cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + else + cntkctl &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; arch_timer_set_cntkctl(cntkctl); } diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 1b2574c4fb979e4a2597e5ef9e86b9177fcf6b98..b167cc634fae353e8d98c67b6ec35b89f16f1a5f 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,7 @@ #include #include #include +#include #define TIM_CR1 0x00 #define TIM_DIER 0x0c @@ -106,6 +107,10 @@ static int __init stm32_clockevent_init(struct device_node *np) unsigned long rate, max_delta; int irq, ret, bits, prescaler = 1; + data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + clk = of_clk_get(np, 0); if (IS_ERR(clk)) { ret = PTR_ERR(clk); @@ -156,8 +161,8 @@ static int __init stm32_clockevent_init(struct device_node *np) writel_relaxed(prescaler - 1, data->base + TIM_PSC); writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); @@ -184,6 +189,7 @@ static int __init stm32_clockevent_init(struct device_node *np) err_clk_enable: clk_put(clk); err_clk_get: + kfree(data); return ret; } diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index e8fe5b950945440381f54a14f8eacf65be38edd5..1170bfea63eb8a5ce6dcfe095cbbe6484988afc5 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -303,6 +303,7 @@ endif if MIPS config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -315,6 +316,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 837a281cf767e0d72ae11f81272ba5f570287614..434e729d94eee2970bf34d39630372ba90f619be 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1022,11 +1022,19 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static int add_cpu_dev_symlink(struct cpufreq_policy *policy, - struct device *dev) +static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) { + struct device *dev = get_cpu_device(cpu); + + if (!dev) + return; + + if (cpumask_test_and_set_cpu(cpu, policy->real_cpus)) + return; + dev_dbg(dev, "%s: Adding symlink\n", __func__); - return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); + if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq")) + dev_err(dev, "cpufreq symlink creation failed\n"); } static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, @@ -1288,10 +1296,10 @@ static int cpufreq_online(unsigned int cpu) policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; - write_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->related_cpus) + for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + add_cpu_dev_symlink(policy, j); + } } else { policy->min = policy->user_policy.min; policy->max = policy->user_policy.max; @@ -1388,13 +1396,15 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->exit) cpufreq_driver->exit(policy); + + for_each_cpu(j, policy->real_cpus) + remove_cpu_dev_symlink(policy, get_cpu_device(j)); + out_free_policy: cpufreq_policy_free(policy, !new_policy); return ret; } -static int cpufreq_offline(unsigned int cpu); - /** * cpufreq_add_dev - the cpufreq interface for a CPU device. * @dev: CPU device. @@ -1416,16 +1426,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) - return 0; + if (policy) + add_cpu_dev_symlink(policy, cpu); - ret = add_cpu_dev_symlink(policy, dev); - if (ret) { - cpumask_clear_cpu(cpu, policy->real_cpus); - cpufreq_offline(cpu); - } - - return ret; + return 0; } static int cpufreq_offline(unsigned int cpu) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 7fe442ca38f4b793cef0f70731a62e215d5d37fc..854a567811002329e9b39068ad8d4834e9b8cf71 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -164,6 +164,24 @@ static int powernv_cpuidle_driver_init(void) drv->state_count += 1; } + /* + * On the PowerNV platform cpu_present may be less than cpu_possible in + * cases when firmware detects the CPU, but it is not available to the + * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at + * run time and hence cpu_devices are not created for those CPUs by the + * generic topology_init(). + * + * drv->cpumask defaults to cpu_possible_mask in + * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where + * cpu_devices are not created for CPUs in cpu_possible_mask that + * cannot be hot-added later at run time. + * + * Trying cpuidle_register_device() on a CPU without a cpu_device is + * incorrect, so pass a correct CPU mask to the generic cpuidle driver. + */ + + drv->cpumask = (struct cpumask *)cpu_present_mask; + return 0; } diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 78ab946d946a7cf21d0ec77d6fdefc9d21b639f9..439f460ac08215c6e7d71fc62b8ad2a343f1a419 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -189,6 +189,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, return -EBUSY; } target_state = &drv->states[index]; + broadcast = false; } /* Take note of the planned idle state. */ diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 832a2c3f01ffccf691842507955078299799eb86..9e98a5fbbc1d7fadafc7861ceae1967d8032f956 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -613,6 +613,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; + /* + * Return if cpu_device is not setup for this CPU. + * + * This could happen if the arch did not set up cpu_device + * since this CPU is not in cpu_present mask and the + * driver did not send a correct CPU mask during registration. + * Without this check we would end up passing bogus + * value for &cpu_dev->kobj in kobject_init_and_add() + */ + if (!cpu_dev) + return -ENODEV; + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); if (!kdev) return -ENOMEM; diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h index ecfdcfe3698d62204ffa3f4f9096d0769c4094b5..4f41d6da5acca7d1dedbb3419619a8b327b53564 100644 --- a/drivers/crypto/amcc/crypto4xx_core.h +++ b/drivers/crypto/amcc/crypto4xx_core.h @@ -34,12 +34,12 @@ #define PPC405EX_CE_RESET 0x00000008 #define CRYPTO4XX_CRYPTO_PRIORITY 300 -#define PPC4XX_LAST_PD 63 -#define PPC4XX_NUM_PD 64 -#define PPC4XX_LAST_GD 1023 +#define PPC4XX_NUM_PD 256 +#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1) #define PPC4XX_NUM_GD 1024 -#define PPC4XX_LAST_SD 63 -#define PPC4XX_NUM_SD 64 +#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1) +#define PPC4XX_NUM_SD 256 +#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1) #define PPC4XX_SD_BUFFER_SIZE 2048 #define PD_ENTRY_INUSE 1 diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 98468b96c32f8155c4d71e75e079e81366b55e08..2ca101ac0c177e868a4b20f124baac0c691f4239 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, * without any error (HW optimizations for later * CAAM eras), then try again. */ + if (ret) + break; + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) + !(rdsta_val & (1 << sh_idx))) { ret = -EAGAIN; - if (ret) break; + } + dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); /* Clear the contents before recreating the descriptor */ memset(desc, 0x00, CAAM_CMD_SZ * 7); diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 5d4c05074a5c222b03011db9b0cc41b1078a7725..e2bcacc1a921675cf30f70a40816e1306a8c3ef9 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -41,6 +41,7 @@ struct caam_drv_private_jr { struct device *dev; int ridx; struct caam_job_ring __iomem *rregs; /* JobR's register space */ + struct tasklet_struct irqtask; int irq; /* One per queue */ /* Number of scatterlist crypt transforms active on the JobR */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 757c27f9953d52aa4b662f317b7c0d5ff508a685..9e7f28122bb7fe5a88eb80d7ed6f9dcbe98cd0d5 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev) ret = caam_reset_hw_jr(dev); + tasklet_kill(&jrp->irqtask); + /* Release interrupt */ free_irq(jrp->irq, dev); @@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) /* * Check the output ring for ready responses, kick - * the threaded irq if jobs done. + * tasklet if jobs done. */ irqstate = rd_reg32(&jrp->rregs->jrintstatus); if (!irqstate) @@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); - return IRQ_WAKE_THREAD; + preempt_disable(); + tasklet_schedule(&jrp->irqtask); + preempt_enable(); + + return IRQ_HANDLED; } -static irqreturn_t caam_jr_threadirq(int irq, void *st_dev) +/* Deferred service handler, run as interrupt-fired tasklet */ +static void caam_jr_dequeue(unsigned long devarg) { int hw_idx, sw_idx, i, head, tail; - struct device *dev = st_dev; + struct device *dev = (struct device *)devarg; struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg); u32 *userdesc, userstatus; @@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev) /* reenable / unmask IRQs */ clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); - - return IRQ_HANDLED; } /** @@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev) jrp = dev_get_drvdata(dev); + tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev); + /* Connect job ring interrupt handler. */ - error = request_threaded_irq(jrp->irq, caam_jr_interrupt, - caam_jr_threadirq, IRQF_SHARED, - dev_name(dev), dev); + error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED, + dev_name(dev), dev); if (error) { dev_err(dev, "can't connect JobR %d interrupt (%d)\n", jrp->ridx, jrp->irq); @@ -454,6 +460,7 @@ static int caam_jr_init(struct device *dev) out_free_irq: free_irq(jrp->irq, dev); out_kill_deq: + tasklet_kill(&jrp->irqtask); return error; } diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index c5aac25a57380ba8e6976538cb76ee5b72e6429c..b365ad78ac27accff12e0ad9099cf92cb75bc84a 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1620,6 +1620,7 @@ static int queue_cache_init(void) CWQ_ENTRY_SIZE, 0, NULL); if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; return -ENOMEM; } return 0; @@ -1629,6 +1630,8 @@ static void queue_cache_destroy(void) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; + queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL; } static int spu_queue_register(struct spu_queue *p, unsigned long q_type) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index dce1af0ce85ce8ec6dbd7184f02776cb173c41f0..a668286d62cb17aca693c525c1703594ae688aac 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -805,8 +805,9 @@ static int s5p_aes_probe(struct platform_device *pdev) dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; } - err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt, - IRQF_SHARED, pdev->name, pdev); + err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL, + s5p_aes_interrupt, IRQF_ONESHOT, + pdev->name, pdev); if (err < 0) { dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index e2d323fa24379c4ddea9f2ce841bdc9319e3b9d0..1c8d79d93098fbec5272ff210597a75be6a13d10 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1232,12 +1232,11 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sg_link_tbl_len += authsize; } - sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, - &desc->ptr[4], sg_count, areq->assoclen, - tbl_off); + ret = talitos_sg_map(dev, areq->src, sg_link_tbl_len, edesc, + &desc->ptr[4], sg_count, areq->assoclen, tbl_off); - if (sg_count > 1) { - tbl_off += sg_count; + if (ret > 1) { + tbl_off += ret; sync_needed = true; } @@ -1248,14 +1247,15 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); } - sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc, - &desc->ptr[5], sg_count, areq->assoclen, - tbl_off); + ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[5], + sg_count, areq->assoclen, tbl_off); if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1); - if (sg_count > 1) { + /* ICV data */ + if (ret > 1) { + tbl_off += ret; edesc->icv_ool = true; sync_needed = true; @@ -1265,9 +1265,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sizeof(struct talitos_ptr) + authsize; /* Add an entry to the link table for ICV data */ - tbl_ptr += sg_count - 1; - to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1); - tbl_ptr++; + to_talitos_ptr_ext_set(tbl_ptr - 1, 0, is_sec1); to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, is_sec1); to_talitos_ptr_len(tbl_ptr, authsize, is_sec1); @@ -1275,18 +1273,33 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* icv data follows link tables */ to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset, is_sec1); + } else { + dma_addr_t addr = edesc->dma_link_tbl; + + if (is_sec1) + addr += areq->assoclen + cryptlen; + else + addr += sizeof(struct talitos_ptr) * tbl_off; + + to_talitos_ptr(&desc->ptr[6], addr, is_sec1); + to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); + } + } else if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + ret = talitos_sg_map(dev, areq->dst, authsize, edesc, + &desc->ptr[6], sg_count, areq->assoclen + + cryptlen, + tbl_off); + if (ret > 1) { + tbl_off += ret; + edesc->icv_ool = true; + sync_needed = true; + } else { + edesc->icv_ool = false; } } else { edesc->icv_ool = false; } - /* ICV data */ - if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { - to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); - to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl + - areq->assoclen + cryptlen, is_sec1); - } - /* iv out */ if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, @@ -1494,12 +1507,20 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key, unsigned int keylen) { struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); + u32 tmp[DES_EXPKEY_WORDS]; if (keylen > TALITOS_MAX_KEY_SIZE) { crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } + if (unlikely(crypto_ablkcipher_get_flags(cipher) & + CRYPTO_TFM_REQ_WEAK_KEY) && + !des_ekey(tmp, key)) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY); + return -EINVAL; + } + memcpy(&ctx->key, key, keylen); ctx->keylen = keylen; @@ -2614,7 +2635,7 @@ static struct talitos_alg_template driver_algs[] = { .ivsize = AES_BLOCK_SIZE, } }, - .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + .desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP | DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CTR, }, @@ -3047,6 +3068,11 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.aead.setkey = aead_setkey; t_alg->algt.alg.aead.encrypt = aead_encrypt; t_alg->algt.alg.aead.decrypt = aead_decrypt; + if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) && + !strncmp(alg->cra_name, "authenc(hmac(sha224)", 20)) { + kfree(t_alg); + return ERR_PTR(-ENOTSUPP); + } break; case CRYPTO_ALG_TYPE_AHASH: alg = &t_alg->algt.alg.hash.halg.base; diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index 094548bcb9cf6fa8bffe616e3c2e3f88cf72e4ee..883b3bea143c1123e8559fb37eb943fd2d27a9cf 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -279,6 +279,31 @@ int fence_add_callback(struct fence *fence, struct fence_cb *cb, } EXPORT_SYMBOL(fence_add_callback); +/** + * fence_get_status - returns the status upon completion + * @fence: [in] the fence to query + * + * This wraps fence_get_status_locked() to return the error status + * condition on a signaled fence. See fence_get_status_locked() for more + * details. + * + * Returns 0 if the fence has not yet been signaled, 1 if the fence has + * been signaled without an error condition, or a negative error code + * if the fence has been completed in err. + */ +int fence_get_status(struct fence *fence) +{ + unsigned long flags; + int status; + + spin_lock_irqsave(fence->lock, flags); + status = fence_get_status_locked(fence); + spin_unlock_irqrestore(fence->lock, flags); + + return status; +} +EXPORT_SYMBOL(fence_get_status); + /** * fence_remove_callback - remove a callback from the signaling list * @fence: [in] the fence to wait on @@ -529,6 +554,7 @@ fence_init(struct fence *fence, const struct fence_ops *ops, fence->context = context; fence->seqno = seqno; fence->flags = 0UL; + fence->error = 0; trace_fence_init(fence); } diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 0cb8d9d8784a00143e42e550bf26c2eeaccf0e56..9dc86d3303aedf5ecf378fa6daed87831177546c 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,9 @@ struct sync_timeline *sync_timeline_create(const char *name) obj->context = fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name)); - INIT_LIST_HEAD(&obj->child_list_head); - INIT_LIST_HEAD(&obj->active_list_head); - spin_lock_init(&obj->child_list_lock); + obj->pt_tree = RB_ROOT; + INIT_LIST_HEAD(&obj->pt_list); + spin_lock_init(&obj->lock); sync_timeline_debug_add(obj); @@ -125,68 +125,6 @@ static void sync_timeline_put(struct sync_timeline *obj) kref_put(&obj->kref, sync_timeline_free); } -/** - * sync_timeline_signal() - signal a status change on a sync_timeline - * @obj: sync_timeline to signal - * @inc: num to increment on timeline->value - * - * A sync implementation should call this any time one of it's fences - * has signaled or has an error condition. - */ -static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) -{ - unsigned long flags; - struct sync_pt *pt, *next; - - trace_sync_timeline(obj); - - spin_lock_irqsave(&obj->child_list_lock, flags); - - obj->value += inc; - - list_for_each_entry_safe(pt, next, &obj->active_list_head, - active_list) { - if (fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->active_list); - } - - spin_unlock_irqrestore(&obj->child_list_lock, flags); -} - -/** - * sync_pt_create() - creates a sync pt - * @parent: fence's parent sync_timeline - * @size: size to allocate for this pt - * @inc: value of the fence - * - * Creates a new sync_pt as a child of @parent. @size bytes will be - * allocated allowing for implementation specific data to be kept after - * the generic sync_timeline struct. Returns the sync_pt object or - * NULL in case of error. - */ -static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, - unsigned int value) -{ - unsigned long flags; - struct sync_pt *pt; - - if (size < sizeof(*pt)) - return NULL; - - pt = kzalloc(size, GFP_KERNEL); - if (!pt) - return NULL; - - spin_lock_irqsave(&obj->child_list_lock, flags); - sync_timeline_get(obj); - fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, - obj->context, value); - list_add_tail(&pt->child_list, &obj->child_list_head); - INIT_LIST_HEAD(&pt->active_list); - spin_unlock_irqrestore(&obj->child_list_lock, flags); - return pt; -} - static const char *timeline_fence_get_driver_name(struct fence *fence) { return "sw_sync"; @@ -203,13 +141,17 @@ static void timeline_fence_release(struct fence *fence) { struct sync_pt *pt = fence_to_sync_pt(fence); struct sync_timeline *parent = fence_parent(fence); - unsigned long flags; - spin_lock_irqsave(fence->lock, flags); - list_del(&pt->child_list); - if (!list_empty(&pt->active_list)) - list_del(&pt->active_list); - spin_unlock_irqrestore(fence->lock, flags); + if (!list_empty(&pt->link)) { + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) { + list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } + spin_unlock_irqrestore(fence->lock, flags); + } sync_timeline_put(parent); fence_free(fence); @@ -219,18 +161,11 @@ static bool timeline_fence_signaled(struct fence *fence) { struct sync_timeline *parent = fence_parent(fence); - return (fence->seqno > parent->value) ? false : true; + return !__fence_is_later(fence->seqno, parent->value); } static bool timeline_fence_enable_signaling(struct fence *fence) { - struct sync_pt *pt = fence_to_sync_pt(fence); - struct sync_timeline *parent = fence_parent(fence); - - if (timeline_fence_signaled(fence)) - return false; - - list_add_tail(&pt->active_list, &parent->active_list_head); return true; } @@ -238,7 +173,7 @@ static void timeline_fence_disable_signaling(struct fence *fence) { struct sync_pt *pt = container_of(fence, struct sync_pt, base); - list_del_init(&pt->active_list); + list_del_init(&pt->link); } static void timeline_fence_value_str(struct fence *fence, @@ -267,6 +202,107 @@ static const struct fence_ops timeline_fence_ops = { .timeline_value_str = timeline_fence_timeline_value_str, }; +/** + * sync_timeline_signal() - signal a status change on a sync_timeline + * @obj: sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) +{ + struct sync_pt *pt, *next; + + trace_sync_timeline(obj); + + spin_lock_irq(&obj->lock); + + obj->value += inc; + + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + if (!timeline_fence_signaled(&pt->base)) + break; + + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + + /* + * A signal callback may release the last reference to this + * fence, causing it to be freed. That operation has to be + * last to avoid a use after free inside this loop, and must + * be after we remove the fence from the timeline in order to + * prevent deadlocking on timeline->lock inside + * timeline_fence_release(). + */ + fence_signal_locked(&pt->base); + } + + spin_unlock_irq(&obj->lock); +} + +/** + * sync_pt_create() - creates a sync pt + * @parent: fence's parent sync_timeline + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +static struct sync_pt *sync_pt_create(struct sync_timeline *obj, + unsigned int value) +{ + struct sync_pt *pt; + + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + if (!pt) + return NULL; + + sync_timeline_get(obj); + fence_init(&pt->base, &timeline_fence_ops, &obj->lock, + obj->context, value); + INIT_LIST_HEAD(&pt->link); + + spin_lock_irq(&obj->lock); + if (!fence_is_signaled_locked(&pt->base)) { + struct rb_node **p = &obj->pt_tree.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + struct sync_pt *other; + int cmp; + + parent = *p; + other = rb_entry(parent, typeof(*pt), node); + cmp = value - other->base.seqno; + if (cmp > 0) { + p = &parent->rb_right; + } else if (cmp < 0) { + p = &parent->rb_left; + } else { + if (fence_get_rcu(&other->base)) { + fence_put(&pt->base); + pt = other; + goto unlock; + } + p = &parent->rb_left; + } + } + rb_link_node(&pt->node, parent, p); + rb_insert_color(&pt->node, &obj->pt_tree); + + parent = rb_next(&pt->node); + list_add_tail(&pt->link, + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); + } +unlock: + spin_unlock_irq(&obj->lock); + + return pt; +} + /* * *WARNING* * @@ -293,8 +329,16 @@ static int sw_sync_debugfs_open(struct inode *inode, struct file *file) static int sw_sync_debugfs_release(struct inode *inode, struct file *file) { struct sync_timeline *obj = file->private_data; + struct sync_pt *pt, *next; + + spin_lock_irq(&obj->lock); + + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + fence_set_error(&pt->base, -ENOENT); + fence_signal_locked(&pt->base); + } - smp_wmb(); + spin_unlock_irq(&obj->lock); sync_timeline_put(obj); return 0; @@ -317,7 +361,7 @@ static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, goto err; } - pt = sync_pt_create(obj, sizeof(*pt), data.value); + pt = sync_pt_create(obj, data.value); if (!pt) { err = -ENOMEM; goto err; @@ -353,6 +397,11 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg) if (copy_from_user(&value, (void __user *)arg, sizeof(value))) return -EFAULT; + while (value > INT_MAX) { + sync_timeline_signal(obj, INT_MAX); + value -= INT_MAX; + } + sync_timeline_signal(obj, value); return 0; diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 2dd4c3db6caa83ceff101bb5405baea8c2ef4874..858263dbecd4c38091d29e709008dadbf79fd052 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -62,29 +62,29 @@ void sync_file_debug_remove(struct sync_file *sync_file) static const char *sync_status_str(int status) { - if (status == 0) - return "signaled"; + if (status < 0) + return "error"; if (status > 0) - return "active"; + return "signaled"; - return "error"; + return "active"; } -static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show) +static void sync_print_fence(struct seq_file *s, + struct fence *fence, bool show) { - int status = 1; struct sync_timeline *parent = fence_parent(fence); + int status; - if (fence_is_signaled_locked(fence)) - status = fence->status; + status = fence_get_status_locked(fence); seq_printf(s, " %s%sfence %s", show ? parent->name : "", show ? "_" : "", sync_status_str(status)); - if (status <= 0) { + if (status) { struct timespec64 ts64 = ktime_to_timespec64(fence->timestamp); @@ -116,17 +116,15 @@ static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show) static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) { struct list_head *pos; - unsigned long flags; seq_printf(s, "%s: %d\n", obj->name, obj->value); - spin_lock_irqsave(&obj->child_list_lock, flags); - list_for_each(pos, &obj->child_list_head) { - struct sync_pt *pt = - container_of(pos, struct sync_pt, child_list); + spin_lock_irq(&obj->lock); + list_for_each(pos, &obj->pt_list) { + struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irqrestore(&obj->child_list_lock, flags); + spin_unlock_irq(&obj->lock); } static void sync_print_sync_file(struct seq_file *s, @@ -135,7 +133,7 @@ static void sync_print_sync_file(struct seq_file *s, int i; seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name, - sync_status_str(!fence_is_signaled(sync_file->fence))); + sync_status_str(fence_get_status(sync_file->fence))); if (fence_is_array(sync_file->fence)) { struct fence_array *array = to_fence_array(sync_file->fence); @@ -149,12 +147,11 @@ static void sync_print_sync_file(struct seq_file *s, static int sync_debugfs_show(struct seq_file *s, void *unused) { - unsigned long flags; struct list_head *pos; seq_puts(s, "objs:\n--------------\n"); - spin_lock_irqsave(&sync_timeline_list_lock, flags); + spin_lock_irq(&sync_timeline_list_lock); list_for_each(pos, &sync_timeline_list_head) { struct sync_timeline *obj = container_of(pos, struct sync_timeline, @@ -163,11 +160,11 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_obj(s, obj); seq_puts(s, "\n"); } - spin_unlock_irqrestore(&sync_timeline_list_lock, flags); + spin_unlock_irq(&sync_timeline_list_lock); seq_puts(s, "fences:\n--------------\n"); - spin_lock_irqsave(&sync_file_list_lock, flags); + spin_lock_irq(&sync_file_list_lock); list_for_each(pos, &sync_file_list_head) { struct sync_file *sync_file = container_of(pos, struct sync_file, sync_file_list); @@ -175,7 +172,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_sync_file(s, sync_file); seq_puts(s, "\n"); } - spin_unlock_irqrestore(&sync_file_list_lock, flags); + spin_unlock_irq(&sync_file_list_lock); return 0; } diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index d269aa6783aaa57c3137ee93b02c72c620a6fd0f..9615dc0385b5830e1afcea441b658a6dfc26dc11 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H #include +#include #include #include @@ -24,43 +25,41 @@ * struct sync_timeline - sync object * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging - * @child_list_head: list of children sync_pts for this sync_timeline - * @child_list_lock: lock protecting @child_list_head and fence.status - * @active_list_head: list of active (unsignaled/errored) sync_pts + * @lock: lock protecting @pt_list and @value + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts + * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ struct sync_timeline { struct kref kref; char name[32]; - /* protected by child_list_lock */ + /* protected by lock */ u64 context; int value; - struct list_head child_list_head; - spinlock_t child_list_lock; - - struct list_head active_list_head; + struct rb_root pt_tree; + struct list_head pt_list; + spinlock_t lock; struct list_head sync_timeline_list; }; static inline struct sync_timeline *fence_parent(struct fence *fence) { - return container_of(fence->lock, struct sync_timeline, - child_list_lock); + return container_of(fence->lock, struct sync_timeline, lock); } /** * struct sync_pt - sync_pt object * @base: base fence object - * @child_list: sync timeline child's list - * @active_list: sync timeline active child's list + * @link: link on the sync timeline's list + * @node: node in the sync timeline's tree */ struct sync_pt { struct fence base; - struct list_head child_list; - struct list_head active_list; + struct list_head link; + struct rb_node node; }; #ifdef CONFIG_SW_SYNC diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 1beebf8beb3e7cc43efe35190183d3523875e5bf..c835f6216922ed06b1ba0aba5d7abeeb4ccdea65 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -370,10 +370,8 @@ static void sync_fill_fence_info(struct fence *fence, sizeof(info->obj_name)); strlcpy(info->driver_name, fence->ops->get_driver_name(fence), sizeof(info->driver_name)); - if (fence_is_signaled(fence)) - info->status = fence->status >= 0 ? 1 : fence->status; - else - info->status = 0; + + info->status = fence_get_status(fence); info->timestamp_ns = ktime_to_ns(fence->timestamp); } diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6b535262ac5d76972bb525b49f3cf160fcf54aa0..3db94e81bc14a8aa5f3b37d66bd56eb04e7f8d54 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1107,12 +1107,14 @@ static struct dmaengine_unmap_pool *__get_unmap_pool(int nr) switch (order) { case 0 ... 1: return &unmap_pool[0]; +#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) case 2 ... 4: return &unmap_pool[1]; case 5 ... 7: return &unmap_pool[2]; case 8: return &unmap_pool[3]; +#endif default: BUG(); return NULL; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index fbb75514dfb4422a86904c5c94439f661805558e..ebe72a4665875a0d3a81176004a1f6c0fc9e4184 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -158,6 +158,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)"); #define PATTERN_OVERWRITE 0x20 #define PATTERN_COUNT_MASK 0x1f +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + struct dmatest_thread { struct list_head node; struct dmatest_info *info; @@ -166,6 +172,8 @@ struct dmatest_thread { u8 **srcs; u8 **dsts; enum dma_transaction_type type; + wait_queue_head_t done_wait; + struct dmatest_done test_done; bool done; }; @@ -326,18 +334,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start, return error_count; } -/* poor man's completion - we want to use wait_event_freezable() on it */ -struct dmatest_done { - bool done; - wait_queue_head_t *wait; -}; static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; - - done->done = true; - wake_up_all(done->wait); + struct dmatest_thread *thread = + container_of(done, struct dmatest_thread, test_done); + if (!thread->done) { + done->done = true; + wake_up_all(done->wait); + } else { + /* + * If thread->done, it means that this callback occurred + * after the parent thread has cleaned up. This can + * happen in the case that driver doesn't implement + * the terminate_all() functionality and a dma operation + * did not occur within the timeout period + */ + WARN(1, "dmatest: Kernel memory may be corrupted!!\n"); + } } static unsigned int min_odd(unsigned int x, unsigned int y) @@ -408,9 +423,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len) */ static int dmatest_func(void *data) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); struct dmatest_thread *thread = data; - struct dmatest_done done = { .wait = &done_wait }; + struct dmatest_done *done = &thread->test_done; struct dmatest_info *info; struct dmatest_params *params; struct dma_chan *chan; @@ -637,9 +651,9 @@ static int dmatest_func(void *data) continue; } - done.done = false; + done->done = false; tx->callback = dmatest_callback; - tx->callback_param = &done; + tx->callback_param = done; cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { @@ -652,21 +666,12 @@ static int dmatest_func(void *data) } dma_async_issue_pending(chan); - wait_event_freezable_timeout(done_wait, done.done, + wait_event_freezable_timeout(thread->done_wait, done->done, msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - if (!done.done) { - /* - * We're leaving the timed out dma operation with - * dangling pointer to done_wait. To make this - * correct, we'll need to allocate wait_done for - * each test iteration and perform "who's gonna - * free it this time?" dancing. For now, just - * leave it dangling. - */ - WARN(1, "dmatest: Kernel stack may be corrupted!!\n"); + if (!done->done) { dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); @@ -747,7 +752,7 @@ static int dmatest_func(void *data) dmatest_KBs(runtime, total_len), ret); /* terminate all transfers on specified channels */ - if (ret) + if (ret || failed_tests) dmaengine_terminate_all(chan); thread->done = true; @@ -807,6 +812,8 @@ static int dmatest_add_threads(struct dmatest_info *info, thread->info = info; thread->chan = dtc->chan; thread->type = type; + thread->test_done.wait = &thread->done_wait; + init_waitqueue_head(&thread->done_wait); smp_wmb(); thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", dma_chan_name(chan), op, i); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 9f3dbc8c63d27e733aadc975ca90a74328513333..fb2e7476d96b448d577d657940e32b1bd11e4282 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1694,7 +1694,6 @@ static bool _chan_ns(const struct pl330_dmac *pl330, int i) static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) { struct pl330_thread *thrd = NULL; - unsigned long flags; int chans, i; if (pl330->state == DYING) @@ -1702,8 +1701,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) chans = pl330->pcfg.num_chan; - spin_lock_irqsave(&pl330->lock, flags); - for (i = 0; i < chans; i++) { thrd = &pl330->channels[i]; if ((thrd->free) && (!_manager_ns(thrd) || @@ -1721,8 +1718,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) thrd = NULL; } - spin_unlock_irqrestore(&pl330->lock, flags); - return thrd; } @@ -1740,7 +1735,6 @@ static inline void _free_event(struct pl330_thread *thrd, int ev) static void pl330_release_channel(struct pl330_thread *thrd) { struct pl330_dmac *pl330; - unsigned long flags; if (!thrd || thrd->free) return; @@ -1752,10 +1746,8 @@ static void pl330_release_channel(struct pl330_thread *thrd) pl330 = thrd->dmac; - spin_lock_irqsave(&pl330->lock, flags); _free_event(thrd, thrd->ev); thrd->free = true; - spin_unlock_irqrestore(&pl330->lock, flags); } /* Initialize the structure for PL330 configuration, that can be used @@ -2120,20 +2112,20 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; - spin_lock_irqsave(&pch->lock, flags); + spin_lock_irqsave(&pl330->lock, flags); dma_cookie_init(chan); pch->cyclic = false; pch->thread = pl330_request_channel(pl330); if (!pch->thread) { - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); return -ENOMEM; } tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch); - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); return 1; } @@ -2236,12 +2228,13 @@ static int pl330_pause(struct dma_chan *chan) static void pl330_free_chan_resources(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; tasklet_kill(&pch->task); pm_runtime_get_sync(pch->dmac->ddma.dev); - spin_lock_irqsave(&pch->lock, flags); + spin_lock_irqsave(&pl330->lock, flags); pl330_release_channel(pch->thread); pch->thread = NULL; @@ -2249,7 +2242,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan) if (pch->cyclic) list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); pm_runtime_mark_last_busy(pch->dmac->ddma.dev); pm_runtime_put_autosuspend(pch->dmac->ddma.dev); } diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 307547f4848db818c254cbf67c63fe527fc6a189..ae3f60be7759e80a68a2354968fa0521222f1790 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -884,7 +884,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, struct virt_dma_desc *vdesc; enum dma_status status; unsigned long flags; - u32 residue; + u32 residue = 0; status = dma_cookie_status(c, cookie, state); if ((status == DMA_COMPLETE) || (!state)) @@ -892,16 +892,12 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, spin_lock_irqsave(&chan->vchan.lock, flags); vdesc = vchan_find_desc(&chan->vchan, cookie); - if (cookie == chan->desc->vdesc.tx.cookie) { + if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) residue = stm32_dma_desc_residue(chan, chan->desc, chan->next_sg); - } else if (vdesc) { + else if (vdesc) residue = stm32_dma_desc_residue(chan, to_stm32_dma_desc(vdesc), 0); - } else { - residue = 0; - } - dma_set_residue(state, residue); spin_unlock_irqrestore(&chan->vchan.lock, flags); @@ -976,21 +972,18 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, struct stm32_dma_chan *chan; struct dma_chan *c; - if (dma_spec->args_count < 3) + if (dma_spec->args_count < 4) return NULL; cfg.channel_id = dma_spec->args[0]; cfg.request_line = dma_spec->args[1]; cfg.stream_config = dma_spec->args[2]; - cfg.threshold = 0; + cfg.threshold = dma_spec->args[3]; if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) return NULL; - if (dma_spec->args_count > 3) - cfg.threshold = dma_spec->args[3]; - chan = &dmadev->chan[cfg.channel_id]; c = dma_get_slave_channel(&chan->vchan.chan); diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 88a00d06def686bbf721a5581cf9c5a899b1e9c1..43e88d85129e4fb6de951f8bbd55581f6a5a5fcf 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -49,12 +49,12 @@ struct ti_am335x_xbar_data { struct ti_am335x_xbar_map { u16 dma_line; - u16 mux_val; + u8 mux_val; }; -static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u16 val) +static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val) { - writeb_relaxed(val & 0x1f, iomem + event); + writeb_relaxed(val, iomem + event); } static void ti_am335x_xbar_free(struct device *dev, void *route_data) @@ -105,7 +105,7 @@ static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec, } map->dma_line = (u16)dma_spec->args[0]; - map->mux_val = (u16)dma_spec->args[2]; + map->mux_val = (u8)dma_spec->args[2]; dma_spec->args[2] = 0; dma_spec->args_count = 2; diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 72e07e3cf718fb760c62cda09023b199857bcb13..16e0eb523439a8193df821056b68d3bd6a3a431f 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -227,7 +227,7 @@ #define NREC_RDWR(x) (((x)>>11) & 1) #define NREC_RANK(x) (((x)>>8) & 0x7) #define NRECMEMB 0xC0 -#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define NREC_CAS(x) (((x)>>16) & 0xFFF) #define NREC_RAS(x) ((x) & 0x7FFF) #define NRECFGLOG 0xC4 #define NREEECFBDA 0xC8 @@ -371,7 +371,7 @@ struct i5000_error_info { /* These registers are input ONLY if there was a * Non-Recoverable Error */ u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ + u32 nrecmemb; /* Non-Recoverable Mem log B */ }; @@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci, NERR_FAT_FBD, &info->nerr_fat_fbd); pci_read_config_word(pvt->branchmap_werrors, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_dword(pvt->branchmap_werrors, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ @@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) dimm->mtype = MEM_FB_DDR2; /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) dimm->dtype = DEV_X8; else dimm->dtype = DEV_X4; diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 6ef6ad1ba16ef3351217074f2ba63316a07276e4..2ea2f32e608be9f916a038301cd90c0260a8d508 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -368,7 +368,7 @@ struct i5400_error_info { /* These registers are input ONLY if there was a Non-Rec Error */ u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ + u32 nrecmemb; /* Non-Recoverable Mem log B */ }; @@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci, NERR_FAT_FBD, &info->nerr_fat_fbd); pci_read_config_word(pvt->branchmap_werrors, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_dword(pvt->branchmap_werrors, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ @@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) dimm->nr_pages = size_mb << 8; dimm->grain = 8; - dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ? + DEV_X8 : DEV_X4; dimm->mtype = MEM_FB_DDR2; /* * The eccc mechanism is SDDC (aka SECC), with * is similar to Chipkill. */ - dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ? EDAC_S8ECD8ED : EDAC_S4ECD4ED; ndimms++; } diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index cda6dab5067a57709393fad7913a2e76a531a23b..6b65a102b49ded103b8169131a41089e7db2332a 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -79,6 +79,7 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) if (!pvt->inject) int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); else { + int_reg.u64 = 0; if (pvt->error_type == 1) int_reg.s.sec_err = 1; if (pvt->error_type == 2) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 54775221a01fbe73d6a184ac903ccafc7e76c50c..3c47e6361d8127faeacce4d8b88df28d849a1eb2 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2510,6 +2510,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, break; case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: pvt->pci_ta = pdev; + break; case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS: pvt->pci_ras = pdev; break; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index a4944e22f294b321fba70d572dc4996fcbb838f7..2f47c5b5f4cb8fb837f4c154a976112e06ea37d2 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -120,8 +120,7 @@ static ssize_t systab_show(struct kobject *kobj, return str - buf; } -static struct kobj_attribute efi_attr_systab = - __ATTR(systab, 0400, systab_show, NULL); +static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); #define EFI_FIELD(var) efi.var @@ -385,7 +384,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) return 0; } } - pr_err_once("requested map not found.\n"); return -ENOENT; } diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 14914074f716ed41cc3fccc42c1e333810ecf890..311c9d0e8cbb126a7d4edfa25374d7e9cd9d30b2 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -106,7 +106,7 @@ static const struct sysfs_ops esre_attr_ops = { }; /* Generic ESRT Entry ("ESRE") support. */ -static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) +static ssize_t fw_class_show(struct esre_entry *entry, char *buf) { char *str = buf; @@ -117,18 +117,16 @@ static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) return str - buf; } -static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400, - esre_fw_class_show, NULL); +static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400); #define esre_attr_decl(name, size, fmt) \ -static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \ +static ssize_t name##_show(struct esre_entry *entry, char *buf) \ { \ return sprintf(buf, fmt "\n", \ le##size##_to_cpu(entry->esre.esre1->name)); \ } \ \ -static struct esre_attribute esre_##name = __ATTR(name, 0400, \ - esre_##name##_show, NULL) +static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400) esre_attr_decl(fw_type, 32, "%u"); esre_attr_decl(fw_version, 32, "%u"); @@ -193,14 +191,13 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) /* support for displaying ESRT fields at the top level */ #define esrt_attr_decl(name, size, fmt) \ -static ssize_t esrt_##name##_show(struct kobject *kobj, \ +static ssize_t name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf)\ { \ return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \ } \ \ -static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \ - esrt_##name##_show, NULL) +static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400) esrt_attr_decl(fw_resource_count, 32, "%u"); esrt_attr_decl(fw_resource_count_max, 32, "%u"); @@ -254,7 +251,7 @@ void __init efi_esrt_init(void) rc = efi_mem_desc_lookup(efi.esrt, &md); if (rc < 0) { - pr_err("ESRT header is not in the memory map.\n"); + pr_warn("ESRT header is not in the memory map.\n"); return; } @@ -431,7 +428,7 @@ static int __init esrt_sysfs_init(void) err_remove_esrt: kobject_put(esrt_kobj); err: - kfree(esrt); + memunmap(esrt); esrt = NULL; return error; } diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 8e64b77aeac95e43c0e0571694f42bbe6c8ba73f..f377609ff141bca733bf498babc25f9d215aefad 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -63,11 +63,11 @@ static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, return map_attr->show(entry, buf); } -static struct map_attribute map_type_attr = __ATTR_RO(type); -static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr); -static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr); -static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages); -static struct map_attribute map_attribute_attr = __ATTR_RO(attribute); +static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400); +static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400); +static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400); +static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400); +static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400); /* * These are default attributes that are added for every memmap entry. diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 5bddbd507ca9f105aa18cfe5f43b673b676d551d..3fe6a21e05a5718d8769bf2dd505cb5968f41207 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d, altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); - if (type == IRQ_TYPE_NONE) + if (type == IRQ_TYPE_NONE) { + irq_set_handler_locked(d, handle_bad_irq); return 0; - if (type == IRQ_TYPE_LEVEL_HIGH && - altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) - return 0; - if (type == IRQ_TYPE_EDGE_RISING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) - return 0; - if (type == IRQ_TYPE_EDGE_FALLING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING) - return 0; - if (type == IRQ_TYPE_EDGE_BOTH && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH) + } + if (type == altera_gc->interrupt_trigger) { + if (type == IRQ_TYPE_LEVEL_HIGH) + irq_set_handler_locked(d, handle_level_irq); + else + irq_set_handler_locked(d, handle_simple_irq); return 0; - + } + irq_set_handler_locked(d, handle_bad_irq); return -EINVAL; } @@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } - static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; @@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->interrupt_trigger = reg; ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(&pdev->dev, "could not add irqchip\n"); diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c index dc37dbe4b46d8889bfaddeac0c492dd6ed56090c..a83e97e15c142e8d198803148593634ba2652716 100644 --- a/drivers/gpio/gpio-ath79.c +++ b/drivers/gpio/gpio-ath79.c @@ -323,3 +323,6 @@ static struct platform_driver ath79_gpio_driver = { }; module_platform_driver(ath79_gpio_driver); + +MODULE_DESCRIPTION("Atheros AR71XX/AR724X/AR913X GPIO API support"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpio/gpio-iop.c b/drivers/gpio/gpio-iop.c index 98c7ff2a76e76a24194f2c3278af42f5d5c4ff9c..8d62db447ec1baff473836ee75b33fdda64c2d7d 100644 --- a/drivers/gpio/gpio-iop.c +++ b/drivers/gpio/gpio-iop.c @@ -58,3 +58,7 @@ static int __init iop3xx_gpio_init(void) return platform_driver_register(&iop3xx_gpio_driver); } arch_initcall(iop3xx_gpio_init); + +MODULE_DESCRIPTION("GPIO handling for Intel IOP3xx processors"); +MODULE_AUTHOR("Lennert Buytenhek "); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index adba614b39656791fbdfc03292ac597c5eb8b2e5..abb5a27525110c1ea11e4bc85b43e6baf5bc56d1 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -190,6 +190,16 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) }; int i, j; + /* + * STMPE1600: to be able to get IRQ from pins, + * a read must be done on GPMR register, or a write in + * GPSR or GPCR registers + */ + if (stmpe->partnum == STMPE1600) { + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + } + for (i = 0; i < CACHE_NR_REGS; i++) { /* STMPE801 and STMPE1600 don't have RE and FE registers */ if ((stmpe->partnum == STMPE801 || @@ -227,21 +237,11 @@ static void stmpe_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(gc); - struct stmpe *stmpe = stmpe_gpio->stmpe; int offset = d->hwirq; int regoffset = offset / 8; int mask = BIT(offset % 8); stmpe_gpio->regs[REG_IE][regoffset] |= mask; - - /* - * STMPE1600 workaround: to be able to get IRQ from pins, - * a read must be done on GPMR register, or a write in - * GPSR or GPCR registers - */ - if (stmpe->partnum == STMPE1600) - stmpe_reg_read(stmpe, - stmpe->regs[STMPE_IDX_GPMR_LSB + regoffset]); } static void stmpe_dbg_show_one(struct seq_file *s, diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 063d176baa24a887ee4c561c32449e1c425c0b82..f3c3680963b94ba2ec52f0b9e9bab208c84a2f68 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -705,6 +705,9 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) struct gpioevent_data ge; int ret, level; + /* Do not leak kernel stack to userspace */ + memset(&ge, 0, sizeof(ge)); + ge.timestamp = ktime_get_real_ns(); level = gpiod_get_value_cansleep(le->desc); diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile index 8363cb57915b0b726c704b8be37805ecef2a18ee..8a08e81ee90d579774ca96bc70853093ba623f09 100644 --- a/drivers/gpu/drm/amd/acp/Makefile +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -3,6 +3,4 @@ # of AMDSOC/AMDGPU drm driver. # It provides the HW control for ACP related functionalities. -subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include - AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc2661348bc6d1263ac8fa9ac47c99..47951f4775b94bc55bf42b19091db5be5a00cbc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -367,29 +367,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -493,9 +514,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index f8fdbd1378a7536848ed7d4d4b1e1a8dc4bbcaf1..26afdffab5a06ccc4fee8322ef074366b6edb57e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1788,34 +1788,32 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); } -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index bfb4b91869e7e68a6142ef25b0cf2bf686e06b79..cb505f66d3aab7f2cd7757bfd4a67100dc161686 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -240,6 +240,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); + p->chunks = NULL; + p->nchunks = 0; put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: @@ -414,6 +416,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate == lobj) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e41d4baebf86ea39b7e604071d72b0ca29ded612..ce9797b6f9c75091e099062a3ef18c69aacb6a20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2020,8 +2020,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_late_init(adev); - if (r) + if (r) { + if (fbcon) + console_unlock(); return r; + } /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 743a12df6971bd7457f2aaaeaecb899bf2cbed6a..3bb2b9b5ef9c5e8d1038dd572699c77b7bc974c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -648,7 +648,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r, idx = 0; + int i, r = 0, idx = 0; r = amdgpu_cs_sysvm_access_required(p); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 968c4260d7a7e0ccfa94f4a3f069acc9f73e1830..47503759906b8e1f6ab0b2d20dcbcc3fcf333e5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -744,7 +744,7 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - int r; + int r = 0; r = amdgpu_vm_update_pd_or_shadow(adev, vm, true); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index c2bd9f045532b3f24b6f34004233ec9ed4e70614..6d75fd0e3105f297c9a3bd435a7f28c528c67d74 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -565,11 +565,8 @@ static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = static void dce_virtual_encoder_destroy(struct drm_encoder *encoder) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - - kfree(amdgpu_encoder->enc_priv); drm_encoder_cleanup(encoder); - kfree(amdgpu_encoder); + kfree(encoder); } static const struct drm_encoder_funcs dce_virtual_encoder_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 50f0cf2788b73f43c7dce0fe280bf19293cd4259..7522f796f19b64edc729a04971600254a79020de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -182,7 +182,7 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, WREG32(mmVCE_UENC_CLOCK_GATING_2, data); data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); - data &= ~0xffc00000; + data &= ~0x3ff; WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index d83de985e88cf882a150f2823a08632a7e2e5594..8577a563600fe3368e132e83ab4e4f1cfdcba60d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -215,8 +215,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, BUG_ON(!mm || !mqd || !q); m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index e1fb40b84c72f84f48a13be1e2b467d6dff80bb0..5425c68d028774f50af6d9c390e6803f62b4ff37 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -205,6 +205,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index 4477c55a58e32f903d33005cf9f2cc13a07215ba..a8b59b3decd8b9f66df71c5674919b1169e15f09 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -850,9 +850,9 @@ static int init_over_drive_limits( const ATOM_Tonga_POWERPLAYTABLE *powerplay_table) { hwmgr->platform_descriptor.overdriveLimit.engineClock = - le16_to_cpu(powerplay_table->ulMaxODEngineClock); + le32_to_cpu(powerplay_table->ulMaxODEngineClock); hwmgr->platform_descriptor.overdriveLimit.memoryClock = - le16_to_cpu(powerplay_table->ulMaxODMemoryClock); + le32_to_cpu(powerplay_table->ulMaxODMemoryClock); hwmgr->platform_descriptor.minOverdriveVDDC = 0; hwmgr->platform_descriptor.maxOverdriveVDDC = 0; diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile index 26412d2f8c980cf1fbee7ce865c137d354f2ab95..ffd67361577280117dfffedc7b94bda7470b5ee2 100644 --- a/drivers/gpu/drm/armada/Makefile +++ b/drivers/gpu/drm/armada/Makefile @@ -4,5 +4,3 @@ armada-y += armada_510.o armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o obj-$(CONFIG_DRM_ARMADA) := armada.o - -CFLAGS_armada_trace.o := -I$(src) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 44d476ea6d2e000520c042a54297b266fdc5f308..f64f35cdc2ff11b4ecbcbf526877a0b3c993cce0 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) @@ -655,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -664,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -689,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -705,14 +722,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: @@ -808,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -954,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -974,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } @@ -1105,7 +1109,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 6ca1f3117fe8d524fbb4dfed89692c527c5bd41c..6dd09c306bc1a8d76e33910a63b798df2fa6803b 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -46,7 +46,8 @@ enum decon_flag_bits { BIT_CLKS_ENABLED, BIT_IRQS_ENABLED, BIT_WIN_UPDATED, - BIT_SUSPENDED + BIT_SUSPENDED, + BIT_REQUEST_UPDATE }; struct decon_context { @@ -315,6 +316,7 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, /* window enable */ decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, ~0); + set_bit(BIT_REQUEST_UPDATE, &ctx->flags); } static void decon_disable_plane(struct exynos_drm_crtc *crtc, @@ -327,6 +329,7 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, return; decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, 0); + set_bit(BIT_REQUEST_UPDATE, &ctx->flags); } static void decon_atomic_flush(struct exynos_drm_crtc *crtc) @@ -340,8 +343,8 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); - /* standalone update */ - decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); + if (test_and_clear_bit(BIT_REQUEST_UPDATE, &ctx->flags)) + decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); if (ctx->out_type & IFTYPE_I80) set_bit(BIT_WIN_UPDATED, &ctx->flags); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index f2ae72ba7d5ac87e9d3d399fca5453b2799215e0..2abc47b554ab8456f866be5ef5de3d11f85577bc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -246,6 +246,15 @@ struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev, if (IS_ERR(exynos_gem)) return exynos_gem; + if (!is_drm_iommu_supported(dev) && (flags & EXYNOS_BO_NONCONTIG)) { + /* + * when no IOMMU is available, all allocated buffers are + * contiguous anyway, so drop EXYNOS_BO_NONCONTIG flag + */ + flags &= ~EXYNOS_BO_NONCONTIG; + DRM_WARN("Non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); + } + /* set memory type and cache attribute from user side. */ exynos_gem->flags = flags; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index cc2fde2ae5eff272c8f5e15548b61b525a54c6cc..c9eef0f51d31a2126e4f7238e0c86f077ffdebbb 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -243,7 +243,6 @@ static int fsl_dcu_drm_pm_suspend(struct device *dev) return PTR_ERR(fsl_dev->state); } - clk_disable_unprepare(fsl_dev->pix_clk); clk_disable_unprepare(fsl_dev->clk); return 0; @@ -266,6 +265,7 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) if (fsl_dev->tcon) fsl_tcon_bypass_enable(fsl_dev->tcon); fsl_dcu_drm_init_planes(fsl_dev->drm); + enable_irq(fsl_dev->irq); drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state); console_lock(); @@ -273,7 +273,6 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) console_unlock(); drm_kms_helper_poll_enable(fsl_dev->drm); - enable_irq(fsl_dev->irq); return 0; } diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 7e7a4d43d6b698f2326e0e8d2765b0c27bb80164..0f563c95452069a27529e9249037b00e913c9ce0 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -521,9 +521,12 @@ static void ade_crtc_atomic_begin(struct drm_crtc *crtc, { struct ade_crtc *acrtc = to_ade_crtc(crtc); struct ade_hw_ctx *ctx = acrtc->ctx; + struct drm_display_mode *mode = &crtc->state->mode; + struct drm_display_mode *adj_mode = &crtc->state->adjusted_mode; if (!ctx->power_on) (void)ade_power_up(ctx); + ade_ldi_set_mode(acrtc, mode, adj_mode); } static void ade_crtc_atomic_flush(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7fdc42e5aac82dfa1080e04f7d7420ff90b39ec3..74163a928cbac80bb78f674682e4f4dda4b99a52 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5063,6 +5063,12 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, */ final->t8 = 1; final->t9 = 1; + + /* + * HW has only a 100msec granularity for t11_t12 so round it up + * accordingly. + */ + final->t11_t12 = roundup(final->t11_t12, 100 * 10); } static void diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 79aab9ad6faa826416abd8004f35ce78d9f62c2c..6769aa1b6922407123663e8f88666aee9c4daeda 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -430,7 +430,9 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + msgs[i].addr == msgs[i + 1].addr && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 4ceed7a9762f1a8309f84441154550878a8e4bbc..4b83e9eeab06df1027e330407d350ace99a1536b 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index af267c35d813cc7548f060ef5771d6cd4232b4c9..ee5883f59be5a1992c6bdd20c751285079f5d3c1 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, struct drm_gem_object *obj = buffer->priv; int ret = 0; - if (WARN_ON(!obj->filp)) - return -EINVAL; - ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 27cb42467b2024881b7d5ea9cc9ca146a666e713..6f65846b1783c9be1fc74db2b975c7df753c070f 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -369,6 +369,7 @@ static int panel_simple_remove(struct device *dev) drm_panel_remove(&panel->base); panel_simple_disable(&panel->base); + panel_simple_unprepare(&panel->base); if (panel->ddc) put_device(&panel->ddc->dev); @@ -384,6 +385,7 @@ static void panel_simple_shutdown(struct device *dev) struct panel_simple *panel = dev_get_drvdata(dev); panel_simple_disable(&panel->base); + panel_simple_unprepare(&panel->base); } static const struct drm_display_mode ampire_am800480r3tmqwa1h_mode = { diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 432cb46f6a34a10753e30317562ce2df5315a890..fd7682bf335dcc2b9760c980827233325797cb82 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -45,34 +45,32 @@ static char *pre_emph_names[] = { /***** radeon AUX functions *****/ -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 0daad446d2c754768f501604ff3bc8fe623731aa..af84705b82ed55b1d9072c461ed901aab7de8f93 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -252,7 +252,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 13ba73fd9b68849bd34d27207eb282eb6ff62793..8bd9e6c371d150ced85580470f44bb584d189b0c 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3029,6 +3029,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, max_sclk = 75000; max_mclk = 80000; } + } else if (rdev->family == CHIP_OLAND) { + if ((rdev->pdev->revision == 0xC7) || + (rdev->pdev->revision == 0x80) || + (rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || + (rdev->pdev->device == 0x6604) || + (rdev->pdev->device == 0x6605)) { + max_sclk = 75000; + } } /* Apply dpm quirks */ while (p && p->chip_device != 0) { diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index a2ec6d8796a094e73bb888be55f08a9d9daf3532..3322b157106dde518df47c4edcd20d4465904879 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -392,6 +392,31 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc) rcrtc->started = true; } +static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc) +{ + struct rcar_du_device *rcdu = rcrtc->group->dev; + struct drm_crtc *crtc = &rcrtc->crtc; + u32 status; + /* Make sure vblank interrupts are enabled. */ + drm_crtc_vblank_get(crtc); + /* + * Disable planes and calculate how many vertical blanking interrupts we + * have to wait for. If a vertical blanking interrupt has been triggered + * but not processed yet, we don't know whether it occurred before or + * after the planes got disabled. We thus have to wait for two vblank + * interrupts in that case. + */ + spin_lock_irq(&rcrtc->vblank_lock); + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); + status = rcar_du_crtc_read(rcrtc, DSSR); + rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1; + spin_unlock_irq(&rcrtc->vblank_lock); + if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0, + msecs_to_jiffies(100))) + dev_warn(rcdu->dev, "vertical blanking timeout\n"); + drm_crtc_vblank_put(crtc); +} + static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) { struct drm_crtc *crtc = &rcrtc->crtc; @@ -400,17 +425,16 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) return; /* Disable all planes and wait for the change to take effect. This is - * required as the DSnPR registers are updated on vblank, and no vblank - * will occur once the CRTC is stopped. Disabling planes when starting - * the CRTC thus wouldn't be enough as it would start scanning out - * immediately from old frame buffers until the next vblank. + * required as the plane enable registers are updated on vblank, and no + * vblank will occur once the CRTC is stopped. Disabling planes when + * starting the CRTC thus wouldn't be enough as it would start scanning + * out immediately from old frame buffers until the next vblank. * * This increases the CRTC stop delay, especially when multiple CRTCs * are stopped in one operation as we now wait for one vblank per CRTC. * Whether this can be improved needs to be researched. */ - rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); - drm_crtc_wait_one_vblank(crtc); + rcar_du_crtc_disable_planes(rcrtc); /* Disable vertical blanking interrupt reporting. We first need to wait * for page flip completion before stopping the CRTC as userspace @@ -548,10 +572,25 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) irqreturn_t ret = IRQ_NONE; u32 status; + spin_lock(&rcrtc->vblank_lock); + status = rcar_du_crtc_read(rcrtc, DSSR); rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); - if (status & DSSR_FRM) { + if (status & DSSR_VBK) { + /* + * Wake up the vblank wait if the counter reaches 0. This must + * be protected by the vblank_lock to avoid races in + * rcar_du_crtc_disable_planes(). + */ + if (rcrtc->vblank_count) { + if (--rcrtc->vblank_count == 0) + wake_up(&rcrtc->vblank_wait); + } + } + spin_unlock(&rcrtc->vblank_lock); + + if (status & DSSR_VBK) { drm_crtc_handle_vblank(&rcrtc->crtc); rcar_du_crtc_finish_page_flip(rcrtc); ret = IRQ_HANDLED; @@ -606,6 +645,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } init_waitqueue_head(&rcrtc->flip_wait); + init_waitqueue_head(&rcrtc->vblank_wait); + spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; rcrtc->mmio_offset = mmio_offsets[index]; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index 6f08b7e7db06708cfa910fd7655d3d52ee3f78b7..48bef05b4c625401bedb4446a48aee9953493f0f 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -15,6 +15,7 @@ #define __RCAR_DU_CRTC_H__ #include +#include #include #include @@ -33,6 +34,9 @@ struct rcar_du_vsp; * @started: whether the CRTC has been started and is running * @event: event to post when the pending page flip completes * @flip_wait: wait queue used to signal page flip completion + * @vblank_lock: protects vblank_wait and vblank_count + * @vblank_wait: wait queue used to signal vertical blanking + * @vblank_count: number of vertical blanking interrupts to wait for * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC * @group: CRTC group this CRTC belongs to */ @@ -48,6 +52,10 @@ struct rcar_du_crtc { struct drm_pending_vblank_event *event; wait_queue_head_t flip_wait; + spinlock_t vblank_lock; + wait_queue_head_t vblank_wait; + unsigned int vblank_count; + unsigned int outputs; struct rcar_du_group *group; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index bf6e21655c576aed2810193e576441e69af406b9..7d22f9874d5f6a3b19547651bcd634208c9e7f51 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -473,6 +473,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); INIT_LIST_HEAD(&fbo->io_reserve_lru); + mutex_init(&fbo->wu_mutex); fbo->moving = NULL; drm_vma_node_reset(&fbo->vma_node); atomic_set(&fbo->cpu_writers, 0); diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 094bc6a475c1773923dfe8225cba9886fc9e5026..d96c084d3a76dddd3b6dc13677fe720db400ecad 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -225,6 +225,9 @@ vc4_irq_uninstall(struct drm_device *dev) /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); + /* Finish any interrupt handler still in flight. */ + disable_irq(dev->irq); + cancel_work_sync(&vc4->overflow_mem_work); } diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0baed63701d1fae8f0306aa7713129..ce7c21d250cf9ed99d0b1eaa401e91629c459e0f 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -173,6 +173,9 @@ static int vc4_v3d_runtime_resume(struct device *dev) struct vc4_dev *vc4 = v3d->vc4; vc4_v3d_init_hw(vc4->dev); + + /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */ + enable_irq(vc4->dev->irq); vc4_irq_postinstall(vc4->dev); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index fefb9d995d2cb3d0da0c2972dabf1926e3b52047..81f5a552e32f7ae86c2e70bdceb0d6920207852b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2729,6 +2729,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, } view_type = vmw_view_cmd_to_type(header->id); + if (view_type == vmw_view_max) + return -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, user_surface_converter, diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index cd4599c0523bf62e2c4ebd886c51763fb6307bb5..8eed456a67bed0f4611ba7b06bcbae366e71ceda 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -175,11 +175,11 @@ config HID_CHERRY Support for Cherry Cymotion keyboard. config HID_CHICONY - tristate "Chicony Tactical pad" + tristate "Chicony devices" depends on HID default !EXPERT ---help--- - Support for Chicony Tactical pad. + Support for Chicony Tactical pad and special keys on Chicony keyboards. config HID_CORSAIR tristate "Corsair devices" @@ -190,6 +190,7 @@ config HID_CORSAIR Supported devices: - Vengeance K90 + - Scimitar PRO RGB config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index bc3cec199feefdf437d0c0141c5ff6f73aa10308..f04ed9aabc3f9fea0baf5b074acd83b6d07527c6 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { } }; MODULE_DEVICE_TABLE(hid, ch_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 4f3f5749b0c15858cab72b10faf2789b57c82c03..03cac5731afcb3ddd247e83b9b22fe8bdb422ecb 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1872,6 +1872,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, @@ -1906,6 +1907,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, @@ -2105,6 +2107,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) }, { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) }, @@ -2362,7 +2365,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0401) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, @@ -2632,6 +2634,17 @@ bool hid_ignore(struct hid_device *hdev) strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0) return true; break; + case USB_VENDOR_ID_ELAN: + /* + * Many Elan devices have a product id of 0x0401 and are handled + * by the elan_i2c input driver. But the ACPI HID ELAN0800 dev + * is not (and cannot be) handled by that driver -> + * Ignore all 0x0401 devs except for the ELAN0800 dev. + */ + if (hdev->product == 0x0401 && + strncmp(hdev->name, "ELAN0800", 8) != 0) + return true; + break; } if (hdev->type == HID_TYPE_USBMOUSE && diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index c0303f61c26a94f1998f6883d42a0fc8cb41f432..9ba5d98a118042a52dc40b895c3b2e8df67c0b39 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -3,8 +3,10 @@ * * Supported devices: * - Vengeance K90 Keyboard + * - Scimitar PRO RGB Gaming Mouse * * Copyright (c) 2015 Clement Vuchener + * Copyright (c) 2017 Oscar Campos */ /* @@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev, return 0; } +/* + * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is + * non parseable as they define two consecutive Logical Minimum for + * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16 + * that should be obviousy 0x26 for Logical Magimum of 16 bits. This + * prevents poper parsing of the report descriptor due Logical + * Minimum being larger than Logical Maximum. + * + * This driver fixes the report descriptor for: + * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse + */ + +static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + + if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { + /* + * Corsair Scimitar RGB Pro report descriptor is broken and + * defines two different Logical Minimum for the Consumer + * Application. The byte 77 should be a 0x26 defining a 16 + * bits integer for the Logical Maximum but it is a 0x16 + * instead (Logical Minimum) + */ + switch (hdev->product) { + case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB: + if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16 + && rdesc[78] == 0xff && rdesc[79] == 0x0f) { + hid_info(hdev, "Fixing up report descriptor\n"); + rdesc[77] = 0x26; + } + break; + } + + } + return rdesc; +} + static const struct hid_device_id corsair_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), .driver_data = CORSAIR_USE_K90_MACRO | CORSAIR_USE_K90_BACKLIGHT }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, + USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, {} }; @@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = { .event = corsair_event, .remove = corsair_remove, .input_mapping = corsair_input_mapping, + .report_fixup = corsair_mouse_report_fixup, }; module_hid_driver(corsair_driver); MODULE_LICENSE("GPL"); +/* Original K90 driver author */ MODULE_AUTHOR("Clement Vuchener"); +/* Scimitar PRO RGB driver author */ +MODULE_AUTHOR("Oscar Campos"); MODULE_DESCRIPTION("HID driver for Corsair devices"); diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index e06c1344c9138e1f6bb1ffafa5f22eff11fe7a17..7af77818efc3f3232015c8df9333c367579cd57b 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -188,6 +188,8 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) HID_REQ_GET_REPORT); if (ret != CP2112_GPIO_CONFIG_LENGTH) { hid_err(hdev, "error requesting GPIO config: %d\n", ret); + if (ret >= 0) + ret = -EIO; goto exit; } @@ -197,8 +199,10 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); - if (ret < 0) { + if (ret != CP2112_GPIO_CONFIG_LENGTH) { hid_err(hdev, "error setting GPIO config: %d\n", ret); + if (ret >= 0) + ret = -EIO; goto exit; } @@ -206,7 +210,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) exit: mutex_unlock(&dev->lock); - return ret < 0 ? ret : -EIO; + return ret; } static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 08fd3f831d62cded30b1bc82c2efb16176306b11..244b97c1b74ee48554c8b1ef01d6db1e4ada448c 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -277,6 +277,9 @@ #define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 #define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 #define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 +#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38 +#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39 +#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e #define USB_VENDOR_ID_CREATIVELABS 0x041e #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c @@ -558,6 +561,7 @@ #define USB_VENDOR_ID_JESS 0x0c45 #define USB_DEVICE_ID_JESS_YUREX 0x1010 +#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112 #define USB_VENDOR_ID_JESS2 0x0f30 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 @@ -1076,6 +1080,7 @@ #define USB_VENDOR_ID_XIN_MO 0x16c0 #define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE 0x05e1 +#define USB_DEVICE_ID_THT_2P_ARCADE 0x75e1 #define USB_VENDOR_ID_XIROKU 0x1477 #define USB_DEVICE_ID_XIROKU_SPX 0x1006 diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c index 7df5227a7e61d6ff79acd62cb009e29ffa4b79d8..9ad7731d2e10dad45268b55fbcf9dfc025570682 100644 --- a/drivers/hid/hid-xinmo.c +++ b/drivers/hid/hid-xinmo.c @@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id xinmo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, { } }; diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 2b1620797959d0148ec5c79c703a2b5208f74253..1916f80a692de04ce6e0d2070d91247af169a04d 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -80,6 +80,9 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index d72dfb2bbdb8e205f6ed8214c46368a49cf7037a..7a4d39ce51d914b8eecfa101f7d13249c02f1fb2 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2192,23 +2192,23 @@ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index) int i; unsigned long flags; - spin_lock_irqsave(&remote->remote_lock, flags); - remote->remotes[index].registered = false; - spin_unlock_irqrestore(&remote->remote_lock, flags); + for (i = 0; i < WACOM_MAX_REMOTES; i++) { + if (remote->remotes[i].serial == serial) { - if (remote->remotes[index].battery.battery) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index].battery.bat_desc); + spin_lock_irqsave(&remote->remote_lock, flags); + remote->remotes[i].registered = false; + spin_unlock_irqrestore(&remote->remote_lock, flags); - if (remote->remotes[index].group.name) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index]); + if (remote->remotes[i].battery.battery) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i].battery.bat_desc); + + if (remote->remotes[i].group.name) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i]); - for (i = 0; i < WACOM_MAX_REMOTES; i++) { - if (remote->remotes[i].serial == serial) { remote->remotes[i].serial = 0; remote->remotes[i].group.name = NULL; - remote->remotes[i].registered = false; remote->remotes[i].battery.battery = NULL; wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN; } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index e0a8216ecf2b8b646bb0dbbe5339d58efc09ee0e..13c32eb4073856fb8fc53859e2e30cfb312f8c3f 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" /* The one and only */ @@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) return (u64)ULLONG_MAX; __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); - __asm__ __volatile__("call *%3" : "=a" (hv_status) : + __asm__ __volatile__(CALL_NOSPEC : + "=a" (hv_status) : "c" (control), "d" (input_address), - "m" (hypercall_page)); + THUNK_TARGET(hypercall_page)); return hv_status; @@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hypercall_page) return (u64)ULLONG_MAX; - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), + __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi), "=a"(hv_status_lo) : "d" (control_hi), "a" (control_lo), "b" (input_address_hi), "c" (input_address_lo), "D"(output_address_hi), - "S"(output_address_lo), "m" (hypercall_page)); + "S"(output_address_lo), + THUNK_TARGET(hypercall_page)); return hv_status_lo | ((u64)hv_status_hi << 32); #endif /* !x86_64 */ diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 75126e4e3f055519568c68e4f5c31ac3dc9c58dd..44420073eddadcd89054385fabc88dcd237dfe59 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -61,7 +61,6 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data); static const char fcopy_devname[] = "vmbus/hv_fcopy"; static u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; /* * This state maintains the version number registered by the daemon. */ @@ -322,7 +321,6 @@ static void fcopy_on_reset(void) if (cancel_delayed_work_sync(&fcopy_timeout_work)) fcopy_respond_to_host(HV_E_FAIL); - complete(&release_event); } int hv_fcopy_init(struct hv_util_service *srv) @@ -330,7 +328,6 @@ int hv_fcopy_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; fcopy_transaction.recv_channel = srv->channel; - init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -352,5 +349,4 @@ void hv_fcopy_deinit(void) fcopy_transaction.state = HVUTIL_DEVICE_DYING; cancel_delayed_work_sync(&fcopy_timeout_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 3abfc5983c97c87f6457111a937d4f330ba185e1..5e1fdc8d32ab0c03d25bfcf7d3fbadbcedc6f2eb 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -88,7 +88,6 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; static u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; /* * Register the kernel component with the user-level daemon. * As part of this registration, pass the LIC version number. @@ -717,7 +716,6 @@ static void kvp_on_reset(void) if (cancel_delayed_work_sync(&kvp_timeout_work)) kvp_respond_to_host(NULL, HV_E_FAIL); kvp_transaction.state = HVUTIL_DEVICE_INIT; - complete(&release_event); } int @@ -726,7 +724,6 @@ hv_kvp_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; kvp_transaction.recv_channel = srv->channel; - init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -750,5 +747,4 @@ void hv_kvp_deinit(void) cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index a76e3db0d01f9791e0e4dab7db3fb3ce99f13fc0..a6707133c297c7a1435b1bdcda446256671fc080 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -66,7 +66,6 @@ static int dm_reg_value; static const char vss_devname[] = "vmbus/hv_vss"; static __u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; static void vss_timeout_func(struct work_struct *dummy); static void vss_handle_request(struct work_struct *dummy); @@ -331,13 +330,11 @@ static void vss_on_reset(void) if (cancel_delayed_work_sync(&vss_timeout_work)) vss_respond_to_host(HV_E_FAIL); vss_transaction.state = HVUTIL_DEVICE_INIT; - complete(&release_event); } int hv_vss_init(struct hv_util_service *srv) { - init_completion(&release_event); if (vmbus_proto_version < VERSION_WIN8_1) { pr_warn("Integration service 'Backup (volume snapshot)'" " not supported on this host version.\n"); @@ -368,5 +365,4 @@ void hv_vss_deinit(void) cancel_delayed_work_sync(&vss_timeout_work); cancel_work_sync(&vss_handle_request_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index c235a95152671104cd7042a451b16ccd0d0d06ab..4402a71e23f7f7277c6d1561c4a8db5d46489b8d 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -182,10 +182,11 @@ static int hvt_op_release(struct inode *inode, struct file *file) * connects back. */ hvt_reset(hvt); - mutex_unlock(&hvt->lock); if (mode_old == HVUTIL_TRANSPORT_DESTROY) - hvt_transport_free(hvt); + complete(&hvt->release); + + mutex_unlock(&hvt->lock); return 0; } @@ -304,6 +305,7 @@ struct hvutil_transport *hvutil_transport_init(const char *name, init_waitqueue_head(&hvt->outmsg_q); mutex_init(&hvt->lock); + init_completion(&hvt->release); spin_lock(&hvt_list_lock); list_add(&hvt->list, &hvt_list); @@ -351,6 +353,8 @@ void hvutil_transport_destroy(struct hvutil_transport *hvt) if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0) cn_del_callback(&hvt->cn_id); - if (mode_old != HVUTIL_TRANSPORT_CHARDEV) - hvt_transport_free(hvt); + if (mode_old == HVUTIL_TRANSPORT_CHARDEV) + wait_for_completion(&hvt->release); + + hvt_transport_free(hvt); } diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h index d98f5225c3e693468fdb27ccc03de8d17e2a8576..79afb626e1668981beaa740b4e7d618f22ab194d 100644 --- a/drivers/hv/hv_utils_transport.h +++ b/drivers/hv/hv_utils_transport.h @@ -41,6 +41,7 @@ struct hvutil_transport { int outmsg_len; /* its length */ wait_queue_head_t outmsg_q; /* poll/read wait queue */ struct mutex lock; /* protects struct members */ + struct completion release; /* synchronize with fd release */ }; struct hvutil_transport *hvutil_transport_init(const char *name, diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index cccef87963e050afb99181d63d3ed5dcf401d5a1..975c43d446f8593d0e701efeaf8da133717cff74 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value) else err = atk_read_value_new(sensor, value); + if (err) + return err; + sensor->is_valid = true; sensor->last_updated = jiffies; sensor->cached_value = *value; diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 1bf22eff0b08687250445e3685cdafe921baf8ed..0f1f6421845fdb9a63e50dc1a9c5407a0074bd6b 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -22,6 +22,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -45,6 +46,7 @@ static const unsigned short normal_i2c[] = { #define JC42_REG_TEMP 0x05 #define JC42_REG_MANID 0x06 #define JC42_REG_DEVICEID 0x07 +#define JC42_REG_SMBUS 0x22 /* NXP and Atmel, possibly others? */ /* Status bits in temperature register */ #define JC42_ALARM_CRIT_BIT 15 @@ -73,6 +75,9 @@ static const unsigned short normal_i2c[] = { #define ONS_MANID 0x1b09 /* ON Semiconductor */ #define STM_MANID 0x104a /* ST Microelectronics */ +/* SMBUS register */ +#define SMBUS_STMOUT BIT(7) /* SMBus time-out, active low */ + /* Supported chips */ /* Analog Devices */ @@ -476,6 +481,22 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) data->extended = !!(cap & JC42_CAP_RANGE); + if (device_property_read_bool(dev, "smbus-timeout-disable")) { + int smbus; + + /* + * Not all chips support this register, but from a + * quick read of various datasheets no chip appears + * incompatible with the below attempt to disable + * the timeout. And the whole thing is opt-in... + */ + smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS); + if (smbus < 0) + return smbus; + i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS, + smbus | SMBUS_STMOUT); + } + config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG); if (config < 0) return config; diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c index c1b9275978f9d9ee9172e99e569de7dca48b491d..281491cca5103ad4b82e2f2a52be6f2408262bdb 100644 --- a/drivers/hwmon/max31790.c +++ b/drivers/hwmon/max31790.c @@ -311,7 +311,7 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel, data->pwm[channel] = val << 8; err = i2c_smbus_write_word_swapped(client, MAX31790_REG_PWMOUT(channel), - val); + data->pwm[channel]); break; case hwmon_pwm_enable: fan_config = data->fan_config[channel]; diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ba59eaef2e075a74cb15940fdc095673c15346a4..d013acf3f83a3a2115d0e7db3931dea6c38e5766 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -476,8 +477,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -505,11 +506,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -629,7 +631,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -646,18 +649,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 2cd7c718198a82e3a2aeecc5aed1c3a5643157d7..df63315369386a6f74db16f3488ea5b5bd822a75 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -201,9 +201,23 @@ static void *etm_setup_aux(int event_cpu, void **pages, event_data = alloc_event_data(event_cpu); if (!event_data) return NULL; - INIT_WORK(&event_data->work, free_event_data); + /* + * In theory nothing prevent tracers in a trace session from being + * associated with different sinks, nor having a sink per tracer. But + * until we have HW with this kind of topology we need to assume tracers + * in a trace session are using the same sink. Therefore go through + * the coresight bus and pick the first enabled sink. + * + * When operated from sysFS users are responsible to enable the sink + * while from perf, the perf tools will do it based on the choice made + * on the cmd line. As such the "enable_sink" flag in sysFS is reset. + */ + sink = coresight_get_enabled_sink(true); + if (!sink) + goto err; + mask = &event_data->mask; /* Setup the path for each CPU in a trace session */ @@ -219,28 +233,15 @@ static void *etm_setup_aux(int event_cpu, void **pages, * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev); + event_data->path[cpu] = coresight_build_path(csdev, sink); if (IS_ERR(event_data->path[cpu])) goto err; } - /* - * In theory nothing prevent tracers in a trace session from being - * associated with different sinks, nor having a sink per tracer. But - * until we have HW with this kind of topology and a way to convey - * sink assignement from the perf cmd line we need to assume tracers - * in a trace session are using the same sink. Therefore pick the sink - * found at the end of the first available path. - */ - cpu = cpumask_first(mask); - /* Grab the sink at the end of the path */ - sink = coresight_get_sink(event_data->path[cpu]); - if (!sink) - goto err; - if (!sink_ops(sink)->alloc_buffer) goto err; + cpu = cpumask_first(mask); /* Get the AUX specific data from the sink buffer */ event_data->snk_config = sink_ops(sink)->alloc_buffer(sink, cpu, pages, diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 4db8d6a4d0cbbe0545e5724b7de75fbae0438c68..83358ebad762ae86341bc431c58c4b72baf8b4c6 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1026,7 +1026,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) } pm_runtime_put(&adev->dev); - dev_info(dev, "%s initialized\n", (char *)id->data); + dev_info(dev, "CPU%d: %s initialized\n", + drvdata->cpu, (char *)id->data); if (boot_enable) { coresight_enable(drvdata->csdev); @@ -1045,20 +1046,25 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) } static struct amba_id etm4_ids[] = { - { /* ETM 4.0 - Cortex-A53 */ + { .id = 0x000bb95d, .mask = 0x000fffff, - .data = "ETM 4.0", + .data = "Cortex-A53 ETM v4.0", }, - { /* ETM 4.0 - Cortex-A57 */ + { .id = 0x000bb95e, .mask = 0x000fffff, - .data = "ETM 4.0", + .data = "Cortex-A57 ETM v4.0", }, - { /* ETM 4.0 - A72, Maia, HiSilicon */ + { .id = 0x000bb95a, .mask = 0x000fffff, - .data = "ETM 4.0", + .data = "Cortex-A72 ETM v4.0", + }, + { + .id = 0x000bb959, + .mask = 0x000fffff, + .data = "Cortex-A73 ETM v4.0", }, { 0, 0}, }; diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 196a14be4b3da95306ff0a8994e9ecdaad86162b..ef9d8e93e3b2e0b6959107b57b2689695e33604d 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -111,7 +111,9 @@ static inline void CS_UNLOCK(void __iomem *addr) void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode); struct coresight_device *coresight_get_sink(struct list_head *path); -struct list_head *coresight_build_path(struct coresight_device *csdev); +struct coresight_device *coresight_get_enabled_sink(bool reset); +struct list_head *coresight_build_path(struct coresight_device *csdev, + struct coresight_device *sink); void coresight_release_path(struct list_head *path); #ifdef CONFIG_CORESIGHT_SOURCE_ETM3X diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index d6941ea24d8df1152baf39177e9ac9fea733f499..1549436e249242064fe1238957e7d00f4bf5ba3c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -70,7 +70,7 @@ static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) * When operating in sysFS mode the content of the buffer needs to be * read before the TMC is disabled. */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etb_dump_hw(drvdata); tmc_disable_hw(drvdata); @@ -103,19 +103,14 @@ static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) { int ret = 0; bool used = false; char *buf = NULL; - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_SYSFS)) - return -EINVAL; - /* * If we don't have a buffer release the lock and allocate memory. * Otherwise keep the lock and move along. @@ -138,13 +133,12 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In sysFS mode we can have multiple writers per sink. Since this * sink is already enabled no memory is needed and the HW need not be * touched. */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) goto out; /* @@ -163,6 +157,7 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) drvdata->buf = buf; } + drvdata->mode = CS_MODE_SYSFS; tmc_etb_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -177,34 +172,29 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode) return ret; } -static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etf_sink_perf(struct coresight_device *csdev) { int ret = 0; - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_PERF)) - return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { ret = -EINVAL; goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In Perf mode there can be only one writer per sink. There * is also no need to continue if the ETB/ETR is already operated * from sysFS. */ - if (val != CS_MODE_DISABLED) { + if (drvdata->mode != CS_MODE_DISABLED) { ret = -EINVAL; goto out; } + drvdata->mode = CS_MODE_PERF; tmc_etb_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -216,9 +206,9 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) { switch (mode) { case CS_MODE_SYSFS: - return tmc_enable_etf_sink_sysfs(csdev, mode); + return tmc_enable_etf_sink_sysfs(csdev); case CS_MODE_PERF: - return tmc_enable_etf_sink_perf(csdev, mode); + return tmc_enable_etf_sink_perf(csdev); } /* We shouldn't be here */ @@ -227,7 +217,6 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) static void tmc_disable_etf_sink(struct coresight_device *csdev) { - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -237,10 +226,11 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev) return; } - val = local_xchg(&drvdata->mode, CS_MODE_DISABLED); /* Disable the TMC only if it needs to */ - if (val != CS_MODE_DISABLED) + if (drvdata->mode != CS_MODE_DISABLED) { tmc_etb_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + } spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -260,7 +250,7 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, } tmc_etf_enable_hw(drvdata); - local_set(&drvdata->mode, CS_MODE_SYSFS); + drvdata->mode = CS_MODE_SYSFS; spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_info(drvdata->dev, "TMC-ETF enabled\n"); @@ -280,7 +270,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, } tmc_etf_disable_hw(drvdata); - local_set(&drvdata->mode, CS_MODE_DISABLED); + drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_info(drvdata->dev, "TMC disabled\n"); @@ -383,7 +373,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, return; /* This shouldn't happen */ - if (WARN_ON_ONCE(local_read(&drvdata->mode) != CS_MODE_PERF)) + if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) return; CS_UNLOCK(drvdata->base); @@ -504,7 +494,6 @@ const struct coresight_ops tmc_etf_cs_ops = { int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) { - long val; enum tmc_mode mode; int ret = 0; unsigned long flags; @@ -528,9 +517,8 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) goto out; } - val = local_read(&drvdata->mode); /* Don't interfere if operated from Perf */ - if (val == CS_MODE_PERF) { + if (drvdata->mode == CS_MODE_PERF) { ret = -EINVAL; goto out; } @@ -542,7 +530,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) } /* Disable the TMC if need be */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etb_disable_hw(drvdata); drvdata->reading = true; @@ -573,7 +561,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) } /* Re-enable the TMC if need be */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { + if (drvdata->mode == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace * buffer. As such zero-out the buffer so that we don't end diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 886ea83c68e0cf2233398f46cb00edbe188c56b6..2db48576be5bb50b925c0f872925d948054275ac 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -15,11 +15,30 @@ * this program. If not, see . */ +#include #include #include +#include + #include "coresight-priv.h" #include "coresight-tmc.h" +/** + * struct cs_etr_buffer - keep track of a recording session' specifics + * @tmc: generic portion of the TMC buffers + * @paddr: the physical address of a DMA'able contiguous memory area + * @vaddr: the virtual address associated to @paddr + * @size: how much memory we have, starting at @paddr + * @dev: the device @vaddr has been tied to + */ +struct cs_etr_buffers { + struct cs_buffers tmc; + dma_addr_t paddr; + void __iomem *vaddr; + u32 size; + struct device *dev; +}; + static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl; @@ -86,26 +105,22 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) * When operating in sysFS mode the content of the buffer needs to be * read before the TMC is disabled. */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etr_dump_hw(drvdata); tmc_disable_hw(drvdata); CS_LOCK(drvdata->base); } -static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { int ret = 0; bool used = false; - long val; unsigned long flags; void __iomem *vaddr = NULL; dma_addr_t paddr; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_SYSFS)) - return -EINVAL; /* * If we don't have a buffer release the lock and allocate memory. @@ -134,13 +149,12 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In sysFS mode we can have multiple writers per sink. Since this * sink is already enabled no memory is needed and the HW need not be * touched. */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) goto out; /* @@ -155,8 +169,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) drvdata->buf = drvdata->vaddr; } - memset(drvdata->vaddr, 0, drvdata->size); - + drvdata->mode = CS_MODE_SYSFS; tmc_etr_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -171,34 +184,29 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode) return ret; } -static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etr_sink_perf(struct coresight_device *csdev) { int ret = 0; - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - /* This shouldn't be happening */ - if (WARN_ON(mode != CS_MODE_PERF)) - return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { ret = -EINVAL; goto out; } - val = local_xchg(&drvdata->mode, mode); /* * In Perf mode there can be only one writer per sink. There * is also no need to continue if the ETR is already operated * from sysFS. */ - if (val != CS_MODE_DISABLED) { + if (drvdata->mode != CS_MODE_DISABLED) { ret = -EINVAL; goto out; } + drvdata->mode = CS_MODE_PERF; tmc_etr_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -210,9 +218,9 @@ static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) { switch (mode) { case CS_MODE_SYSFS: - return tmc_enable_etr_sink_sysfs(csdev, mode); + return tmc_enable_etr_sink_sysfs(csdev); case CS_MODE_PERF: - return tmc_enable_etr_sink_perf(csdev, mode); + return tmc_enable_etr_sink_perf(csdev); } /* We shouldn't be here */ @@ -221,7 +229,6 @@ static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) static void tmc_disable_etr_sink(struct coresight_device *csdev) { - long val; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -231,19 +238,244 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) return; } - val = local_xchg(&drvdata->mode, CS_MODE_DISABLED); /* Disable the TMC only if it needs to */ - if (val != CS_MODE_DISABLED) + if (drvdata->mode != CS_MODE_DISABLED) { tmc_etr_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + } spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_info(drvdata->dev, "TMC-ETR disabled\n"); } +static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, int cpu, + void **pages, int nr_pages, bool overwrite) +{ + int node; + struct cs_etr_buffers *buf; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (cpu == -1) + cpu = smp_processor_id(); + node = cpu_to_node(cpu); + + /* Allocate memory structure for interaction with Perf */ + buf = kzalloc_node(sizeof(struct cs_etr_buffers), GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->dev = drvdata->dev; + buf->size = drvdata->size; + buf->vaddr = dma_alloc_coherent(buf->dev, buf->size, + &buf->paddr, GFP_KERNEL); + if (!buf->vaddr) { + kfree(buf); + return NULL; + } + + buf->tmc.snapshot = overwrite; + buf->tmc.nr_pages = nr_pages; + buf->tmc.data_pages = pages; + + return buf; +} + +static void tmc_free_etr_buffer(void *config) +{ + struct cs_etr_buffers *buf = config; + + dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->paddr); + kfree(buf); +} + +static int tmc_set_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config) +{ + int ret = 0; + unsigned long head; + struct cs_etr_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + /* wrap head around to the amount of space we have */ + head = handle->head & ((buf->tmc.nr_pages << PAGE_SHIFT) - 1); + + /* find the page to write to */ + buf->tmc.cur = head / PAGE_SIZE; + + /* and offset within that page */ + buf->tmc.offset = head % PAGE_SIZE; + + local_set(&buf->tmc.data_size, 0); + + /* Tell the HW where to put the trace data */ + drvdata->vaddr = buf->vaddr; + drvdata->paddr = buf->paddr; + memset(drvdata->vaddr, 0, drvdata->size); + + return ret; +} + +static unsigned long tmc_reset_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config, bool *lost) +{ + long size = 0; + struct cs_etr_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (buf) { + /* + * In snapshot mode ->data_size holds the new address of the + * ring buffer's head. The size itself is the whole address + * range since we want the latest information. + */ + if (buf->tmc.snapshot) { + size = buf->tmc.nr_pages << PAGE_SHIFT; + handle->head = local_xchg(&buf->tmc.data_size, size); + } + + /* + * Tell the tracer PMU how much we got in this run and if + * something went wrong along the way. Nobody else can use + * this cs_etr_buffers instance until we are done. As such + * resetting parameters here and squaring off with the ring + * buffer API in the tracer PMU is fine. + */ + *lost = !!local_xchg(&buf->tmc.lost, 0); + size = local_xchg(&buf->tmc.data_size, 0); + } + + /* Get ready for another run */ + drvdata->vaddr = NULL; + drvdata->paddr = 0; + + return size; +} + +static void tmc_update_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config) +{ + int i, cur; + u32 *buf_ptr; + u32 read_ptr, write_ptr; + u32 status, to_read; + unsigned long offset; + struct cs_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (!buf) + return; + + /* This shouldn't happen */ + if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) + return; + + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + + read_ptr = readl_relaxed(drvdata->base + TMC_RRP); + write_ptr = readl_relaxed(drvdata->base + TMC_RWP); + + /* + * Get a hold of the status register and see if a wrap around + * has occurred. If so adjust things accordingly. + */ + status = readl_relaxed(drvdata->base + TMC_STS); + if (status & TMC_STS_FULL) { + local_inc(&buf->lost); + to_read = drvdata->size; + } else { + to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); + } + + /* + * The TMC RAM buffer may be bigger than the space available in the + * perf ring buffer (handle->size). If so advance the RRP so that we + * get the latest trace data. + */ + if (to_read > handle->size) { + u32 buffer_start, mask = 0; + + /* Read buffer start address in system memory */ + buffer_start = readl_relaxed(drvdata->base + TMC_DBALO); + + /* + * The value written to RRP must be byte-address aligned to + * the width of the trace memory databus _and_ to a frame + * boundary (16 byte), whichever is the biggest. For example, + * for 32-bit, 64-bit and 128-bit wide trace memory, the four + * LSBs must be 0s. For 256-bit wide trace memory, the five + * LSBs must be 0s. + */ + switch (drvdata->memwidth) { + case TMC_MEM_INTF_WIDTH_32BITS: + case TMC_MEM_INTF_WIDTH_64BITS: + case TMC_MEM_INTF_WIDTH_128BITS: + mask = GENMASK(31, 5); + break; + case TMC_MEM_INTF_WIDTH_256BITS: + mask = GENMASK(31, 6); + break; + } + + /* + * Make sure the new size is aligned in accordance with the + * requirement explained above. + */ + to_read = handle->size & mask; + /* Move the RAM read pointer up */ + read_ptr = (write_ptr + drvdata->size) - to_read; + /* Make sure we are still within our limits */ + if (read_ptr > (buffer_start + (drvdata->size - 1))) + read_ptr -= drvdata->size; + /* Tell the HW */ + writel_relaxed(read_ptr, drvdata->base + TMC_RRP); + local_inc(&buf->lost); + } + + cur = buf->cur; + offset = buf->offset; + + /* for every byte to read */ + for (i = 0; i < to_read; i += 4) { + buf_ptr = buf->data_pages[cur] + offset; + *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD); + + offset += 4; + if (offset >= PAGE_SIZE) { + offset = 0; + cur++; + /* wrap around at the end of the buffer */ + cur &= buf->nr_pages - 1; + } + } + + /* + * In snapshot mode all we have to do is communicate to + * perf_aux_output_end() the address of the current head. In full + * trace mode the same function expects a size to move rb->aux_head + * forward. + */ + if (buf->snapshot) + local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); + else + local_add(to_read, &buf->data_size); + + CS_LOCK(drvdata->base); +} + static const struct coresight_ops_sink tmc_etr_sink_ops = { .enable = tmc_enable_etr_sink, .disable = tmc_disable_etr_sink, + .alloc_buffer = tmc_alloc_etr_buffer, + .free_buffer = tmc_free_etr_buffer, + .set_buffer = tmc_set_etr_buffer, + .reset_buffer = tmc_reset_etr_buffer, + .update_buffer = tmc_update_etr_buffer, }; const struct coresight_ops tmc_etr_cs_ops = { @@ -253,7 +485,6 @@ const struct coresight_ops tmc_etr_cs_ops = { int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) { int ret = 0; - long val; unsigned long flags; /* config types are set a boot time and never change */ @@ -266,9 +497,8 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) goto out; } - val = local_read(&drvdata->mode); /* Don't interfere if operated from Perf */ - if (val == CS_MODE_PERF) { + if (drvdata->mode == CS_MODE_PERF) { ret = -EINVAL; goto out; } @@ -280,7 +510,7 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) } /* Disable the TMC if need be */ - if (val == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) tmc_etr_disable_hw(drvdata); drvdata->reading = true; @@ -303,7 +533,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) spin_lock_irqsave(&drvdata->spinlock, flags); /* RE-enable the TMC if need be */ - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { + if (drvdata->mode == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace * buffer. The trace buffer is cleared in tmc_etr_enable_hw(), diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 44b3ae346118f45b7dc5d2dd79ed550cee8d8144..51c01851533e01ab90a7fc068d267269340dd52d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -117,7 +117,7 @@ struct tmc_drvdata { void __iomem *vaddr; u32 size; u32 len; - local_t mode; + u32 mode; enum tmc_config_type config_type; enum tmc_mem_intf_width memwidth; u32 trigger_cntr; diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 0673baf0f2f5b93bbe9c14c2a7c9a7d390f338b7..ff579a7c6d008346021d9e2709d8d0c4b0bea3fa 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -46,8 +46,11 @@ #define TPIU_ITATBCTR0 0xef8 /** register definition **/ +/* FFSR - 0x300 */ +#define FFSR_FT_STOPPED BIT(1) /* FFCR - 0x304 */ #define FFCR_FON_MAN BIT(6) +#define FFCR_STOP_FI BIT(12) /** * @base: memory mapped base address for this component. @@ -85,10 +88,14 @@ static void tpiu_disable_hw(struct tpiu_drvdata *drvdata) { CS_UNLOCK(drvdata->base); - /* Clear formatter controle reg. */ - writel_relaxed(0x0, drvdata->base + TPIU_FFCR); + /* Clear formatter and stop on flush */ + writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR); /* Generate manual flush */ - writel_relaxed(FFCR_FON_MAN, drvdata->base + TPIU_FFCR); + writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR); + /* Wait for flush to complete */ + coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN, 0); + /* Wait for formatter to stop */ + coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED, 1); CS_LOCK(drvdata->base); } diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 7bf00a0beb6f14b4c69cdaf619a2d981e58f612a..0c37356e417ca66131717e83f359357f092df6d3 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -368,6 +368,52 @@ struct coresight_device *coresight_get_sink(struct list_head *path) return csdev; } +static int coresight_enabled_sink(struct device *dev, void *data) +{ + bool *reset = data; + struct coresight_device *csdev = to_coresight_device(dev); + + if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && + csdev->activated) { + /* + * Now that we have a handle on the sink for this session, + * disable the sysFS "enable_sink" flag so that possible + * concurrent perf session that wish to use another sink don't + * trip on it. Doing so has no ramification for the current + * session. + */ + if (*reset) + csdev->activated = false; + + return 1; + } + + return 0; +} + +/** + * coresight_get_enabled_sink - returns the first enabled sink found on the bus + * @deactivate: Whether the 'enable_sink' flag should be reset + * + * When operated from perf the deactivate parameter should be set to 'true'. + * That way the "enabled_sink" flag of the sink that was selected can be reset, + * allowing for other concurrent perf sessions to choose a different sink. + * + * When operated from sysFS users have full control and as such the deactivate + * parameter should be set to 'false', hence mandating users to explicitly + * clear the flag. + */ +struct coresight_device *coresight_get_enabled_sink(bool deactivate) +{ + struct device *dev = NULL; + + dev = bus_find_device(&coresight_bustype, NULL, &deactivate, + coresight_enabled_sink); + + return dev ? to_coresight_device(dev) : NULL; +} + /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. @@ -380,6 +426,7 @@ struct coresight_device *coresight_get_sink(struct list_head *path) * last one. */ static int _coresight_build_path(struct coresight_device *csdev, + struct coresight_device *sink, struct list_head *path) { int i; @@ -387,15 +434,15 @@ static int _coresight_build_path(struct coresight_device *csdev, struct coresight_node *node; /* An activated sink has been found. Enqueue the element */ - if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || - csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && csdev->activated) + if (csdev == sink) goto out; /* Not a sink - recursively explore each port found on this element */ for (i = 0; i < csdev->nr_outport; i++) { struct coresight_device *child_dev = csdev->conns[i].child_dev; - if (child_dev && _coresight_build_path(child_dev, path) == 0) { + if (child_dev && + _coresight_build_path(child_dev, sink, path) == 0) { found = true; break; } @@ -422,18 +469,22 @@ static int _coresight_build_path(struct coresight_device *csdev, return 0; } -struct list_head *coresight_build_path(struct coresight_device *csdev) +struct list_head *coresight_build_path(struct coresight_device *source, + struct coresight_device *sink) { struct list_head *path; int rc; + if (!sink) + return ERR_PTR(-EINVAL); + path = kzalloc(sizeof(struct list_head), GFP_KERNEL); if (!path) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(path); - rc = _coresight_build_path(csdev, path); + rc = _coresight_build_path(source, sink, path); if (rc) { kfree(path); return ERR_PTR(rc); @@ -497,6 +548,7 @@ static int coresight_validate_source(struct coresight_device *csdev, int coresight_enable(struct coresight_device *csdev) { int cpu, ret = 0; + struct coresight_device *sink; struct list_head *path; mutex_lock(&coresight_mutex); @@ -508,7 +560,17 @@ int coresight_enable(struct coresight_device *csdev) if (csdev->enable) goto out; - path = coresight_build_path(csdev); + /* + * Search for a valid sink for this session but don't reset the + * "enable_sink" flag in sysFS. Users get to do that explicitly. + */ + sink = coresight_get_enabled_sink(false); + if (!sink) { + ret = -EINVAL; + goto out; + } + + path = coresight_build_path(csdev, sink); if (IS_ERR(path)) { pr_err("building path(s) failed\n"); ret = PTR_ERR(path); diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 63b5db4e407096d204615c4c8d4a907640b94d76..e0f3244505d3d6eb4f8d0935868410ed3d3da394 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -95,6 +95,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6), .driver_data = (kernel_ulong_t)0, }, + { + /* Gemini Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e), + .driver_data = (kernel_ulong_t)0, + }, { 0 }, }; diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 686971263bef19a2fcc159432a6e6e46b1e16771..45d6771fac8ce1efcd093a710c6c3fad64ca33b8 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -962,10 +962,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) goto err_clk_dis; } - ret = i2c_add_adapter(&id->adap); - if (ret < 0) - goto err_clk_dis; - /* * Cadence I2C controller has a bug wherein it generates * invalid read transaction after HW timeout in master receiver mode. @@ -975,6 +971,10 @@ static int cdns_i2c_probe(struct platform_device *pdev) */ cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET); + ret = i2c_add_adapter(&id->adap); + if (ret < 0) + goto err_clk_dis; + dev_info(&pdev->dev, "%u kHz mmio %08lx irq %d\n", id->i2c_clk / 1000, (unsigned long)r_mem->start, id->irq); diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index eb3627f35d12002776447982d493835fe36dc064..e6fe21a6135b11197c12b0786af3374a0b092b34 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1592,6 +1592,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Default timeout in interrupt mode: 200 ms */ priv->adapter.timeout = HZ / 5; + if (dev->irq == IRQ_NOTCONNECTED) + priv->features &= ~FEATURE_IRQ; + if (priv->features & FEATURE_IRQ) { u16 pcictl, pcists; diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index 8f11d347b3ec482815e37d3170fa6abef4537c31..c811af4c8d817bcf353068bf2e3f95f56953155b 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data) } if (riic->is_last || riic->err) { - riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER); + riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER); writeb(ICCR2_SP, riic->base + RIIC_ICCR2); + } else { + /* Transfer is complete, but do not send STOP */ + riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER); + complete(&riic->msg_done); } return IRQ_HANDLED; diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 472641fc890c9bef608cca285adb615e45b48823..af05e20c986b54e1f68b5152f4c75e03e4e5470e 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -269,6 +269,7 @@ int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val) conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr_old]); conv_time += DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]); + conv_time += conv_time / 10; /* 10% internal clock inaccuracy */ usleep_range(conv_time, conv_time + 1); data->conv_invalid = false; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 809a028001027ca5b7e730dbc9b9e738e504912d..a09d6eed3b888c7b70298586d1dbfab706ce0625 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1482,7 +1482,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, return id_priv; } -static inline int cma_user_data_offset(struct rdma_id_private *id_priv) +static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) { return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } @@ -1877,7 +1877,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; - int offset, ret; + u8 offset; + int ret; listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) @@ -3309,7 +3310,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct ib_cm_sidr_req_param req; struct ib_cm_id *id; void *private_data; - int offset, ret; + u8 offset; + int ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); @@ -3366,7 +3368,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_route *route; void *private_data; struct ib_cm_id *id; - int offset, ret; + u8 offset; + int ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a754fc727de52181aed8b4e59146a9e356a0411a..ff12b8d176ced89fdfc3e427498dcd80a7150773 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq) irq_poll_disable(&cq->iop); break; case IB_POLL_WORKQUEUE: - flush_work(&cq->work); + cancel_work_sync(&cq->work); break; default: WARN_ON_ONCE(1); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 19c6477af19f1416d17c15363e239307542b438d..a856371bbe58c17a42f9cdc2e427b7ccdd8c307e 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -575,10 +575,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, ret = -EAGAIN; goto skip_cqe; } - if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + if (unlikely(!CQE_STATUS(hw_cqe) && + CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { t4_set_wq_in_error(wq); - hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); - goto proc_cqe; + hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 862381aa83c824bb8712e408df39e49f47f11975..b55adf53c7587910cd26ad23fafca01cdb09f85b 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -171,7 +171,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 stag; + __be32 stag; u16 nada2; u16 cidx; } scqe; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 010c709ba3bb539169b2b3112584a960d20a0e55..58c531db4f4aa0eceec5b86d8adb80bc4e75a374 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -675,8 +675,8 @@ struct fw_ri_fr_nsmr_tpte_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __u32 r2; - __u32 stag; + __be32 r2; + __be32 stag; struct fw_ri_tpte tpte; __u64 pbl[2]; }; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 24d0820873cf2df220892b9523907aeb9485eb8b..4682909b021b2f182e5b86273dd378b8bdfef011 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9769,7 +9769,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) goto unimplemented; case HFI1_IB_CFG_OP_VLS: - val = ppd->vls_operational; + val = ppd->actual_vls_operational; break; case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */ val = VL_ARB_HIGH_PRIO_TABLE_SIZE; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 6fd043b1d71413fbcebe8a209121a9cee50a5586..7db2001775cba009e4c8c9fc7de37af931bbd140 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -159,6 +159,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; upper_dev = netdev_master_upper_dev_get(netdev); if (netdev != event_netdev) @@ -231,6 +234,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; if (netdev != event_netdev) return NOTIFY_DONE; @@ -280,6 +286,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * if (!iwhdl) return NOTIFY_DONE; iwdev = &iwhdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; p = (__be32 *)neigh->primary_key; i40iw_copy_ip_ntohl(local_ipaddr, p); if (neigh->nud_state & NUD_VALID) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c22454383976c4dda19eee489a9f6e93eabd9251..709d6491d2435c417756eed7761824397c84aafc 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1669,7 +1669,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else - context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + context->mtu_msgmax = (IB_MTU_4096 << 5) | 13; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { pr_err("path MTU (%u) is invalid\n", diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 786f640fc4626a514612a6cdbab85abd736fac24..5e29fbd3a5a0bc929f1439a2fce6a2d970895b41 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2514,6 +2514,8 @@ static int create_umr_res(struct mlx5_ib_dev *dev) qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; attr->qp_state = IB_QPS_INIT; attr->port_num = 1; @@ -2573,6 +2575,18 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return ret; } +static u8 mlx5_get_umr_fence(u8 umr_fence_cap) +{ + switch (umr_fence_cap) { + case MLX5_CAP_UMR_FENCE_NONE: + return MLX5_FENCE_MODE_NONE; + case MLX5_CAP_UMR_FENCE_SMALL: + return MLX5_FENCE_MODE_INITIATOR_SMALL; + default: + return MLX5_FENCE_MODE_STRONG_ORDERING; + } +} + static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; @@ -3099,6 +3113,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); + if (MLX5_CAP_GEN(mdev, imaicl)) { dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 86e1e08125ff2e92bf03e6556021fb4010d4182c..d5cc954e8ac2dfabc1cd4d7a3e0972c1abb4d835 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -345,7 +345,7 @@ struct mlx5_ib_qp { struct mlx5_ib_wq rq; u8 sq_signal_bits; - u8 fm_cache; + u8 next_fence; struct mlx5_ib_wq sq; /* serialize qp state modifications @@ -643,6 +643,7 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2665414b48750baf98279c26d86425a095596d44..fdd156101a72af0e15583bc107ef2b4aadb0a6ce 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3755,24 +3755,6 @@ static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, } } -static u8 get_fence(u8 fence, struct ib_send_wr *wr) -{ - if (unlikely(wr->opcode == IB_WR_LOCAL_INV && - wr->send_flags & IB_SEND_FENCE)) - return MLX5_FENCE_MODE_STRONG_ORDERING; - - if (unlikely(fence)) { - if (wr->send_flags & IB_SEND_FENCE) - return MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - return fence; - } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { - return MLX5_FENCE_MODE_FENCE; - } - - return 0; -} - static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, struct ib_send_wr *wr, unsigned *idx, @@ -3801,8 +3783,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u8 next_fence, - u32 mlx5_opcode) + int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -3810,7 +3791,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp, mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; - qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) ctrl->signature = wq_sig(ctrl); @@ -3870,7 +3850,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - fence = qp->fm_cache; num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); @@ -3887,6 +3866,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } + if (wr->opcode == IB_WR_LOCAL_INV || + wr->opcode == IB_WR_REG_MR) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -3913,7 +3905,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; case IB_WR_LOCAL_INV: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); set_linv_wr(qp, &seg, &size); @@ -3921,7 +3912,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_REG_MR: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size); @@ -3944,9 +3934,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error @@ -3971,9 +3960,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { @@ -3983,7 +3971,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); @@ -3993,9 +3980,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4100,8 +4087,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - get_fence(fence, wr), next_fence, + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 63890ebb72bdff1c525e87786f59b124b2ea8d3f..eccf7039aaca755e0dfbc8135cfd0de7dc84e9a6 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -404,9 +404,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev, } if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h)) - packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; - else packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB; + else + packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; packet->roce_mode = roce_mode; memcpy(packet->header.vaddr, ud_header_buffer, header_size); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4ba019e3dc56c0f81b9daf6f0370b8c5fdcb2ea5..35d5b89decb431237a651099819acd25bbded561 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1653,7 +1653,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, int status = 0; if (new_state == qp->state) - return 1; + return 0; switch (qp->state) { case QED_ROCE_QP_STATE_RESET: diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index e202b8142759f58e0c06317ce1e4f3d798c54f37..6b712eecbd37d9ca5a0bf312028dc7b23c1c2fd8 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, spin_lock_irq(&rdi->mmap_offset_lock); if (rdi->mmap_offset == 0) - rdi->mmap_offset = PAGE_SIZE; + rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->offset = rdi->mmap_offset; - rdi->mmap_offset += size; + rdi->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_irq(&rdi->mmap_offset_lock); INIT_LIST_HEAD(&ip->pending_mmaps); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index c572a4c09359c9f91fcf7f55466fadff012fbc82..bd812e00988ed32f5517ce952a0e2186c4678f87 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, spin_lock_bh(&rxe->mmap_offset_lock); if (rxe->mmap_offset == 0) - rxe->mmap_offset = PAGE_SIZE; + rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->info.offset = rxe->mmap_offset; - rxe->mmap_offset += size; + rxe->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_bh(&rxe->mmap_offset_lock); diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index ee26a1b1b4edb314852ad519e2c23990e912bcc0..1c4e5b2e683508a04fbaaa98c4cd2ad95fdbc644 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -412,6 +412,8 @@ void *rxe_alloc(struct rxe_pool *pool) elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); + if (!elem) + return NULL; elem->pool = pool; kref_init(&elem->ref_cnt); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 9d084780ac91faa41b464241c543dca2361c3429..5b0ca35c06ab85558d9f61c0bd72cff3ac3a7c4d 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -726,11 +726,11 @@ int rxe_requester(void *arg) ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; - kfree_skb(skb); rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { + kfree_skb(skb); rxe_run_task(&qp->req.task, 1); goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 7705820cdac61a73b8a16406e7811c3865c5e649..8c0ddd7165ae5b23580cb628d602f38752bcf0ca 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -799,18 +799,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) /* Unreachable */ WARN_ON(1); - /* We successfully processed this new request. */ - qp->resp.msn++; - /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) + if (pkt->mask & RXE_COMP_MASK) { + /* We successfully processed this new request. */ + qp->resp.msn++; return RESPST_COMPLETE; - else if (qp_type(qp) == IB_QPT_RC) + } else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 830fecb6934c8edf60a4c1d138cd4815be4b1314..335bd2c9e16ec6c53c8fb00f92c2d1abe23b879e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1177,10 +1177,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { + rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - if (ipoib_ib_dev_open(dev) != 0) + + result = ipoib_ib_dev_open(dev); + rtnl_unlock(); + if (result) return; + if (netif_queue_stopped(dev)) netif_start_queue(dev); } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0be6a7c5ddb5aea294be7c10a8247895adbc2dde..cb48e22afff724c0810bc6e8f7ed51695c6f18e9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -430,6 +430,7 @@ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; struct iser_pi_context *pi_ctx; + struct list_head all_list; }; /** @@ -443,6 +444,7 @@ struct iser_fr_pool { struct list_head list; spinlock_t lock; int size; + struct list_head all_list; }; /** diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a4b791dfaa1db4d2aa5158b5d4e4fc23bd3799ba..bc6f5bb6c524aca894ef9b3a50d2c36a2659bcf9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, int i, ret; INIT_LIST_HEAD(&fr_pool->list); + INIT_LIST_HEAD(&fr_pool->all_list); spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { @@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, } list_add_tail(&desc->list, &fr_pool->list); + list_add_tail(&desc->all_list, &fr_pool->all_list); fr_pool->size++; } @@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) struct iser_fr_desc *desc, *tmp; int i = 0; - if (list_empty(&fr_pool->list)) + if (list_empty(&fr_pool->all_list)) return; iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) { - list_del(&desc->list); + list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { + list_del(&desc->all_list); iser_free_reg_res(&desc->rsc); if (desc->pi_ctx) iser_free_pi_ctx(desc->pi_ctx); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 39d28375aa37d98958f7debd24ded524920e351b..0983470929bda8a1f29549e0ff8ad24c8f44aeed 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -747,6 +747,7 @@ isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + ib_drain_qp(isert_conn->qp); list_del_init(&isert_conn->node); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index b9748970df4aab15502b7c7cc7541c4deca8c87c..29ab814693fc3ee917aa58013ca6d3a8d95062cb 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -992,8 +992,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) return -ENOMEM; attr->qp_state = IB_QPS_INIT; - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; attr->port_num = ch->sport->port; attr->pkey_index = 0; diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index caa5a62c42fbe0b55e0230931d7d833922fdaab9..15929d8624592b5e17a3d94d2f37704670bc226a 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops, twl4030_vibra_suspend, twl4030_vibra_resume); static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->coexist) return true; - node = of_find_node_by_name(node, "codec"); + node = of_get_child_by_name(parent, "codec"); if (node) { of_node_put(node); return true; diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 5690eb7ff954d23390d966d2f5c7137513a68b9f..15e0d352c4cc2445b2fb05636e1e7eb950001d91 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev) int vddvibr_uV = 0; int error; - of_node_get(twl6040_core_dev->of_node); - twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node, + twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node, "vibra"); if (!twl6040_core_node) { dev_err(&pdev->dev, "parent of node is missing?\n"); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index f26807c75be46a494ed09019552ad2d1a21205f7..af83d2e349131588305f167b7364ef8ab3e9a9c6 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1247,29 +1247,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL; } else { f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX_BL; } + no_data_y = SS4_MFPACKET_NO_AY_BL; f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX_BL; - no_data_y = SS4_MFPACKET_NO_AY_BL; } else { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX; } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_STD_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX; } + no_data_y = SS4_MFPACKET_NO_AY; + f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX; - no_data_y = SS4_MFPACKET_NO_AY; } f->first_mp = 0; diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index 7931237171451c1a729458589f62a43d5526a78d..9bc2babd92569b43a419559f99d6c247780ef6df 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -120,10 +120,12 @@ enum SS4_PACKET_ID { #define SS4_IS_5F_DETECTED(_b) ((_b[2] & 0x10) == 0x10) -#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ -#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ -#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */ -#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ +#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */ +#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */ +#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */ +#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */ /* * enum V7_PACKET_ID - defines the packet type for V7 diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index cd834da5934ac34b23775bacf241773d16955bea..59603a5728f7b78553c5702d10105700f07d868e 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1609,7 +1609,7 @@ static int elantech_set_properties(struct elantech_data *etd) case 5: etd->hw_version = 3; break; - case 6 ... 14: + case 6 ... 15: etd->hw_version = 4; break; default: diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 7e2dc5e566320fd60566a0efd0bc5cf779ff9b33..0b49f29bf0dab42f2af61a5bf5efee968ca5ca0b 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -383,6 +383,9 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) { psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); button_info = 0x33; + } else if (!button_info) { + psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index dbf09836ff3060600303064c96194b760c0e738d..d1051e3ce819fff642eeb47e7ccb65d21b5ea927 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -520,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"), }, }, + { + /* TUXEDO BU1406 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"), + }, + }, { } }; diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c index 251ff2aa0633170a7c1bcf09c1ba194893a53cd9..7a0dbce4dae9399e0555f9e430d363858b5a65d8 100644 --- a/drivers/input/touchscreen/88pm860x-ts.c +++ b/drivers/input/touchscreen/88pm860x-ts.c @@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, int data, n, ret; if (!np) return -ENODEV; - np = of_find_node_by_name(np, "touch"); + np = of_get_child_by_name(np, "touch"); if (!np) { dev_err(&pdev->dev, "Can't find touch node\n"); return -EINVAL; @@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set tsi prebias time */ if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) { ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set prebias & prechg time of pen detect */ data = 0; @@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x); + + of_node_put(np); + return 0; + +err_put_node: + of_node_put(np); + + return -EINVAL; } #else #define pm860x_touch_dt_init(x, y, z) (-1) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1a0b110f12c0576e694b3811c36eadde837b0a83..0c910a8635810cc0a7edf51e248dff9ded7d3b1e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3211,7 +3211,7 @@ static void amd_iommu_apply_dm_region(struct device *dev, unsigned long start, end; start = IOVA_PFN(region->start); - end = IOVA_PFN(region->start + region->length); + end = IOVA_PFN(region->start + region->length - 1); WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); } diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d3d975ae24b74be0266f480043690d832787a3d6..7f294f785ce60550d3e2c65133495a9b44afcfcc 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1547,13 +1547,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1UL << ias) - 1; domain->geometry.force_aperture = true; - smmu_domain->pgtbl_ops = pgtbl_ops; ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); - if (ret < 0) + if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); + return ret; + } - return ret; + smmu_domain->pgtbl_ops = pgtbl_ops; + return 0; } static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) @@ -1580,7 +1582,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { - int i; + int i, j; struct arm_smmu_master_data *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; @@ -1588,6 +1590,13 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) u32 sid = fwspec->ids[i]; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + /* Bridged PCI devices may end up with duplicated IDs */ + for (j = 0; j < i; j++) + if (fwspec->ids[j] == sid) + break; + if (j < i) + continue; + arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c7820b3ea80ee6f0c06348f176050ba688e0a885..beef59eb94fa7e2d867baf733b1185f329133a3a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -543,7 +543,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) { clk_enable(data->clk_master); if (sysmmu_block(data)) { - __sysmmu_tlb_invalidate_entry(data, iova, 1); + if (data->version >= MAKE_MMU_VER(5, 0)) + __sysmmu_tlb_invalidate(data); + else + __sysmmu_tlb_invalidate_entry(data, iova, 1); sysmmu_unblock(data); } clk_disable(data->clk_master); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 002f8a421efa8f927a8c39b2520be8f381351025..88bbc8ccc5e33f5649891396e9810b8e06ab063f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2245,10 +2245,12 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, uint64_t tmp; if (!sg_res) { + unsigned int pgoff = sg->offset & ~PAGE_MASK; + sg_res = aligned_nrpages(sg->offset, sg->length); - sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; + sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; sg->dma_length = sg->length; - pteval = page_to_phys(sg_page(sg)) | prot; + pteval = (sg_phys(sg) - pgoff) | prot; phys_pfn = pteval >> VTD_PAGE_SHIFT; } @@ -3894,7 +3896,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, for_each_sg(sglist, sg, nelems, i) { BUG_ON(!sg_page(sg)); - sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; + sg->dma_address = sg_phys(sg); sg->dma_length = sg->length; } return nelems; diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index f50e51c1a9c88630b03ca9095a2d6e732f671aee..d68a552cfe8d44c9bfb1f9586a3030b9946be7bf 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -418,8 +418,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, pte |= ARM_V7S_ATTR_NS_TABLE; __arm_v7s_set_pte(ptep, pte, 1, cfg); - } else { + } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { cptep = iopte_deref(pte, lvl); + } else { + /* We require an unmap first */ + WARN_ON(!selftest_running); + return -EEXIST; } /* Rinse, repeat */ diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index b8aeb07684836c902a9f87829985f05a44fdcc5d..68c6050d1efb988dcd1cf4c8f6ec1dec9dbc9c1f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -703,7 +703,7 @@ static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, .remove = mtk_iommu_remove, .driver = { - .name = "mtk-iommu", + .name = "mtk-iommu-v1", .of_match_table = mtk_iommu_of_ids, .pm = &mtk_iommu_pm_ops, } diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index bc0af3307bbfd6612d2ae18d37a15c8d87a6ea39..910cb5e23371b77c21397584655dc41014b72763 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -258,6 +258,7 @@ config IRQ_MXS config MVEBU_ODMI bool + select GENERIC_MSI_IRQ_DOMAIN config MVEBU_PIC bool diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 05bbf171df37720b7a3185e4e56dd8ee318beaf2..1070b7b959f2d1f60996492398aa2bb6ae0ac06c 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { int i, size, reserved = 0; - u32 max = 0, entry; + u32 max = 0, entry, reg_size; const __be32 *irqsr; int ret = -ENOMEM; @@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node) if (!cb->register_offsets) goto err_irq_map; - of_property_read_u32(node, "ti,reg-size", &size); + of_property_read_u32(node, "ti,reg-size", ®_size); - switch (size) { + switch (reg_size) { case 1: cb->write = crossbar_writeb; break; @@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node) continue; cb->register_offsets[i] = reserved; - reserved += size; + reserved += reg_size; } of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map); diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 823f6985b2603198ff7b7ea7c8869db6032ba168..dd7e38ac29bdd8fd9c8a58e34ed471e1a731bde5 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) sizeof(avmb1_carddef)))) return -EFAULT; cdef.cardtype = AVM_CARDTYPE_B1; + cdef.cardnr = 0; } else { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_extcarddef)))) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 3fba31cea66e052d94ed63255e3fb3807979962d..537903bf9add0f9b3c797e7e138254bfb7a906a6 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -477,7 +477,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, if (b == -1) goto err; - k->ptr[i] = PTR(ca->buckets[b].gen, + k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 2efdce07247cca54db812dd64e82146fb0526d07..cac297f8170e26b9f02fd1b5ca02d6a966946380 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -803,7 +803,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 243de0bf15cdbf359965dff55cac4762b804362f..4bf15182c4da21c31cc8a4751a027ac436e54b20 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -584,7 +584,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey return false; for (i = 0; i < KEY_PTRS(l); i++) - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) return false; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 6925023e12d45656ce6802ea5237c0069b9beb29..08f20b7cd1999e769685fd8fb6deb47515e82b6d 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -508,7 +508,7 @@ static void journal_reclaim(struct cache_set *c) continue; ja->cur_idx = next; - k->ptr[n++] = PTR(0, + k->ptr[n++] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e0f1c6d534fe9aed6888d0dcb641459dc49e3d94..edb8d1a1a69f72e01130456ba1e963b319872541 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -468,6 +468,7 @@ struct search { unsigned recoverable:1; unsigned write:1; unsigned read_dirty_data:1; + unsigned cache_missed:1; unsigned long start_time; @@ -653,6 +654,7 @@ static inline struct search *search_alloc(struct bio *bio, s->orig_bio = bio; s->cache_miss = NULL; + s->cache_missed = 0; s->d = d; s->recoverable = 1; s->write = op_is_write(bio_op(bio)); @@ -703,7 +705,14 @@ static void cached_dev_read_error(struct closure *cl) struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; - if (s->recoverable) { + /* + * If read request hit dirty data (s->read_dirty_data is true), + * then recovery a failed read request from cached device may + * get a stale data back. So read failure recovery is only + * permitted when read request hit clean data in cache device, + * or when cache read race happened. + */ + if (s->recoverable && !s->read_dirty_data) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); @@ -764,7 +773,7 @@ static void cached_dev_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s->iop.c, s->d, - !s->cache_miss, s->iop.bypass); + !s->cache_missed, s->iop.bypass); trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); if (s->iop.error) @@ -783,6 +792,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); struct bio *miss, *cache_bio; + s->cache_missed = 1; + if (s->cache_miss || s->iop.bypass) { miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); ret = miss == bio ? MAP_DONE : MAP_CONTINUE; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f4557f558b24daa11186a0a4b73af8edfab04808..28ce342348a9b5c87a996dd883f3e3a8c39a4dee 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2091,6 +2091,7 @@ static void bcache_exit(void) if (bcache_major) unregister_blkdev(bcache_major, "bcache"); unregister_reboot_notifier(&reboot); + mutex_destroy(&bch_register_lock); } static int __init bcache_init(void) @@ -2109,14 +2110,15 @@ static int __init bcache_init(void) bcache_major = register_blkdev(0, "bcache"); if (bcache_major < 0) { unregister_reboot_notifier(&reboot); + mutex_destroy(&bch_register_lock); return bcache_major; } if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || - sysfs_create_files(bcache_kobj, files) || bch_request_init() || - bch_debug_init(bcache_kobj)) + bch_debug_init(bcache_kobj) || + sysfs_create_files(bcache_kobj, files)) goto err; return 0; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index fb02c3979bf47f62a2ee69846a3b678bcc7956ba..f7ff408567ad56fc6a54c9bb27257529478138c9 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -2084,6 +2084,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, for (k = 0; k < page; k++) { kfree(new_bp[k].map); } + kfree(new_bp); /* restore some fields from old_counts */ bitmap->counts.bp = old_counts.bp; @@ -2134,6 +2135,14 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, block += old_blocks; } + if (bitmap->counts.bp != old_counts.bp) { + unsigned long k; + for (k = 0; k < old_counts.pages; k++) + if (!old_counts.bp[k].hijacked) + kfree(old_counts.bp[k].map); + kfree(old_counts.bp); + } + if (!init) { int i; while (block < (chunks << chunkshift)) { diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 7643f72adb1c8898397c45f20651e6060908098e..3ec647e8b9c6e3358daad7f376e8f4f02f2c66f4 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1554,7 +1554,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, int l; struct dm_buffer *b, *tmp; unsigned long freed = 0; - unsigned long count = nr_to_scan; + unsigned long count = c->n_buffers[LIST_CLEAN] + + c->n_buffers[LIST_DIRTY]; unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { @@ -1591,6 +1592,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c; unsigned long count; + unsigned long retain_target; c = container_of(shrink, struct dm_bufio_client, shrinker); if (sc->gfp_mask & __GFP_FS) @@ -1599,8 +1601,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) return 0; count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + retain_target = get_retain_buffers(c); dm_bufio_unlock(c); - return count; + return (count < retain_target) ? 0 : (count - retain_target); } /* diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4477bf930cf4480a4f70da9081fb912a0d726784..e976f4f393341e5d44b3d8982a15684fe20b208c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -81,10 +81,14 @@ #define SECTOR_TO_BLOCK_SHIFT 3 /* + * For btree insert: * 3 for btree insert + * 2 for btree lookup used within space map + * For btree remove: + * 2 for shadow spine + + * 4 for rebalance 3 child node */ -#define THIN_MAX_CONCURRENT_LOCKS 5 +#define THIN_MAX_CONCURRENT_LOCKS 6 /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 2b13117fb918cbe27775ba61cc68c6f78e5408ff..ba7edcdd09cec7f607d7b9f203ba5417fe20f10d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -974,6 +974,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); dlm_release_lockspace(cinfo->lockspace, 2); + kfree(cinfo); return 0; } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 7a75b5010f735c236de0944006ad680b95211802..e4ececd3df004b2222f39e2984229c49ef310e63 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -678,23 +678,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) pn->keys[1] = rn->keys[0]; memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); - /* - * rejig the spine. This is ugly, since it knows too - * much about the spine - */ - if (s->nodes[0] != new_parent) { - unlock_block(s->info, s->nodes[0]); - s->nodes[0] = new_parent; - } - if (key < le64_to_cpu(rn->keys[0])) { - unlock_block(s->info, right); - s->nodes[1] = left; - } else { - unlock_block(s->info, left); - s->nodes[1] = right; - } - s->count = 2; - + unlock_block(s->info, left); + unlock_block(s->info, right); return 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7aea0221530c54319fc2bf07d85cad390259feb9..475a7a1bcfe07be9d53a3368049e19f0e12fdf3d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1689,8 +1689,11 @@ static void ops_complete_reconstruct(void *stripe_head_ref) struct r5dev *dev = &sh->dev[i]; if (dev->written || i == pd_idx || i == qd_idx) { - if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) + if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { set_bit(R5_UPTODATE, &dev->flags); + if (test_bit(STRIPE_EXPAND_READY, &sh->state)) + set_bit(R5_Expanded, &dev->flags); + } if (fua) set_bit(R5_WantFUA, &dev->flags); if (sync) diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c index ad304eed656d7e16f14261281145706fe2e73330..c61227cfff2583f746a9595949ffa5f7213844d1 100644 --- a/drivers/media/dvb-frontends/ascot2e.c +++ b/drivers/media/dvb-frontends/ascot2e.c @@ -155,7 +155,9 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv, static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val) { - return ascot2e_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return ascot2e_write_regs(priv, reg, &tmp, 1); } static int ascot2e_read_regs(struct ascot2e_priv *priv, diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index fd0f25ee251f8dd86ce496551d6c8cbc6b083f4b..b97647cd7dc61b3703c44636614a15a87f765fcd 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -261,7 +261,9 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv, static int cxd2841er_write_reg(struct cxd2841er_priv *priv, u8 addr, u8 reg, u8 val) { - return cxd2841er_write_regs(priv, addr, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return cxd2841er_write_regs(priv, addr, reg, &tmp, 1); } static int cxd2841er_read_regs(struct cxd2841er_priv *priv, diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c index dc43c5f6d0ea450c96b8e8c27d4533b0ed59c1a6..e06bcd4b3ddc0379a9d4dda6eaaf8f9a74318de2 100644 --- a/drivers/media/dvb-frontends/helene.c +++ b/drivers/media/dvb-frontends/helene.c @@ -331,7 +331,9 @@ static int helene_write_regs(struct helene_priv *priv, static int helene_write_reg(struct helene_priv *priv, u8 reg, u8 val) { - return helene_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return helene_write_regs(priv, reg, &tmp, 1); } static int helene_read_regs(struct helene_priv *priv, diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c index 0c089b5986a1928faceabdedf65034396f549fcd..4ebddc895137d93bd69370009e66bd209eb22b96 100644 --- a/drivers/media/dvb-frontends/horus3a.c +++ b/drivers/media/dvb-frontends/horus3a.c @@ -89,7 +89,9 @@ static int horus3a_write_regs(struct horus3a_priv *priv, static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val) { - return horus3a_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return horus3a_write_regs(priv, reg, &tmp, 1); } static int horus3a_enter_power_save(struct horus3a_priv *priv) diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c index cadcae4cff89122dbf90cad2d02cfa9fa13854b4..ac9d2591bb6fca292cca0fc44171ef1d9a4a7575 100644 --- a/drivers/media/dvb-frontends/itd1000.c +++ b/drivers/media/dvb-frontends/itd1000.c @@ -99,8 +99,9 @@ static int itd1000_read_reg(struct itd1000_state *state, u8 reg) static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v) { - int ret = itd1000_write_regs(state, r, &v, 1); - state->shadow[r] = v; + u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + int ret = itd1000_write_regs(state, r, &tmp, 1); + state->shadow[r] = tmp; return ret; } diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c index fc08429c99b7debbe2848c0d2f76e1c6d15aa8ee..7824926a37440fbc6fc39618b80710b1cb44d88c 100644 --- a/drivers/media/dvb-frontends/mt312.c +++ b/drivers/media/dvb-frontends/mt312.c @@ -142,7 +142,10 @@ static inline int mt312_readreg(struct mt312_state *state, static inline int mt312_writereg(struct mt312_state *state, const enum mt312_reg_addr reg, const u8 val) { - return mt312_write(state, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + + return mt312_write(state, reg, &tmp, 1); } static inline u32 mt312_div(u32 a, u32 b) diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c index 3d171b0e00c2a742d41737b176418d40d155093d..3deddbcaa8b76303fa3d9d1a1afab0efee777aaf 100644 --- a/drivers/media/dvb-frontends/stb0899_drv.c +++ b/drivers/media/dvb-frontends/stb0899_drv.c @@ -552,7 +552,8 @@ int stb0899_write_regs(struct stb0899_state *state, unsigned int reg, u8 *data, int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data) { - return stb0899_write_regs(state, reg, &data, 1); + u8 tmp = data; + return stb0899_write_regs(state, reg, &tmp, 1); } /* diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index 5add1182c3cae6f5f172d0a4e57e82c46ea8ed74..4746b1e0d637bfeddd1bd830e1d05a9d46a0aa54 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -226,12 +226,14 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data) { + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + if (unlikely(reg >= STB6100_NUMREGS)) { dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg); return -EREMOTEIO; } - data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set; - return stb6100_write_reg_range(state, &data, reg, 1); + tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set; + return stb6100_write_reg_range(state, &tmp, reg, 1); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index abc379aea713f5e6a93ac34dc5531e1cbf6124b9..94cec81d0a5c7514ea17df15d8b392eb496dba2a 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -804,7 +804,9 @@ int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - return stv0367_writeregs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv0367_writeregs(state, reg, &tmp, 1); } static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c index 25bdf6e0f9632d18a64c0c4e42bda005d0d80c51..f0377e2b341b7933122cc71ca595aeb0b6803909 100644 --- a/drivers/media/dvb-frontends/stv090x.c +++ b/drivers/media/dvb-frontends/stv090x.c @@ -761,7 +761,9 @@ static int stv090x_write_regs(struct stv090x_state *state, unsigned int reg, u8 static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data) { - return stv090x_write_regs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv090x_write_regs(state, reg, &tmp, 1); } static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable) diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c index c611ad210b5c264fe2f3c5f0dfa0b452f7ed5f6c..924f16fee1fb602c371141042d78b7d699321da7 100644 --- a/drivers/media/dvb-frontends/stv6110x.c +++ b/drivers/media/dvb-frontends/stv6110x.c @@ -97,7 +97,9 @@ static int stv6110x_write_regs(struct stv6110x_state *stv6110x, int start, u8 da static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data) { - return stv6110x_write_regs(stv6110x, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv6110x_write_regs(stv6110x, reg, &tmp, 1); } static int stv6110x_init(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c index a9f6bbea6df368fbea92a47457dec33730bf90c3..103b9c824f1f241e5389ee7b6eaaa574b016caff 100644 --- a/drivers/media/dvb-frontends/ts2020.c +++ b/drivers/media/dvb-frontends/ts2020.c @@ -369,7 +369,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain2 = clamp_t(long, gain2, 0, 13); v_agc = clamp_t(long, v_agc, 400, 1100); - *_gain = -(gain1 * 2330 + + *_gain = -((__s64)gain1 * 2330 + gain2 * 3500 + v_agc * 24 / 10 * 10 + 10000); @@ -387,7 +387,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain3 = clamp_t(long, gain3, 0, 6); v_agc = clamp_t(long, v_agc, 600, 1600); - *_gain = -(gain1 * 2650 + + *_gain = -((__s64)gain1 * 2650 + gain2 * 3380 + gain3 * 2850 + v_agc * 176 / 100 * 10 - diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c index f8c271be196c19180007560b73f7272f3c804719..0d2bef62ff0532fdc7e0cf999d98164c9072c35e 100644 --- a/drivers/media/dvb-frontends/zl10039.c +++ b/drivers/media/dvb-frontends/zl10039.c @@ -138,7 +138,9 @@ static inline int zl10039_writereg(struct zl10039_state *state, const enum zl10039_reg_addr reg, const u8 val) { - return zl10039_write(state, reg, &val, 1); + const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return zl10039_write(state, reg, &tmp, 1); } static int zl10039_init(struct dvb_frontend *fe) diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c index f77252d6ccd3a3c3caade41b2be891e5ddb4ed47..d29c24854c2c003c3691c39604a5cb4238fc3802 100644 --- a/drivers/media/platform/soc_camera/soc_scale_crop.c +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c @@ -418,3 +418,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd, mf->height = soc_camera_shift_scale(rect->height, shift, scale_v); } EXPORT_SYMBOL(soc_camera_calc_client_output); + +MODULE_DESCRIPTION("soc-camera scaling-cropping functions"); +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_LICENSE("GPL"); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 6ebe89551961c5337e8022ab702e2dcf36f1e5b8..f4509ef9922b7908461615e2e397db67bab2ef94 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -446,6 +446,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) return -ERESTARTSYS; ir = irctls[iminor(inode)]; + mutex_unlock(&lirc_dev_lock); + if (!ir) { retval = -ENODEV; goto error; @@ -486,8 +488,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) } error: - mutex_unlock(&lirc_dev_lock); - nonseekable_open(inode, file); return retval; diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 0e8fb89896c40ee26b02df2db275a64a85cbe866..5c4aa247d650f0c6269e290961364b15115369b0 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -504,18 +504,23 @@ static int lme2510_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, static int lme2510_return_status(struct dvb_usb_device *d) { - int ret = 0; + int ret; u8 *data; - data = kzalloc(10, GFP_KERNEL); + data = kzalloc(6, GFP_KERNEL); if (!data) return -ENOMEM; - ret |= usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), - 0x06, 0x80, 0x0302, 0x00, data, 0x0006, 200); - info("Firmware Status: %x (%x)", ret , data[2]); + ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), + 0x06, 0x80, 0x0302, 0x00, + data, 0x6, 200); + if (ret != 6) + ret = -EINVAL; + else + ret = data[2]; + + info("Firmware Status: %6ph", data); - ret = (ret < 0) ? -ENODEV : data[2]; kfree(data); return ret; } @@ -1079,8 +1084,6 @@ static int dm04_lme2510_frontend_attach(struct dvb_usb_adapter *adap) if (adap->fe[0]) { info("FE Found M88RS2000"); - dvb_attach(ts2020_attach, adap->fe[0], &ts2020_config, - &d->i2c_adap); st->i2c_tuner_gate_w = 5; st->i2c_tuner_gate_r = 5; st->i2c_tuner_addr = 0x60; @@ -1146,17 +1149,18 @@ static int dm04_lme2510_tuner(struct dvb_usb_adapter *adap) ret = st->tuner_config; break; case TUNER_RS2000: - ret = st->tuner_config; + if (dvb_attach(ts2020_attach, adap->fe[0], + &ts2020_config, &d->i2c_adap)) + ret = st->tuner_config; break; default: break; } - if (ret) + if (ret) { info("TUN Found %s tuner", tun_msg[ret]); - else { - info("TUN No tuner found --- resetting device"); - lme_coldreset(d); + } else { + info("TUN No tuner found"); return -ENODEV; } @@ -1200,6 +1204,7 @@ static int lme2510_get_adapter_count(struct dvb_usb_device *d) static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) { struct lme2510_state *st = d->priv; + int status; usb_reset_configuration(d->udev); @@ -1208,12 +1213,16 @@ static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) st->dvb_usb_lme2510_firmware = dvb_usb_lme2510_firmware; - if (lme2510_return_status(d) == 0x44) { + status = lme2510_return_status(d); + if (status == 0x44) { *name = lme_firmware_switch(d, 0); return COLD; } - return 0; + if (status != 0x47) + return -EINVAL; + + return WARM; } static int lme2510_get_stream_config(struct dvb_frontend *fe, u8 *ts_type, diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index 9fd43a37154cedb24576903a5e2eceae99719227..b20f03d86e001efa78f8dcd617fb39dd8818c89d 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -820,6 +820,8 @@ static int dvico_bluebird_xc2028_callback(void *ptr, int component, case XC2028_RESET_CLK: deb_info("%s: XC2028_RESET_CLK %d\n", __func__, arg); break; + case XC2028_I2C_FLUSH: + break; default: deb_info("%s: unknown command %d, arg %d\n", __func__, command, arg); diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index caa55402052e05c360ac574f6ce8eb5f248a76c3..2868766893c8535d038c8be72e1187cfe244b2e6 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -431,6 +431,7 @@ static int stk7700ph_xc3028_callback(void *ptr, int component, state->dib7000p_ops.set_gpio(adap->fe_adap[0].fe, 8, 0, 1); break; case XC2028_RESET_CLK: + case XC2028_I2C_FLUSH: break; default: err("%s: unknown command %d, arg %d\n", __func__, diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index 8207e6900656bef61de4f0df10b37b5cc6110f98..bcacb0f220282d9c3537e8847b178d7f0ec71727 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -223,8 +223,20 @@ EXPORT_SYMBOL(dibusb_i2c_algo); int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val) { - u8 wbuf[1] = { offs }; - return dibusb_i2c_msg(d, 0x50, wbuf, 1, val, 1); + u8 *buf; + int rc; + + buf = kmalloc(2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = offs; + + rc = dibusb_i2c_msg(d, 0x50, &buf[0], 1, &buf[1], 1); + *val = buf[1]; + kfree(buf); + + return rc; } EXPORT_SYMBOL(dibusb_read_eeprom_byte); diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c index a61d8fd63c123e08f80ad88a5bd6c25588710dce..a20b60ac66ca46e556592c17fab009cfe1aa3824 100644 --- a/drivers/media/usb/hdpvr/hdpvr-core.c +++ b/drivers/media/usb/hdpvr/hdpvr-core.c @@ -295,7 +295,7 @@ static int hdpvr_probe(struct usb_interface *interface, /* register v4l2_device early so it can be used for printks */ if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) { dev_err(&interface->dev, "v4l2_device_register failed\n"); - goto error; + goto error_free_dev; } mutex_init(&dev->io_mutex); @@ -304,7 +304,7 @@ static int hdpvr_probe(struct usb_interface *interface, dev->usbc_buf = kmalloc(64, GFP_KERNEL); if (!dev->usbc_buf) { v4l2_err(&dev->v4l2_dev, "Out of memory\n"); - goto error; + goto error_v4l2_unregister; } init_waitqueue_head(&dev->wait_buffer); @@ -342,13 +342,13 @@ static int hdpvr_probe(struct usb_interface *interface, } if (!dev->bulk_in_endpointAddr) { v4l2_err(&dev->v4l2_dev, "Could not find bulk-in endpoint\n"); - goto error; + goto error_put_usb; } /* init the device */ if (hdpvr_device_init(dev)) { v4l2_err(&dev->v4l2_dev, "device init failed\n"); - goto error; + goto error_put_usb; } mutex_lock(&dev->io_mutex); @@ -356,7 +356,7 @@ static int hdpvr_probe(struct usb_interface *interface, mutex_unlock(&dev->io_mutex); v4l2_err(&dev->v4l2_dev, "allocating transfer buffers failed\n"); - goto error; + goto error_put_usb; } mutex_unlock(&dev->io_mutex); @@ -364,7 +364,7 @@ static int hdpvr_probe(struct usb_interface *interface, retval = hdpvr_register_i2c_adapter(dev); if (retval < 0) { v4l2_err(&dev->v4l2_dev, "i2c adapter register failed\n"); - goto error; + goto error_free_buffers; } client = hdpvr_register_ir_rx_i2c(dev); @@ -397,13 +397,17 @@ static int hdpvr_probe(struct usb_interface *interface, reg_fail: #if IS_ENABLED(CONFIG_I2C) i2c_del_adapter(&dev->i2c_adapter); +error_free_buffers: #endif + hdpvr_free_buffers(dev); +error_put_usb: + usb_put_dev(dev->udev); + kfree(dev->usbc_buf); +error_v4l2_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +error_free_dev: + kfree(dev); error: - if (dev) { - flush_work(&dev->worker); - /* this frees allocated memory */ - hdpvr_delete(dev); - } return retval; } diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c index dc76fd41e00fc4c0875670619bc2543c24c9210f..0324633ede42e80c11c1e8be1a260ec4546bd20a 100644 --- a/drivers/media/usb/usbtv/usbtv-core.c +++ b/drivers/media/usb/usbtv/usbtv-core.c @@ -141,6 +141,7 @@ static void usbtv_disconnect(struct usb_interface *intf) static struct usb_device_id usbtv_id_table[] = { { USB_DEVICE(0x1b71, 0x3002) }, + { USB_DEVICE(0x1f71, 0x3301) }, {} }; MODULE_DEVICE_TABLE(usb, usbtv_id_table); diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index dc51dd86377de9162325fcade0a030a8403df62f..48a39222fdf902a21a007688213c1dac9c58a85a 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -18,8 +18,18 @@ #include #include #include +#include +#include #include +/* Use the same argument order as copy_in_user */ +#define assign_in_user(to, from) \ +({ \ + typeof(*from) __assign_tmp; \ + \ + get_user(__assign_tmp, from) || put_user(__assign_tmp, to); \ +}) + static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret = -ENOIOCTLCMD; @@ -33,131 +43,88 @@ static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct v4l2_clip32 { struct v4l2_rect c; - compat_caddr_t next; + compat_caddr_t next; }; struct v4l2_window32 { struct v4l2_rect w; - __u32 field; /* enum v4l2_field */ + __u32 field; /* enum v4l2_field */ __u32 chromakey; compat_caddr_t clips; /* actually struct v4l2_clip32 * */ __u32 clipcount; compat_caddr_t bitmap; + __u8 global_alpha; }; -static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) -{ - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount)) - return -EFAULT; - if (kp->clipcount > 2048) - return -EINVAL; - if (kp->clipcount) { - struct v4l2_clip32 __user *uclips; - struct v4l2_clip __user *kclips; - int n = kp->clipcount; - compat_caddr_t p; - - if (get_user(p, &up->clips)) - return -EFAULT; - uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(struct v4l2_clip)); - kp->clips = kclips; - while (--n >= 0) { - if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) - return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) - return -EFAULT; - uclips += 1; - kclips += 1; - } - } else - kp->clips = NULL; - return 0; -} - -static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) -{ - if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount)) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) +static int get_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up, + void __user *aux_buf, u32 aux_space) { - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) + struct v4l2_clip32 __user *uclips; + struct v4l2_clip __user *kclips; + compat_caddr_t p; + u32 clipcount; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + copy_in_user(&kp->w, &up->w, sizeof(up->w)) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->chromakey, &up->chromakey) || + assign_in_user(&kp->global_alpha, &up->global_alpha) || + get_user(clipcount, &up->clipcount) || + put_user(clipcount, &kp->clipcount)) return -EFAULT; - return 0; -} + if (clipcount > 2048) + return -EINVAL; + if (!clipcount) + return put_user(NULL, &kp->clips); -static inline int get_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_vbi_format))) + if (get_user(p, &up->clips)) return -EFAULT; - return 0; -} - -static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_vbi_format))) + uclips = compat_ptr(p); + if (aux_space < clipcount * sizeof(*kclips)) return -EFAULT; - return 0; -} - -static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format))) + kclips = aux_buf; + if (put_user(kclips, &kp->clips)) return -EFAULT; - return 0; -} -static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; + while (clipcount--) { + if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) + return -EFAULT; + if (put_user(clipcount ? kclips + 1 : NULL, &kclips->next)) + return -EFAULT; + uclips++; + kclips++; + } return 0; } -static inline int get_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) +static int put_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up) { - if (copy_from_user(kp, up, sizeof(struct v4l2_sdr_format))) + struct v4l2_clip __user *kclips = kp->clips; + struct v4l2_clip32 __user *uclips; + compat_caddr_t p; + u32 clipcount; + + if (copy_in_user(&up->w, &kp->w, sizeof(kp->w)) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->chromakey, &kp->chromakey) || + assign_in_user(&up->global_alpha, &kp->global_alpha) || + get_user(clipcount, &kp->clipcount) || + put_user(clipcount, &up->clipcount)) return -EFAULT; - return 0; -} + if (!clipcount) + return 0; -static inline int put_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sdr_format))) + if (get_user(p, &up->clips)) return -EFAULT; + uclips = compat_ptr(p); + while (clipcount--) { + if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) + return -EFAULT; + uclips++; + kclips++; + } return 0; } @@ -191,97 +158,158 @@ struct v4l2_create_buffers32 { __u32 reserved[8]; }; -static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int __bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) +{ + u32 type; + + if (get_user(type, &up->type)) + return -EFAULT; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OVERLAY: + case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: { + u32 clipcount; + + if (get_user(clipcount, &up->fmt.win.clipcount)) + return -EFAULT; + if (clipcount > 2048) + return -EINVAL; + *size = clipcount * sizeof(struct v4l2_clip); + return 0; + } + default: + *size = 0; + return 0; + } +} + +static int bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) { - if (get_user(kp->type, &up->type)) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; + return __bufsize_v4l2_format(up, size); +} - switch (kp->type) { +static int __get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + u32 type; + + if (get_user(type, &up->type) || put_user(type, &kp->type)) + return -EFAULT; + + switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_in_user(&kp->fmt.pix, &up->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return get_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_in_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); + return get_v4l2_window32(&kp->fmt.win, &up->fmt.win, + aux_buf, aux_space); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_in_user(&kp->fmt.vbi, &up->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_in_user(&kp->fmt.sliced, &up->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_in_user(&kp->fmt.sdr, &up->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } -static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; - return __get_v4l2_format32(kp, up); + return __get_v4l2_format32(kp, up, aux_buf, aux_space); } -static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int bufsize_v4l2_create(struct v4l2_create_buffers32 __user *up, + u32 *size) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_create_buffers32)) || - copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; - return __get_v4l2_format32(&kp->format, &up->format); + return __bufsize_v4l2_format(&up->format, size); } -static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int get_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up, + void __user *aux_buf, u32 aux_space) { - if (put_user(kp->type, &up->type)) + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + copy_in_user(kp, up, + offsetof(struct v4l2_create_buffers32, format))) return -EFAULT; + return __get_v4l2_format32(&kp->format, &up->format, + aux_buf, aux_space); +} + +static int __put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) +{ + u32 type; - switch (kp->type) { + if (get_user(type, &kp->type)) + return -EFAULT; + + switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_in_user(&up->fmt.pix, &kp->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return put_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_in_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_in_user(&up->fmt.vbi, &kp->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_in_user(&up->fmt.sliced, &kp->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_in_user(&up->fmt.sdr, &kp->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } -static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } -static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int put_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) || - copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + copy_in_user(up, kp, + offsetof(struct v4l2_create_buffers32, format)) || + copy_in_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return __put_v4l2_format32(&kp->format, &up->format); } @@ -295,25 +323,28 @@ struct v4l2_standard32 { __u32 reserved[4]; }; -static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int get_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || - get_user(kp->index, &up->index)) + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + assign_in_user(&kp->index, &up->index)) return -EFAULT; return 0; } -static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int put_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || - copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) - return -EFAULT; + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + assign_in_user(&up->index, &kp->index) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->name, kp->name, sizeof(up->name)) || + copy_in_user(&up->frameperiod, &kp->frameperiod, + sizeof(up->frameperiod)) || + assign_in_user(&up->framelines, &kp->framelines) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; return 0; } @@ -352,134 +383,186 @@ struct v4l2_buffer32 { __u32 reserved; }; -static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) +static int get_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, + enum v4l2_memory memory) { - void __user *up_pln; - compat_long_t p; + compat_ulong_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || - copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + copy_in_user(&up->data_offset, &up32->data_offset, + sizeof(up->data_offset))) return -EFAULT; - if (memory == V4L2_MEMORY_USERPTR) { - if (get_user(p, &up32->m.userptr)) - return -EFAULT; - up_pln = compat_ptr(p); - if (put_user((unsigned long)up_pln, &up->m.userptr)) + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: + if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, + sizeof(up32->m.mem_offset))) return -EFAULT; - } else if (memory == V4L2_MEMORY_DMABUF) { - if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int))) + break; + case V4L2_MEMORY_USERPTR: + if (get_user(p, &up32->m.userptr) || + put_user((unsigned long)compat_ptr(p), &up->m.userptr)) return -EFAULT; - } else { - if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + break; + case V4L2_MEMORY_DMABUF: + if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; + break; } return 0; } -static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) +static int put_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, + enum v4l2_memory memory) { + unsigned long p; + if (copy_in_user(up32, up, 2 * sizeof(__u32)) || - copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + copy_in_user(&up32->data_offset, &up->data_offset, + sizeof(up->data_offset))) return -EFAULT; - /* For MMAP, driver might've set up the offset, so copy it back. - * USERPTR stays the same (was userspace-provided), so no copying. */ - if (memory == V4L2_MEMORY_MMAP) + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(up->m.mem_offset))) return -EFAULT; - /* For DMABUF, driver might've set up the fd, so copy it back. */ - if (memory == V4L2_MEMORY_DMABUF) - if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + break; + case V4L2_MEMORY_USERPTR: + if (get_user(p, &up->m.userptr) || + put_user((compat_ulong_t)ptr_to_compat((__force void *)p), + &up32->m.userptr)) + return -EFAULT; + break; + case V4L2_MEMORY_DMABUF: + if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; + break; + } return 0; } -static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int bufsize_v4l2_buffer(struct v4l2_buffer32 __user *up, u32 *size) { + u32 type; + u32 length; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(type, &up->type) || + get_user(length, &up->length)) + return -EFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + if (length > VIDEO_MAX_PLANES) + return -EINVAL; + + /* + * We don't really care if userspace decides to kill itself + * by passing a very big length value + */ + *size = length * sizeof(struct v4l2_plane); + } else { + *size = 0; + } + return 0; +} + +static int get_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) - return -EFAULT; + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + assign_in_user(&kp->index, &up->index) || + get_user(type, &up->type) || + put_user(type, &kp->type) || + assign_in_user(&kp->flags, &up->flags) || + get_user(memory, &up->memory) || + put_user(memory, &kp->memory) || + get_user(length, &up->length) || + put_user(length, &kp->length)) + return -EFAULT; - if (V4L2_TYPE_IS_OUTPUT(kp->type)) - if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + if (V4L2_TYPE_IS_OUTPUT(type)) + if (assign_in_user(&kp->bytesused, &up->bytesused) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->timestamp.tv_sec, + &up->timestamp.tv_sec) || + assign_in_user(&kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; + if (num_planes == 0) { - kp->m.planes = NULL; - /* num_planes == 0 is legal, e.g. when userspace doesn't - * need planes array on DQBUF*/ - return 0; + /* + * num_planes == 0 is legal, e.g. when userspace doesn't + * need planes array on DQBUF + */ + return put_user(NULL, &kp->m.planes); } + if (num_planes > VIDEO_MAX_PLANES) + return -EINVAL; if (get_user(p, &up->m.planes)) return -EFAULT; uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - num_planes * sizeof(struct v4l2_plane32))) + num_planes * sizeof(*uplane32))) return -EFAULT; - /* We don't really care if userspace decides to kill itself - * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(num_planes * - sizeof(struct v4l2_plane)); - kp->m.planes = (__force struct v4l2_plane *)uplane; + /* + * We don't really care if userspace decides to kill itself + * by passing a very big num_planes value + */ + if (aux_space < num_planes * sizeof(*uplane)) + return -EFAULT; + + uplane = aux_buf; + if (put_user((__force struct v4l2_plane *)uplane, + &kp->m.planes)) + return -EFAULT; - while (--num_planes >= 0) { - ret = get_v4l2_plane32(uplane, uplane32, kp->memory); + while (num_planes--) { + ret = get_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; - ++uplane; - ++uplane32; + uplane++; + uplane32++; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: - if (get_user(kp->m.offset, &up->m.offset)) + case V4L2_MEMORY_OVERLAY: + if (assign_in_user(&kp->m.offset, &up->m.offset)) return -EFAULT; break; - case V4L2_MEMORY_USERPTR: - { - compat_long_t tmp; + case V4L2_MEMORY_USERPTR: { + compat_ulong_t userptr; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; - - kp->m.userptr = (unsigned long)compat_ptr(tmp); - } - break; - case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) + if (get_user(userptr, &up->m.userptr) || + put_user((unsigned long)compat_ptr(userptr), + &kp->m.userptr)) return -EFAULT; break; + } case V4L2_MEMORY_DMABUF: - if (get_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&kp->m.fd, &up->m.fd)) return -EFAULT; break; } @@ -488,65 +571,70 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user return 0; } -static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int put_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up) { + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) - return -EFAULT; + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + assign_in_user(&up->index, &kp->index) || + get_user(type, &kp->type) || + put_user(type, &up->type) || + assign_in_user(&up->flags, &kp->flags) || + get_user(memory, &kp->memory) || + put_user(memory, &up->memory)) + return -EFAULT; - if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) - return -EFAULT; + if (assign_in_user(&up->bytesused, &kp->bytesused) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_usec, &kp->timestamp.tv_usec) || + copy_in_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->reserved2, &kp->reserved2) || + assign_in_user(&up->reserved, &kp->reserved) || + get_user(length, &kp->length) || + put_user(length, &up->length)) + return -EFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; if (num_planes == 0) return 0; - uplane = (__force struct v4l2_plane __user *)kp->m.planes; + if (get_user(uplane, ((__force struct v4l2_plane __user **)&kp->m.planes))) + return -EFAULT; if (get_user(p, &up->m.planes)) return -EFAULT; uplane32 = compat_ptr(p); - while (--num_planes >= 0) { - ret = put_v4l2_plane32(uplane, uplane32, kp->memory); + while (num_planes--) { + ret = put_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; ++uplane; ++uplane32; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: - if (put_user(kp->m.offset, &up->m.offset)) + case V4L2_MEMORY_OVERLAY: + if (assign_in_user(&up->m.offset, &kp->m.offset)) return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (put_user(kp->m.userptr, &up->m.userptr)) - return -EFAULT; - break; - case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&up->m.userptr, &kp->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (put_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&up->m.fd, &kp->m.fd)) return -EFAULT; break; } @@ -558,7 +646,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user struct v4l2_framebuffer32 { __u32 capability; __u32 flags; - compat_caddr_t base; + compat_caddr_t base; struct { __u32 width; __u32 height; @@ -571,30 +659,33 @@ struct v4l2_framebuffer32 { } fmt; }; -static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int get_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp; - - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || - get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) - return -EFAULT; - kp->base = (__force void *)compat_ptr(tmp); + compat_caddr_t tmp; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(tmp, &up->base) || + put_user((__force void *)compat_ptr(tmp), &kp->base) || + assign_in_user(&kp->capability, &up->capability) || + assign_in_user(&kp->flags, &up->flags) || + copy_in_user(&kp->fmt, &up->fmt, sizeof(kp->fmt))) + return -EFAULT; return 0; } -static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int put_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->base); - - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) - return -EFAULT; + void *base; + + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + get_user(base, &kp->base) || + put_user(ptr_to_compat(base), &up->base) || + assign_in_user(&up->capability, &kp->capability) || + assign_in_user(&up->flags, &kp->flags) || + copy_in_user(&up->fmt, &kp->fmt, sizeof(kp->fmt))) + return -EFAULT; return 0; } @@ -606,21 +697,26 @@ struct v4l2_input32 { __u32 tuner; /* Associated tuner */ compat_u64 std; __u32 status; - __u32 reserved[4]; + __u32 capabilities; + __u32 reserved[3]; }; -/* The 64-bit v4l2_input struct has extra padding at the end of the struct. - Otherwise it is identical to the 32-bit version. */ -static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +/* + * The 64-bit v4l2_input struct has extra padding at the end of the struct. + * Otherwise it is identical to the 32-bit version. + */ +static inline int get_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(struct v4l2_input32))) + if (copy_in_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } -static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +static inline int put_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(struct v4l2_input32))) + if (copy_in_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -644,58 +740,95 @@ struct v4l2_ext_control32 { }; } __attribute__ ((packed)); -/* The following function really belong in v4l2-common, but that causes - a circular dependency between modules. We need to think about this, but - for now this will do. */ - -/* Return non-zero if this control is a pointer type. Currently only - type STRING is a pointer type. */ -static inline int ctrl_is_pointer(u32 id) +/* Return true if this control is a pointer type. */ +static inline bool ctrl_is_pointer(struct file *file, u32 id) { - switch (id) { - case V4L2_CID_RDS_TX_PS_NAME: - case V4L2_CID_RDS_TX_RADIO_TEXT: - return 1; - default: - return 0; + struct video_device *vdev = video_devdata(file); + struct v4l2_fh *fh = NULL; + struct v4l2_ctrl_handler *hdl = NULL; + struct v4l2_query_ext_ctrl qec = { id }; + const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops; + + if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags)) + fh = file->private_data; + + if (fh && fh->ctrl_handler) + hdl = fh->ctrl_handler; + else if (vdev->ctrl_handler) + hdl = vdev->ctrl_handler; + + if (hdl) { + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, id); + + return ctrl && ctrl->is_ptr; } + + if (!ops || !ops->vidioc_query_ext_ctrl) + return false; + + return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && + (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); +} + +static int bufsize_v4l2_ext_controls(struct v4l2_ext_controls32 __user *up, + u32 *size) +{ + u32 count; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(count, &up->count)) + return -EFAULT; + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; + *size = count * sizeof(struct v4l2_ext_control); + return 0; } -static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int get_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls __user *kp, + struct v4l2_ext_controls32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; - int n; + u32 count; + u32 n; compat_caddr_t p; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) - return -EFAULT; - n = kp->count; - if (n == 0) { - kp->controls = NULL; - return 0; - } + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + assign_in_user(&kp->which, &up->which) || + get_user(count, &up->count) || + put_user(count, &kp->count) || + assign_in_user(&kp->error_idx, &up->error_idx) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + return -EFAULT; + + if (count == 0) + return put_user(NULL, &kp->controls); + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_READ, ucontrols, count * sizeof(*ucontrols))) + return -EFAULT; + if (aux_space < count * sizeof(*kcontrols)) return -EFAULT; - kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control)); - kp->controls = (__force struct v4l2_ext_control *)kcontrols; - while (--n >= 0) { + kcontrols = aux_buf; + if (put_user((__force struct v4l2_ext_control *)kcontrols, + &kp->controls)) + return -EFAULT; + + for (n = 0; n < count; n++) { u32 id; if (copy_in_user(kcontrols, ucontrols, sizeof(*ucontrols))) return -EFAULT; + if (get_user(id, &kcontrols->id)) return -EFAULT; - if (ctrl_is_pointer(id)) { + + if (ctrl_is_pointer(file, id)) { void __user *s; if (get_user(p, &ucontrols->string)) @@ -710,43 +843,55 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return 0; } -static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int put_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls __user *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; - struct v4l2_ext_control __user *kcontrols = - (__force struct v4l2_ext_control __user *)kp->controls; - int n = kp->count; + struct v4l2_ext_control __user *kcontrols; + u32 count; + u32 n; compat_caddr_t p; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; - if (!kp->count) - return 0; + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + assign_in_user(&up->which, &kp->which) || + get_user(count, &kp->count) || + put_user(count, &up->count) || + assign_in_user(&up->error_idx, &kp->error_idx) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)) || + get_user(kcontrols, &kp->controls)) + return -EFAULT; + if (!count) + return 0; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_WRITE, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - while (--n >= 0) { - unsigned size = sizeof(*ucontrols); + for (n = 0; n < count; n++) { + unsigned int size = sizeof(*ucontrols); u32 id; - if (get_user(id, &kcontrols->id)) + if (get_user(id, &kcontrols->id) || + put_user(id, &ucontrols->id) || + assign_in_user(&ucontrols->size, &kcontrols->size) || + copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2, + sizeof(ucontrols->reserved2))) return -EFAULT; - /* Do not modify the pointer when copying a pointer control. - The contents of the pointer was changed, not the pointer - itself. */ - if (ctrl_is_pointer(id)) + + /* + * Do not modify the pointer when copying a pointer control. + * The contents of the pointer was changed, not the pointer + * itself. + */ + if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); + if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; + ucontrols++; kcontrols++; } @@ -766,18 +911,19 @@ struct v4l2_event32 { __u32 reserved[8]; }; -static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) +static int put_v4l2_event32(struct v4l2_event __user *kp, + struct v4l2_event32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) - return -EFAULT; + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + assign_in_user(&up->type, &kp->type) || + copy_in_user(&up->u, &kp->u, sizeof(kp->u)) || + assign_in_user(&up->pending, &kp->pending) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_nsec, &kp->timestamp.tv_nsec) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; return 0; } @@ -789,32 +935,35 @@ struct v4l2_edid32 { compat_caddr_t edid; }; -static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int get_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp; - - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || - get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) - return -EFAULT; - kp->edid = (__force u8 *)compat_ptr(tmp); + compat_uptr_t tmp; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + assign_in_user(&kp->pad, &up->pad) || + assign_in_user(&kp->start_block, &up->start_block) || + assign_in_user(&kp->blocks, &up->blocks) || + get_user(tmp, &up->edid) || + put_user(compat_ptr(tmp), &kp->edid) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + return -EFAULT; return 0; } -static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int put_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->edid); - - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + void *edid; + + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || + assign_in_user(&up->pad, &kp->pad) || + assign_in_user(&up->start_block, &kp->start_block) || + assign_in_user(&up->blocks, &kp->blocks) || + get_user(edid, &kp->edid) || + put_user(ptr_to_compat(edid), &up->edid) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; return 0; } @@ -830,7 +979,7 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_ENUMINPUT32 _IOWR('V', 26, struct v4l2_input32) #define VIDIOC_G_EDID32 _IOWR('V', 40, struct v4l2_edid32) #define VIDIOC_S_EDID32 _IOWR('V', 41, struct v4l2_edid32) -#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) +#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) #define VIDIOC_G_EXT_CTRLS32 _IOWR('V', 71, struct v4l2_ext_controls32) #define VIDIOC_S_EXT_CTRLS32 _IOWR('V', 72, struct v4l2_ext_controls32) #define VIDIOC_TRY_EXT_CTRLS32 _IOWR('V', 73, struct v4l2_ext_controls32) @@ -846,22 +995,23 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_G_OUTPUT32 _IOR ('V', 46, s32) #define VIDIOC_S_OUTPUT32 _IOWR('V', 47, s32) +static int alloc_userspace(unsigned int size, u32 aux_space, + void __user **up_native) +{ + *up_native = compat_alloc_user_space(size + aux_space); + if (!*up_native) + return -ENOMEM; + if (clear_user(*up_native, size)) + return -EFAULT; + return 0; +} + static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - union { - struct v4l2_format v2f; - struct v4l2_buffer v2b; - struct v4l2_framebuffer v2fb; - struct v4l2_input v2i; - struct v4l2_standard v2s; - struct v4l2_ext_controls v2ecs; - struct v4l2_event v2ev; - struct v4l2_create_buffers v2crt; - struct v4l2_edid v2edid; - unsigned long vx; - int vi; - } karg; void __user *up = compat_ptr(arg); + void __user *up_native = NULL; + void __user *aux_buf; + u32 aux_space; int compatible_arg = 1; long err = 0; @@ -900,30 +1050,52 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_STREAMOFF: case VIDIOC_S_INPUT: case VIDIOC_S_OUTPUT: - err = get_user(karg.vi, (s32 __user *)up); + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); + if (!err && assign_in_user((unsigned int __user *)up_native, + (compat_uint_t __user *)up)) + err = -EFAULT; compatible_arg = 0; break; case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); compatible_arg = 0; break; case VIDIOC_G_EDID: case VIDIOC_S_EDID: - err = get_v4l2_edid32(&karg.v2edid, up); + err = alloc_userspace(sizeof(struct v4l2_edid), 0, &up_native); + if (!err) + err = get_v4l2_edid32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = get_v4l2_format32(&karg.v2f, up); + err = bufsize_v4l2_format(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_format), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_format); + err = get_v4l2_format32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_CREATE_BUFS: - err = get_v4l2_create32(&karg.v2crt, up); + err = bufsize_v4l2_create(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_create_buffers), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_create_buffers); + err = get_v4l2_create32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; @@ -931,36 +1103,63 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = get_v4l2_buffer32(&karg.v2b, up); + err = bufsize_v4l2_buffer(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_buffer), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_buffer); + err = get_v4l2_buffer32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_S_FBUF: - err = get_v4l2_framebuffer32(&karg.v2fb, up); + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); + if (!err) + err = get_v4l2_framebuffer32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FBUF: + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); compatible_arg = 0; break; case VIDIOC_ENUMSTD: - err = get_v4l2_standard32(&karg.v2s, up); + err = alloc_userspace(sizeof(struct v4l2_standard), 0, + &up_native); + if (!err) + err = get_v4l2_standard32(up_native, up); compatible_arg = 0; break; case VIDIOC_ENUMINPUT: - err = get_v4l2_input32(&karg.v2i, up); + err = alloc_userspace(sizeof(struct v4l2_input), 0, &up_native); + if (!err) + err = get_v4l2_input32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(&karg.v2ecs, up); + err = bufsize_v4l2_ext_controls(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_ext_controls), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_ext_controls); + err = get_v4l2_ext_controls32(file, up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_DQEVENT: + err = alloc_userspace(sizeof(struct v4l2_event), 0, &up_native); compatible_arg = 0; break; } @@ -969,22 +1168,26 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar if (compatible_arg) err = native_ioctl(file, cmd, (unsigned long)up); - else { - mm_segment_t old_fs = get_fs(); + else + err = native_ioctl(file, cmd, (unsigned long)up_native); - set_fs(KERNEL_DS); - err = native_ioctl(file, cmd, (unsigned long)&karg); - set_fs(old_fs); - } + if (err == -ENOTTY) + return err; - /* Special case: even after an error we need to put the - results back for these ioctls since the error_idx will - contain information on which control failed. */ + /* + * Special case: even after an error we need to put the + * results back for these ioctls since the error_idx will + * contain information on which control failed. + */ switch (cmd) { case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(&karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, up_native, up)) + err = -EFAULT; + break; + case VIDIOC_S_EDID: + if (put_v4l2_edid32(up_native, up)) err = -EFAULT; break; } @@ -996,44 +1199,46 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_S_OUTPUT: case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: - err = put_user(((s32)karg.vi), (s32 __user *)up); + if (assign_in_user((compat_uint_t __user *)up, + ((unsigned int __user *)up_native))) + err = -EFAULT; break; case VIDIOC_G_FBUF: - err = put_v4l2_framebuffer32(&karg.v2fb, up); + err = put_v4l2_framebuffer32(up_native, up); break; case VIDIOC_DQEVENT: - err = put_v4l2_event32(&karg.v2ev, up); + err = put_v4l2_event32(up_native, up); break; case VIDIOC_G_EDID: - case VIDIOC_S_EDID: - err = put_v4l2_edid32(&karg.v2edid, up); + err = put_v4l2_edid32(up_native, up); break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = put_v4l2_format32(&karg.v2f, up); + err = put_v4l2_format32(up_native, up); break; case VIDIOC_CREATE_BUFS: - err = put_v4l2_create32(&karg.v2crt, up); + err = put_v4l2_create32(up_native, up); break; + case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = put_v4l2_buffer32(&karg.v2b, up); + err = put_v4l2_buffer32(up_native, up); break; case VIDIOC_ENUMSTD: - err = put_v4l2_standard32(&karg.v2s, up); + err = put_v4l2_standard32(up_native, up); break; case VIDIOC_ENUMINPUT: - err = put_v4l2_input32(&karg.v2i, up); + err = put_v4l2_input32(up_native, up); break; } return err; diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index c52d94c018bb25839bfc3e5aebe0e8910bdfcc2f..4510e8a3724408a4110b2d2dbb42db5df86f0598 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2862,8 +2862,11 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, /* Handles IOCTL */ err = func(file, cmd, parg); - if (err == -ENOIOCTLCMD) + if (err == -ENOTTY || err == -ENOIOCTLCMD) { err = -ENOTTY; + goto out; + } + if (err == 0) { if (cmd == VIDIOC_DQBUF) trace_v4l2_dqbuf(video_devdata(file)->minor, parg); diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 5457c361ad586424050c98958bdefd22a4c17db7..bf0fe0137dfed2c893001abb8cfe8b83861dae08 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev, if (!of_property_read_u32(child, "dma-channel", &val)) gpmc_onenand_data->dma_channel = val; - gpmc_onenand_init(gpmc_onenand_data); - - return 0; + return gpmc_onenand_init(gpmc_onenand_data); } #else static int gpmc_probe_onenand_child(struct platform_device *pdev, diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index a518832ed5f5ba93a45639ca0ddc03612b37f0c1..59dbdaa24c28767e0b7fa43a9cdfc6b222c3fcf7 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -664,6 +664,7 @@ static int cros_ec_spi_probe(struct spi_device *spi) sizeof(struct ec_response_get_protocol_info); ec_dev->dout_size = sizeof(struct ec_host_request); + ec_spi->last_transfer_ns = ktime_get_ns(); err = cros_ec_register(ec_dev); if (err) { diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c index 77b2675cf8f5df53b035ae1f3a373b7cb50754b8..92e176009ffe319703a74b88e3ea50831f188ed8 100644 --- a/drivers/mfd/fsl-imx25-tsadc.c +++ b/drivers/mfd/fsl-imx25-tsadc.c @@ -183,6 +183,19 @@ static int mx25_tsadc_probe(struct platform_device *pdev) return 0; } +static int mx25_tsadc_remove(struct platform_device *pdev) +{ + struct mx25_tsadc *tsadc = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + + if (irq) { + irq_set_chained_handler_and_data(irq, NULL, NULL); + irq_domain_remove(tsadc->domain); + } + + return 0; +} + static const struct of_device_id mx25_tsadc_ids[] = { { .compatible = "fsl,imx25-tsadc" }, { /* Sentinel */ } @@ -194,6 +207,7 @@ static struct platform_driver mx25_tsadc_driver = { .of_match_table = of_match_ptr(mx25_tsadc_ids), }, .probe = mx25_tsadc_probe, + .remove = mx25_tsadc_remove, }; module_platform_driver(mx25_tsadc_driver); diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c index 0a1606480023f90886f25d6af2ef039e0356951c..cc832d309599a0b6985e747c8546e80d40f121e7 100644 --- a/drivers/mfd/twl4030-audio.c +++ b/drivers/mfd/twl4030-audio.c @@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void) EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->codec) return true; - if (of_find_node_by_name(node, "codec")) + node = of_get_child_by_name(parent, "codec"); + if (node) { + of_node_put(node); return true; + } return false; } diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index 1beb722f608080c8fd232c8fc6b21429fa2d8f6d..e1e69a480c5620856e45f841890e32520af53bda 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -701,6 +701,7 @@ static struct twl4030_ins omap3_wrst_seq[] = { TWL_RESOURCE_RESET(RES_MAIN_REF), TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2), TWL_RESOURCE_RESET(RES_VUSB_3V1), + TWL_RESOURCE_RESET(RES_VMMC1), TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1), TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0), TWL_RESOURCE_ON(RES_RESET), diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index d66502d36ba0b3202d1c15c08540fa8aade42a32..dd19f17a1b637543965dd94e64d0d44b9178f64c 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = { }; -static bool twl6040_has_vibra(struct device_node *node) +static bool twl6040_has_vibra(struct device_node *parent) { -#ifdef CONFIG_OF - if (of_find_node_by_name(node, "vibra")) + struct device_node *node; + + node = of_get_child_by_name(parent, "vibra"); + if (node) { + of_node_put(node); return true; -#endif + } + return false; } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index eef202d4399b01f037d93dbacf11a7d6c0f453fe..a5422f483ad57cbcd9760346fdf053a04c7329e0 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1758,6 +1758,9 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, /* There should only be one entry, but go through the list * anyway */ + if (afu->phb == NULL) + return result; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (!afu_dev->driver) continue; @@ -1801,6 +1804,11 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, /* Only participate in EEH if we are on a virtual PHB */ if (afu->phb == NULL) return PCI_ERS_RESULT_NONE; + + /* + * Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; @@ -1941,6 +1949,9 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) if (cxl_afu_select_best_mode(afu)) goto err; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -2003,6 +2014,9 @@ static void cxl_pci_resume(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (afu_dev->driver && afu_dev->driver->err_handler && afu_dev->driver->err_handler->resume) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 051b14766ef964984da6ff487db81f5a782cb3ba..d8a485f1798b58df0c7d077e31d5c1c3065c1f5b 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -365,7 +365,8 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, memset(msg, 0, sizeof(msg)); msg[0].addr = client->addr; msg[0].buf = addrbuf; - addrbuf[0] = 0x90 + offset; + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + addrbuf[0] = 0xa0 - at24->chip.byte_len + offset; msg[0].len = 1; msg[1].addr = client->addr; msg[1].flags = I2C_M_RD; @@ -506,6 +507,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return count; + if (off + count > at24->chip.byte_len) + return -EINVAL; + /* * Read data from chip, protecting against concurrent updates * from this host, but not from other I2C masters. @@ -538,6 +542,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return -EINVAL; + if (off + count > at24->chip.byte_len) + return -EINVAL; + /* * Write data to chip, protecting against concurrent updates * from this host, but not from other I2C masters. @@ -638,6 +645,16 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_warn(&client->dev, "page_size looks suspicious (no power of 2)!\n"); + /* + * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while + * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4. + * + * Eventually we'll get rid of the magic values altoghether in favor of + * real structs, but for now just manually set the right size. + */ + if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4) + chip.byte_len = 6; + /* Use I2C operations unless we're stuck with SMBus extensions. */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { if (chip.flags & AT24_FLAG_ADDR16) @@ -766,7 +783,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) at24->nvmem_config.reg_read = at24_read; at24->nvmem_config.reg_write = at24_write; at24->nvmem_config.priv = at24; - at24->nvmem_config.stride = 4; + at24->nvmem_config.stride = 1; at24->nvmem_config.word_size = 1; at24->nvmem_config.size = chip.byte_len; diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index 7d69dd5c2b75b3b67d3e67bf44d45aa4ad8a1c36..42a513c97239089fc2b08a5ac0f7e92cd96d0543 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -1,4 +1,4 @@ -/* drivers/misc/uid_cputime.c +/* drivers/misc/uid_sys_stats.c * * Copyright (C) 2014 - 2015 Google, Inc. * diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index c64266f5a399b3c6ee2535d2b8539460a75ed5e2..60ebe5b4500b06400155d14f776f89d689377cea 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -155,6 +155,9 @@ static int mmc_bus_suspend(struct device *dev) return ret; ret = host->bus_ops->suspend(host); + if (ret) + pm_generic_resume(dev); + return ret; } diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 40ddc3e69a4d09c807781828b1352a9c200914e9..00b9d7ef392f89fe3c5d92593047d8780a6f8579 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -209,9 +209,10 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) completion = ktime_get(); delta_us = ktime_us_delta(completion, mrq->io_start); - blk_update_latency_hist(&host->io_lat_s, - (mrq->data->flags & MMC_DATA_READ), - delta_us); + blk_update_latency_hist( + (mrq->data->flags & MMC_DATA_READ) ? + &host->io_lat_read : + &host->io_lat_write, delta_us); } #endif } @@ -3100,8 +3101,14 @@ static ssize_t latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mmc_host *host = cls_dev_to_mmc_host(dev); + size_t written_bytes; - return blk_latency_hist_show(&host->io_lat_s, buf); + written_bytes = blk_latency_hist_show("Read", &host->io_lat_read, + buf, PAGE_SIZE); + written_bytes += blk_latency_hist_show("Write", &host->io_lat_write, + buf + written_bytes, PAGE_SIZE - written_bytes); + + return written_bytes; } /* @@ -3119,9 +3126,10 @@ latency_hist_store(struct device *dev, struct device_attribute *attr, if (kstrtol(buf, 0, &value)) return -EINVAL; - if (value == BLK_IO_LAT_HIST_ZERO) - blk_zero_latency_hist(&host->io_lat_s); - else if (value == BLK_IO_LAT_HIST_ENABLE || + if (value == BLK_IO_LAT_HIST_ZERO) { + memset(&host->io_lat_read, 0, sizeof(host->io_lat_read)); + memset(&host->io_lat_write, 0, sizeof(host->io_lat_write)); + } else if (value == BLK_IO_LAT_HIST_ENABLE || value == BLK_IO_LAT_HIST_DISABLE) host->latency_hist_enabled = value; return count; diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index e0218b3808d7b01f8ec97037f0cf77b477eecff6..3e5954f8734d6a2a1ff73abbabce26465a60adea 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -763,7 +763,7 @@ MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size); MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult); MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_fwrev_show(struct device *dev, struct device_attribute *attr, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index a0aa64eac123c5f6225a0b8d3fddf614cf71ab4e..ad709340e6639a4ee5647bf8be129996ebbff075 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -683,7 +683,7 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_dsr_show(struct device *dev, diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 84e9afcb5c099bd73f080f3c8660fd7780bc3d02..6f9535e5e584b4469f0fd01bc5c2ad625d255e50 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -579,7 +579,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz) } } sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV, - (mode << 8) | (div % 0xff)); + (mode << 8) | div); sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN); while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB)) cpu_relax(); @@ -1562,7 +1562,7 @@ static int msdc_drv_probe(struct platform_device *pdev) host->src_clk_freq = clk_get_rate(host->src_clk); /* Set host parameters to mmc */ mmc->ops = &mt_msdc_ops; - mmc->f_min = host->src_clk_freq / (4 * 255); + mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255); mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23; /* MMC core transfer sizes tunable parameters */ diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 90ed2e12d345d4ee91f6088aecac306897e2c266..80c89a31d7902299fc2133ae03702173557129b7 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -642,6 +642,21 @@ static int sdhci_msm_probe(struct platform_device *pdev) CORE_VENDOR_SPEC_CAPABILITIES0); } + /* + * Power on reset state may trigger power irq if previous status of + * PWRCTL was either BUS_ON or IO_HIGH_V. So before enabling pwr irq + * interrupt in GIC, any pending power irq interrupt should be + * acknowledged. Otherwise power irq interrupt handler would be + * fired prematurely. + */ + sdhci_msm_voltage_switch(host); + + /* + * Ensure that above writes are propogated before interrupt enablement + * in GIC. + */ + mb(); + /* Setup IRQ for handling power/voltage tasks with PMIC */ msm_host->pwr_irq = platform_get_irq_byname(pdev, "pwr_irq"); if (msm_host->pwr_irq < 0) { @@ -651,6 +666,9 @@ static int sdhci_msm_probe(struct platform_device *pdev) goto clk_disable; } + /* Enable pwr irq interrupts */ + writel_relaxed(INT_MASK, msm_host->core_mem + CORE_PWRCTL_MASK); + ret = devm_request_threaded_irq(&pdev->dev, msm_host->pwr_irq, NULL, sdhci_msm_pwr_irq, IRQF_ONESHOT, dev_name(&pdev->dev), host); diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index d9fab2222eb3d8bec67a00b964e9ed567d81f844..1a4a790054e4d5c89b71dc7e6559dac281fb5574 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -2193,16 +2193,9 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) if (ctrl->nand_version >= 0x0702) tmp |= ACC_CONTROL_RD_ERASED; tmp &= ~ACC_CONTROL_FAST_PGM_RDIN; - if (ctrl->features & BRCMNAND_HAS_PREFETCH) { - /* - * FIXME: Flash DMA + prefetch may see spurious erased-page ECC - * errors - */ - if (has_flash_dma(ctrl)) - tmp &= ~ACC_CONTROL_PREFETCH; - else - tmp |= ACC_CONTROL_PREFETCH; - } + if (ctrl->features & BRCMNAND_HAS_PREFETCH) + tmp &= ~ACC_CONTROL_PREFETCH; + nand_writereg(ctrl, offs, tmp); return 0; diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c index de31514df282223ba1c61f13a68631609fbe9b8d..d38527e0a2f2f7fe55830b291f4c3cc3a5a2d674 100644 --- a/drivers/mtd/nand/denali_pci.c +++ b/drivers/mtd/nand/denali_pci.c @@ -119,3 +119,7 @@ static struct pci_driver denali_pci_driver = { }; module_pci_driver(denali_pci_driver); + +MODULE_DESCRIPTION("PCI driver for Denali NAND controller"); +MODULE_AUTHOR("Intel Corporation and its suppliers"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index a77cfd74a92e5eca26ede3e0eeb5917aa263ab6f..21c03086bb7f610c82aee1b26ac9515d5a2e6082 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2320,6 +2320,7 @@ EXPORT_SYMBOL(nand_write_oob_syndrome); static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + unsigned int max_bitflips = 0; int page, realpage, chipnr; struct nand_chip *chip = mtd_to_nand(mtd); struct mtd_ecc_stats stats; @@ -2377,6 +2378,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, nand_wait_ready(mtd); } + max_bitflips = max_t(unsigned int, max_bitflips, ret); + readlen -= len; if (!readlen) break; @@ -2402,7 +2405,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, if (mtd->ecc_stats.failed - stats.failed) return -EBADMSG; - return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; + return max_bitflips; } /** diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index b121bf4ed73a2d50aca1935cffe2e6bf3639c26d..3b8911cd3a199d986ffa1c4b70857f5e104a0434 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -950,6 +950,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command) switch (command) { case NAND_CMD_READ0: + case NAND_CMD_READOOB: case NAND_CMD_PAGEPROG: info->use_ecc = 1; break; diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index f9b2a771096bb4b745a7db661b66ee6d35b5523d..e26c4f880df66fae301d7fa8e58a1145698b94f1 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1835,8 +1835,14 @@ static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, /* Add ECC info retrieval from DT */ for (i = 0; i < ARRAY_SIZE(strengths); i++) { - if (ecc->strength <= strengths[i]) + if (ecc->strength <= strengths[i]) { + /* + * Update ecc->strength value with the actual strength + * that will be used by the ECC engine. + */ + ecc->strength = strengths[i]; break; + } } if (i >= ARRAY_SIZE(strengths)) { diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index d1e6931c132f79a5a7a57557c75644170c6ee4a6..46913ef25bc02127d3437e272164eb58144242da 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -99,6 +99,8 @@ struct ubiblock { /* Linked list of all ubiblock instances */ static LIST_HEAD(ubiblock_devices); +static DEFINE_IDR(ubiblock_minor_idr); +/* Protects ubiblock_devices and ubiblock_minor_idr */ static DEFINE_MUTEX(devices_mutex); static int ubiblock_major; @@ -353,8 +355,6 @@ static struct blk_mq_ops ubiblock_mq_ops = { .init_request = ubiblock_init_request, }; -static DEFINE_IDR(ubiblock_minor_idr); - int ubiblock_create(struct ubi_volume_info *vi) { struct ubiblock *dev; @@ -367,14 +367,15 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Check that the volume isn't already handled */ mutex_lock(&devices_mutex); if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { - mutex_unlock(&devices_mutex); - return -EEXIST; + ret = -EEXIST; + goto out_unlock; } - mutex_unlock(&devices_mutex); dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL); - if (!dev) - return -ENOMEM; + if (!dev) { + ret = -ENOMEM; + goto out_unlock; + } mutex_init(&dev->dev_mutex); @@ -439,14 +440,13 @@ int ubiblock_create(struct ubi_volume_info *vi) goto out_free_queue; } - mutex_lock(&devices_mutex); list_add_tail(&dev->list, &ubiblock_devices); - mutex_unlock(&devices_mutex); /* Must be the last step: anyone can call file ops from now on */ add_disk(dev->gd); dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", dev->ubi_num, dev->vol_id, vi->name); + mutex_unlock(&devices_mutex); return 0; out_free_queue: @@ -459,6 +459,8 @@ int ubiblock_create(struct ubi_volume_info *vi) put_disk(dev->gd); out_free_dev: kfree(dev); +out_unlock: + mutex_unlock(&devices_mutex); return ret; } @@ -480,30 +482,36 @@ static void ubiblock_cleanup(struct ubiblock *dev) int ubiblock_remove(struct ubi_volume_info *vi) { struct ubiblock *dev; + int ret; mutex_lock(&devices_mutex); dev = find_dev_nolock(vi->ubi_num, vi->vol_id); if (!dev) { - mutex_unlock(&devices_mutex); - return -ENODEV; + ret = -ENODEV; + goto out_unlock; } /* Found a device, let's lock it so we can check if it's busy */ mutex_lock(&dev->dev_mutex); if (dev->refcnt > 0) { - mutex_unlock(&dev->dev_mutex); - mutex_unlock(&devices_mutex); - return -EBUSY; + ret = -EBUSY; + goto out_unlock_dev; } /* Remove from device list */ list_del(&dev->list); - mutex_unlock(&devices_mutex); - ubiblock_cleanup(dev); mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + kfree(dev); return 0; + +out_unlock_dev: + mutex_unlock(&dev->dev_mutex); +out_unlock: + mutex_unlock(&devices_mutex); + return ret; } static int ubiblock_resize(struct ubi_volume_info *vi) @@ -632,6 +640,7 @@ static void ubiblock_remove_all(void) struct ubiblock *next; struct ubiblock *dev; + mutex_lock(&devices_mutex); list_for_each_entry_safe(dev, next, &ubiblock_devices, list) { /* The module is being forcefully removed */ WARN_ON(dev->desc); @@ -640,6 +649,7 @@ static void ubiblock_remove_all(void) ubiblock_cleanup(dev); kfree(dev); } + mutex_unlock(&devices_mutex); } int __init ubiblock_init(void) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b5b8cd6f481c5434cc0442496db24317702542ad..668b46202507ce7f0b59a998d2b34e7d92cc2bbc 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1528,6 +1528,46 @@ static void shutdown_work(struct ubi_device *ubi) } } +/** + * erase_aeb - erase a PEB given in UBI attach info PEB + * @ubi: UBI device description object + * @aeb: UBI attach info PEB + * @sync: If true, erase synchronously. Otherwise schedule for erasure + */ +static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) +{ + struct ubi_wl_entry *e; + int err; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (sync) { + err = sync_erase(ubi, e, false); + if (err) + goto out_free; + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } else { + err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); + if (err) + goto out_free; + } + + return 0; + +out_free: + wl_entry_destroy(ubi, e); + + return err; +} + /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object @@ -1566,18 +1606,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + err = erase_aeb(ubi, aeb, false); + if (err) goto out_free; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); - goto out_free; - } - found_pebs++; } @@ -1635,6 +1667,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { + bool sync = false; + /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face @@ -1644,18 +1678,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->lookuptbl[aeb->pnum]) continue; - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + /* + * The fastmap update code might not find a free PEB for + * writing the fastmap anchor to and then reuses the + * current fastmap anchor PEB. When this PEB gets erased + * and a power cut happens before it is written again we + * must make sure that the fastmap attach code doesn't + * find any outdated fastmap anchors, hence we erase the + * outdated fastmap anchor PEBs synchronously here. + */ + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) + sync = true; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi_assert(!ubi->lookuptbl[e->pnum]); - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); + err = erase_aeb(ubi, aeb, sync); + if (err) goto out_free; - } } found_pebs++; diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index e90c6a7333d76b4f11e3fccfdcad094be141108e..2e4649655181df1e6ada5f8bfd526d3ebbf1af8a 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -191,7 +191,7 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev) */ static int ipddp_create(struct ipddp_route *new_rt) { - struct ipddp_route *rt = kmalloc(sizeof(*rt), GFP_KERNEL); + struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL); if (rt == NULL) return -ENOMEM; diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 6749b1829469411315dedac1634b7be974cf21d8..4d01d7bc24ef889670cee9b4675baf6ee3d8b21d 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -652,6 +652,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota) mbx_mask = hecc_read(priv, HECC_CANMIM); mbx_mask |= HECC_TX_MBOX_MASK; hecc_write(priv, HECC_CANMIM, mbx_mask); + } else { + /* repoll is done only if whole budget is used */ + num_pkts = quota; } return num_pkts; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b3d02759c226bbde64ea3bdcdd3314ca646b743b..b00358297424604634489a9b106bf313c06b7fc9 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -288,6 +288,8 @@ static void ems_usb_read_interrupt_callback(struct urb *urb) case -ECONNRESET: /* unlink */ case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 9fdb0f0bfa06a00a2ade5e74daef15798ab7d4a1..c6dcf93675c00585bd960f579988ea1c2d348d50 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -393,6 +393,8 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index eea9aea14b003cefad6c4b2dcf96e17a54063aad..5d5012337d9eea9a6139dae399ba373a95217dfc 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 4224e066cb16664e64b6efe6755ab21d2989d742..c9d61a6dfb7a368c74c564919c3b66a9c0e71abc 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -609,8 +609,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id, } if (pos + tmp->len > actual_len) { - dev_err(dev->udev->dev.parent, - "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } @@ -813,6 +813,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, if (err) { netdev_err(netdev, "Error transmitting URB\n"); usb_unanchor_urb(urb); + kfree(buf); usb_free_urb(urb); return err; } @@ -1325,6 +1326,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) case 0: break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; default: @@ -1333,7 +1336,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - while (pos <= urb->actual_length - MSG_HEADER_LEN) { + while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) { msg = urb->transfer_buffer + pos; /* The Kvaser firmware can only read and write messages that @@ -1352,7 +1355,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) } if (pos + msg->len > urb->actual_length) { - dev_err(dev->udev->dev.parent, "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } @@ -1768,6 +1772,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); usb_unanchor_urb(urb); + kfree(buf); stats->tx_dropped++; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 304732550f0a628a7fa9ae9c94e67113d9a869a0..7f5ec40e2b4dcaf5e96bdd84f8dd3eaf2aaecdf3 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) void *cmd_head = pcan_usb_fd_cmd_buffer(dev); int err = 0; u8 *packet_ptr; - int i, n = 1, packet_len; + int packet_len; ptrdiff_t cmd_len; /* usb device unregistered? */ @@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr = cmd_head; + packet_len = cmd_len; /* firmware is not able to re-assemble 512 bytes buffer in full-speed */ - if ((dev->udev->speed != USB_SPEED_HIGH) && - (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) { - packet_len = PCAN_UFD_LOSPD_PKT_SIZE; - n += cmd_len / packet_len; - } else { - packet_len = cmd_len; - } + if (unlikely(dev->udev->speed != USB_SPEED_HIGH)) + packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE); - for (i = 0; i < n; i++) { + do { err = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, PCAN_USBPRO_EP_CMDOUT), @@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr += packet_len; - } + cmd_len -= packet_len; + + if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE) + packet_len = cmd_len; + + } while (packet_len > 0); return err; } diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index d000cb62d6ae8c68233e123bc63686d3ab71dd67..27861c417c9404c9c3df841daac95015978f3a69 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -524,6 +524,8 @@ static void usb_8dev_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: + case -EPROTO: case -ESHUTDOWN: return; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index be7ec5a76a54102eecabe99c45ffcf00a49a7713..744ed6ddaf373964a2b3526a2b73613932c73c87 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1023,15 +1023,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } - /* Insert TSB and checksum infos */ - if (priv->tsb_en) { - skb = bcm_sysport_insert_tsb(skb, dev); - if (!skb) { - ret = NETDEV_TX_OK; - goto out; - } - } - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we @@ -1039,13 +1030,21 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. */ - if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { ret = NETDEV_TX_OK; goto out; } - skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ? - ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len; + /* Insert TSB and checksum infos */ + if (priv->tsb_en) { + skb = bcm_sysport_insert_tsb(skb, dev); + if (!skb) { + ret = NETDEV_TX_OK; + goto out; + } + } + + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); if (dma_mapping_error(kdev, mapping)) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 4febe60eadc2ef57e592943ef6b3515a22bd3111..5d958b5bb8b124ed62824a2f7bedd26848e142f1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13293,17 +13293,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA; - /* VF with OLD Hypervisor or old PF do not support filtering */ if (IS_PF(bp)) { if (chip_is_e1x) bp->accept_any_vlan = true; else dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#ifdef CONFIG_BNX2X_SRIOV - } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#endif } + /* For VF we'll know whether to enable VLAN filtering after + * getting a response to CHANNEL_TLV_ACQUIRE from PF. + */ dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= NETIF_F_HIGHDMA; @@ -13735,7 +13733,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) if (!netif_running(bp->dev)) { DP(BNX2X_MSG_PTP, "PTP adjfreq called while the interface is down\n"); - return -EFAULT; + return -ENETDOWN; } if (ppb < 0) { @@ -13794,6 +13792,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP adjtime called while the interface is down\n"); + return -ENETDOWN; + } + DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta); timecounter_adjtime(&bp->timecounter, delta); @@ -13806,6 +13810,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns; + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP gettime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timecounter_read(&bp->timecounter); DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns); @@ -13821,6 +13831,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp, struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns; + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP settime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timespec64_to_ns(ts); DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns); @@ -13988,6 +14004,14 @@ static int bnx2x_init_one(struct pci_dev *pdev, rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count); if (rc) goto init_one_freemem; + +#ifdef CONFIG_BNX2X_SRIOV + /* VF with OLD Hypervisor or old PF do not support filtering */ + if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } +#endif } /* Enable SRIOV if capability found in configuration space */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 3f77d0863543fe4cb90cfe9599d60aab96d75df3..c6e059119b22e1b12351274973f2d86ee6ffc33e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, /* Add/Remove the filter */ rc = bnx2x_config_vlan_mac(bp, &ramrod); - if (rc && rc != -EEXIST) { + if (rc == -EEXIST) + return 0; + if (rc) { BNX2X_ERR("Failed to %s %s\n", filter->add ? "add" : "delete", (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ? @@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, return rc; } + filter->applied = true; + return 0; } @@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf, BNX2X_ERR("Managed only %d/%d filters - rolling back\n", i, filters->count + 1); while (--i >= 0) { + if (!filters->filters[i].applied) + continue; filters->filters[i].add = !filters->filters[i].add; bnx2x_vf_mac_vlan_config(bp, vf, qid, &filters->filters[i], diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 7a6d406f4c111774ea606c98253608273ae8787b..888d0b6632e86f2f7ab7e2f9e605be87fa4c7061 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter { (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/ bool add; + bool applied; u8 *mac; u16 vid; }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index bfae300cf25ff881292dc36ad56e51e37132cd76..c2d327d9dff0ce7b89d25b448b3a05b75ddcb69c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) struct bnx2x *bp = netdev_priv(dev); struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; - int rc, i = 0; + int rc = 0, i = 0; struct netdev_hw_addr *ha; if (bp->state != BNX2X_STATE_OPEN) { @@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) /* Get Rx mode requested */ DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags); + /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */ + if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) { + DP(NETIF_MSG_IFUP, + "VF supports not more than %d multicast MAC addresses\n", + PFVF_MAX_MULTICAST_PER_VF); + rc = -EINVAL; + goto out; + } + netdev_for_each_mc_addr(ha, dev) { DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n", bnx2x_mc_addr(ha)); @@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) i++; } - /* We support four PFVF_MAX_MULTICAST_PER_VF mcast - * addresses tops - */ - if (i >= PFVF_MAX_MULTICAST_PER_VF) { - DP(NETIF_MSG_IFUP, - "VF supports not more than %d multicast MAC addresses\n", - PFVF_MAX_MULTICAST_PER_VF); - return -EINVAL; - } - req->n_multicast = i; req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED; req->vf_qid = 0; @@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) out: bnx2x_vfpf_finalize(bp, &req->first_tlv); - return 0; + return rc; } /* request pf to add a vlan for the vf */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d2cf56aed0e17efd9141bddbae3d1b03ef82a15..bbb3641eddcb699b67172c530c5721e35ec3eb40 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2381,6 +2381,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) return 0; } +static void bnxt_init_cp_rings(struct bnxt *bp) +{ + int i; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + + ring->fw_ring_id = INVALID_HW_RING_ID; + } +} + static int bnxt_init_rx_rings(struct bnxt *bp) { int i, rc = 0; @@ -4700,6 +4712,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init) static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init) { + bnxt_init_cp_rings(bp); bnxt_init_rx_rings(bp); bnxt_init_tx_rings(bp); bnxt_init_ring_grps(bp, irq_re_init); @@ -5132,8 +5145,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) & PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK; } - link_info->support_auto_speeds = - le16_to_cpu(resp->supported_speeds_auto_mode); + if (resp->supported_speeds_auto_mode) + link_info->support_auto_speeds = + le16_to_cpu(resp->supported_speeds_auto_mode); hwrm_phy_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a7e04ff4eaedefac2e01f6ecb90f224d63f50f6f..cde4b96f3153f7cbfee033fb8d41c9040ec592f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1843,8 +1843,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start, - length, data); + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + start, length, data); } return rc; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0975af2903ef6e6532506135108a5d33918f6e17..3480b3078775fa15c39a1e87892b25af4fb7226a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1,7 +1,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014 Broadcom Corporation + * Copyright (c) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -778,8 +778,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes), /* Misc UniMAC counters */ STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, - UMAC_RBUF_OVFL_CNT), - STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT), + UMAC_RBUF_OVFL_CNT_V1), + STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, + UMAC_RBUF_ERR_CNT_V1), STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT), STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), @@ -821,6 +822,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset, } } +static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset) +{ + u16 new_offset; + u32 val; + + switch (offset) { + case UMAC_RBUF_OVFL_CNT_V1: + if (GENET_IS_V2(priv)) + new_offset = RBUF_OVFL_CNT_V2; + else + new_offset = RBUF_OVFL_CNT_V3PLUS; + + val = bcmgenet_rbuf_readl(priv, new_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_rbuf_writel(priv, 0, new_offset); + break; + case UMAC_RBUF_ERR_CNT_V1: + if (GENET_IS_V2(priv)) + new_offset = RBUF_ERR_CNT_V2; + else + new_offset = RBUF_ERR_CNT_V3PLUS; + + val = bcmgenet_rbuf_readl(priv, new_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_rbuf_writel(priv, 0, new_offset); + break; + default: + val = bcmgenet_umac_readl(priv, offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_umac_writel(priv, 0, offset); + break; + } + + return val; +} + static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) { int i, j = 0; @@ -836,19 +876,28 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) case BCMGENET_STAT_NETDEV: case BCMGENET_STAT_SOFT: continue; - case BCMGENET_STAT_MIB_RX: - case BCMGENET_STAT_MIB_TX: case BCMGENET_STAT_RUNT: - if (s->type != BCMGENET_STAT_MIB_RX) - offset = BCMGENET_STAT_OFFSET; + offset += BCMGENET_STAT_OFFSET; + /* fall through */ + case BCMGENET_STAT_MIB_TX: + offset += BCMGENET_STAT_OFFSET; + /* fall through */ + case BCMGENET_STAT_MIB_RX: val = bcmgenet_umac_readl(priv, UMAC_MIB_START + j + offset); + offset = 0; /* Reset Offset */ break; case BCMGENET_STAT_MISC: - val = bcmgenet_umac_readl(priv, s->reg_offset); - /* clear if overflowed */ - if (val == ~0) - bcmgenet_umac_writel(priv, 0, s->reg_offset); + if (GENET_IS_V1(priv)) { + val = bcmgenet_umac_readl(priv, s->reg_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_umac_writel(priv, 0, + s->reg_offset); + } else { + val = bcmgenet_update_stat_misc(priv, + s->reg_offset); + } break; } @@ -2464,24 +2513,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) /* Interrupt bottom half */ static void bcmgenet_irq_task(struct work_struct *work) { + unsigned long flags; + unsigned int status; struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); netif_dbg(priv, intr, priv->dev, "%s\n", __func__); - if (priv->irq0_stat & UMAC_IRQ_MPD_R) { - priv->irq0_stat &= ~UMAC_IRQ_MPD_R; + spin_lock_irqsave(&priv->lock, flags); + status = priv->irq0_stat; + priv->irq0_stat = 0; + spin_unlock_irqrestore(&priv->lock, flags); + + if (status & UMAC_IRQ_MPD_R) { netif_dbg(priv, wol, priv->dev, "magic packet detected, waking up\n"); bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); } /* Link UP/DOWN event */ - if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { + if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->phydev, - !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); - priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; - } + !!(status & UMAC_IRQ_LINK_UP)); } /* bcmgenet_isr1: handle Rx and Tx priority queues */ @@ -2490,22 +2543,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) struct bcmgenet_priv *priv = dev_id; struct bcmgenet_rx_ring *rx_ring; struct bcmgenet_tx_ring *tx_ring; - unsigned int index; + unsigned int index, status; - /* Save irq status for bottom-half processing. */ - priv->irq1_stat = - bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & + /* Read irq status */ + status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ - bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); + bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, - "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); + "%s: IRQ=0x%x\n", __func__, status); /* Check Rx priority queue interrupts */ for (index = 0; index < priv->hw_params->rx_queues; index++) { - if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) + if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) continue; rx_ring = &priv->rx_rings[index]; @@ -2518,7 +2570,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) /* Check Tx priority queue interrupts */ for (index = 0; index < priv->hw_params->tx_queues; index++) { - if (!(priv->irq1_stat & BIT(index))) + if (!(status & BIT(index))) continue; tx_ring = &priv->tx_rings[index]; @@ -2538,19 +2590,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) struct bcmgenet_priv *priv = dev_id; struct bcmgenet_rx_ring *rx_ring; struct bcmgenet_tx_ring *tx_ring; + unsigned int status; + unsigned long flags; - /* Save irq status for bottom-half processing. */ - priv->irq0_stat = - bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & + /* Read irq status */ + status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ - bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR); + bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, - "IRQ=0x%x\n", priv->irq0_stat); + "IRQ=0x%x\n", status); - if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) { + if (status & UMAC_IRQ_RXDMA_DONE) { rx_ring = &priv->rx_rings[DESC_INDEX]; if (likely(napi_schedule_prep(&rx_ring->napi))) { @@ -2559,7 +2612,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) { + if (status & UMAC_IRQ_TXDMA_DONE) { tx_ring = &priv->tx_rings[DESC_INDEX]; if (likely(napi_schedule_prep(&tx_ring->napi))) { @@ -2568,22 +2621,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | - UMAC_IRQ_PHY_DET_F | - UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_HFB_SM | - UMAC_IRQ_HFB_MM | - UMAC_IRQ_MPD_R)) { - /* all other interested interrupts handled in bottom half */ - schedule_work(&priv->bcmgenet_irq_work); - } - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && - priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { - priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); + status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { wake_up(&priv->wq); } + /* all other interested interrupts handled in bottom half */ + status &= (UMAC_IRQ_LINK_EVENT | + UMAC_IRQ_MPD_R); + if (status) { + /* Save irq status for bottom-half processing. */ + spin_lock_irqsave(&priv->lock, flags); + priv->irq0_stat |= status; + spin_unlock_irqrestore(&priv->lock, flags); + + schedule_work(&priv->bcmgenet_irq_work); + } + return IRQ_HANDLED; } @@ -2808,6 +2862,8 @@ static int bcmgenet_open(struct net_device *dev) err_fini_dma: bcmgenet_fini_dma(priv); err_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); clk_disable_unprepare(priv->clk); return ret; } @@ -3184,6 +3240,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) */ gphy_rev = reg & 0xffff; + /* This is reserved so should require special treatment */ + if (gphy_rev == 0 || gphy_rev == 0x01ff) { + pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); + return; + } + /* This is the good old scheme, just GPHY major, no minor nor patch */ if ((gphy_rev & 0xf0) != 0) priv->gphy_rev = gphy_rev << 8; @@ -3192,12 +3254,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) else if ((gphy_rev & 0xff00) != 0) priv->gphy_rev = gphy_rev; - /* This is reserved so should require special treatment */ - else if (gphy_rev == 0 || gphy_rev == 0x01ff) { - pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); - return; - } - #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!(params->flags & GENET_HAS_40BITS)) pr_warn("GENET does not support 40-bits PA\n"); @@ -3240,6 +3296,7 @@ static int bcmgenet_probe(struct platform_device *pdev) const void *macaddr; struct resource *r; int err = -EIO; + const char *phy_mode_str; /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */ dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1, @@ -3283,6 +3340,8 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->lock); + SET_NETDEV_DEV(dev, &pdev->dev); dev_set_drvdata(&pdev->dev, dev); ether_addr_copy(dev->dev_addr, macaddr); @@ -3345,6 +3404,13 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->clk_eee = NULL; } + /* If this is an internal GPHY, power it on now, before UniMAC is + * brought out of reset as absolutely no UniMAC activity is allowed + */ + if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) && + !strcasecmp(phy_mode_str, "internal")) + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + err = reset_umac(priv); if (err) goto err_clk_disable; @@ -3511,6 +3577,8 @@ static int bcmgenet_resume(struct device *d) return 0; out_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); clk_disable_unprepare(priv->clk); return ret; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 1e2dc34d331a49e05a8fc9a66156dfeeb00ee10f..db7f289d65ae2abd1589446ee0cadc00ffbf0254 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Broadcom Corporation + * Copyright (c) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -214,7 +214,9 @@ struct bcmgenet_mib_counters { #define MDIO_REG_SHIFT 16 #define MDIO_REG_MASK 0x1F -#define UMAC_RBUF_OVFL_CNT 0x61C +#define UMAC_RBUF_OVFL_CNT_V1 0x61C +#define RBUF_OVFL_CNT_V2 0x80 +#define RBUF_OVFL_CNT_V3PLUS 0x94 #define UMAC_MPD_CTRL 0x620 #define MPD_EN (1 << 0) @@ -224,7 +226,9 @@ struct bcmgenet_mib_counters { #define UMAC_MPD_PW_MS 0x624 #define UMAC_MPD_PW_LS 0x628 -#define UMAC_RBUF_ERR_CNT 0x634 +#define UMAC_RBUF_ERR_CNT_V1 0x634 +#define RBUF_ERR_CNT_V2 0x84 +#define RBUF_ERR_CNT_V3PLUS 0x98 #define UMAC_MDF_ERR_CNT 0x638 #define UMAC_MDF_CTRL 0x650 #define UMAC_MDF_ADDR 0x654 @@ -619,11 +623,13 @@ struct bcmgenet_priv { struct work_struct bcmgenet_irq_work; int irq0; int irq1; - unsigned int irq0_stat; - unsigned int irq1_stat; int wol_irq; bool wol_irq_disabled; + /* shared status */ + spinlock_t lock; + unsigned int irq0_stat; + /* HW descriptors/checksum variables */ bool desc_64b_en; bool desc_rxchk_en; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index edae2dcc4927ebe7024f349060ba5728aa6412c7..bb22d325e9656c50630df4829d7ab9f1b37a25c8 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14226,7 +14226,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) /* Reset PHY, otherwise the read DMA engine will be in a mode that * breaks all requests to 256 bytes. */ - if (tg3_asic_rev(tp) == ASIC_REV_57766) + if (tg3_asic_rev(tp) == ASIC_REV_57766 || + tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 9e59663a6eadb012de6f4a4474484800401fce3b..0f6811860ad51de9b871e806f3f254a1abfcf2eb 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1930,13 +1930,13 @@ static void bfa_ioc_send_enable(struct bfa_ioc *ioc) { struct bfi_ioc_ctrl_req enable_req; - struct timeval tv; bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ, bfa_ioc_portid(ioc)); enable_req.clscode = htons(ioc->clscode); - do_gettimeofday(&tv); - enable_req.tv_sec = ntohl(tv.tv_sec); + enable_req.rsvd = htons(0); + /* overflow in 2106 */ + enable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req)); } @@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc) bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ, bfa_ioc_portid(ioc)); + disable_req.clscode = htons(ioc->clscode); + disable_req.rsvd = htons(0); + /* overflow in 2106 */ + disable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req)); } diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 05c1c1dd7751bd720fac026876c7fcf7392eca03..cebfe3bd086e36f60f717579f03037058b1d1d9e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, return PTR_ERR(kern_buf); rc = sscanf(kern_buf, "%x:%x", &addr, &len); - if (rc < 2) { + if (rc < 2 || len > UINT_MAX >> 2) { netdev_warn(bnad->netdev, "failed to read user buffer\n"); kfree(kern_buf); return -EINVAL; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c index 67befedef7098ddbde763738eb7940116efa283e..578c7f8f11bf23add2ac4d3c2263e371b4509136 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c @@ -116,8 +116,7 @@ void xcv_setup_link(bool link_up, int link_speed) int speed = 2; if (!xcv) { - dev_err(&xcv->pdev->dev, - "XCV init not done, probe may have failed\n"); + pr_err("XCV init not done, probe may have failed\n"); return; } diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index 0f0de5b63622d8ebfb98c124d5768e806a3689e8..d04a6c1634452034217e9dc4ab8771bf02899b86 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -133,17 +133,15 @@ cxgb_find_route6(struct cxgb4_lld_info *lldi, if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = sin6_scope_id; dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgb_our_interface(lldi, get_real_dev, - ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + if (dst->error || + (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) { dst_release(dst); - dst = NULL; + return NULL; } } -out: return dst; } EXPORT_SYMBOL(cxgb_find_route6); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 5626908f3f7af0235c186abc2cc36914f6c9af6b..b2eeecb26939bc8f8b1dda2921ef1e79fa181ac5 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -275,8 +275,7 @@ static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac) /* Check if mac has already been added as part of uc-list */ for (i = 0; i < adapter->uc_macs; i++) { - if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN], - mac)) { + if (ether_addr_equal(adapter->uc_list[i].mac, mac)) { /* mac already added, skip addition */ adapter->pmac_id[0] = adapter->pmac_id[i + 1]; return 0; @@ -363,8 +362,10 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) status = -EPERM; goto err; } -done: + + /* Remember currently programmed MAC */ ether_addr_copy(adapter->dev_mac, addr->sa_data); +done: ether_addr_copy(netdev->dev_addr, addr->sa_data); dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; @@ -1679,14 +1680,12 @@ static void be_clear_mc_list(struct be_adapter *adapter) static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) { - if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], - adapter->dev_mac)) { + if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) { adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; return 0; } - return be_cmd_pmac_add(adapter, - (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac, adapter->if_handle, &adapter->pmac_id[uc_idx + 1], 0); } @@ -1722,9 +1721,8 @@ static void be_set_uc_list(struct be_adapter *adapter) } if (adapter->update_uc_list) { - i = 1; /* First slot is claimed by the Primary MAC */ - /* cache the uc-list in adapter array */ + i = 0; netdev_for_each_uc_addr(ha, netdev) { ether_addr_copy(adapter->uc_list[i].mac, ha->addr); i++; @@ -3639,8 +3637,10 @@ static void be_disable_if_filters(struct be_adapter *adapter) { /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */ if (!BEx_chip(adapter) || !be_virtfn(adapter) || - check_privilege(adapter, BE_PRIV_FILTMGMT)) + check_privilege(adapter, BE_PRIV_FILTMGMT)) { be_dev_mac_del(adapter, adapter->pmac_id[0]); + eth_zero_addr(adapter->dev_mac); + } be_clear_uc_list(adapter); be_clear_mc_list(adapter); @@ -3794,12 +3794,27 @@ static int be_enable_if_filters(struct be_adapter *adapter) if (status) return status; - /* Don't add MAC on BE3 VFs without FILTMGMT privilege */ - if (!BEx_chip(adapter) || !be_virtfn(adapter) || - check_privilege(adapter, BE_PRIV_FILTMGMT)) { + /* Normally this condition usually true as the ->dev_mac is zeroed. + * But on BE3 VFs the initial MAC is pre-programmed by PF and + * subsequent be_dev_mac_add() can fail (after fresh boot) + */ + if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) { + int old_pmac_id = -1; + + /* Remember old programmed MAC if any - can happen on BE3 VF */ + if (!is_zero_ether_addr(adapter->dev_mac)) + old_pmac_id = adapter->pmac_id[0]; + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; + + /* Delete the old programmed MAC as we successfully programmed + * a new MAC + */ + if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0]) + be_dev_mac_del(adapter, old_pmac_id); + ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr); } @@ -4573,6 +4588,10 @@ static int be_mac_setup(struct be_adapter *adapter) memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN); memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN); + + /* Initial MAC for BE3 VFs is already programmed by PF */ + if (BEx_chip(adapter) && be_virtfn(adapter)) + memcpy(adapter->dev_mac, mac, ETH_ALEN); } return 0; @@ -4714,6 +4733,15 @@ int be_update_queues(struct be_adapter *adapter) be_schedule_worker(adapter); + /* + * The IF was destroyed and re-created. We need to clear + * all promiscuous flags valid for the destroyed IF. + * Without this promisc mode is not restored during + * be_open() because the driver thinks that it is + * already enabled in HW. + */ + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + if (netif_running(netdev)) status = be_open(netdev); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 12aef1b15356b00ad0139d6cb26f264ace84925d..91709187125953d8c85590eb31efb0d5a0774301 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -172,10 +172,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #endif /* CONFIG_M5272 */ /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets. + * + * 2048 byte skbufs are allocated. However, alignment requirements + * varies between FEC variants. Worst case is 64, so round down by 64. */ -#define PKT_MAXBUF_SIZE 1522 +#define PKT_MAXBUF_SIZE (round_down(2048 - 64, 64)) #define PKT_MINBUF_SIZE 64 -#define PKT_MAXBLR_SIZE 1536 /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) @@ -813,6 +815,12 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); if (txq->tx_skbuff[i]) { dev_kfree_skb_any(txq->tx_skbuff[i]); txq->tx_skbuff[i] = NULL; @@ -847,7 +855,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i)); - writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); + writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ if (i) @@ -2923,6 +2931,7 @@ static void set_multicast_list(struct net_device *ndev) struct netdev_hw_addr *ha; unsigned int i, bit, data, crc, tmp; unsigned char hash; + unsigned int hash_high = 0, hash_low = 0; if (ndev->flags & IFF_PROMISC) { tmp = readl(fep->hwp + FEC_R_CNTRL); @@ -2945,11 +2954,7 @@ static void set_multicast_list(struct net_device *ndev) return; } - /* Clear filter and add the addresses in hash register - */ - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - + /* Add the addresses in hash register */ netdev_for_each_mc_addr(ha, ndev) { /* calculate crc32 value of mac address */ crc = 0xffffffff; @@ -2967,16 +2972,14 @@ static void set_multicast_list(struct net_device *ndev) */ hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; - if (hash > 31) { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - tmp |= 1 << (hash - 32); - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - } else { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW); - tmp |= 1 << hash; - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - } + if (hash > 31) + hash_high |= 1 << (hash - 32); + else + hash_low |= 1 << hash; } + + writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW); } /* Set a MAC change in hardware. */ diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b8778e7b1f792f2e0138425e8f2afa1f4d7ef65c..7c6c1468628bfbd9029cde2233c0aa2083c7b5c3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev) send_map_query(adapter); for (i = 0; i < rxadd_subcrqs; i++) { init_rx_pool(adapter, &adapter->rx_pool[i], - IBMVNIC_BUFFS_PER_POOL, i, + adapter->req_rx_add_entries_per_subcrq, i, be64_to_cpu(size_array[i]), 1); if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) { dev_err(dev, "Couldn't alloc rx pool\n"); @@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev) for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; tx_pool->tx_buff = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) goto tx_pool_alloc_failed; if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - adapter->max_tx_entries_per_subcrq * + adapter->req_tx_entries_per_subcrq * adapter->req_mtu)) goto tx_ltb_alloc_failed; tx_pool->free_map = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) goto tx_fm_alloc_failed; - for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++) + for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++) tx_pool->free_map[j] = j; tx_pool->consumer_index = 0; @@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_buff *tx_buff = NULL; + struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; @@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) int ret = 0; tx_pool = &adapter->tx_pool[queue_num]; + tx_scrq = adapter->tx_scrq[queue_num]; txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf-> @@ -744,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->consumer_index = (tx_pool->consumer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; tx_buff = &tx_pool->tx_buff[index]; tx_buff->skb = skb; @@ -817,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (tx_pool->consumer_index == 0) tx_pool->consumer_index = - adapter->max_tx_entries_per_subcrq - 1; + adapter->req_tx_entries_per_subcrq - 1; else tx_pool->consumer_index--; @@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_BUSY; goto out; } + + atomic_inc(&tx_scrq->used); + + if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { + netdev_info(netdev, "Stopping queue %d\n", queue_num); + netif_stop_subqueue(netdev, queue_num); + } + tx_packets++; tx_bytes += skb->len; txq->trans_start = jiffies; @@ -1220,6 +1230,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); scrq->cur = 0; + atomic_set(&scrq->used, 0); scrq->rx_skb_top = NULL; spin_lock_init(&scrq->lock); @@ -1368,14 +1379,28 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, DMA_TO_DEVICE); } - if (txbuff->last_frag) + if (txbuff->last_frag) { + atomic_dec(&scrq->used); + + if (atomic_read(&scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + netif_subqueue_stopped(adapter->netdev, + txbuff->skb)) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + dev_kfree_skb_any(txbuff->skb); + } adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. producer_index] = index; adapter->tx_pool[pool].producer_index = (adapter->tx_pool[pool].producer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; } /* remove tx_comp scrq*/ next->tx_comp.first = 0; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index dd775d951b739eed4cd27985c055cacef0e56b6b..892eda346e54ae31f198200d824b8df6c100179d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue { spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; + atomic_t used; }; struct ibmvnic_long_term_buff { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index f3aaca743ea3ff3a2250547e47fb8d1c275f0cea..8a48656a376bb473fd24e679fe8108e8deb56112 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1364,6 +1364,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1379,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1611,10 +1614,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 4d19e46f7c5573857327d280f97a352985583bea..3693ae104c2a1e7b487cd86696e653bcb2b3019f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -508,8 +508,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, - int unused); +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, + int __always_unused min_rate, int max_rate); int fm10k_ndo_get_vf_config(struct net_device *netdev, int vf_idx, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 5f4dac0d36ef1511e7587c53d1ec2800b5d5c814..e72fd52bacfe0c015ad35e1759b03b32fe05c2ed 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -126,6 +126,9 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface) struct fm10k_mbx_info *mbx = &vf_info->mbx; u16 glort = vf_info->glort; + /* process the SM mailbox first to drain outgoing messages */ + hw->mbx.ops.process(hw, &hw->mbx); + /* verify port mapping is valid, if not reset port */ if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) hw->iov.ops.reset_lport(hw, vf_info); @@ -482,7 +485,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, } int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, - int __always_unused unused, int rate) + int __always_unused min_rate, int max_rate) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -493,14 +496,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, return -EINVAL; /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) + if (max_rate && + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) return -EINVAL; /* store values */ - iov_data->vf_info[vf_idx].rate = rate; + iov_data->vf_info[vf_idx].rate = max_rate; /* update hardware configuration */ - hw->iov.ops.configure_tc(hw, vf_idx, rate); + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2caafebb0295f04f3305bd1dd208b97548ebe4bb..becffd15c092609afb0c12be2a851d177f2d7674 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4217,8 +4217,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_enable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_enable(&q_vector->napi); + } } /** @@ -4232,8 +4236,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_disable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_disable(&q_vector->napi); + } } /** diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 16839600fb78753a2e9e3a5ea7ec09113c690237..3a61491421b1d69c255c3cadbbbddb0bdd4a8b0f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3102,6 +3102,8 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), GFP_ATOMIC); + if (!adapter->shadow_vfta) + return -ENOMEM; /* This call may decrease the number of queues */ if (igb_init_interrupt_scheme(adapter, true)) { @@ -3271,7 +3273,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) int igb_close(struct net_device *netdev) { - if (netif_device_present(netdev)) + if (netif_device_present(netdev) || netdev->dismantle) return __igb_close(netdev, false); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 77d3039283f67ce380e93b660c4dfe264888ab42..ad3362293cbd6ab4db0301babf655af61fed9417 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -3696,10 +3696,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, fw_cmd.ver_build = build; fw_cmd.ver_sub = sub; fw_cmd.hdr.checksum = 0; - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); fw_cmd.pad = 0; fw_cmd.pad2 = 0; + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { ret_val = ixgbe_host_interface_command(hw, &fw_cmd, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 60f0bf779073bcbaff7b7afd2b1794593c884f4c..77a60aa5dc7e96cdd96dff15efe8a101d7788f3f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -617,6 +617,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, /* convert offset from words to bytes */ buffer.address = cpu_to_be32((offset + current_word) * 2); buffer.length = cpu_to_be16(words_to_read * 2); + buffer.pad2 = 0; + buffer.pad3 = 0; status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer), diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index a0d1b084ecec9820aead869a281289a2d4249d9c..7aeb7fedb364b393e1102a9cf5a1ab38290774ed 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -232,7 +232,8 @@ static int orion_mdio_probe(struct platform_device *pdev) dev->regs + MVMDIO_ERR_INT_MASK); } else if (dev->err_interrupt == -EPROBE_DEFER) { - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out_mdio; } mutex_init(&dev->lock); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 6ea10a9f33e894bb339a12fe42191cb76a2d517f..fa463268d019a343890e67b0d96210e57db97cab 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1182,6 +1182,10 @@ static void mvneta_port_disable(struct mvneta_port *pp) val &= ~MVNETA_GMAC0_PORT_ENABLE; mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); + pp->link = 0; + pp->duplex = -1; + pp->speed = 0; + udelay(200); } @@ -1905,9 +1909,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo, if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { + mvneta_rx_error(pp, rx_desc); err_drop_frame: dev->stats.rx_errors++; - mvneta_rx_error(pp, rx_desc); /* leave the descriptor untouched */ continue; } @@ -2922,7 +2926,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp) { int queue; - for (queue = 0; queue < txq_number; queue++) + for (queue = 0; queue < rxq_number; queue++) mvneta_rxq_deinit(pp, &pp->rxqs[queue]); } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e36bebcab3f228794b462fd834e592f07c651ad4..dae9dcfa8f369a735be8ed70f02d4a2e333067b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2304,6 +2304,17 @@ static int sync_toggles(struct mlx4_dev *dev) rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { /* PCI might be offline */ + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) { + mlx4_warn(dev, + "communication channel is offline\n"); + return -EIO; + } + msleep(100); wr_toggle = swab32(readl(&priv->mfunc.comm-> slave_write)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index d4d97ca12e83ba768616f6de34c55686218b3c45..f9897d17f01da0301dd484d54fbd60e1618afab3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -251,13 +251,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 727122de7df0de75c8cfe268a2b19d5bddd4cf54..5411ca48978ad62306d76dd6103b4c39a32430f2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); if (!offline_bit) return 0; + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) + break; + /* There are cases as part of AER/Reset flow that PF needs * around 100 msec to load. We therefore sleep for 100 msec * to allow other tasks to make use of that CPU during this @@ -3954,6 +3962,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + if (mlx4_is_slave(dev)) + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; + mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f7fabecc104f1995ecd0462d2dc4b102b48b06c1..4c3f1cb7e2c90d3259b5074800f066a8059ec0da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -367,7 +367,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: - case MLX5_CMD_OP_SET_RATE_LIMIT: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: @@ -502,7 +502,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); - MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9d3722930c954505735dfeea898f2b89673c0e26..38981db43bc356bfec2c4f84a8c577632a34cb2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3038,6 +3038,7 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) { + unsigned int offset = 0; struct udphdr *udph; u16 proto; u16 port = 0; @@ -3047,7 +3048,7 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, proto = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): - proto = ipv6_hdr(skb)->nexthdr; + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); break; default: goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4de3c28b054787577d423428e97a307eb4e495b1..331a6ca4856da20f2a70372dd377a96e926b0d9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1015,7 +1015,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, u32 *match_criteria) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct list_head *prev = ft->node.children.prev; + struct list_head *prev = &ft->node.children; unsigned int candidate_index = 0; struct mlx5_flow_group *fg; void *match_criteria_addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b3309f2ed7dc435003fd3e448335acb0c615f620..981cd1d84a5b5e62a3160b362c7a1f39f9824fed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1283,6 +1283,7 @@ static int init_one(struct pci_dev *pdev, if (err) goto clean_load; + pci_save_state(pdev); return 0; clean_load: @@ -1331,9 +1332,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain the health wq */ + /* In case of kernel call drain the health wq */ if (state) { - pci_save_state(pdev); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } @@ -1385,6 +1385,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) pci_set_master(pdev); pci_restore_state(pdev); + pci_save_state(pdev); if (wait_vital(pdev)) { dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index d0a4005fe63a425869b71feb3626e998a34b0d39..9346f3985edf3c44ed80927d719bab5efe06057f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -303,8 +303,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); - MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); - MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 104902a93a0b577f22ecd43824c6c42b5a9b059d..2be9ec5fd651d5e3aea58437d2fbc7c3ea37ba6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -60,16 +60,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, return ret_entry; } -static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; - MLX5_SET(set_rate_limit_in, in, opcode, - MLX5_CMD_OP_SET_RATE_LIMIT); - MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -108,7 +108,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) entry->refcount++; } else { /* new rate limit */ - err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index); if (err) { mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", rate, err); @@ -144,7 +144,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) entry->refcount--; if (!entry->refcount) { /* need to remove rate */ - mlx5_set_rate_limit_cmd(dev, 0, entry->index); + mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index); entry->rate = 0; } @@ -197,8 +197,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) if (table->rl_entry[i].rate) - mlx5_set_rate_limit_cmd(dev, 0, - table->rl_entry[i].index); + mlx5_set_pp_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); kfree(dev->priv.rl_table.rl_entry); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 07a9ba6cfc70a11f7b4c05c73c1b32a704b7e6ba..2f74953e4561511e23d8fe3219db89104e3dd9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); - spin_unlock(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); return vxlan; } @@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; - if (mlx5e_vxlan_lookup_port(priv, port)) + mutex_lock(&priv->state_lock); + vxlan = mlx5e_vxlan_lookup_port(priv, port); + if (vxlan) { + atomic_inc(&vxlan->refcount); goto free_work; + } if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; @@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) goto err_delete_port; vxlan->udp_port = port; + atomic_set(&vxlan->refcount, 1); - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (err) goto err_free; @@ -113,35 +118,39 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) err_delete_port: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); free_work: + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } -static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) +static void mlx5e_vxlan_del_port(struct work_struct *work) { + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + u16 port = vxlan_work->port; struct mlx5e_vxlan *vxlan; + bool remove = false; - spin_lock_irq(&vxlan_db->lock); - vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_irq(&vxlan_db->lock); - + mutex_lock(&priv->state_lock); + spin_lock_bh(&vxlan_db->lock); + vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) - return; - - mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port); - - kfree(vxlan); -} + goto out_unlock; -static void mlx5e_vxlan_del_port(struct work_struct *work) -{ - struct mlx5e_vxlan_work *vxlan_work = - container_of(work, struct mlx5e_vxlan_work, work); - struct mlx5e_priv *priv = vxlan_work->priv; - u16 port = vxlan_work->port; + if (atomic_dec_and_test(&vxlan->refcount)) { + radix_tree_delete(&vxlan_db->tree, port); + remove = true; + } - __mlx5e_vxlan_core_del_port(priv, port); +out_unlock: + spin_unlock_bh(&vxlan_db->lock); + if (remove) { + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); + } + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } @@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_irq(&vxlan_db->lock); + /* Lockless since we are the only radix-tree consumers, wq is disabled */ while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_irq(&vxlan_db->lock); - __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_irq(&vxlan_db->lock); + radix_tree_delete(&vxlan_db->tree, port); + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); } - spin_unlock_irq(&vxlan_db->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 5def12c048e38992e7edd9f870233e369ef4580e..5ef6ae7d568abcd1410bc403b526628a634799cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -36,6 +36,7 @@ #include "en.h" struct mlx5e_vxlan { + atomic_t refcount; u16 udp_port; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6460c7256f2ba0260245f69200bc176167b4c6ee..a01e6c0d0cd1a26a65f875a1d5a39f0a9208055d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -788,7 +788,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid) #define MLXSW_REG_SPVM_ID 0x200F #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */ #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */ -#define MLXSW_REG_SPVM_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVM_REC_MAX_COUNT 255 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) @@ -1757,7 +1757,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload, #define MLXSW_REG_SPVMLR_ID 0x2020 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */ #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */ -#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \ MLXSW_REG_SPVMLR_REC_LEN * \ MLXSW_REG_SPVMLR_REC_MAX_COUNT) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9e31a33901545f4518c31edf3a99d4ca84762898..16556011d5711a11dbf637346fe6e3353f875d71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -765,11 +765,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, dipn = htonl(dip); dev = mlxsw_sp->rifs[rif]->dev; n = neigh_lookup(&arp_tbl, &dipn, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); neigh_event_send(n, NULL); @@ -1328,9 +1325,9 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 4367dd6879a22791a1bef1726ba8d770b03cebca..0622fd03941b85ffb60699c708f09d80441ec529 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "moxart_ether.h" @@ -278,6 +279,13 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget) return rx; } +static int moxart_tx_queue_space(struct net_device *ndev) +{ + struct moxart_mac_priv_t *priv = netdev_priv(ndev); + + return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM); +} + static void moxart_tx_finished(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); @@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev) tx_tail = TX_NEXT(tx_tail); } priv->tx_tail = tx_tail; + if (netif_queue_stopped(ndev) && + moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD) + netif_wake_queue(ndev); } static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id) @@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct moxart_mac_priv_t *priv = netdev_priv(ndev); void *desc; unsigned int len; - unsigned int tx_head = priv->tx_head; + unsigned int tx_head; u32 txdes1; int ret = NETDEV_TX_BUSY; + spin_lock_irq(&priv->txlock); + + tx_head = priv->tx_head; desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head); - spin_lock_irq(&priv->txlock); + if (moxart_tx_queue_space(ndev) == 1) + netif_stop_queue(ndev); + if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) { net_dbg_ratelimited("no TX space for packet\n"); priv->stats.tx_dropped++; diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index 93a9563ac7c6730eec8240ac187a86b9831c9741..afc32ec998c043957f0b472080d22d01600edf2a 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -59,6 +59,7 @@ #define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM_MASK)) #define TX_BUF_SIZE 1600 #define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK+1) +#define TX_WAKE_THRESHOLD 16 #define RX_DESC_NUM 64 #define RX_DESC_NUM_MASK (RX_DESC_NUM-1) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 0c42c240b5cfdff66dc8ef021b2401812a0fec17..ed014bdbbabde4a6b529f8f0f8e7707bb72fd6f4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -373,8 +373,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn, u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val; u32 cxt_size = CONN_CXT_SIZE(p_hwfn); u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; + u32 align = elems_per_page * DQ_RANGE_ALIGN; - p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page); + p_conn->cid_count = roundup(p_conn->cid_count, align); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 62ae55bd81b8cc2940d957e6122471e29f92a6ea..a3360cbdb30bddae65fbc0cfc983dc06d6aa5a70 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -187,6 +187,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, /* If need to reuse or there's no replacement buffer, repost this */ if (rc) goto out_post; + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); skb = build_skb(buffer->data, 0); if (!skb) { @@ -441,7 +443,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn, static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, union core_rx_cqe_union *p_cqe, - unsigned long lock_flags, + unsigned long *p_lock_flags, bool b_last_cqe) { struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; @@ -462,10 +464,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, "Mismatch between active_descq and the LL2 Rx chain\n"); list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); - spin_unlock_irqrestore(&p_rx->lock, lock_flags); + spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags); qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); - spin_lock_irqsave(&p_rx->lock, lock_flags); + spin_lock_irqsave(&p_rx->lock, *p_lock_flags); return 0; } @@ -505,7 +507,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) break; case CORE_RX_CQE_TYPE_REGULAR: rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, - cqe, flags, b_last_cqe); + cqe, &flags, + b_last_cqe); break; default: rc = -EIO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bdbcd2b088a01e5eaf85a1c0d9b581c221c6ca8c..c3c28f0960e50e4aa1147d1bb7c8b639e0c2615d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3849,7 +3849,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) struct list_head *head = &mbx->cmd_q; struct qlcnic_cmd_args *cmd = NULL; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); while (!list_empty(head)) { cmd = list_entry(head->next, struct qlcnic_cmd_args, list); @@ -3860,7 +3860,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) qlcnic_83xx_notify_cmd_completion(adapter, cmd); } - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); } static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) @@ -3896,12 +3896,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_del(&cmd->list); mbx->num_cmds--; - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); qlcnic_83xx_notify_cmd_completion(adapter, cmd); } @@ -3966,7 +3966,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, init_completion(&cmd->completion); cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_add_tail(&cmd->list, &mbx->cmd_q); mbx->num_cmds++; @@ -3974,7 +3974,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT; queue_work(mbx->work_q, &mbx->work); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return 0; } @@ -4070,15 +4070,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; spin_unlock_irqrestore(&mbx->aen_lock, flags); - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); if (list_empty(head)) { - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return; } cmd = list_entry(head->next, struct qlcnic_cmd_args, list); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); mbx_ops->encode_cmd(adapter, cmd); mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2c4350a1c6296f57dfceee070c94b31eeeaea011..18e68c91e65130023f277fa136159de2320b7984 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1387,7 +1387,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) { void __iomem *ioaddr = tp->mmio_addr; - return RTL_R8(IBISR0) & 0x02; + return RTL_R8(IBISR0) & 0x20; } static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) @@ -1395,7 +1395,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000); RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); } @@ -2222,19 +2222,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) void __iomem *ioaddr = tp->mmio_addr; dma_addr_t paddr = tp->counters_phys_addr; u32 cmd; - bool ret; RTL_W32(CounterAddrHigh, (u64)paddr >> 32); + RTL_R32(CounterAddrHigh); cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(CounterAddrLow, cmd); RTL_W32(CounterAddrLow, cmd | counter_cmd); - ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); - - RTL_W32(CounterAddrLow, 0); - RTL_W32(CounterAddrHigh, 0); - - return ret; + return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); } static bool rtl8169_reset_counters(struct net_device *dev) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 11623aad0e8e558ec7d5ffd324c7623752ec9750..10d3a9f6349e1e43f9ddcb40ac3dbac406741ff9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -941,14 +941,10 @@ static int ravb_poll(struct napi_struct *napi, int budget) /* Receive error message handling */ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) { + if (priv->rx_over_errors != ndev->stats.rx_over_errors) ndev->stats.rx_over_errors = priv->rx_over_errors; - netif_err(priv, rx_err, ndev, "Receive Descriptor Empty\n"); - } - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) { + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; - netif_err(priv, rx_err, ndev, "Receive FIFO Overflow\n"); - } out: return budget - quota; } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 2140dedab7124b77b2b7d8fe968165636cb921e1..b6816ae00b7aa307242ff68b20f773519ee8f4f9 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3087,18 +3087,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 1d85109cb8ed23c5faddd4fdc3862be5406d0bd0..3d5d5d54c1033032b3129e0016929b95b44776c0 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4967,7 +4967,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); - } else { + } else if (rc) { efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, sizeof(inbuf), NULL, 0, rc); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ef6bff820cf6e45f44ecd2c8da9162d5106024fb..98bbb91336e4dd8b67cd51c04ff79e31b68a6945 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -280,8 +280,14 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) bool stmmac_eee_init(struct stmmac_priv *priv) { unsigned long flags; + int interface = priv->plat->interface; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ @@ -1795,6 +1801,7 @@ static int stmmac_open(struct net_device *dev) priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); priv->rx_copybreak = STMMAC_RX_COPYBREAK; + priv->mss = 0; ret = alloc_dma_desc_resources(priv); if (ret < 0) { diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 6d68c8a8f4f2ac7f732ff6cfe862c71ae2460a27..da4ec575ccf9ba4aede92c0fd3a4842ed4257449 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -34,6 +34,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" depends on (PPC || MICROBLAZE) + depends on !64BIT || BROKEN select PHYLIB ---help--- This driver supports the Xilinx 10/100/1000 LocalLink TEMAC diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index e46b1ebbbff4d88312510e42fc5e279c54ed34a4..7ea8ead4fd1c70a431ef3f8c96f3f3dccf23d18d 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1277,7 +1277,7 @@ static void fjes_netdev_setup(struct net_device *netdev) fjes_set_ethtool_ops(netdev); netdev->mtu = fjes_support_mtu[3]; netdev->flags |= IFF_BROADCAST; - netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } static void fjes_irq_watch_task(struct work_struct *work) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index cebde074d19673e88f8edb01e69b15e5876df88b..cb206e5526c4cc606867c81aa21e4dc50eaa59a9 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -69,7 +69,6 @@ struct gtp_dev { struct socket *sock0; struct socket *sock1u; - struct net *net; struct net_device *dev; unsigned int hash_size; @@ -316,7 +315,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); - xnet = !net_eq(gtp->net, dev_net(gtp->dev)); + xnet = !net_eq(sock_net(sk), dev_net(gtp->dev)); switch (udp_sk(sk)->encap_type) { case UDP_ENCAP_GTP0: @@ -612,7 +611,7 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) pktinfo.fl4.saddr, pktinfo.fl4.daddr, pktinfo.iph->tos, ip4_dst_hoplimit(&pktinfo.rt->dst), - htons(IP_DF), + 0, pktinfo.gtph_port, pktinfo.gtph_port, true, false); break; @@ -658,7 +657,7 @@ static void gtp_link_setup(struct net_device *dev) static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); static void gtp_hashtable_free(struct gtp_dev *gtp); static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, - int fd_gtp0, int fd_gtp1, struct net *src_net); + int fd_gtp0, int fd_gtp1); static int gtp_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) @@ -675,7 +674,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, fd0 = nla_get_u32(data[IFLA_GTP_FD0]); fd1 = nla_get_u32(data[IFLA_GTP_FD1]); - err = gtp_encap_enable(dev, gtp, fd0, fd1, src_net); + err = gtp_encap_enable(dev, gtp, fd0, fd1); if (err < 0) goto out_err; @@ -821,7 +820,7 @@ static void gtp_hashtable_free(struct gtp_dev *gtp) } static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, - int fd_gtp0, int fd_gtp1, struct net *src_net) + int fd_gtp0, int fd_gtp1) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct socket *sock0, *sock1u; @@ -858,7 +857,6 @@ static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, gtp->sock0 = sock0; gtp->sock1u = sock1u; - gtp->net = src_net; tuncfg.sk_user_data = gtp; tuncfg.encap_rcv = gtp_encap_recv; diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b4e990743e1da0cb3a946f5473d02cce7447bd1a..980e38524418df7cb08fd765c1b6755914934fdf 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -404,7 +404,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) struct dst_entry *dst; int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { - .flowi6_iif = dev->ifindex, + .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index a0849f49bbec7401c3c6a366e0ca5c8d98fb651f..c0192f97ecc846dcb85fb67b77115dca0962e4e8 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr memset(rd, 0, sizeof(*rd)); rd->hw = hwmap + i; rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA); - if (rd->buf == NULL || - !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) { + if (rd->buf) + busaddr = pci_map_single(pdev, rd->buf, len, dir); + if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) { if (rd->buf) { net_err_ratelimited("%s: failed to create PCI-MAP for %p\n", __func__, rd->buf); @@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr rd = r->rd + j; busaddr = rd_get_addr(rd); rd_set_addr_status(rd, 0, 0); - if (busaddr) - pci_unmap_single(pdev, busaddr, len, dir); + pci_unmap_single(pdev, busaddr, len, dir); kfree(rd->buf); rd->buf = NULL; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index dc8ccac0a01df3d54025669bfc25b8d227b831a3..6d55049cd3dcb1c9cf4a2031d50dd6bb4278f178 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -452,7 +452,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) struct macvlan_dev, list); else vlan = macvlan_hash_lookup(port, eth->h_dest); - if (vlan == NULL) + if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE) return RX_HANDLER_PASS; dev = vlan->dev; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index a52b560e428b86cd5e782749725875a127a52ea7..3603eec7217fc80338d2b45ee7986b08b2796904 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -166,7 +166,7 @@ static int at803x_set_wol(struct phy_device *phydev, mac = (const u8 *) ndev->dev_addr; if (!is_valid_ether_addr(mac)) - return -EFAULT; + return -EINVAL; for (i = 0; i < 3; i++) { phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2229188286558f7af147be57a5eddc02fde8d807..2032a6de026bfd26c6ded8a20a83ca034473e640 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev) phydev->link = 0; if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); } return 0; @@ -1020,7 +1021,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ8795, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8795", - .features = (SUPPORTED_Pause | SUPPORTED_Asym_Pause), + .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 93ffedfa299412f78af2c72fedc991101a52a451..1e2d4f1179da31ed1e458af5e29cc77314f875ad 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -491,13 +491,14 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; - ks->regs_attr.size = ks->chip->regs_size; memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); + ks->regs_attr.size = ks->chip->regs_size; err = ks8995_reset(ks); if (err) return err; + sysfs_attr_init(&ks->regs_attr.attr); err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr); if (err) { dev_err(&spi->dev, "unable to create sysfs file, err=%d\n", diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 440d5f42810f38258d70fae326871b89b6b6ac2d..fc4c2ccc3d22508e8b3b217ec511368204478b72 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -958,6 +958,7 @@ static __net_exit void ppp_exit_net(struct net *net) unregister_netdevice_many(&list); rtnl_unlock(); + mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); } @@ -1001,17 +1002,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) if (!ifname_is_set) snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + mutex_unlock(&pn->all_ppp_mutex); + ret = register_netdevice(ppp->dev); if (ret < 0) goto err_unit; atomic_inc(&ppp_unit_count); - mutex_unlock(&pn->all_ppp_mutex); - return 0; err_unit: + mutex_lock(&pn->all_ppp_mutex); unit_put(&pn->units_idr, ppp->file.index); err: mutex_unlock(&pn->all_ppp_mutex); diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4ddae8118c8566e4de07a16b136af13597fe4f3e..dc36c2ec1d1039c89011a6a8d5b7d862fafd5e6b 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, struct pppoe_hdr *ph; struct net_device *dev; char *start; + int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { @@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, if (total_len > (dev->mtu + dev->hard_header_len)) goto end; - - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, - 0, GFP_KERNEL); + hlen = LL_RESERVED_SPACE(dev); + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; @@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) /* Copy the data if there is no space for the header or if it's * read-only. */ - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 053986983b1b219cafa702d48e9f83a856383fc3..5f516687be488c7fbacacdc145651157137aa306 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -525,6 +525,14 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } +static void tun_cleanup_tx_array(struct tun_file *tfile) +{ + if (tfile->tx_array.ring.queue) { + skb_array_cleanup(&tfile->tx_array); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + } +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -566,8 +574,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) - skb_array_cleanup(&tfile->tx_array); + tun_cleanup_tx_array(tfile); sock_put(&tfile->sk); } } @@ -606,11 +613,13 @@ static void tun_detach_all(struct net_device *dev) /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } BUG_ON(tun->numdisabled != 0); @@ -2369,6 +2378,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + return 0; } diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index e221bfcee76b40a3ad7ba60ec4d348f4b8f4cc73..947bea81d924124c3827e87f75e732e35adb2acd 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb, { int len = skb->len; - if (skb_headroom(skb) < 2) { - struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags); + if (skb_cow_head(skb, 2)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } skb_push(skb, 2); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f33460cec79f394238e8f489453d19d722245009..c53385a0052f1229b6d8b8ad64d179816e915fc7 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2197,6 +2197,7 @@ static int lan78xx_reset(struct lan78xx_net *dev) buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; dev->rx_qlen = 4; + dev->tx_qlen = 4; } ret = lan78xx_write_reg(dev, BURST_CAP, buf); @@ -2419,14 +2420,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } if (lan78xx_linearize(skb) < 0) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9cf11c83993afbd89d30a587eb802934a6b3190e..e1e5e84384573f77d9a8881f4350c49abbc5eaac 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -74,9 +74,11 @@ static void qmi_wwan_netdev_setup(struct net_device *net) net->hard_header_len = 0; net->addr_len = 0; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + set_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } @@ -580,6 +582,10 @@ static const struct usb_device_id products[] = { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* Motorola Mapphone devices with MDM6600 */ + USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff), + .driver_info = (unsigned long)&qmi_wwan_info, + }, /* 2. Combined interface devices matching on class+protocol */ { /* Huawei E367 and possibly others in "Windows mode" */ @@ -901,6 +907,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ @@ -936,6 +943,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ + {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index afb953a258cd69009102b089a2fbcd0f9241fa1e..b2d7c7e32250e92ccc3239e30b0f51e26d6b24b0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "8" +#define NET_VERSION "9" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -501,6 +501,8 @@ enum rtl_register_content { #define RTL8153_RMS RTL8153_MAX_PACKET #define RTL8152_TX_TIMEOUT (5 * HZ) #define RTL8152_NAPI_WEIGHT 64 +#define rx_reserved_size(x) ((x) + VLAN_ETH_HLEN + CRC_SIZE + \ + sizeof(struct rx_desc) + RX_ALIGN) /* rtl8152 flags */ enum rtl8152_flags { @@ -1292,6 +1294,7 @@ static void intr_callback(struct urb *urb) } } else { if (netif_carrier_ok(tp->netdev)) { + netif_stop_queue(tp->netdev); set_bit(RTL8152_LINK_CHG, &tp->flags); schedule_delayed_work(&tp->schedule, 0); } @@ -1362,6 +1365,7 @@ static int alloc_all_mem(struct r8152 *tp) spin_lock_init(&tp->rx_lock); spin_lock_init(&tp->tx_lock); INIT_LIST_HEAD(&tp->tx_free); + INIT_LIST_HEAD(&tp->rx_done); skb_queue_head_init(&tp->tx_queue); skb_queue_head_init(&tp->rx_queue); @@ -2252,8 +2256,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) static void r8153_set_rx_early_size(struct r8152 *tp) { - u32 mtu = tp->netdev->mtu; - u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8; + u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4; ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data); } @@ -3165,6 +3168,9 @@ static void set_carrier(struct r8152 *tp) napi_enable(&tp->napi); netif_wake_queue(netdev); netif_info(tp, link, netdev, "carrier on\n"); + } else if (netif_queue_stopped(netdev) && + skb_queue_len(&tp->tx_queue) < tp->tx_qlen) { + netif_wake_queue(netdev); } } else { if (netif_carrier_ok(netdev)) { @@ -3698,8 +3704,18 @@ static int rtl8152_resume(struct usb_interface *intf) tp->rtl_ops.autosuspend_en(tp, false); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); - if (netif_carrier_ok(tp->netdev)) - rtl_start_rx(tp); + + if (netif_carrier_ok(tp->netdev)) { + if (rtl8152_get_speed(tp) & LINK_STATUS) { + rtl_start_rx(tp); + } else { + netif_carrier_off(tp->netdev); + tp->rtl_ops.disable(tp); + netif_info(tp, link, tp->netdev, + "linking down\n"); + } + } + napi_enable(&tp->napi); clear_bit(SELECTIVE_SUSPEND, &tp->flags); smp_mb__after_atomic(); diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 9af9799935dbbd4baed06eb03402803e0ce8d9c8..4cb9b11a545a9572ab6d248742ec5abee00fa7e4 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2205,13 +2205,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) { - struct sk_buff *skb2 = - skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS; diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 4a1e9c489f1f455388ffee289d65e1d6b36cba42..aadfe1d1c37ee67e2a7d17c759db12dad248c41d 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb, len = skb->len; - if (skb_headroom(skb) < SR_TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SR_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } __skb_push(skb, SR_TX_OVERHEAD); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index d5071e364d40e1dba629a887d67a07c4ff66825c..4ab82b998a0f02bfc00bc96f2c275cedfdd354f2 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -485,7 +485,10 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) return -ENOLINK; } - skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); + if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags)) + skb = __netdev_alloc_skb(dev->net, size, flags); + else + skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); if (!skb) { netif_dbg(dev, rx_err, dev->net, "no rx skb\n"); usbnet_defer_kevent (dev, EVENT_RX_MEMORY); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ef83ae3b0a44a4c854c02a5a66e048203ee7ce8e..4afba17e24031119563c88b0076b7037dd76165b 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, rq->rx_ring[i].basePA); rq->rx_ring[i].base = NULL; } - rq->buf_info[i] = NULL; } if (rq->data_ring.base) { @@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, (rq->rx_ring[0].size + rq->rx_ring[1].size); dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], rq->buf_info_pa); + rq->buf_info[0] = rq->buf_info[1] = NULL; } } diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index e7f5910a65191f4f013ae53db73d2a77510ae9e8..f8eb66ef2944ea9630237455cae4faf483b4f609 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface, struct i2400mu *i2400mu; struct usb_device *usb_dev = interface_to_usbdev(iface); + if (iface->cur_altsetting->desc.bNumEndpoints < 4) + return -ENODEV; + if (usb_dev->speed != USB_SPEED_HIGH) dev_err(dev, "device not connected as high speed\n"); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 0b4c1562420fcb4535a70538eb5d574b65f83c85..ba1fe61e6ea6f80e6310111a37b4f73993655772 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -548,6 +548,11 @@ static int ath10k_htt_rx_crypto_param_len(struct ath10k *ar, return IEEE80211_TKIP_IV_LEN; case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2: return IEEE80211_CCMP_HDR_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2: + return IEEE80211_CCMP_256_HDR_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2: + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2: + return IEEE80211_GCMP_HDR_LEN; case HTT_RX_MPDU_ENCRYPT_WEP128: case HTT_RX_MPDU_ENCRYPT_WAPI: break; @@ -573,6 +578,11 @@ static int ath10k_htt_rx_crypto_tail_len(struct ath10k *ar, return IEEE80211_TKIP_ICV_LEN; case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2: return IEEE80211_CCMP_MIC_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2: + return IEEE80211_CCMP_256_MIC_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2: + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2: + return IEEE80211_GCMP_MIC_LEN; case HTT_RX_MPDU_ENCRYPT_WEP128: case HTT_RX_MPDU_ENCRYPT_WAPI: break; @@ -1024,9 +1034,21 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, hdr = (void *)msdu->data; /* Tail */ - if (status->flag & RX_FLAG_IV_STRIPPED) + if (status->flag & RX_FLAG_IV_STRIPPED) { skb_trim(msdu, msdu->len - ath10k_htt_rx_crypto_tail_len(ar, enctype)); + } else { + /* MIC */ + if ((status->flag & RX_FLAG_MIC_STRIPPED) && + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + skb_trim(msdu, msdu->len - 8); + + /* ICV */ + if (status->flag & RX_FLAG_ICV_STRIPPED && + enctype != HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + skb_trim(msdu, msdu->len - + ath10k_htt_rx_crypto_tail_len(ar, enctype)); + } /* MMIC */ if ((status->flag & RX_FLAG_MMIC_STRIPPED) && @@ -1048,7 +1070,8 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, struct sk_buff *msdu, struct ieee80211_rx_status *status, - const u8 first_hdr[64]) + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) { struct ieee80211_hdr *hdr; struct htt_rx_desc *rxd; @@ -1056,6 +1079,7 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; int l3_pad_bytes; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [nwifi 802.11 header] <-- replaced with 802.11 hdr @@ -1084,6 +1108,14 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, /* push original 802.11 header */ hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); /* original 802.11 header has a different DA and in @@ -1144,6 +1176,7 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, u8 sa[ETH_ALEN]; int l3_pad_bytes; struct htt_rx_desc *rxd; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc @@ -1172,6 +1205,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, /* push original 802.11 header */ hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); /* original 802.11 header has a different DA and in @@ -1185,12 +1226,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, struct sk_buff *msdu, struct ieee80211_rx_status *status, - const u8 first_hdr[64]) + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) { struct ieee80211_hdr *hdr; size_t hdr_len; int l3_pad_bytes; struct htt_rx_desc *rxd; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [amsdu header] <-- replaced with 802.11 hdr @@ -1206,6 +1249,14 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); } @@ -1240,13 +1291,15 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar, is_decrypted); break; case RX_MSDU_DECAP_NATIVE_WIFI: - ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr); + ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr, + enctype); break; case RX_MSDU_DECAP_ETHERNET2_DIX: ath10k_htt_rx_h_undecap_eth(ar, msdu, status, first_hdr, enctype); break; case RX_MSDU_DECAP_8023_SNAP_LLC: - ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr); + ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr, + enctype); break; } } @@ -1289,7 +1342,8 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu) static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, struct sk_buff_head *amsdu, - struct ieee80211_rx_status *status) + struct ieee80211_rx_status *status, + bool fill_crypt_header) { struct sk_buff *first; struct sk_buff *last; @@ -1299,7 +1353,6 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, enum htt_rx_mpdu_encrypt_type enctype; u8 first_hdr[64]; u8 *qos; - size_t hdr_len; bool has_fcs_err; bool has_crypto_err; bool has_tkip_err; @@ -1324,15 +1377,17 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, * decapped header. It'll be used for undecapping of each MSDU. */ hdr = (void *)rxd->rx_hdr_status; - hdr_len = ieee80211_hdrlen(hdr->frame_control); - memcpy(first_hdr, hdr, hdr_len); + memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN); /* Each A-MSDU subframe will use the original header as the base and be * reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl. */ hdr = (void *)first_hdr; - qos = ieee80211_get_qos_ctl(hdr); - qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + if (ieee80211_is_data_qos(hdr->frame_control)) { + qos = ieee80211_get_qos_ctl(hdr); + qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + } /* Some attention flags are valid only in the last MSDU. */ last = skb_peek_tail(amsdu); @@ -1379,9 +1434,14 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, status->flag |= RX_FLAG_DECRYPTED; if (likely(!is_mgmt)) - status->flag |= RX_FLAG_IV_STRIPPED | - RX_FLAG_MMIC_STRIPPED; -} + status->flag |= RX_FLAG_MMIC_STRIPPED; + + if (fill_crypt_header) + status->flag |= RX_FLAG_MIC_STRIPPED | + RX_FLAG_ICV_STRIPPED; + else + status->flag |= RX_FLAG_IV_STRIPPED; + } skb_queue_walk(amsdu, msdu) { ath10k_htt_rx_h_csum_offload(msdu); @@ -1397,6 +1457,9 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, if (is_mgmt) continue; + if (fill_crypt_header) + continue; + hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); } @@ -1407,6 +1470,9 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar, struct ieee80211_rx_status *status) { struct sk_buff *msdu; + struct sk_buff *first_subframe; + + first_subframe = skb_peek(amsdu); while ((msdu = __skb_dequeue(amsdu))) { /* Setup per-MSDU flags */ @@ -1415,6 +1481,13 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar, else status->flag |= RX_FLAG_AMSDU_MORE; + if (msdu == first_subframe) { + first_subframe = NULL; + status->flag &= ~RX_FLAG_ALLOW_SAME_PN; + } else { + status->flag |= RX_FLAG_ALLOW_SAME_PN; + } + ath10k_process_rx(ar, status, msdu); } } @@ -1557,7 +1630,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, rx_status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true); ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status); return num_msdus; @@ -1892,7 +1965,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false); ath10k_htt_rx_h_deliver(ar, &amsdu, status); break; case -EAGAIN: diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h index 034e7a54c5b2789b15ba59ff0dda2325d6c63ae2..e4878d0044bfee82b884637523c759bdb3df5ee1 100644 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h @@ -239,6 +239,9 @@ enum htt_rx_mpdu_encrypt_type { HTT_RX_MPDU_ENCRYPT_WAPI = 5, HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2 = 6, HTT_RX_MPDU_ENCRYPT_NONE = 7, + HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2 = 8, + HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2 = 9, + HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2 = 10, }; #define RX_MPDU_START_INFO0_PEER_IDX_MASK 0x000007ff diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index 1fa7f844b5da3083d164be256d2b5384b3ba6371..8e9480cc33e15843b249f3d171dd91aa8875cb99 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -179,6 +179,9 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, ssize_t len; int r; + if (count < 1) + return -EINVAL; + if (sc->cur_chan->nvifs > 1) return -EOPNOTSUPP; @@ -186,6 +189,8 @@ static ssize_t write_file_tx99(struct file *file, const char __user *user_buf, if (copy_from_user(buf, user_buf, len)) return -EFAULT; + buf[len] = '\0'; + if (strtobool(buf, &start)) return -EINVAL; diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index 6e5d9095b1956c1d792725591fe79a75fe676f38..a635fc6b1722b62455d31b893997c8abe556c914 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -71,8 +71,18 @@ MODULE_FIRMWARE("b43/ucode11.fw"); MODULE_FIRMWARE("b43/ucode13.fw"); MODULE_FIRMWARE("b43/ucode14.fw"); MODULE_FIRMWARE("b43/ucode15.fw"); +MODULE_FIRMWARE("b43/ucode16_lp.fw"); MODULE_FIRMWARE("b43/ucode16_mimo.fw"); +MODULE_FIRMWARE("b43/ucode24_lcn.fw"); +MODULE_FIRMWARE("b43/ucode25_lcn.fw"); +MODULE_FIRMWARE("b43/ucode25_mimo.fw"); +MODULE_FIRMWARE("b43/ucode26_mimo.fw"); +MODULE_FIRMWARE("b43/ucode29_mimo.fw"); +MODULE_FIRMWARE("b43/ucode33_lcn40.fw"); +MODULE_FIRMWARE("b43/ucode30_mimo.fw"); MODULE_FIRMWARE("b43/ucode5.fw"); +MODULE_FIRMWARE("b43/ucode40.fw"); +MODULE_FIRMWARE("b43/ucode42.fw"); MODULE_FIRMWARE("b43/ucode9.fw"); static int modparam_bad_frames_preempt; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8e3c6f4bdaa04dfccddd9cc5e5040a4cece357a2..edffe5aeeeb167430f851567dbf357c2ab715795 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4080,8 +4080,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err, sdio_release_host(sdiodev->func[1]); fail: brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err); - device_release_driver(dev); device_release_driver(&sdiodev->func[2]->dev); + device_release_driver(dev); } struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 9789f3c5a78582177e17bfefa7a3a7034b53c9d7..f1231c0ea336616fecf6b7101039118fb09e6f9c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2320,7 +2320,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw, { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - /* Called when we need to transmit (a) frame(s) from agg queue */ + /* Called when we need to transmit (a) frame(s) from agg or dqa queue */ iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames, tids, more_data, true); @@ -2340,7 +2340,8 @@ static void iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw, for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) { struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid]; - if (tid_data->state != IWL_AGG_ON && + if (!iwl_mvm_is_dqa_supported(mvm) && + tid_data->state != IWL_AGG_ON && tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA) continue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index e64aeb4a22043002a4b5bd75dfb7dc86846d3c51..bdd1deed55a451b8c5bb05e6b433f2285eb4f1e7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3032,7 +3032,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum ieee80211_frame_release_type reason, u16 cnt, u16 tids, bool more_data, - bool agg) + bool single_sta_queue) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_add_sta_cmd cmd = { @@ -3052,14 +3052,14 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT) cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]); - /* If we're releasing frames from aggregation queues then check if the - * all queues combined that we're releasing frames from have + /* If we're releasing frames from aggregation or dqa queues then check + * if all the queues that we're releasing frames from, combined, have: * - more frames than the service period, in which case more_data * needs to be set * - fewer than 'cnt' frames, in which case we need to adjust the * firmware command (but do that unconditionally) */ - if (agg) { + if (single_sta_queue) { int remaining = cnt; int sleep_tx_count; @@ -3069,7 +3069,8 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, u16 n_queued; tid_data = &mvmsta->tid_data[tid]; - if (WARN(tid_data->state != IWL_AGG_ON && + if (WARN(!iwl_mvm_is_dqa_supported(mvm) && + tid_data->state != IWL_AGG_ON && tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA, "TID %d state is %d\n", tid, tid_data->state)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index e068d5355865fa219e69ed17035e027d1c1b6dfb..f65950e91ed57a0653d0e3789efadf2287d8611a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -545,7 +545,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum ieee80211_frame_release_type reason, u16 cnt, u16 tids, bool more_data, - bool agg); + bool single_sta_queue); int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool drain); void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 092ae0024f227b9f9db65d163a28dfa8bbd04488..7465d4db136fbeae6c7b859a37c47cd28196f63e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH - * Copyright(c) 2016 Intel Deutschland GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -621,8 +622,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) * values. * Note that we don't need to make sure it isn't agg'd, since we're * TXing non-sta + * For DQA mode - we shouldn't increase it though */ - atomic_inc(&mvm->pending_frames[sta_id]); + if (!iwl_mvm_is_dqa_supported(mvm)) + atomic_inc(&mvm->pending_frames[sta_id]); return 0; } @@ -1009,11 +1012,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, spin_unlock(&mvmsta->lock); - /* Increase pending frames count if this isn't AMPDU */ - if ((iwl_mvm_is_dqa_supported(mvm) && - mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON && - mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) || - (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)) + /* Increase pending frames count if this isn't AMPDU or DQA queue */ + if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu) atomic_inc(&mvm->pending_frames[mvmsta->sta_id]); return 0; @@ -1083,12 +1083,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, lockdep_assert_held(&mvmsta->lock); if ((tid_data->state == IWL_AGG_ON || - tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) && + tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA || + iwl_mvm_is_dqa_supported(mvm)) && iwl_mvm_tid_queued(tid_data) == 0) { /* - * Now that this aggregation queue is empty tell mac80211 so it - * knows we no longer have frames buffered for the station on - * this TID (for the TIM bitmap calculation.) + * Now that this aggregation or DQA queue is empty tell + * mac80211 so it knows we no longer have frames buffered for + * the station on this TID (for the TIM bitmap calculation.) */ ieee80211_sta_set_buffered(sta, tid, false); } @@ -1261,7 +1262,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, u8 skb_freed = 0; u16 next_reclaimed, seq_ctl; bool is_ndp = false; - bool txq_agg = false; /* Is this TXQ aggregated */ __skb_queue_head_init(&skbs); @@ -1287,6 +1287,10 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, info->flags |= IEEE80211_TX_STAT_ACK; break; case TX_STATUS_FAIL_DEST_PS: + /* In DQA, the FW should have stopped the queue and not + * return this status + */ + WARN_ON(iwl_mvm_is_dqa_supported(mvm)); info->flags |= IEEE80211_TX_STAT_TX_FILTERED; break; default: @@ -1391,15 +1395,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, bool send_eosp_ndp = false; spin_lock_bh(&mvmsta->lock); - if (iwl_mvm_is_dqa_supported(mvm)) { - enum iwl_mvm_agg_state state; - - state = mvmsta->tid_data[tid].state; - txq_agg = (state == IWL_AGG_ON || - state == IWL_EMPTYING_HW_QUEUE_DELBA); - } else { - txq_agg = txq_id >= mvm->first_agg_queue; - } if (!is_ndp) { tid_data->next_reclaimed = next_reclaimed; @@ -1456,11 +1451,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, * If the txq is not an AMPDU queue, there is no chance we freed * several skbs. Check that out... */ - if (txq_agg) + if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue) goto out; /* We can't free more than one frame at once on a shared queue */ - WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1)); + WARN_ON(skb_freed > 1); /* If we have still frames for this STA nothing to do here */ if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id])) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index d04babd99b53803c2a0248d038bcad3fbfab36f0..ff5ce1ed03c4275a5664e64b50ac9ab9079e10ae 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1040,6 +1040,8 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, return le32_to_cpu(txq_timer->p2p_go); case NL80211_IFTYPE_P2P_DEVICE: return le32_to_cpu(txq_timer->p2p_device); + case NL80211_IFTYPE_MONITOR: + return default_timeout; default: WARN_ON(1); return mvm->cfg->base_params->wd_timeout; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d2a28a9d3209887cba994f4d6891a857c8eb53c6..4b462dc21c413a895431bc0cde566be115275003 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3047,6 +3047,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_new_radio_params param = { 0 }; const char *hwname = NULL; + int ret; param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG]; param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE]; @@ -3086,7 +3087,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) param.regd = hwsim_world_regdom_custom[idx]; } - return mac80211_hwsim_new_radio(info, ¶m); + ret = mac80211_hwsim_new_radio(info, ¶m); + kfree(hwname); + return ret; } static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index cb7365bdf6e0031f4ec48d5334df5f5721131c64..5b1d2e8402d9d5482085018186d947b59cbb35a4 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -113,10 +113,10 @@ struct xenvif_stats { * A subset of struct net_device_stats that contains only the * fields that are updated in netback.c for each queue. */ - unsigned int rx_bytes; - unsigned int rx_packets; - unsigned int tx_bytes; - unsigned int tx_packets; + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; /* Additional stats used by xenvif */ unsigned long rx_gso_checksum_fixup; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 5bfaf5578810d20faf20c66ef400e2e52c4ba60e..618013e7f87bafd0f8fac0a2581ca46ff2901319 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -225,10 +225,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; - unsigned long rx_bytes = 0; - unsigned long rx_packets = 0; - unsigned long tx_bytes = 0; - unsigned long tx_packets = 0; + u64 rx_bytes = 0; + u64 rx_packets = 0; + u64 tx_bytes = 0; + u64 tx_packets = 0; unsigned int index; spin_lock(&vif->lock); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index cd442e46afb4f1adfd961f7ab03bafcf3fe980c1..1a9dadf7b3cc38b71e27dbb4250f86eade804afe 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -86,6 +86,8 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + struct netfront_stats { u64 packets; u64 bytes; @@ -1854,27 +1856,19 @@ static int talk_to_netback(struct xenbus_device *dev, xennet_destroy_queues(info); err = xennet_create_queues(info, &num_queues); - if (err < 0) - goto destroy_ring; + if (err < 0) { + xenbus_dev_fatal(dev, err, "creating queues"); + kfree(info->queues); + info->queues = NULL; + goto out; + } /* Create shared ring, alloc event channel -- for each queue */ for (i = 0; i < num_queues; ++i) { queue = &info->queues[i]; err = setup_netfront(dev, queue, feature_split_evtchn); - if (err) { - /* setup_netfront() will tidy up the current - * queue on error, but we need to clean up - * those already allocated. - */ - if (i > 0) { - rtnl_lock(); - netif_set_real_num_tx_queues(info->netdev, i); - rtnl_unlock(); - goto destroy_ring; - } else { - goto out; - } - } + if (err) + goto destroy_ring; } again: @@ -1964,9 +1958,9 @@ static int talk_to_netback(struct xenbus_device *dev, xenbus_transaction_end(xbt, 1); destroy_ring: xennet_disconnect_backend(info); - kfree(info->queues); - info->queues = NULL; + xennet_destroy_queues(info); out: + device_unregister(&dev->dev); return err; } @@ -2059,10 +2053,12 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: + wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: + wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2168,6 +2164,20 @@ static int xennet_remove(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + xenbus_switch_state(dev, XenbusStateClosing); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing); + + xenbus_switch_state(dev, XenbusStateClosed); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); + } + xennet_disconnect_backend(info); unregister_netdev(info->netdev); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 94733f73d37f12feaf0c0c35d0234d9807225830..7121453ec0475bc264b2bdb337ec61459ae6002a 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -183,13 +183,13 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { - WARN_ON(!ent); + WARN_ON(!log); return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE); } static struct dentry *debugfs_root; @@ -229,6 +229,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -247,6 +249,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -256,8 +263,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -265,23 +274,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -306,17 +315,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_info(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -324,7 +334,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -338,17 +348,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half); if (ret) @@ -419,16 +425,16 @@ static int btt_log_init(struct arena_info *arena) { int ret; u32 i; - struct log_entry log, zerolog; + struct log_entry ent, zerolog; memset(&zerolog, 0, sizeof(zerolog)); for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent); if (ret) return ret; ret = __btt_log_write(arena, i, 1, &zerolog); @@ -490,6 +496,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -545,8 +668,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -563,6 +685,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -653,6 +779,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + ret = btt_freelist_init(arena); if (ret) goto out; diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index b2f8651e5395f07f8e9fe117262fe8f131eea715..0f80b6b3d4a321460858f45920b4ef655fdc3121 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -26,6 +26,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -44,12 +45,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -117,6 +158,7 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -147,6 +189,7 @@ struct arena_info { struct dentry *debugfs_dir; /* Arena flags */ u32 flags; + int log_index[2]; }; /** diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 71eb6c637b607feecc6dbd6c409564f3aa239e01..42abdd2391c9b029525f18e0b2fbbade60d9d50e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -352,9 +352,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; - unsigned long align; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; + unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -398,6 +398,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); + start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -456,7 +457,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(offset, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fbeca065f18cdd1a07951f0196c84a25e5854100..719ee5fb2626670484f9e937f477d9bf3f74893a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1619,7 +1619,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->ns_id == nsid) { - kref_get(&ns->kref); + if (!kref_get_unless_zero(&ns->kref)) + continue; ret = ns; break; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8edafd8cb8ce1fb3cf0ce4912e40f28e2d0fc76d..5c52a61827659789e1f0c5829a6d595f080ce019 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -84,7 +84,7 @@ enum nvme_quirks { * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was * found empirically. */ -#define NVME_QUIRK_DELAY_AMOUNT 2000 +#define NVME_QUIRK_DELAY_AMOUNT 2300 enum nvme_ctrl_state { NVME_CTRL_NEW, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 54ea90f89b70530f5958639e6e6c1d1fabdf6cfb..e48ecb9303ca40c92917a68c2a41ae25e79ca040 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2109,6 +2109,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 55ce769cecee6c8e7a57e11b1d5bc1915c129771..c89d68a76f3d01964c69a14ddaaecb0f3a2925be 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -422,6 +422,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, ctrl->sqs[qid] = sq; } +static void nvmet_confirm_sq(struct percpu_ref *ref) +{ + struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); + + complete(&sq->confirm_done); +} + void nvmet_sq_destroy(struct nvmet_sq *sq) { /* @@ -430,7 +437,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) */ if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) nvmet_async_events_free(sq->ctrl); - percpu_ref_kill(&sq->ref); + percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); + wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); @@ -458,6 +466,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) return ret; } init_completion(&sq->free_done); + init_completion(&sq->confirm_done); return 0; } @@ -816,6 +825,9 @@ static void nvmet_ctrl_free(struct kref *ref) list_del(&ctrl->subsys_entry); mutex_unlock(&subsys->lock); + flush_work(&ctrl->async_event_work); + cancel_work_sync(&ctrl->fatal_err_work); + ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid); nvmet_subsys_put(subsys); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d5df77d686b26a40414f192a1c74388651141fd1..e56ca3fb107eb16551c006ee573a34cca4c6e59b 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, struct nvme_loop_iod *iod, unsigned int queue_idx) { - BUG_ON(queue_idx >= ctrl->queue_count); - iod->req.cmd = &iod->cmd; iod->req.rsp = &iod->rsp; iod->queue = &ctrl->queues[queue_idx]; @@ -288,9 +286,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) @@ -314,6 +312,43 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) kfree(ctrl); } +static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +} + +static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; + int ret, i; + + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret || !nr_io_queues) + return ret; + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); + + for (i = 1; i <= nr_io_queues; i++) { + ctrl->queues[i].ctrl = ctrl; + ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); + if (ret) + goto out_destroy_queues; + + ctrl->queue_count++; + } + + return 0; + +out_destroy_queues: + nvme_loop_destroy_io_queues(ctrl); + return ret; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -385,17 +420,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { - int i; - nvme_stop_keep_alive(&ctrl->ctrl); if (ctrl->queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); } if (ctrl->ctrl.state == NVME_CTRL_LIVE) @@ -467,19 +498,14 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_disable; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_free_queues; - - ctrl->queue_count++; - } + ret = nvme_loop_init_io_queues(ctrl); + if (ret) + goto out_destroy_admin; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { + for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) - goto out_free_queues; + goto out_destroy_io; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -492,9 +518,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) return; -out_free_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +out_destroy_io: + nvme_loop_destroy_io_queues(ctrl); +out_destroy_admin: nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -533,25 +559,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret, i; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret || !opts->nr_io_queues) + ret = nvme_loop_init_io_queues(ctrl); + if (ret) return ret; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - for (i = 1; i <= opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_destroy_queues; - - ctrl->queue_count++; - } - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; @@ -575,7 +588,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i <= opts->nr_io_queues; i++) { + for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) goto out_cleanup_connect_q; @@ -588,8 +601,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) out_free_tagset: blk_mq_free_tag_set(&ctrl->tag_set); out_destroy_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); return ret; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7655a351320f4e08eec2c1e67fef95f037ecd323..26b87dc843d289b890e32e59decb6fb97a249bb8 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -73,6 +73,7 @@ struct nvmet_sq { u16 qid; u16 size; struct completion free_done; + struct completion confirm_done; }; /** diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ca8ddc3fb19e524d9b18405cb902b81d3984374b..53bd3255086740785a52a808506cd3298ae9b379 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, { u16 status; - cmd->queue = queue; - cmd->n_rdma = 0; - cmd->req.port = queue->port; - - ib_dma_sync_single_for_cpu(queue->dev->device, cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, DMA_FROM_DEVICE); @@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); + rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; rsp->req.cmd = cmd->nvme_cmd; + rsp->req.port = queue->port; + rsp->n_rdma = 0; if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index bc286cbbbc9b26d12d6fb57473a2d0c7051d97d6..1cced1d039d75cc97818ca87e4eac1ca7561aa1d 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1656,3 +1656,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask) iounmap(base_addr); } + +/* + * The design of the Diva management card in rp34x0 machines (rp3410, rp3440) + * seems rushed, so that many built-in components simply don't work. + * The following quirks disable the serial AUX port and the built-in ATI RV100 + * Radeon 7000 graphics card which both don't have any external connectors and + * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as + * such makes those machines the only PARISC machines on which we can't use + * ttyS0 as boot console. + */ +static void quirk_diva_ati_card(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1292) + return; + + dev_info(&dev->dev, "Hiding Diva built-in ATI card"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY, + quirk_diva_ati_card); + +static void quirk_diva_aux_disable(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1291) + return; + + dev_info(&dev->dev, "Hiding Diva built-in AUX serial device"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX, + quirk_diva_aux_disable); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 47227820406d17f3e33f921effac2210e38ffd61..1d32fe2d97aa7cfefc136301ef07a8ebdaf4e53b 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -164,7 +164,6 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) pci_device_add(virtfn, virtfn->bus); mutex_unlock(&iov->dev->sriov->lock); - pci_bus_add_device(virtfn); sprintf(buf, "virtfn%u", id); rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); if (rc) @@ -175,6 +174,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + pci_bus_add_device(virtfn); + return 0; failed2: diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 8a68e2b554e1f60ef0a0ca506ede561eed3bb173..802997e2ddcceb8a5f39937440a2dc83bc4dc988 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -953,7 +953,12 @@ static int pci_pm_thaw_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_update_current_state(pci_dev, PCI_D0); + /* + * pci_restore_state() requires the device to be in D0 (because of MSI + * restoration among other things), so force it into D0 in case the + * driver's "freeze" callbacks put it into a low-power state directly. + */ + pci_set_power_state(pci_dev, PCI_D0); pci_restore_state(pci_dev); if (drv && drv->pm && drv->pm->thaw_noirq) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e7d4048e81f214826a3e093cf21a0c79eeee5526..a87c8e1aef68132cddd5ae6d4e122ab4e44934a8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4214,6 +4214,10 @@ static bool pci_bus_resetable(struct pci_bus *bus) { struct pci_dev *dev; + + if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)) + return false; + list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || (dev->subordinate && !pci_bus_resetable(dev->subordinate))) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index b1303b32053fe62b0d41fe5a468b975583c7eff0..057465adf0b686b37830fb7da32e3b7fa6330df7 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, * If the error is reported by an end point, we think this * error is related to the upstream link of the end point. */ - pci_walk_bus(dev->bus, cb, &result_data); + if (state == pci_channel_io_normal) + /* + * the error is non fatal so the bus is ok, just invoke + * the callback for the function that logged the error. + */ + cb(dev, &result_data); + else + pci_walk_bus(dev->bus, cb, &result_data); } return result_data.result; diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 4b703492376af72b6a85626d28d6427d4cd98eb0..00f61225386c853c5d6bc853b349e0b701cb867a 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -232,6 +232,9 @@ static void pcie_pme_work_fn(struct work_struct *work) break; pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); + if (rtsta == (u32) ~0) + break; + if (rtsta & PCI_EXP_RTSTA_PME) { /* * Clear PME status of the port. If there are other @@ -279,7 +282,7 @@ static irqreturn_t pcie_pme_irq(int irq, void *context) spin_lock_irqsave(&data->lock, flags); pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); - if (!(rtsta & PCI_EXP_RTSTA_PME)) { + if (rtsta == (u32) ~0 || !(rtsta & PCI_EXP_RTSTA_PME)) { spin_unlock_irqrestore(&data->lock, flags); return IRQ_NONE; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 60bada90cd75604ae73fe7c363f598475addecfe..a98be6db7e93b53503849ec2ebaec8786d0625ea 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -932,7 +932,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) child = pci_add_new_bus(bus, dev, max+1); if (!child) goto out; - pci_bus_insert_busn_res(child, max+1, 0xff); + pci_bus_insert_busn_res(child, max+1, + bus->busn_res.end); } max++; buses = (buses & 0xff000000) @@ -2136,6 +2137,10 @@ unsigned int pci_scan_child_bus(struct pci_bus *bus) if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) { if (max - bus->busn_res.start < pci_hotplug_bus_size - 1) max = bus->busn_res.start + pci_hotplug_bus_size - 1; + + /* Do not allocate more buses than we have room left */ + if (max > bus->busn_res.end) + max = bus->busn_res.end; } /* diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index f9357e09e9b30131d631361a9d0092a1e19980bb..b6b9b5b74e3085a4be6876fec8cf7d6688d59b8c 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -19,9 +19,9 @@ static void pci_stop_dev(struct pci_dev *dev) pci_pme_active(dev, false); if (dev->is_added) { + device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); - device_release_driver(&dev->dev); dev->is_added = 0; } diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index a268f4d6f3e9002e753d18ecabab123feed37a29..48a365e303e5a967bf7d65d7d7d87abe98de2b23 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -395,6 +395,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index) if (ret) return ERR_PTR(-ENODEV); + /* This phy type handled by the usb-phy subsystem for now */ + if (of_device_is_compatible(args.np, "usb-nop-xceiv")) + return ERR_PTR(-ENODEV); + mutex_lock(&phy_provider_mutex); phy_provider = of_phy_provider_lookup(args.np); if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) { diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 671610c989b6740ca132d3937655d27b3ff78171..b0c0fa0444dd8f59f6e21ff3148dd501d369cef0 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -26,7 +26,8 @@ config DEBUG_PINCTRL config PINCTRL_ADI2 bool "ADI pin controller driver" - depends on BLACKFIN + depends on (BF54x || BF60x) + depends on !GPIO_ADI select PINMUX select IRQ_DOMAIN help diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 0d34d8a4ab14a22d9f3aa34c7047ccfc9dc19f90..e8c08eb9753010a28de00fa9987745a849b34a19 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1594,6 +1594,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) clear_bit(i, chip->irq_valid_mask); } + /* + * The same set of machines in chv_no_valid_mask[] have incorrectly + * configured GPIOs that generate spurious interrupts so we use + * this same list to apply another quirk for them. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953. + */ + if (!need_valid_mask) { + /* + * Mask all interrupts the community is able to generate + * but leave the ones that can only generate GPEs unmasked. + */ + chv_writel(GENMASK(31, pctrl->community->nirqs), + pctrl->regs + CHV_INTMASK); + } + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index b40a074822cfedddd0ec62bf75d1c401b44ef200..df63b7d997e885466eff7e5a945c609ff235b481 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -368,6 +368,18 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) +{ + u32 value; + + /* Put the pad into GPIO mode */ + value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; + /* Disable SCI/SMI/NMI generation */ + value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); + value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); + writel(value, padcfg0); +} + static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) @@ -375,7 +387,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); void __iomem *padcfg0; unsigned long flags; - u32 value; raw_spin_lock_irqsave(&pctrl->lock, flags); @@ -385,13 +396,7 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, } padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); - /* Put the pad into GPIO mode */ - value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; - /* Disable SCI/SMI/NMI generation */ - value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); - value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); - writel(value, padcfg0); - + intel_gpio_set_gpio_mode(padcfg0); /* Disable TX buffer and enable RX (this will be input) */ __intel_gpio_set_direction(padcfg0, true); @@ -770,6 +775,8 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned type) raw_spin_lock_irqsave(&pctrl->lock, flags); + intel_gpio_set_gpio_mode(reg); + value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index b7bb371679692e5dd76186be130ed4b4a6a86cb3..50c45bdf93be85c86d5aa715fa6d0bf23fb6ce1e 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1285,6 +1285,22 @@ static void st_gpio_irq_unmask(struct irq_data *d) writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK); } +static int st_gpio_irq_request_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + st_gpio_direction_input(gc, d->hwirq); + + return gpiochip_lock_as_irq(gc, d->hwirq); +} + +static void st_gpio_irq_release_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + gpiochip_unlock_as_irq(gc, d->hwirq); +} + static int st_gpio_irq_set_type(struct irq_data *d, unsigned type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1438,12 +1454,14 @@ static struct gpio_chip st_gpio_template = { }; static struct irq_chip st_gpio_irqchip = { - .name = "GPIO", - .irq_disable = st_gpio_irq_mask, - .irq_mask = st_gpio_irq_mask, - .irq_unmask = st_gpio_irq_unmask, - .irq_set_type = st_gpio_irq_set_type, - .flags = IRQCHIP_SKIP_SET_WAKE, + .name = "GPIO", + .irq_request_resources = st_gpio_irq_request_resources, + .irq_release_resources = st_gpio_irq_release_resources, + .irq_disable = st_gpio_irq_mask, + .irq_mask = st_gpio_irq_mask, + .irq_unmask = st_gpio_irq_unmask, + .irq_set_type = st_gpio_irq_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static int st_gpiolib_register_bank(struct st_pinctrl *info, diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c index 866aa3ce1ac95a3866526b648e1e806aef5eba71..6cf0006d4c8df4837e512ac1e81a32f7d709969e 100644 --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); + +MODULE_AUTHOR("Robert Jarzmik "); +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c index 9f31bc1a47d01b9c88072f60444d53917a63af84..18716025b1dbcf0f3a5a6934bab416ca18277eab 100644 --- a/drivers/platform/x86/asus-wireless.c +++ b/drivers/platform/x86/asus-wireless.c @@ -97,6 +97,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event) return; } input_report_key(data->idev, KEY_RFKILL, 1); + input_sync(data->idev); input_report_key(data->idev, KEY_RFKILL, 0); input_sync(data->idev); } diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 09356684c32f6a78f03dbc5ef1dd7addc828bf08..abd9d83f6009473de8908ee397b36ca7d932ac14 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -240,6 +240,7 @@ static const struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HDX18", "HP HDX 18", x_inverted), AXIS_DMI_MATCH("HPB432x", "HP ProBook 432", xy_rotated_left), AXIS_DMI_MATCH("HPB440G3", "HP ProBook 440 G3", x_inverted_usd), + AXIS_DMI_MATCH("HPB440G4", "HP ProBook 440 G4", x_inverted), AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left), AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted), AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap), diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index a47a41fc10ad77c427158811857da4bc21cd747f..b5b890127479f8f586880762828bbd447a383ddf 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -252,28 +252,28 @@ static int intel_punit_get_bars(struct platform_device *pdev) * - GTDRIVER_IPC BASE_IFACE */ res = platform_get_resource(pdev, IORESOURCE_MEM, 2); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[ISPDRIVER_IPC][BASE_DATA] = addr; } res = platform_get_resource(pdev, IORESOURCE_MEM, 3); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[ISPDRIVER_IPC][BASE_IFACE] = addr; } res = platform_get_resource(pdev, IORESOURCE_MEM, 4); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[GTDRIVER_IPC][BASE_DATA] = addr; } res = platform_get_resource(pdev, IORESOURCE_MEM, 5); - if (res) { + if (res && resource_size(res) > 1) { addr = devm_ioremap_resource(&pdev->dev, res); if (!IS_ERR(addr)) punit_ipcdev->base[GTDRIVER_IPC][BASE_IFACE] = addr; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index ceeb8c188ef33566e67a6603dfd8f75d5e440de9..00d82e8443bdd48de09ae4d1a05e91cd5e6252ac 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -848,5 +848,5 @@ static void __exit acpi_wmi_exit(void) pr_info("Mapper unloaded\n"); } -subsys_initcall(acpi_wmi_init); +subsys_initcall_sync(acpi_wmi_init); module_exit(acpi_wmi_exit); diff --git a/drivers/power/reset/zx-reboot.c b/drivers/power/reset/zx-reboot.c index b0b1eb3a78c289090fef365d2e5772a924659ae5..76153ac0706cb78438b918cd57aeb669825989a9 100644 --- a/drivers/power/reset/zx-reboot.c +++ b/drivers/power/reset/zx-reboot.c @@ -81,3 +81,7 @@ static struct platform_driver zx_reboot_driver = { }, }; module_platform_driver(zx_reboot_driver); + +MODULE_DESCRIPTION("ZTE SoCs reset driver"); +MODULE_AUTHOR("Jun Nie "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 9013a585507e8a80b4580c084445ec7bbde6e04c..f32fc704cb7ed778f48bcf95062875bc022dc7a3 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -964,7 +964,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, req->sgt.sgl, req->sgt.nents, dir); if (nents == -EFAULT) { rmcd_error("Failed to map SG list"); - return -EFAULT; + ret = -EFAULT; + goto err_pg; } ret = do_dma_request(req, xfer, sync, nents); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 84a52db9b05f905bae3c294b88d116e77327f8c5..6ebd42aad291f3c20f7a9c412982208cb219b12b 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -772,7 +772,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) } timerqueue_add(&rtc->timerqueue, &timer->node); - if (!next) { + if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 1227ceab61eecc408526c6eb542477058a46417a..a4b8b603c807da32671788d6d62edc860ccd6e39 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -422,7 +422,7 @@ static unsigned long pcf8563_clkout_recalc_rate(struct clk_hw *hw, return 0; buf &= PCF8563_REG_CLKO_F_MASK; - return clkout_rates[ret]; + return clkout_rates[buf]; } static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index e1687e19c59f4ad57e6dd78d5f7e8702fa411d27..a30f24cb6c83c020f456836f085886ce9a7d3e94 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev) dev_pm_clear_wake_irq(&adev->dev); device_init_wakeup(&adev->dev, false); - free_irq(adev->irq[0], ldata); + if (adev->irq[0]) + free_irq(adev->irq[0], ldata); rtc_device_unregister(ldata->rtc); iounmap(ldata->base); kfree(ldata); @@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_rtc; } - if (request_irq(adev->irq[0], pl031_interrupt, - vendor->irqflags, "rtc-pl031", ldata)) { - ret = -EIO; - goto out_no_irq; + if (adev->irq[0]) { + ret = request_irq(adev->irq[0], pl031_interrupt, + vendor->irqflags, "rtc-pl031", ldata); + if (ret) + goto out_no_irq; + dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); } - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); return 0; out_no_irq: diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index d55e6438bb5ef1a92bf4426323b9874fe253e58a..9b5fc502f6a1f17996f51c2fc48a81f7a3d1b1d7 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -576,9 +576,9 @@ enum qeth_cq { }; struct qeth_ipato { - int enabled; - int invert4; - int invert6; + bool enabled; + bool invert4; + bool invert6; struct list_head entries; }; @@ -969,7 +969,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role); int qeth_bridgeport_an_set(struct qeth_card *card, int enable); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); -int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int); +int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, + int extra_elems, int data_offset); int qeth_get_elements_for_frags(struct sk_buff *); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, int, int); @@ -1004,6 +1005,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, int qeth_set_features(struct net_device *, netdev_features_t); int qeth_recover_features(struct net_device *); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); /* exports for OSN */ int qeth_osn_assist(struct net_device *, void *, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 21ef8023430fe7c532da42e47fd7110898270d9c..df8f74cb14067738ebb598421d4a851a35e924c7 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include +#include +#include + #include #include @@ -1470,9 +1475,9 @@ static int qeth_setup_card(struct qeth_card *card) qeth_set_intial_options(card); /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); - card->ipato.enabled = 0; - card->ipato.invert4 = 0; - card->ipato.invert6 = 0; + card->ipato.enabled = false; + card->ipato.invert4 = false; + card->ipato.invert6 = false; /* init QDIO stuff */ qeth_init_qdio_info(card); INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work); @@ -3837,6 +3842,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * @card: qeth card structure, to check max. elems. * @skb: SKB address * @extra_elems: extra elems needed, to check against max. + * @data_offset: range starts at skb->data + data_offset * * Returns the number of pages, and thus QDIO buffer elements, needed to cover * skb data, including linear part and fragments. Checks if the result plus @@ -3844,10 +3850,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * Note: extra_elems is not included in the returned result. */ int qeth_get_elements_no(struct qeth_card *card, - struct sk_buff *skb, int extra_elems) + struct sk_buff *skb, int extra_elems, int data_offset) { int elements = qeth_get_elements_for_range( - (addr_t)skb->data, + (addr_t)skb->data + data_offset, (addr_t)skb->data + skb_headlen(skb)) + qeth_get_elements_for_frags(skb); @@ -6240,6 +6246,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev, } EXPORT_SYMBOL_GPL(qeth_fix_features); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* GSO segmentation builds skbs with + * a (small) linear part for the headers, and + * page frags for the data. + * Compared to a linear skb, the header-only part consumes an + * additional buffer element. This reduces buffer utilization, and + * hurts throughput. So compress small segments into one element. + */ + if (netif_needs_gso(skb, features)) { + /* match skb_segment(): */ + unsigned int doffset = skb->data - skb_mac_header(skb); + unsigned int hsize = skb_shinfo(skb)->gso_size; + unsigned int hroom = skb_headroom(skb); + + /* linearize only if resulting skb allocations are order-0: */ + if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0)) + features &= ~NETIF_F_SG; + } + + return vlan_features_check(skb, features); +} +EXPORT_SYMBOL_GPL(qeth_features_check); + static int __init qeth_core_init(void) { int rc; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8530477caab8ce64433702208648c15e7325ac2f..5082dfeacb95a15333a63655bdc231fb749dafe8 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -865,7 +865,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * chaining we can not send long frag lists */ if ((card->info.type != QETH_CARD_TYPE_IQD) && - !qeth_get_elements_no(card, new_skb, 0)) { + !qeth_get_elements_no(card, new_skb, 0, 0)) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -910,7 +910,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - elements = qeth_get_elements_no(card, new_skb, elements_needed); + elements = qeth_get_elements_no(card, new_skb, elements_needed, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); @@ -1084,6 +1085,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_stop = qeth_l2_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_l2_do_ioctl, @@ -1128,6 +1130,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; card->dev->vlan_features = NETIF_F_SG; + card->dev->features |= NETIF_F_SG; /* OSA 3S and earlier has no RX/TX support */ if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) { card->dev->hw_features |= NETIF_F_IP_CSUM; @@ -1140,8 +1143,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) } card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index 26f79533e62e33acffbbc6348d4f45c3049c6052..eedf9b01a496a4ff623ad1d94ac5e7313df2442b 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -80,7 +80,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *); int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *); void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions, const u8 *); -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *); +void qeth_l3_update_ipato(struct qeth_card *card); struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions); int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *); int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 03a2619166cafc65bf49a8cc49a820be972ac7bb..1487f8a0c575e5a256c3b75ca117bdc50c401412 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -168,8 +168,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len) } } -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, - struct qeth_ipaddr *addr) +static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, + struct qeth_ipaddr *addr) { struct qeth_ipato_entry *ipatoe; u8 addr_bits[128] = {0, }; @@ -178,6 +178,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, if (!card->ipato.enabled) return 0; + if (addr->type != QETH_IP_TYPE_NORMAL) + return 0; qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits, (addr->proto == QETH_PROT_IPV4)? 4:16); @@ -293,8 +295,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr)); addr->ref_counter = 1; - if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) { + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) { QETH_CARD_TEXT(card, 2, "tkovaddr"); addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } @@ -607,6 +608,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card) /* * IP address takeover related functions */ + +/** + * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs. + * + * Caller must hold ip_lock. + */ +void qeth_l3_update_ipato(struct qeth_card *card) +{ + struct qeth_ipaddr *addr; + unsigned int i; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + else + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + } +} + static void qeth_l3_clear_ipato_list(struct qeth_card *card) { struct qeth_ipato_entry *ipatoe, *tmp; @@ -618,6 +640,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card) kfree(ipatoe); } + qeth_l3_update_ipato(card); spin_unlock_bh(&card->ip_lock); } @@ -642,8 +665,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, } } - if (!rc) + if (!rc) { list_add_tail(&new->entry, &card->ipato.entries); + qeth_l3_update_ipato(card); + } spin_unlock_bh(&card->ip_lock); @@ -666,6 +691,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card, (proto == QETH_PROT_IPV4)? 4:16) && (ipatoe->mask_bits == mask_bits)) { list_del(&ipatoe->entry); + qeth_l3_update_ipato(card); kfree(ipatoe); } } @@ -1416,6 +1442,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev) tmp->u.a4.addr = im4->multiaddr; memcpy(tmp->mac, buf, sizeof(tmp->mac)); + tmp->is_multicast = 1; ipm = qeth_l3_ip_from_hash(card, tmp); if (ipm) { @@ -1593,7 +1620,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4); if (!addr) - return; + goto out; spin_lock_bh(&card->ip_lock); @@ -1607,6 +1634,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, spin_unlock_bh(&card->ip_lock); kfree(addr); +out: in_dev_put(in_dev); } @@ -1631,7 +1659,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6); if (!addr) - return; + goto out; spin_lock_bh(&card->ip_lock); @@ -1646,6 +1674,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, spin_unlock_bh(&card->ip_lock); kfree(addr); +out: in6_dev_put(in6_dev); #endif /* CONFIG_QETH_IPV6 */ } @@ -2609,17 +2638,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card, char daddr[16]; struct af_iucv_trans_hdr *iucv_hdr; - skb_pull(skb, 14); - card->dev->header_ops->create(skb, card->dev, 0, - card->dev->dev_addr, card->dev->dev_addr, - card->dev->addr_len); - skb_pull(skb, 14); - iucv_hdr = (struct af_iucv_trans_hdr *)skb->data; memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; hdr->hdr.l3.ext_flags = 0; - hdr->hdr.l3.length = skb->len; + hdr->hdr.l3.length = skb->len - ETH_HLEN; hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST; + + iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN); memset(daddr, 0, sizeof(daddr)); daddr[0] = 0xfe; daddr[1] = 0x80; @@ -2823,10 +2848,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((card->info.type == QETH_CARD_TYPE_IQD) && !skb_is_nonlinear(skb)) { new_skb = skb; - if (new_skb->protocol == ETH_P_AF_IUCV) - data_offset = 0; - else - data_offset = ETH_HLEN; + data_offset = ETH_HLEN; hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); if (!hdr) goto tx_drop; @@ -2867,7 +2889,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if ((card->info.type != QETH_CARD_TYPE_IQD) && ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) || - (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) { + (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -2909,7 +2931,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) elements = use_tso ? qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) : - qeth_get_elements_no(card, new_skb, hdr_elements); + qeth_get_elements_no(card, new_skb, hdr_elements, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); @@ -3064,6 +3087,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = { .ndo_stop = qeth_l3_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_multicast_list, .ndo_do_ioctl = qeth_l3_do_ioctl, @@ -3120,6 +3144,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; + card->dev->features |= NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { @@ -3145,8 +3170,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; netif_keep_dst(card->dev); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; + netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * + PAGE_SIZE); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT); diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index cffe42f5775d0fd41791e38e67648498b7ca7585..d6bdfc6e905a5af4507c25d9c4162d7b42940f96 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -372,8 +372,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - struct qeth_ipaddr *addr; - int i, rc = 0; + bool enable; + int rc = 0; if (!card) return -EINVAL; @@ -386,25 +386,18 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, } if (sysfs_streq(buf, "toggle")) { - card->ipato.enabled = (card->ipato.enabled)? 0 : 1; - } else if (sysfs_streq(buf, "1")) { - card->ipato.enabled = 1; - hash_for_each(card->ip_htable, i, addr, hnode) { - if ((addr->type == QETH_IP_TYPE_NORMAL) && - qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= - QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else if (sysfs_streq(buf, "0")) { - card->ipato.enabled = 0; - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->set_flags & - QETH_IPA_SETIP_TAKEOVER_FLAG) - addr->set_flags &= - ~QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else + enable = !card->ipato.enabled; + } else if (kstrtobool(buf, &enable)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.enabled != enable) { + card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; @@ -430,20 +423,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert4 = (card->ipato.invert4)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert4 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert4 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert4; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert4 != invert) { + card->ipato.invert4 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } @@ -609,20 +609,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert6 = (card->ipato.invert6)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert6 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert6 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert6; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert6 != invert) { + card->ipato.invert6 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 0aeecec1f5eafa6390a020eb0174cb2582519524..e2962f15c189a38562e889aaf547cbf12a3d8841 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1416,13 +1416,13 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) * will ensure that i/o is queisced and the card is flushed in that * case. */ + aac_free_irq(aac); aac_fib_map_free(aac); pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys); aac->comm_addr = NULL; aac->comm_phys = 0; kfree(aac->queues); aac->queues = NULL; - aac_free_irq(aac); kfree(aac->fsa_dev); aac->fsa_dev = NULL; quirks = aac_get_driver_ident(index)->quirks; diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c index 8dcd8c70c7ee088535a875c7ec2f04d72c8110ef..05f523971348a85d25abf39bc7ed4638471e2ae0 100644 --- a/drivers/scsi/bfa/bfad_debugfs.c +++ b/drivers/scsi/bfa/bfad_debugfs.c @@ -255,7 +255,8 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf, struct bfad_s *bfad = port->bfad; struct bfa_s *bfa = &bfad->bfa; struct bfa_ioc_s *ioc = &bfa->ioc; - int addr, len, rc, i; + int addr, rc, i; + u32 len; u32 *regbuf; void __iomem *rb, *reg_addr; unsigned long flags; @@ -266,7 +267,7 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf, return PTR_ERR(kern_buf); rc = sscanf(kern_buf, "%x:%x", &addr, &len); - if (rc < 2) { + if (rc < 2 || len > (UINT_MAX >> 2)) { printk(KERN_INFO "bfad[%d]: %s failed to read user buf\n", bfad->inst_no, __func__); diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 0039bebaa9e24407bc00650fe0906a60f05247af..358ec32927baf824c1fdaf8a1a224e86051ec23e 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1347,6 +1347,7 @@ static void release_offload_resources(struct cxgbi_sock *csk) csk, csk->state, csk->flags, csk->tid); cxgbi_sock_free_cpl_skbs(csk); + cxgbi_sock_purge_write_queue(csk); if (csk->wr_cred != csk->wr_max_cred) { cxgbi_sock_purge_wr_queue(csk); cxgbi_sock_reset_wr_list(csk); diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index a1d6ab76a51418f9a4f26ecbb76529ea1b775835..0b8db8a74d50923fab03d30c70d1d03a934c7b35 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2951,7 +2951,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr, /* fill_cmd can't fail here, no data buffer to map. */ (void) fill_cmd(c, reset_type, h, NULL, 0, 0, scsi3addr, TYPE_MSG); - rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT); + rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT); if (rc) { dev_warn(&h->pdev->dev, "Failed to send reset command\n"); goto out; @@ -3686,7 +3686,7 @@ static int hpsa_get_volume_status(struct ctlr_info *h, * # (integer code indicating one of several NOT READY states * describing why a volume is to be kept offline) */ -static int hpsa_volume_offline(struct ctlr_info *h, +static unsigned char hpsa_volume_offline(struct ctlr_info *h, unsigned char scsi3addr[]) { struct CommandList *c; @@ -3707,7 +3707,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, DEFAULT_TIMEOUT); if (rc) { cmd_free(h, c); - return 0; + return HPSA_VPD_LV_STATUS_UNSUPPORTED; } sense = c->err_info->SenseInfo; if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) @@ -3718,19 +3718,13 @@ static int hpsa_volume_offline(struct ctlr_info *h, cmd_status = c->err_info->CommandStatus; scsi_status = c->err_info->ScsiStatus; cmd_free(h, c); - /* Is the volume 'not ready'? */ - if (cmd_status != CMD_TARGET_STATUS || - scsi_status != SAM_STAT_CHECK_CONDITION || - sense_key != NOT_READY || - asc != ASC_LUN_NOT_READY) { - return 0; - } /* Determine the reason for not ready state */ ldstat = hpsa_get_volume_status(h, scsi3addr); /* Keep volume offline in certain cases: */ switch (ldstat) { + case HPSA_LV_FAILED: case HPSA_LV_UNDERGOING_ERASE: case HPSA_LV_NOT_AVAILABLE: case HPSA_LV_UNDERGOING_RPI: @@ -3752,7 +3746,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, default: break; } - return 0; + return HPSA_LV_OK; } /* @@ -3825,10 +3819,10 @@ static int hpsa_update_device_info(struct ctlr_info *h, /* Do an inquiry to the device to see what it is. */ if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { - /* Inquiry failed (msg printed already) */ dev_err(&h->pdev->dev, - "hpsa_update_device_info: inquiry failed\n"); - rc = -EIO; + "%s: inquiry failed, device will be skipped.\n", + __func__); + rc = HPSA_INQUIRY_FAILED; goto bail_out; } @@ -3857,15 +3851,20 @@ static int hpsa_update_device_info(struct ctlr_info *h, if ((this_device->devtype == TYPE_DISK || this_device->devtype == TYPE_ZBC) && is_logical_dev_addr_mode(scsi3addr)) { - int volume_offline; + unsigned char volume_offline; hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); - if (volume_offline < 0 || volume_offline > 0xff) - volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; - this_device->volume_offline = volume_offline & 0xff; + this_device->volume_offline = volume_offline; + if (volume_offline == HPSA_LV_FAILED) { + rc = HPSA_LV_FAILED; + dev_err(&h->pdev->dev, + "%s: LV failed, device will be skipped.\n", + __func__); + goto bail_out; + } } else { this_device->raid_level = RAID_UNKNOWN; this_device->offload_config = 0; @@ -4353,8 +4352,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) goto out; } if (rc) { - dev_warn(&h->pdev->dev, - "Inquiry failed, skipping device.\n"); + h->drv_req_rescan = 1; continue; } @@ -5532,7 +5530,7 @@ static void hpsa_scan_complete(struct ctlr_info *h) spin_lock_irqsave(&h->scan_lock, flags); h->scan_finished = 1; - wake_up_all(&h->scan_wait_queue); + wake_up(&h->scan_wait_queue); spin_unlock_irqrestore(&h->scan_lock, flags); } @@ -5550,11 +5548,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh) if (unlikely(lockup_detected(h))) return hpsa_scan_complete(h); + /* + * If a scan is already waiting to run, no need to add another + */ + spin_lock_irqsave(&h->scan_lock, flags); + if (h->scan_waiting) { + spin_unlock_irqrestore(&h->scan_lock, flags); + return; + } + + spin_unlock_irqrestore(&h->scan_lock, flags); + /* wait until any scan already in progress is finished. */ while (1) { spin_lock_irqsave(&h->scan_lock, flags); if (h->scan_finished) break; + h->scan_waiting = 1; spin_unlock_irqrestore(&h->scan_lock, flags); wait_event(h->scan_wait_queue, h->scan_finished); /* Note: We don't need to worry about a race between this @@ -5564,6 +5574,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh) */ } h->scan_finished = 0; /* mark scan as in progress */ + h->scan_waiting = 0; spin_unlock_irqrestore(&h->scan_lock, flags); if (unlikely(lockup_detected(h))) @@ -8802,6 +8813,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) init_waitqueue_head(&h->event_sync_wait_queue); mutex_init(&h->reset_mutex); h->scan_finished = 1; /* no scan currently in progress */ + h->scan_waiting = 0; pci_set_drvdata(pdev, h); h->ndevices = 0; @@ -9094,6 +9106,8 @@ static void hpsa_remove_one(struct pci_dev *pdev) destroy_workqueue(h->rescan_ctlr_wq); destroy_workqueue(h->resubmit_wq); + hpsa_delete_sas_host(h); + /* * Call before disabling interrupts. * scsi_remove_host can trigger I/O operations especially @@ -9128,8 +9142,6 @@ static void hpsa_remove_one(struct pci_dev *pdev) h->lockup_detected = NULL; /* init_one 2 */ /* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */ - hpsa_delete_sas_host(h); - kfree(h); /* init_one 1 */ } @@ -9621,9 +9633,9 @@ static void hpsa_free_sas_phy(struct hpsa_sas_phy *hpsa_sas_phy) struct sas_phy *phy = hpsa_sas_phy->phy; sas_port_delete_phy(hpsa_sas_phy->parent_port->port, phy); - sas_phy_free(phy); if (hpsa_sas_phy->added_to_port) list_del(&hpsa_sas_phy->phy_list_entry); + sas_phy_delete(phy); kfree(hpsa_sas_phy); } diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index 9ea162de80dcfa976b737cf209c4570de6003888..e16f2945f6acfe0b4dfc0311e57d9620af230d1f 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -203,6 +203,7 @@ struct ctlr_info { dma_addr_t errinfo_pool_dhandle; unsigned long *cmd_pool_bits; int scan_finished; + u8 scan_waiting : 1; spinlock_t scan_lock; wait_queue_head_t scan_wait_queue; diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index a584cdf0705846ef13a0375ecb2e1579513ecf92..5961705eef767526f66a6dbc1bbb1e7feec70c85 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -156,6 +156,7 @@ #define CFGTBL_BusType_Fibre2G 0x00000200l /* VPD Inquiry types */ +#define HPSA_INQUIRY_FAILED 0x02 #define HPSA_VPD_SUPPORTED_PAGES 0x00 #define HPSA_VPD_LV_DEVICE_ID 0x83 #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 @@ -166,6 +167,7 @@ /* Logical volume states */ #define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff #define HPSA_LV_OK 0x0 +#define HPSA_LV_FAILED 0x01 #define HPSA_LV_NOT_AVAILABLE 0x0b #define HPSA_LV_UNDERGOING_ERASE 0x0F #define HPSA_LV_UNDERGOING_RPI 0x12 diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index a530f08592cde6a4586594e1c290d4a03317c97f..4abd3fce5ab6c73c85ba361bd6665119dd4919d6 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1727,7 +1727,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { reason = FAILURE_SESSION_IN_RECOVERY; - sc->result = DID_REQUEUE; + sc->result = DID_REQUEUE << 16; goto fault; } diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 4df3cdcf88ceedd63eedaa215299c908008981b4..fc7addaf24da24760cb40fca30cb2308a104e60a 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7782,7 +7782,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, did, vport->port_state, ndlp->nlp_flag); phba->fc_stat.elsRcvPRLI++; - if (vport->port_state < LPFC_DISC_AUTH) { + if ((vport->port_state < LPFC_DISC_AUTH) && + (vport->fc_flag & FC_FABRIC)) { rjt_err = LSRJT_UNABLE_TPC; rjt_exp = LSEXP_NOTHING_MORE; break; @@ -8185,11 +8186,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; spin_unlock_irq(shost->host_lock); - if (vport->port_type == LPFC_PHYSICAL_PORT - && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) - lpfc_issue_init_vfi(vport); - else + if (mb->mbxStatus == MBX_NOT_FINISHED) + break; + if ((vport->port_type == LPFC_PHYSICAL_PORT) && + !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) { + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_issue_init_vfi(vport); + else + lpfc_initial_flogi(vport); + } else { lpfc_initial_fdisc(vport); + } break; } } else { diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index ed223937798a9d9653dda07daeb9f10c916a1b79..7d2ad633b6bc72485b755dc7776ce6cd6439d68b 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4784,7 +4784,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_cancel_retry_delay_tmo(vport, ndlp); if ((ndlp->nlp_flag & NLP_DEFER_RM) && !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) && - !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) { + !(ndlp->nlp_flag & NLP_RPI_REGISTERED) && + phba->sli_rev != LPFC_SLI_REV4) { /* For this case we need to cleanup the default rpi * allocated by the firmware. */ diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 55faa94637a946f843f8e4cde3e7aaf6d33507d7..2a436dff1589fd7dfd9bae0828cb5ff08841cdc2 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -3232,7 +3232,7 @@ struct lpfc_mbx_get_port_name { #define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4 #define MB_CQE_STATUS_DMA_FAILED 0x5 -#define LPFC_MBX_WR_CONFIG_MAX_BDE 8 +#define LPFC_MBX_WR_CONFIG_MAX_BDE 1 struct lpfc_mbx_wr_object { struct mbox_header header; union { diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 289374cbcb47d393ad35aa1f9910eca6dc120187..468acab04d3dca9ebbeca8aaf892ed9f024dad69 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4770,6 +4770,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } else if (log_info == VIRTUAL_IO_FAILED_RETRY) { scmd->result = DID_RESET << 16; break; + } else if ((scmd->device->channel == RAID_CHANNEL) && + (scsi_state == (MPI2_SCSI_STATE_TERMINATED | + MPI2_SCSI_STATE_NO_SCSI_STATUS))) { + scmd->result = DID_RESET << 16; + break; } scmd->result = DID_SOFT_ERROR << 16; break; diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 658e4d15cb714bc118f30b748f85c6e1b40f29b2..ce4ac769a9a22407f0285b4bbe22ea9f7dbc162d 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id, "%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size); ql_dbg(level, vha, id, "----- -----------------------------------------------\n"); - for (cnt = 0; cnt < size; cnt++, buf++) { - if (cnt % 16 == 0) - ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU); - printk(" %02x", *buf); - if (cnt % 16 == 15) - printk("\n"); + for (cnt = 0; cnt < size; cnt += 16) { + ql_dbg(level, vha, id, "%04x: ", cnt); + print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, + buf + cnt, min(16U, size - cnt), false); } - if (cnt % 16 != 0) - printk("\n"); } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 91f5f55a8a9bac6d140cf6467b2c17890ccbf26d..59059ffbb98c06abc061d1485db2fb0624d27899 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -668,11 +668,9 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) { struct qla_hw_data *ha = vha->hw; struct qla_tgt_sess *sess = NULL; - uint32_t unpacked_lun, lun = 0; uint16_t loop_id; int res = 0; struct imm_ntfy_from_isp *n = (struct imm_ntfy_from_isp *)iocb; - struct atio_from_isp *a = (struct atio_from_isp *)iocb; unsigned long flags; loop_id = le16_to_cpu(n->u.isp24.nport_handle); @@ -725,11 +723,7 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) "loop_id %d)\n", vha->host_no, sess, sess->port_name, mcmd, loop_id); - lun = a->u.isp24.fcp_cmnd.lun; - unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); - - return qlt_issue_task_mgmt(sess, unpacked_lun, mcmd, - iocb, QLA24XX_MGMT_SEND_NACK); + return qlt_issue_task_mgmt(sess, 0, mcmd, iocb, QLA24XX_MGMT_SEND_NACK); } /* ha->tgt.sess_lock supposed to be held on entry */ diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index cf04a364fd8b35f869c31e44491d237ae1b3bc6e..2b0e61557317f4ab44b4e76c135ed908746a6c54 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2996,11 +2996,11 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, if (-1 == ret) { write_unlock_irqrestore(&atomic_rw, iflags); return DID_ERROR << 16; - } else if (sdebug_verbose && (ret < (num * sdebug_sector_size))) + } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size)) sdev_printk(KERN_INFO, scp->device, - "%s: %s: cdb indicated=%u, IO sent=%d bytes\n", + "%s: %s: lb size=%u, IO sent=%d bytes\n", my_name, "write same", - num * sdebug_sector_size, ret); + sdebug_sector_size, ret); /* Copy first sector to remaining blocks */ for (i = 1 ; i < num ; i++) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 2464569253350b61fef59ed0583e8b4cd1a25944..26e6b05d05fc9bad52f61eb0275bb5d99f196621 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -160,7 +160,7 @@ static struct { {"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, storage on LUN 0 */ {"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, no storage on LUN 0 */ {"EMC", "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, - {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN}, + {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2}, {"EMULEX", "MD21/S2 ESDI", NULL, BLIST_SINGLELUN}, {"easyRAID", "16P", NULL, BLIST_NOREPORTLUN}, {"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN}, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d8099c7cab000144a24fe6edf6fd34d6aab86ec4..c7b770075caa4b00344c6d58564d8ff7b53c3751 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2041,11 +2041,13 @@ static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) q->limits.cluster = 0; /* - * set a reasonable default alignment on word boundaries: the - * host and device may alter it using - * blk_queue_update_dma_alignment() later. + * Set a reasonable default alignment: The larger of 32-byte (dword), + * which is a common minimum for HBAs, and the minimum DMA alignment, + * which is set by the platform. + * + * Devices that require a bigger alignment can increase it later. */ - blk_queue_dma_alignment(q, 0x03); + blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1); } struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 09fa1fd0c4ce573375ba3de011ef5613e7254e6f..ace56c5e61e1136b31340c10dcb768bc20a7b7d2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -234,11 +234,15 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr, { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; + bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - sdp->manage_start_stop = simple_strtoul(buf, NULL, 10); + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdp->manage_start_stop = v; return count; } @@ -256,6 +260,7 @@ static ssize_t allow_restart_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + bool v; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; @@ -265,7 +270,10 @@ allow_restart_store(struct device *dev, struct device_attribute *attr, if (sdp->type != TYPE_DISK) return -EINVAL; - sdp->allow_restart = simple_strtoul(buf, NULL, 10); + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdp->allow_restart = v; return count; } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 184c7db1e0cafd56daee1e97ef3dd11022698d59..cd9537ddc19f6122feee6101645cd647a2f94e06 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -149,7 +149,6 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ - char low_dma; /* as in parent but possibly overridden to 1 */ char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */ char cmd_q; /* 1 -> allow command queuing, 0 -> don't */ unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */ @@ -922,24 +921,14 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) /* strange ..., for backward compatibility */ return sfp->timeout_user; case SG_SET_FORCE_LOW_DMA: - result = get_user(val, ip); - if (result) - return result; - if (val) { - sfp->low_dma = 1; - if ((0 == sfp->low_dma) && !sfp->res_in_use) { - val = (int) sfp->reserve.bufflen; - sg_remove_scat(sfp, &sfp->reserve); - sg_build_reserve(sfp, val); - } - } else { - if (atomic_read(&sdp->detaching)) - return -ENODEV; - sfp->low_dma = sdp->device->host->unchecked_isa_dma; - } + /* + * N.B. This ioctl never worked properly, but failed to + * return an error value. So returning '0' to keep compability + * with legacy applications. + */ return 0; case SG_GET_LOW_DMA: - return put_user((int) sfp->low_dma, ip); + return put_user((int) sdp->device->host->unchecked_isa_dma, ip); case SG_GET_SCSI_ID: if (!access_ok(VERIFY_WRITE, p, sizeof (sg_scsi_id_t))) return -EFAULT; @@ -1860,6 +1849,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) int sg_tablesize = sfp->parentdp->sg_tablesize; int blk_size = buff_size, order; gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; + struct sg_device *sdp = sfp->parentdp; if (blk_size < 0) return -EFAULT; @@ -1885,7 +1875,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = num; } - if (sfp->low_dma) + if (sdp->device->host->unchecked_isa_dma) gfp_mask |= GFP_DMA; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) @@ -2148,8 +2138,6 @@ sg_add_sfp(Sg_device * sdp) sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; - sfp->low_dma = (SG_DEF_FORCE_LOW_DMA == 0) ? - sdp->device->host->unchecked_isa_dma : 1; sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; @@ -2608,7 +2596,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) jiffies_to_msecs(fp->timeout), fp->reserve.bufflen, (int) fp->reserve.k_use_sg, - (int) fp->low_dma); + (int) sdp->device->host->unchecked_isa_dma); seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan); diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index f0a015aca6086a462eb4995e72e8b21367a73c8e..fb4dafb18a2b5de1c366d86ab4843cda563f2d88 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3622,10 +3622,10 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, completion = ktime_get(); delta_us = ktime_us_delta(completion, req->lat_hist_io_start); - /* rq_data_dir() => true if WRITE */ - blk_update_latency_hist(&hba->io_lat_s, - (rq_data_dir(req) == READ), - delta_us); + blk_update_latency_hist( + (rq_data_dir(req) == READ) ? + &hba->io_lat_read : + &hba->io_lat_write, delta_us); } } /* Do not touch lrbp after scsi done */ @@ -5356,12 +5356,15 @@ static int ufshcd_config_vreg(struct device *dev, struct ufs_vreg *vreg, bool on) { int ret = 0; - struct regulator *reg = vreg->reg; - const char *name = vreg->name; + struct regulator *reg; + const char *name; int min_uV, uA_load; BUG_ON(!vreg); + reg = vreg->reg; + name = vreg->name; + if (regulator_count_voltages(reg) > 0) { min_uV = on ? vreg->min_uV : 0; ret = regulator_set_voltage(reg, min_uV, vreg->max_uV); @@ -6371,9 +6374,10 @@ latency_hist_store(struct device *dev, struct device_attribute *attr, if (kstrtol(buf, 0, &value)) return -EINVAL; - if (value == BLK_IO_LAT_HIST_ZERO) - blk_zero_latency_hist(&hba->io_lat_s); - else if (value == BLK_IO_LAT_HIST_ENABLE || + if (value == BLK_IO_LAT_HIST_ZERO) { + memset(&hba->io_lat_read, 0, sizeof(hba->io_lat_read)); + memset(&hba->io_lat_write, 0, sizeof(hba->io_lat_write)); + } else if (value == BLK_IO_LAT_HIST_ENABLE || value == BLK_IO_LAT_HIST_DISABLE) hba->latency_hist_enabled = value; return count; @@ -6384,8 +6388,14 @@ latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ufs_hba *hba = dev_get_drvdata(dev); + size_t written_bytes; + + written_bytes = blk_latency_hist_show("Read", &hba->io_lat_read, + buf, PAGE_SIZE); + written_bytes += blk_latency_hist_show("Write", &hba->io_lat_write, + buf + written_bytes, PAGE_SIZE - written_bytes); - return blk_latency_hist_show(&hba->io_lat_s, buf); + return written_bytes; } static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index dbe68b27c6cb6bbc636c411227354052d8c86149..b35a5b9994123b6c661240cabcc507afb106e361 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -564,7 +564,8 @@ struct ufs_hba { enum bkops_status urgent_bkops_lvl; bool is_urgent_bkops_lvl_checked; int latency_hist_enabled; - struct io_latency_state io_lat_s; + struct io_latency_state io_lat_read; + struct io_latency_state io_lat_write; }; /* Returns true if clocks can be gated. Otherwise false */ diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c b/drivers/soc/mediatek/mtk-pmic-wrap.c index a5f10936fb9cbe15c0b7338108ee7e1cf830bb13..e929f5142862a0c9869b16e0df47229433614557 100644 --- a/drivers/soc/mediatek/mtk-pmic-wrap.c +++ b/drivers/soc/mediatek/mtk-pmic-wrap.c @@ -522,7 +522,7 @@ struct pmic_wrapper_type { u32 int_en_all; u32 spi_w; u32 wdt_src; - int has_bridge:1; + unsigned int has_bridge:1; int (*init_reg_clock)(struct pmic_wrapper *wrp); int (*init_soc_specific)(struct pmic_wrapper *wrp); }; diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 8e281e47afec73e39a7ed3ce84b900457e22f201..b7995474148c7eb91b2c2f87459b0c9bd446d3f2 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -365,7 +365,6 @@ config SPI_FSL_SPI config SPI_FSL_DSPI tristate "Freescale DSPI controller" select REGMAP_MMIO - depends on HAS_DMA depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST help This enables support for the Freescale DSPI controller in master diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index c1eafbd7610a4bd498b4132cb4abb6256a592cb0..da51fed143cd8bd9d489dc1ec00711a3eb7d8337 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -553,7 +553,7 @@ static int spi_engine_probe(struct platform_device *pdev) static int spi_engine_remove(struct platform_device *pdev) { - struct spi_master *master = platform_get_drvdata(pdev); + struct spi_master *master = spi_master_get(platform_get_drvdata(pdev)); struct spi_engine *spi_engine = spi_master_get_devdata(master); int irq = platform_get_irq(pdev, 0); @@ -561,6 +561,8 @@ static int spi_engine_remove(struct platform_device *pdev) free_irq(irq, master); + spi_master_put(master); + writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING); writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE); writel_relaxed(0x01, spi_engine->base + SPI_ENGINE_REG_RESET); diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index deb782f6556c119dc8dfe6760510e54df6c0d906..a6e34f05d44d486df625552a76d1d076673768e6 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1307,12 +1307,23 @@ static int spi_imx_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct spi_imx_data *spi_imx = spi_master_get_devdata(master); + int ret; spi_bitbang_stop(&spi_imx->bitbang); + ret = clk_enable(spi_imx->clk_per); + if (ret) + return ret; + + ret = clk_enable(spi_imx->clk_ipg); + if (ret) { + clk_disable(spi_imx->clk_per); + return ret; + } + writel(0, spi_imx->base + MXC_CSPICTRL); - clk_unprepare(spi_imx->clk_ipg); - clk_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); + clk_disable_unprepare(spi_imx->clk_per); spi_imx_sdma_exit(spi_imx); spi_master_put(master); diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 1de3a772eb7d23a8a90b9b77704033ac4945e364..cbf02ebb30a22fb4bc7111269d0883d63d148969 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -862,7 +862,7 @@ static int sh_msiof_transfer_one(struct spi_master *master, break; copy32 = copy_bswap32; } else if (bits <= 16) { - if (l & 1) + if (l & 3) break; copy32 = copy_wswap32; } else { diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index bc7100b93dfcf0c24213f479f9a5fffc41666315..e0b9fe1d0e37d98a7243ca35a56b1d62e024e8b3 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) while (remaining_words) { int n_words, tx_words, rx_words; u32 sr; + int stalled; n_words = min(remaining_words, xspi->buffer_size); @@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) /* Read out all the data from the Rx FIFO */ rx_words = n_words; + stalled = 10; while (rx_words) { + if (rx_words == n_words && !(stalled--) && + !(sr & XSPI_SR_TX_EMPTY_MASK) && + (sr & XSPI_SR_RX_EMPTY_MASK)) { + dev_err(&spi->dev, + "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n"); + xspi_init_hw(xspi); + return -EIO; + } + if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { xilinx_spi_rx(xspi); rx_words--; diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 9435dc59cb7c5a30950defb75d7e8dba377c5fbd..eb41e8487fb0b04212e1f13d16fcbf3582788593 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -766,10 +766,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case ASHMEM_SET_SIZE: ret = -EINVAL; + mutex_lock(&ashmem_mutex); if (!asma->file) { ret = 0; asma->size = (size_t)arg; } + mutex_unlock(&ashmem_mutex); break; case ASHMEM_GET_SIZE: ret = asma->size; diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 8dffd8a7e762d28d6448650c98f97e7874b6c43b..9f01427f35f919c9074988852f331d0d1333fa3f 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -924,6 +924,8 @@ static void __gb_lights_led_unregister(struct gb_channel *channel) return; led_classdev_unregister(cdev); + kfree(cdev->name); + cdev->name = NULL; channel->led = NULL; } diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c index 29dc249b0c7446ca137829580845f685cbe3ccfd..3c2c233c2e49cb30b9664ca45185e30ee8555f57 100644 --- a/drivers/staging/greybus/loopback.c +++ b/drivers/staging/greybus/loopback.c @@ -1034,8 +1034,10 @@ static int gb_loopback_fn(void *data) error = gb_loopback_async_sink(gb, size); } - if (error) + if (error) { gb->error++; + gb->iteration_count++; + } } else { /* We are effectively single threaded here */ if (type == GB_LOOPBACK_TYPE_PING) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 9e8802181452831c52b9a87428ccec02c353a806..e8d9db4d8179bd852a995d6a4d87f31acda6ba9b 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -824,14 +824,15 @@ struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cm return conn; failed_2: - kiblnd_destroy_conn(conn, true); + kiblnd_destroy_conn(conn); + LIBCFS_FREE(conn, sizeof(*conn)); failed_1: LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); failed_0: return NULL; } -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) +void kiblnd_destroy_conn(struct kib_conn *conn) { struct rdma_cm_id *cmid = conn->ibc_cmid; struct kib_peer *peer = conn->ibc_peer; @@ -894,8 +895,6 @@ void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); } - - LIBCFS_FREE(conn, sizeof(*conn)); } int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index 14576977200f8bb2112fa454854ec7351ad0d23a..30cb2f5b3c1596289ac9002e0d4b3bf852aa99e1 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -1018,7 +1018,7 @@ int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, int state, int version); -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn); +void kiblnd_destroy_conn(struct kib_conn *conn); void kiblnd_close_conn(struct kib_conn *conn, int error); void kiblnd_close_conn_locked(struct kib_conn *conn, int error); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 995f2dac7f264e14c8b7e64b04c757d626a8dab4..ea9a0c21d29d351ac2b53443e622f0274eb3bb02 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3323,11 +3323,13 @@ kiblnd_connd(void *arg) spin_unlock_irqrestore(lock, flags); dropped_lock = 1; - kiblnd_destroy_conn(conn, !peer); + kiblnd_destroy_conn(conn); spin_lock_irqsave(lock, flags); - if (!peer) + if (!peer) { + kfree(conn); continue; + } conn->ibc_peer = peer; if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index 436691814a5e31455e216795fc9d9bb7219c2a77..27333d973bcd6b7f3d7c67692d345bea50e628eb 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -401,15 +401,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = VM_FAULT_LOCKED; break; case -ENODATA: + case -EAGAIN: case -EFAULT: result = VM_FAULT_NOPAGE; break; case -ENOMEM: result = VM_FAULT_OOM; break; - case -EAGAIN: - result = VM_FAULT_RETRY; - break; default: result = VM_FAULT_SIGBUS; break; diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 499d7bfe7147465332cd148310076ee4e61153d1..75e6d5e0504f5875e40cf221020fe5dff82fa575 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -608,8 +608,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, } memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len); if (msg->len == 1) { - if (cec_msg_initiator(msg) != 0xf || - cec_msg_destination(msg) == 0xf) { + if (cec_msg_destination(msg) == 0xf) { dprintk(1, "cec_transmit_msg: invalid poll message\n"); return -EINVAL; } @@ -634,7 +633,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, dprintk(1, "cec_transmit_msg: destination is the adapter itself\n"); return -EINVAL; } - if (cec_msg_initiator(msg) != 0xf && + if (msg->len > 1 && adap->is_configured && !cec_has_log_addr(adap, cec_msg_initiator(msg))) { dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n", cec_msg_initiator(msg)); @@ -883,7 +882,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, /* Send poll message */ msg.len = 1; - msg.msg[0] = 0xf0 | log_addr; + msg.msg[0] = (log_addr << 4) | log_addr; err = cec_transmit_msg_fh(adap, &msg, NULL, true); /* diff --git a/drivers/staging/rtl8188eu/core/rtw_cmd.c b/drivers/staging/rtl8188eu/core/rtw_cmd.c index f1f4788dbd86ce4a3f4ec5df9429883483183f42..6051a7ba0797733aadf65349ccdb57fae777c9de 100644 --- a/drivers/staging/rtl8188eu/core/rtw_cmd.c +++ b/drivers/staging/rtl8188eu/core/rtw_cmd.c @@ -342,7 +342,7 @@ u8 rtw_createbss_cmd(struct adapter *padapter) else RT_TRACE(_module_rtl871x_cmd_c_, _drv_info_, (" createbss for SSid:%s\n", pmlmepriv->assoc_ssid.Ssid)); - pcmd = kzalloc(sizeof(struct cmd_obj), GFP_KERNEL); + pcmd = kzalloc(sizeof(struct cmd_obj), GFP_ATOMIC); if (!pcmd) { res = _FAIL; goto exit; @@ -522,7 +522,7 @@ u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueu if (enqueue) { /* need enqueue, prepare cmd_obj and enqueue */ - cmdobj = kzalloc(sizeof(*cmdobj), GFP_KERNEL); + cmdobj = kzalloc(sizeof(*cmdobj), GFP_ATOMIC); if (!cmdobj) { res = _FAIL; kfree(param); diff --git a/drivers/staging/rtl8188eu/core/rtw_mlme.c b/drivers/staging/rtl8188eu/core/rtw_mlme.c index ee2dcd05010f6f07c9ff0e6f8648763bfd09dc00..0b60d1e0333e2e9e8fd9d04fec53066aebb6860e 100644 --- a/drivers/staging/rtl8188eu/core/rtw_mlme.c +++ b/drivers/staging/rtl8188eu/core/rtw_mlme.c @@ -107,10 +107,10 @@ void rtw_free_mlme_priv_ie_data(struct mlme_priv *pmlmepriv) void rtw_free_mlme_priv(struct mlme_priv *pmlmepriv) { - rtw_free_mlme_priv_ie_data(pmlmepriv); - - if (pmlmepriv) + if (pmlmepriv) { + rtw_free_mlme_priv_ie_data(pmlmepriv); vfree(pmlmepriv->free_bss_buf); + } } struct wlan_network *_rtw_alloc_network(struct mlme_priv *pmlmepriv) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 4de9dbc93380e8af46a1fb6eb60b1a6b5311da9e..c7bf8ab26192fc2c400f1405a7db3d76b0b7d570 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1397,19 +1397,13 @@ static int rtw_wx_get_essid(struct net_device *dev, if ((check_fwstate(pmlmepriv, _FW_LINKED)) || (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) { len = pcur_bss->Ssid.SsidLength; - - wrqu->essid.length = len; - memcpy(extra, pcur_bss->Ssid.Ssid, len); - - wrqu->essid.flags = 1; } else { - ret = -1; - goto exit; + len = 0; + *extra = 0; } - -exit: - + wrqu->essid.length = len; + wrqu->essid.flags = 1; return ret; } diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index f109eeac358d86954f5ee8caefc4a0b310725dce..ab96629b7889beb993f86d04e1b0fe63e1913cc4 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1698,10 +1698,11 @@ static int vt6655_suspend(struct pci_dev *pcid, pm_message_t state) MACbShutdown(priv); pci_disable_device(pcid); - pci_set_power_state(pcid, pci_choose_state(pcid, state)); spin_unlock_irqrestore(&priv->lock, flags); + pci_set_power_state(pcid, pci_choose_state(pcid, state)); + return 0; } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 0d578297d9f9a3b91bba5ac20a5124eaacc67303..04d2b6e25503c59ec55ed9a9ff7de0570e97f8d6 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -841,6 +841,7 @@ static int iscsit_add_reject_from_cmd( unsigned char *buf) { struct iscsi_conn *conn; + const bool do_put = cmd->se_cmd.se_tfo != NULL; if (!cmd->conn) { pr_err("cmd->conn is NULL for ITT: 0x%08x\n", @@ -871,7 +872,7 @@ static int iscsit_add_reject_from_cmd( * Perform the kref_put now if se_cmd has already been setup by * scsit_setup_scsi_cmd() */ - if (cmd->se_cmd.se_tfo != NULL) { + if (do_put) { pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); target_put_sess_cmd(&cmd->se_cmd); } @@ -1939,7 +1940,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct iscsi_tmr_req *tmr_req; struct iscsi_tm *hdr; int out_of_order_cmdsn = 0, ret; - bool sess_ref = false; u8 function, tcm_function = TMR_UNKNOWN; hdr = (struct iscsi_tm *) buf; @@ -1981,18 +1981,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, buf); } + transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, + conn->sess->se_sess, 0, DMA_NONE, + TCM_SIMPLE_TAG, cmd->sense_buffer + 2); + + target_get_sess_cmd(&cmd->se_cmd, true); + /* * TASK_REASSIGN for ERL=2 / connection stays inside of * LIO-Target $FABRIC_MOD */ if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { - transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, - conn->sess->se_sess, 0, DMA_NONE, - TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - - target_get_sess_cmd(&cmd->se_cmd, true); - sess_ref = true; - switch (function) { case ISCSI_TM_FUNC_ABORT_TASK: tcm_function = TMR_ABORT_TASK; @@ -2131,12 +2130,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * For connection recovery, this is also the default action for * TMR TASK_REASSIGN. */ - if (sess_ref) { - pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(&cmd->se_cmd); - } - iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + target_put_sess_cmd(&cmd->se_cmd); return 0; } EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 9cbbc9cf63fb70b346913407a6e3523f1d96dd90..8a4bc15bc3f565aa94c5146668943d204389f377 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1144,7 +1144,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( ret = core_tpg_register(wwn, &tpg->tpg_se_tpg, SCSI_PROTOCOL_ISCSI); if (ret < 0) - return NULL; + goto free_out; ret = iscsit_tpg_add_portal_group(tiqn, tpg); if (ret != 0) @@ -1156,6 +1156,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( return &tpg->tpg_se_tpg; out: core_tpg_deregister(&tpg->tpg_se_tpg); +free_out: kfree(tpg); return NULL; } diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 4c82bbe19003d083979fac3139ab91e3a15a01a0..ee5b29aed54bd9921a6cc0d8541906e6c27b879b 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1010,7 +1010,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) static void core_alua_do_transition_tg_pt_work(struct work_struct *work) { struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); @@ -1073,17 +1073,8 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs && - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == - ALUA_ACCESS_STATE_TRANSITION) { - /* Just in case */ - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; - tg_pt_gp->tg_pt_gp_transition_complete = &wait; - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); - wait_for_completion(&wait); - tg_pt_gp->tg_pt_gp_transition_complete = NULL; - return 0; - } + if (!explicit) + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); /* * Save the old primary ALUA access state, and set the current state @@ -1114,17 +1105,9 @@ static int core_alua_do_transition_tg_pt( atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) { - unsigned long transition_tmo; - - transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); - } else { + schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); + if (explicit) { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } @@ -1692,8 +1675,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); + INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, + core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); @@ -1801,7 +1784,7 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 29f807b29e7490f0c6b65a0f095c350d158a8310..97928b42ad6253062cc048fee5436625f2e29343 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -466,6 +466,10 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) struct inode *inode = file->f_mapping->host; int ret; + if (!nolb) { + return 0; + } + if (cmd->se_dev->dev_attrib.pi_prot_type) { ret = fd_do_prot_unmap(cmd, lba, nolb); if (ret) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 47463c99c3181ed8e133b2d39ba9362d0196541a..df20921c233c18454dd86e68bb04809ee627eeff 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -56,8 +56,10 @@ void core_pr_dump_initiator_port( char *buf, u32 size) { - if (!pr_reg->isid_present_at_reg) + if (!pr_reg->isid_present_at_reg) { buf[0] = '\0'; + return; + } snprintf(buf, size, ",i,0x%s", pr_reg->pr_reg_isid); } diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 27dd1e12f246afbddc64fce39e03d00584b0528c..14bb2db5273c2b5c017a5d52e80538473fbc63ba 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd, spin_unlock(&se_cmd->t_state_lock); return false; } + if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) { + if (se_cmd->scsi_status) { + pr_debug("Attempted to abort io tag: %llu early failure" + " status: 0x%02x\n", se_cmd->tag, + se_cmd->scsi_status); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + } if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { pr_debug("Attempted to abort io tag: %llu already shutdown," " skipping\n", se_cmd->tag); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 4c0782cb1e94bc7ebe43ffc9af182fa9f8013f52..6f3eccf986c73aefd2ee6e1657902534899c72f2 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1939,6 +1939,7 @@ void target_execute_cmd(struct se_cmd *cmd) } cmd->t_state = TRANSPORT_PROCESSING; + cmd->transport_state &= ~CMD_T_PRE_EXECUTE; cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; spin_unlock_irq(&cmd->t_state_lock); @@ -2592,6 +2593,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) ret = -ESHUTDOWN; goto out; } + se_cmd->transport_state |= CMD_T_PRE_EXECUTE; list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..a6df12d88f90cd097ab88df8d151970243aea26b --- /dev/null +++ b/drivers/tee/Kconfig @@ -0,0 +1,19 @@ +# Generic Trusted Execution Environment Configuration +config TEE + tristate "Trusted Execution Environment support" + depends on HAVE_ARM_SMCCC || COMPILE_TEST + select DMA_SHARED_BUFFER + select GENERIC_ALLOCATOR + help + This implements a generic interface towards a Trusted Execution + Environment (TEE). + +if TEE + +menu "TEE drivers" + +source "drivers/tee/optee/Kconfig" + +endmenu + +endif diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..7a4e4a1ac39c0f22ada94a4b4e19d081b2687744 --- /dev/null +++ b/drivers/tee/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_TEE) += tee.o +tee-objs += tee_core.o +tee-objs += tee_shm.o +tee-objs += tee_shm_pool.o +obj-$(CONFIG_OPTEE) += optee/ diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..0126de898036cdb6eacb51cc672232b7de9060d7 --- /dev/null +++ b/drivers/tee/optee/Kconfig @@ -0,0 +1,7 @@ +# OP-TEE Trusted Execution Environment Configuration +config OPTEE + tristate "OP-TEE" + depends on HAVE_ARM_SMCCC + help + This implements the OP-TEE Trusted Execution Environment (TEE) + driver. diff --git a/drivers/tee/optee/Makefile b/drivers/tee/optee/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..220cf4298f0d5a3136039526b4aad0918750171a --- /dev/null +++ b/drivers/tee/optee/Makefile @@ -0,0 +1,6 @@ +obj-$(CONFIG_OPTEE) += optee.o +optee-objs += core.o +optee-objs += call.o +optee-objs += rpc.o +optee-objs += supp.o +optee-objs += shm_pool.o diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c new file mode 100644 index 0000000000000000000000000000000000000000..a5afbe6dee6862aa2104a56cbae6458c31e253b8 --- /dev/null +++ b/drivers/tee/optee/call.c @@ -0,0 +1,662 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" + +struct optee_call_waiter { + struct list_head list_node; + struct completion c; +}; + +static void optee_cq_wait_init(struct optee_call_queue *cq, + struct optee_call_waiter *w) +{ + /* + * We're preparing to make a call to secure world. In case we can't + * allocate a thread in secure world we'll end up waiting in + * optee_cq_wait_for_completion(). + * + * Normally if there's no contention in secure world the call will + * complete and we can cleanup directly with optee_cq_wait_final(). + */ + mutex_lock(&cq->mutex); + + /* + * We add ourselves to the queue, but we don't wait. This + * guarantees that we don't lose a completion if secure world + * returns busy and another thread just exited and try to complete + * someone. + */ + init_completion(&w->c); + list_add_tail(&w->list_node, &cq->waiters); + + mutex_unlock(&cq->mutex); +} + +static void optee_cq_wait_for_completion(struct optee_call_queue *cq, + struct optee_call_waiter *w) +{ + wait_for_completion(&w->c); + + mutex_lock(&cq->mutex); + + /* Move to end of list to get out of the way for other waiters */ + list_del(&w->list_node); + reinit_completion(&w->c); + list_add_tail(&w->list_node, &cq->waiters); + + mutex_unlock(&cq->mutex); +} + +static void optee_cq_complete_one(struct optee_call_queue *cq) +{ + struct optee_call_waiter *w; + + list_for_each_entry(w, &cq->waiters, list_node) { + if (!completion_done(&w->c)) { + complete(&w->c); + break; + } + } +} + +static void optee_cq_wait_final(struct optee_call_queue *cq, + struct optee_call_waiter *w) +{ + /* + * We're done with the call to secure world. The thread in secure + * world that was used for this call is now available for some + * other task to use. + */ + mutex_lock(&cq->mutex); + + /* Get out of the list */ + list_del(&w->list_node); + + /* Wake up one eventual waiting task */ + optee_cq_complete_one(cq); + + /* + * If we're completed we've got a completion from another task that + * was just done with its call to secure world. Since yet another + * thread now is available in secure world wake up another eventual + * waiting task. + */ + if (completion_done(&w->c)) + optee_cq_complete_one(cq); + + mutex_unlock(&cq->mutex); +} + +/* Requires the filpstate mutex to be held */ +static struct optee_session *find_session(struct optee_context_data *ctxdata, + u32 session_id) +{ + struct optee_session *sess; + + list_for_each_entry(sess, &ctxdata->sess_list, list_node) + if (sess->session_id == session_id) + return sess; + + return NULL; +} + +/** + * optee_do_call_with_arg() - Do an SMC to OP-TEE in secure world + * @ctx: calling context + * @parg: physical address of message to pass to secure world + * + * Does and SMC to OP-TEE in secure world and handles eventual resulting + * Remote Procedure Calls (RPC) from OP-TEE. + * + * Returns return code from secure world, 0 is OK + */ +u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) +{ + struct optee *optee = tee_get_drvdata(ctx->teedev); + struct optee_call_waiter w; + struct optee_rpc_param param = { }; + struct optee_call_ctx call_ctx = { }; + u32 ret; + + param.a0 = OPTEE_SMC_CALL_WITH_ARG; + reg_pair_from_64(¶m.a1, ¶m.a2, parg); + /* Initialize waiter */ + optee_cq_wait_init(&optee->call_queue, &w); + while (true) { + struct arm_smccc_res res; + + optee->invoke_fn(param.a0, param.a1, param.a2, param.a3, + param.a4, param.a5, param.a6, param.a7, + &res); + + if (res.a0 == OPTEE_SMC_RETURN_ETHREAD_LIMIT) { + /* + * Out of threads in secure world, wait for a thread + * become available. + */ + optee_cq_wait_for_completion(&optee->call_queue, &w); + } else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) { + param.a0 = res.a0; + param.a1 = res.a1; + param.a2 = res.a2; + param.a3 = res.a3; + optee_handle_rpc(ctx, ¶m, &call_ctx); + } else { + ret = res.a0; + break; + } + } + + optee_rpc_finalize_call(&call_ctx); + /* + * We're done with our thread in secure world, if there's any + * thread waiters wake up one. + */ + optee_cq_wait_final(&optee->call_queue, &w); + + return ret; +} + +static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params, + struct optee_msg_arg **msg_arg, + phys_addr_t *msg_parg) +{ + int rc; + struct tee_shm *shm; + struct optee_msg_arg *ma; + + shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params), + TEE_SHM_MAPPED); + if (IS_ERR(shm)) + return shm; + + ma = tee_shm_get_va(shm, 0); + if (IS_ERR(ma)) { + rc = PTR_ERR(ma); + goto out; + } + + rc = tee_shm_get_pa(shm, 0, msg_parg); + if (rc) + goto out; + + memset(ma, 0, OPTEE_MSG_GET_ARG_SIZE(num_params)); + ma->num_params = num_params; + *msg_arg = ma; +out: + if (rc) { + tee_shm_free(shm); + return ERR_PTR(rc); + } + + return shm; +} + +int optee_open_session(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param) +{ + struct optee_context_data *ctxdata = ctx->data; + int rc; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess = NULL; + + /* +2 for the meta parameters added below */ + shm = get_msg_arg(ctx, arg->num_params + 2, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + msg_arg->cmd = OPTEE_MSG_CMD_OPEN_SESSION; + msg_arg->cancel_id = arg->cancel_id; + + /* + * Initialize and add the meta parameters needed when opening a + * session. + */ + msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT | + OPTEE_MSG_ATTR_META; + msg_arg->params[1].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT | + OPTEE_MSG_ATTR_META; + memcpy(&msg_arg->params[0].u.value, arg->uuid, sizeof(arg->uuid)); + memcpy(&msg_arg->params[1].u.value, arg->uuid, sizeof(arg->clnt_uuid)); + msg_arg->params[1].u.value.c = arg->clnt_login; + + rc = optee_to_msg_param(msg_arg->params + 2, arg->num_params, param); + if (rc) + goto out; + + sess = kzalloc(sizeof(*sess), GFP_KERNEL); + if (!sess) { + rc = -ENOMEM; + goto out; + } + + if (optee_do_call_with_arg(ctx, msg_parg)) { + msg_arg->ret = TEEC_ERROR_COMMUNICATION; + msg_arg->ret_origin = TEEC_ORIGIN_COMMS; + } + + if (msg_arg->ret == TEEC_SUCCESS) { + /* A new session has been created, add it to the list. */ + sess->session_id = msg_arg->session; + mutex_lock(&ctxdata->mutex); + list_add(&sess->list_node, &ctxdata->sess_list); + mutex_unlock(&ctxdata->mutex); + } else { + kfree(sess); + } + + if (optee_from_msg_param(param, arg->num_params, msg_arg->params + 2)) { + arg->ret = TEEC_ERROR_COMMUNICATION; + arg->ret_origin = TEEC_ORIGIN_COMMS; + /* Close session again to avoid leakage */ + optee_close_session(ctx, msg_arg->session); + } else { + arg->session = msg_arg->session; + arg->ret = msg_arg->ret; + arg->ret_origin = msg_arg->ret_origin; + } +out: + tee_shm_free(shm); + + return rc; +} + +int optee_close_session(struct tee_context *ctx, u32 session) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess; + + /* Check that the session is valid and remove it from the list */ + mutex_lock(&ctxdata->mutex); + sess = find_session(ctxdata, session); + if (sess) + list_del(&sess->list_node); + mutex_unlock(&ctxdata->mutex); + if (!sess) + return -EINVAL; + kfree(sess); + + shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + msg_arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION; + msg_arg->session = session; + optee_do_call_with_arg(ctx, msg_parg); + + tee_shm_free(shm); + return 0; +} + +int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, + struct tee_param *param) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess; + int rc; + + /* Check that the session is valid */ + mutex_lock(&ctxdata->mutex); + sess = find_session(ctxdata, arg->session); + mutex_unlock(&ctxdata->mutex); + if (!sess) + return -EINVAL; + + shm = get_msg_arg(ctx, arg->num_params, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + msg_arg->cmd = OPTEE_MSG_CMD_INVOKE_COMMAND; + msg_arg->func = arg->func; + msg_arg->session = arg->session; + msg_arg->cancel_id = arg->cancel_id; + + rc = optee_to_msg_param(msg_arg->params, arg->num_params, param); + if (rc) + goto out; + + if (optee_do_call_with_arg(ctx, msg_parg)) { + msg_arg->ret = TEEC_ERROR_COMMUNICATION; + msg_arg->ret_origin = TEEC_ORIGIN_COMMS; + } + + if (optee_from_msg_param(param, arg->num_params, msg_arg->params)) { + msg_arg->ret = TEEC_ERROR_COMMUNICATION; + msg_arg->ret_origin = TEEC_ORIGIN_COMMS; + } + + arg->ret = msg_arg->ret; + arg->ret_origin = msg_arg->ret_origin; +out: + tee_shm_free(shm); + return rc; +} + +int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_shm *shm; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + struct optee_session *sess; + + /* Check that the session is valid */ + mutex_lock(&ctxdata->mutex); + sess = find_session(ctxdata, session); + mutex_unlock(&ctxdata->mutex); + if (!sess) + return -EINVAL; + + shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + msg_arg->cmd = OPTEE_MSG_CMD_CANCEL; + msg_arg->session = session; + msg_arg->cancel_id = cancel_id; + optee_do_call_with_arg(ctx, msg_parg); + + tee_shm_free(shm); + return 0; +} + +/** + * optee_enable_shm_cache() - Enables caching of some shared memory allocation + * in OP-TEE + * @optee: main service struct + */ +void optee_enable_shm_cache(struct optee *optee) +{ + struct optee_call_waiter w; + + /* We need to retry until secure world isn't busy. */ + optee_cq_wait_init(&optee->call_queue, &w); + while (true) { + struct arm_smccc_res res; + + optee->invoke_fn(OPTEE_SMC_ENABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0, + 0, &res); + if (res.a0 == OPTEE_SMC_RETURN_OK) + break; + optee_cq_wait_for_completion(&optee->call_queue, &w); + } + optee_cq_wait_final(&optee->call_queue, &w); +} + +/** + * optee_disable_shm_cache() - Disables caching of some shared memory allocation + * in OP-TEE + * @optee: main service struct + */ +void optee_disable_shm_cache(struct optee *optee) +{ + struct optee_call_waiter w; + + /* We need to retry until secure world isn't busy. */ + optee_cq_wait_init(&optee->call_queue, &w); + while (true) { + union { + struct arm_smccc_res smccc; + struct optee_smc_disable_shm_cache_result result; + } res; + + optee->invoke_fn(OPTEE_SMC_DISABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0, + 0, &res.smccc); + if (res.result.status == OPTEE_SMC_RETURN_ENOTAVAIL) + break; /* All shm's freed */ + if (res.result.status == OPTEE_SMC_RETURN_OK) { + struct tee_shm *shm; + + shm = reg_pair_to_ptr(res.result.shm_upper32, + res.result.shm_lower32); + tee_shm_free(shm); + } else { + optee_cq_wait_for_completion(&optee->call_queue, &w); + } + } + optee_cq_wait_final(&optee->call_queue, &w); +} + +#define PAGELIST_ENTRIES_PER_PAGE \ + ((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1) + +/** + * optee_fill_pages_list() - write list of user pages to given shared + * buffer. + * + * @dst: page-aligned buffer where list of pages will be stored + * @pages: array of pages that represents shared buffer + * @num_pages: number of entries in @pages + * @page_offset: offset of user buffer from page start + * + * @dst should be big enough to hold list of user page addresses and + * links to the next pages of buffer + */ +void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages, + size_t page_offset) +{ + int n = 0; + phys_addr_t optee_page; + /* + * Refer to OPTEE_MSG_ATTR_NONCONTIG description in optee_msg.h + * for details. + */ + struct { + u64 pages_list[PAGELIST_ENTRIES_PER_PAGE]; + u64 next_page_data; + } *pages_data; + + /* + * Currently OP-TEE uses 4k page size and it does not looks + * like this will change in the future. On other hand, there are + * no know ARM architectures with page size < 4k. + * Thus the next built assert looks redundant. But the following + * code heavily relies on this assumption, so it is better be + * safe than sorry. + */ + BUILD_BUG_ON(PAGE_SIZE < OPTEE_MSG_NONCONTIG_PAGE_SIZE); + + pages_data = (void *)dst; + /* + * If linux page is bigger than 4k, and user buffer offset is + * larger than 4k/8k/12k/etc this will skip first 4k pages, + * because they bear no value data for OP-TEE. + */ + optee_page = page_to_phys(*pages) + + round_down(page_offset, OPTEE_MSG_NONCONTIG_PAGE_SIZE); + + while (true) { + pages_data->pages_list[n++] = optee_page; + + if (n == PAGELIST_ENTRIES_PER_PAGE) { + pages_data->next_page_data = + virt_to_phys(pages_data + 1); + pages_data++; + n = 0; + } + + optee_page += OPTEE_MSG_NONCONTIG_PAGE_SIZE; + if (!(optee_page & ~PAGE_MASK)) { + if (!--num_pages) + break; + pages++; + optee_page = page_to_phys(*pages); + } + } +} + +/* + * The final entry in each pagelist page is a pointer to the next + * pagelist page. + */ +static size_t get_pages_list_size(size_t num_entries) +{ + int pages = DIV_ROUND_UP(num_entries, PAGELIST_ENTRIES_PER_PAGE); + + return pages * OPTEE_MSG_NONCONTIG_PAGE_SIZE; +} + +u64 *optee_allocate_pages_list(size_t num_entries) +{ + return alloc_pages_exact(get_pages_list_size(num_entries), GFP_KERNEL); +} + +void optee_free_pages_list(void *list, size_t num_entries) +{ + free_pages_exact(list, get_pages_list_size(num_entries)); +} + +static bool is_normal_memory(pgprot_t p) +{ +#if defined(CONFIG_ARM) + return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC; +#elif defined(CONFIG_ARM64) + return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL); +#else +#error "Unuspported architecture" +#endif +} + +static int __check_mem_type(struct vm_area_struct *vma, unsigned long end) +{ + while (vma && is_normal_memory(vma->vm_page_prot)) { + if (vma->vm_end >= end) + return 0; + vma = vma->vm_next; + } + + return -EINVAL; +} + +static int check_mem_type(unsigned long start, size_t num_pages) +{ + struct mm_struct *mm = current->mm; + int rc; + + down_read(&mm->mmap_sem); + rc = __check_mem_type(find_vma(mm, start), + start + num_pages * PAGE_SIZE); + up_read(&mm->mmap_sem); + + return rc; +} + +int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages, + unsigned long start) +{ + struct tee_shm *shm_arg = NULL; + struct optee_msg_arg *msg_arg; + u64 *pages_list; + phys_addr_t msg_parg; + int rc; + + if (!num_pages) + return -EINVAL; + + rc = check_mem_type(start, num_pages); + if (rc) + return rc; + + pages_list = optee_allocate_pages_list(num_pages); + if (!pages_list) + return -ENOMEM; + + shm_arg = get_msg_arg(ctx, 1, &msg_arg, &msg_parg); + if (IS_ERR(shm_arg)) { + rc = PTR_ERR(shm_arg); + goto out; + } + + optee_fill_pages_list(pages_list, pages, num_pages, + tee_shm_get_page_offset(shm)); + + msg_arg->cmd = OPTEE_MSG_CMD_REGISTER_SHM; + msg_arg->params->attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT | + OPTEE_MSG_ATTR_NONCONTIG; + msg_arg->params->u.tmem.shm_ref = (unsigned long)shm; + msg_arg->params->u.tmem.size = tee_shm_get_size(shm); + /* + * In the least bits of msg_arg->params->u.tmem.buf_ptr we + * store buffer offset from 4k page, as described in OP-TEE ABI. + */ + msg_arg->params->u.tmem.buf_ptr = virt_to_phys(pages_list) | + (tee_shm_get_page_offset(shm) & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1)); + + if (optee_do_call_with_arg(ctx, msg_parg) || + msg_arg->ret != TEEC_SUCCESS) + rc = -EINVAL; + + tee_shm_free(shm_arg); +out: + optee_free_pages_list(pages_list, num_pages); + return rc; +} + +int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm) +{ + struct tee_shm *shm_arg; + struct optee_msg_arg *msg_arg; + phys_addr_t msg_parg; + int rc = 0; + + shm_arg = get_msg_arg(ctx, 1, &msg_arg, &msg_parg); + if (IS_ERR(shm_arg)) + return PTR_ERR(shm_arg); + + msg_arg->cmd = OPTEE_MSG_CMD_UNREGISTER_SHM; + + msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_RMEM_INPUT; + msg_arg->params[0].u.rmem.shm_ref = (unsigned long)shm; + + if (optee_do_call_with_arg(ctx, msg_parg) || + msg_arg->ret != TEEC_SUCCESS) + rc = -EINVAL; + tee_shm_free(shm_arg); + return rc; +} + +int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages, + unsigned long start) +{ + /* + * We don't want to register supplicant memory in OP-TEE. + * Instead information about it will be passed in RPC code. + */ + return check_mem_type(start, num_pages); +} + +int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm) +{ + return 0; +} diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c new file mode 100644 index 0000000000000000000000000000000000000000..e9843c53fe3141e6d252bbcfbd21a239bbf08ca5 --- /dev/null +++ b/drivers/tee/optee/core.c @@ -0,0 +1,705 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" +#include "shm_pool.h" + +#define DRIVER_NAME "optee" + +#define OPTEE_SHM_NUM_PRIV_PAGES 1 + +/** + * optee_from_msg_param() - convert from OPTEE_MSG parameters to + * struct tee_param + * @params: subsystem internal parameter representation + * @num_params: number of elements in the parameter arrays + * @msg_params: OPTEE_MSG parameters + * Returns 0 on success or <0 on failure + */ +int optee_from_msg_param(struct tee_param *params, size_t num_params, + const struct optee_msg_param *msg_params) +{ + int rc; + size_t n; + struct tee_shm *shm; + phys_addr_t pa; + + for (n = 0; n < num_params; n++) { + struct tee_param *p = params + n; + const struct optee_msg_param *mp = msg_params + n; + u32 attr = mp->attr & OPTEE_MSG_ATTR_TYPE_MASK; + + switch (attr) { + case OPTEE_MSG_ATTR_TYPE_NONE: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE; + memset(&p->u, 0, sizeof(p->u)); + break; + case OPTEE_MSG_ATTR_TYPE_VALUE_INPUT: + case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT + + attr - OPTEE_MSG_ATTR_TYPE_VALUE_INPUT; + p->u.value.a = mp->u.value.a; + p->u.value.b = mp->u.value.b; + p->u.value.c = mp->u.value.c; + break; + case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT: + case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT + + attr - OPTEE_MSG_ATTR_TYPE_TMEM_INPUT; + p->u.memref.size = mp->u.tmem.size; + shm = (struct tee_shm *)(unsigned long) + mp->u.tmem.shm_ref; + if (!shm) { + p->u.memref.shm_offs = 0; + p->u.memref.shm = NULL; + break; + } + rc = tee_shm_get_pa(shm, 0, &pa); + if (rc) + return rc; + p->u.memref.shm_offs = mp->u.tmem.buf_ptr - pa; + p->u.memref.shm = shm; + + /* Check that the memref is covered by the shm object */ + if (p->u.memref.size) { + size_t o = p->u.memref.shm_offs + + p->u.memref.size - 1; + + rc = tee_shm_get_pa(shm, o, NULL); + if (rc) + return rc; + } + break; + case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT: + case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT: + case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT: + p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT + + attr - OPTEE_MSG_ATTR_TYPE_RMEM_INPUT; + p->u.memref.size = mp->u.rmem.size; + shm = (struct tee_shm *)(unsigned long) + mp->u.rmem.shm_ref; + + if (!shm) { + p->u.memref.shm_offs = 0; + p->u.memref.shm = NULL; + break; + } + p->u.memref.shm_offs = mp->u.rmem.offs; + p->u.memref.shm = shm; + + break; + + default: + return -EINVAL; + } + } + return 0; +} + +static int to_msg_param_tmp_mem(struct optee_msg_param *mp, + const struct tee_param *p) +{ + int rc; + phys_addr_t pa; + + mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; + + mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm; + mp->u.tmem.size = p->u.memref.size; + + if (!p->u.memref.shm) { + mp->u.tmem.buf_ptr = 0; + return 0; + } + + rc = tee_shm_get_pa(p->u.memref.shm, p->u.memref.shm_offs, &pa); + if (rc) + return rc; + + mp->u.tmem.buf_ptr = pa; + mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED << + OPTEE_MSG_ATTR_CACHE_SHIFT; + + return 0; +} + +static int to_msg_param_reg_mem(struct optee_msg_param *mp, + const struct tee_param *p) +{ + mp->attr = OPTEE_MSG_ATTR_TYPE_RMEM_INPUT + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; + + mp->u.rmem.shm_ref = (unsigned long)p->u.memref.shm; + mp->u.rmem.size = p->u.memref.size; + mp->u.rmem.offs = p->u.memref.shm_offs; + return 0; +} + +/** + * optee_to_msg_param() - convert from struct tee_params to OPTEE_MSG parameters + * @msg_params: OPTEE_MSG parameters + * @num_params: number of elements in the parameter arrays + * @params: subsystem itnernal parameter representation + * Returns 0 on success or <0 on failure + */ +int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, + const struct tee_param *params) +{ + int rc; + size_t n; + + for (n = 0; n < num_params; n++) { + const struct tee_param *p = params + n; + struct optee_msg_param *mp = msg_params + n; + + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_NONE: + mp->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE; + memset(&mp->u, 0, sizeof(mp->u)); + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + mp->attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT + p->attr - + TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT; + mp->u.value.a = p->u.value.a; + mp->u.value.b = p->u.value.b; + mp->u.value.c = p->u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + if (tee_shm_is_registered(p->u.memref.shm)) + rc = to_msg_param_reg_mem(mp, p); + else + rc = to_msg_param_tmp_mem(mp, p); + if (rc) + return rc; + break; + default: + return -EINVAL; + } + } + return 0; +} + +static void optee_get_version(struct tee_device *teedev, + struct tee_ioctl_version_data *vers) +{ + struct tee_ioctl_version_data v = { + .impl_id = TEE_IMPL_ID_OPTEE, + .impl_caps = TEE_OPTEE_CAP_TZ, + .gen_caps = TEE_GEN_CAP_GP, + }; + struct optee *optee = tee_get_drvdata(teedev); + + if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) + v.gen_caps |= TEE_GEN_CAP_REG_MEM; + *vers = v; +} + +static int optee_open(struct tee_context *ctx) +{ + struct optee_context_data *ctxdata; + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + + ctxdata = kzalloc(sizeof(*ctxdata), GFP_KERNEL); + if (!ctxdata) + return -ENOMEM; + + if (teedev == optee->supp_teedev) { + bool busy = true; + + mutex_lock(&optee->supp.mutex); + if (!optee->supp.ctx) { + busy = false; + optee->supp.ctx = ctx; + } + mutex_unlock(&optee->supp.mutex); + if (busy) { + kfree(ctxdata); + return -EBUSY; + } + } + + mutex_init(&ctxdata->mutex); + INIT_LIST_HEAD(&ctxdata->sess_list); + + ctx->data = ctxdata; + return 0; +} + +static void optee_release(struct tee_context *ctx) +{ + struct optee_context_data *ctxdata = ctx->data; + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct tee_shm *shm; + struct optee_msg_arg *arg = NULL; + phys_addr_t parg; + struct optee_session *sess; + struct optee_session *sess_tmp; + + if (!ctxdata) + return; + + shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED); + if (!IS_ERR(shm)) { + arg = tee_shm_get_va(shm, 0); + /* + * If va2pa fails for some reason, we can't call into + * secure world, only free the memory. Secure OS will leak + * sessions and finally refuse more sessions, but we will + * at least let normal world reclaim its memory. + */ + if (!IS_ERR(arg)) + if (tee_shm_va2pa(shm, arg, &parg)) + arg = NULL; /* prevent usage of parg below */ + } + + list_for_each_entry_safe(sess, sess_tmp, &ctxdata->sess_list, + list_node) { + list_del(&sess->list_node); + if (!IS_ERR_OR_NULL(arg)) { + memset(arg, 0, sizeof(*arg)); + arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION; + arg->session = sess->session_id; + optee_do_call_with_arg(ctx, parg); + } + kfree(sess); + } + kfree(ctxdata); + + if (!IS_ERR(shm)) + tee_shm_free(shm); + + ctx->data = NULL; + + if (teedev == optee->supp_teedev) + optee_supp_release(&optee->supp); +} + +static const struct tee_driver_ops optee_ops = { + .get_version = optee_get_version, + .open = optee_open, + .release = optee_release, + .open_session = optee_open_session, + .close_session = optee_close_session, + .invoke_func = optee_invoke_func, + .cancel_req = optee_cancel_req, + .shm_register = optee_shm_register, + .shm_unregister = optee_shm_unregister, +}; + +static const struct tee_desc optee_desc = { + .name = DRIVER_NAME "-clnt", + .ops = &optee_ops, + .owner = THIS_MODULE, +}; + +static const struct tee_driver_ops optee_supp_ops = { + .get_version = optee_get_version, + .open = optee_open, + .release = optee_release, + .supp_recv = optee_supp_recv, + .supp_send = optee_supp_send, + .shm_register = optee_shm_register_supp, + .shm_unregister = optee_shm_unregister_supp, +}; + +static const struct tee_desc optee_supp_desc = { + .name = DRIVER_NAME "-supp", + .ops = &optee_supp_ops, + .owner = THIS_MODULE, + .flags = TEE_DESC_PRIVILEGED, +}; + +static bool optee_msg_api_uid_is_optee_api(optee_invoke_fn *invoke_fn) +{ + struct arm_smccc_res res; + + invoke_fn(OPTEE_SMC_CALLS_UID, 0, 0, 0, 0, 0, 0, 0, &res); + + if (res.a0 == OPTEE_MSG_UID_0 && res.a1 == OPTEE_MSG_UID_1 && + res.a2 == OPTEE_MSG_UID_2 && res.a3 == OPTEE_MSG_UID_3) + return true; + return false; +} + +static bool optee_msg_api_revision_is_compatible(optee_invoke_fn *invoke_fn) +{ + union { + struct arm_smccc_res smccc; + struct optee_smc_calls_revision_result result; + } res; + + invoke_fn(OPTEE_SMC_CALLS_REVISION, 0, 0, 0, 0, 0, 0, 0, &res.smccc); + + if (res.result.major == OPTEE_MSG_REVISION_MAJOR && + (int)res.result.minor >= OPTEE_MSG_REVISION_MINOR) + return true; + return false; +} + +static bool optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn, + u32 *sec_caps) +{ + union { + struct arm_smccc_res smccc; + struct optee_smc_exchange_capabilities_result result; + } res; + u32 a1 = 0; + + /* + * TODO This isn't enough to tell if it's UP system (from kernel + * point of view) or not, is_smp() returns the the information + * needed, but can't be called directly from here. + */ + if (!IS_ENABLED(CONFIG_SMP) || nr_cpu_ids == 1) + a1 |= OPTEE_SMC_NSEC_CAP_UNIPROCESSOR; + + invoke_fn(OPTEE_SMC_EXCHANGE_CAPABILITIES, a1, 0, 0, 0, 0, 0, 0, + &res.smccc); + + if (res.result.status != OPTEE_SMC_RETURN_OK) + return false; + + *sec_caps = res.result.capabilities; + return true; +} + +static struct tee_shm_pool * +optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm, + u32 sec_caps) +{ + union { + struct arm_smccc_res smccc; + struct optee_smc_get_shm_config_result result; + } res; + unsigned long vaddr; + phys_addr_t paddr; + size_t size; + phys_addr_t begin; + phys_addr_t end; + void *va; + struct tee_shm_pool_mgr *priv_mgr; + struct tee_shm_pool_mgr *dmabuf_mgr; + void *rc; + + invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, &res.smccc); + if (res.result.status != OPTEE_SMC_RETURN_OK) { + pr_info("shm service not available\n"); + return ERR_PTR(-ENOENT); + } + + if (res.result.settings != OPTEE_SMC_SHM_CACHED) { + pr_err("only normal cached shared memory supported\n"); + return ERR_PTR(-EINVAL); + } + + begin = roundup(res.result.start, PAGE_SIZE); + end = rounddown(res.result.start + res.result.size, PAGE_SIZE); + paddr = begin; + size = end - begin; + + if (size < 2 * OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE) { + pr_err("too small shared memory area\n"); + return ERR_PTR(-EINVAL); + } + + va = memremap(paddr, size, MEMREMAP_WB); + if (!va) { + pr_err("shared memory ioremap failed\n"); + return ERR_PTR(-EINVAL); + } + vaddr = (unsigned long)va; + + /* + * If OP-TEE can work with unregistered SHM, we will use own pool + * for private shm + */ + if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) { + rc = optee_shm_pool_alloc_pages(); + if (IS_ERR(rc)) + goto err_memunmap; + priv_mgr = rc; + } else { + const size_t sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE; + + rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz, + 3 /* 8 bytes aligned */); + if (IS_ERR(rc)) + goto err_memunmap; + priv_mgr = rc; + + vaddr += sz; + paddr += sz; + size -= sz; + } + + rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, size, PAGE_SHIFT); + if (IS_ERR(rc)) + goto err_free_priv_mgr; + dmabuf_mgr = rc; + + rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr); + if (IS_ERR(rc)) + goto err_free_dmabuf_mgr; + + *memremaped_shm = va; + + return rc; + +err_free_dmabuf_mgr: + tee_shm_pool_mgr_destroy(dmabuf_mgr); +err_free_priv_mgr: + tee_shm_pool_mgr_destroy(priv_mgr); +err_memunmap: + memunmap(va); + return rc; +} + +/* Simple wrapper functions to be able to use a function pointer */ +static void optee_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *res) +{ + arm_smccc_smc(a0, a1, a2, a3, a4, a5, a6, a7, res); +} + +static void optee_smccc_hvc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *res) +{ + arm_smccc_hvc(a0, a1, a2, a3, a4, a5, a6, a7, res); +} + +static optee_invoke_fn *get_invoke_func(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warn("missing \"method\" property\n"); + return ERR_PTR(-ENXIO); + } + + if (!strcmp("hvc", method)) + return optee_smccc_hvc; + else if (!strcmp("smc", method)) + return optee_smccc_smc; + + pr_warn("invalid \"method\" property: %s\n", method); + return ERR_PTR(-EINVAL); +} + +static struct optee *optee_probe(struct device_node *np) +{ + optee_invoke_fn *invoke_fn; + struct tee_shm_pool *pool; + struct optee *optee = NULL; + void *memremaped_shm = NULL; + struct tee_device *teedev; + u32 sec_caps; + int rc; + + invoke_fn = get_invoke_func(np); + if (IS_ERR(invoke_fn)) + return (void *)invoke_fn; + + if (!optee_msg_api_uid_is_optee_api(invoke_fn)) { + pr_warn("api uid mismatch\n"); + return ERR_PTR(-EINVAL); + } + + if (!optee_msg_api_revision_is_compatible(invoke_fn)) { + pr_warn("api revision mismatch\n"); + return ERR_PTR(-EINVAL); + } + + if (!optee_msg_exchange_capabilities(invoke_fn, &sec_caps)) { + pr_warn("capabilities mismatch\n"); + return ERR_PTR(-EINVAL); + } + + /* + * We have no other option for shared memory, if secure world + * doesn't have any reserved memory we can use we can't continue. + */ + if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM)) + return ERR_PTR(-EINVAL); + + pool = optee_config_shm_memremap(invoke_fn, &memremaped_shm, sec_caps); + if (IS_ERR(pool)) + return (void *)pool; + + optee = kzalloc(sizeof(*optee), GFP_KERNEL); + if (!optee) { + rc = -ENOMEM; + goto err; + } + + optee->invoke_fn = invoke_fn; + optee->sec_caps = sec_caps; + + teedev = tee_device_alloc(&optee_desc, NULL, pool, optee); + if (IS_ERR(teedev)) { + rc = PTR_ERR(teedev); + goto err; + } + optee->teedev = teedev; + + teedev = tee_device_alloc(&optee_supp_desc, NULL, pool, optee); + if (IS_ERR(teedev)) { + rc = PTR_ERR(teedev); + goto err; + } + optee->supp_teedev = teedev; + + rc = tee_device_register(optee->teedev); + if (rc) + goto err; + + rc = tee_device_register(optee->supp_teedev); + if (rc) + goto err; + + mutex_init(&optee->call_queue.mutex); + INIT_LIST_HEAD(&optee->call_queue.waiters); + optee_wait_queue_init(&optee->wait_queue); + optee_supp_init(&optee->supp); + optee->memremaped_shm = memremaped_shm; + optee->pool = pool; + + optee_enable_shm_cache(optee); + + pr_info("initialized driver\n"); + return optee; +err: + if (optee) { + /* + * tee_device_unregister() is safe to call even if the + * devices hasn't been registered with + * tee_device_register() yet. + */ + tee_device_unregister(optee->supp_teedev); + tee_device_unregister(optee->teedev); + kfree(optee); + } + if (pool) + tee_shm_pool_free(pool); + if (memremaped_shm) + memunmap(memremaped_shm); + return ERR_PTR(rc); +} + +static void optee_remove(struct optee *optee) +{ + /* + * Ask OP-TEE to free all cached shared memory objects to decrease + * reference counters and also avoid wild pointers in secure world + * into the old shared memory range. + */ + optee_disable_shm_cache(optee); + + /* + * The two devices has to be unregistered before we can free the + * other resources. + */ + tee_device_unregister(optee->supp_teedev); + tee_device_unregister(optee->teedev); + + tee_shm_pool_free(optee->pool); + if (optee->memremaped_shm) + memunmap(optee->memremaped_shm); + optee_wait_queue_exit(&optee->wait_queue); + optee_supp_uninit(&optee->supp); + mutex_destroy(&optee->call_queue.mutex); + + kfree(optee); +} + +static const struct of_device_id optee_match[] = { + { .compatible = "linaro,optee-tz" }, + {}, +}; + +static struct optee *optee_svc; + +static int __init optee_driver_init(void) +{ + struct device_node *fw_np; + struct device_node *np; + struct optee *optee; + + /* Node is supposed to be below /firmware */ + fw_np = of_find_node_by_name(NULL, "firmware"); + if (!fw_np) + return -ENODEV; + + np = of_find_matching_node(fw_np, optee_match); + if (!np) + return -ENODEV; + + optee = optee_probe(np); + of_node_put(np); + + if (IS_ERR(optee)) + return PTR_ERR(optee); + + optee_svc = optee; + + return 0; +} +module_init(optee_driver_init); + +static void __exit optee_driver_exit(void) +{ + struct optee *optee = optee_svc; + + optee_svc = NULL; + if (optee) + optee_remove(optee); +} +module_exit(optee_driver_exit); + +MODULE_AUTHOR("Linaro"); +MODULE_DESCRIPTION("OP-TEE driver"); +MODULE_SUPPORTED_DEVICE(""); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h new file mode 100644 index 0000000000000000000000000000000000000000..30504901be80e2f7ad10c3949e32135f39f3cc35 --- /dev/null +++ b/drivers/tee/optee/optee_msg.h @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _OPTEE_MSG_H +#define _OPTEE_MSG_H + +#include +#include + +/* + * This file defines the OP-TEE message protocol used to communicate + * with an instance of OP-TEE running in secure world. + * + * This file is divided into three sections. + * 1. Formatting of messages. + * 2. Requests from normal world + * 3. Requests from secure world, Remote Procedure Call (RPC), handled by + * tee-supplicant. + */ + +/***************************************************************************** + * Part 1 - formatting of messages + *****************************************************************************/ + +#define OPTEE_MSG_ATTR_TYPE_NONE 0x0 +#define OPTEE_MSG_ATTR_TYPE_VALUE_INPUT 0x1 +#define OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT 0x2 +#define OPTEE_MSG_ATTR_TYPE_VALUE_INOUT 0x3 +#define OPTEE_MSG_ATTR_TYPE_RMEM_INPUT 0x5 +#define OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT 0x6 +#define OPTEE_MSG_ATTR_TYPE_RMEM_INOUT 0x7 +#define OPTEE_MSG_ATTR_TYPE_TMEM_INPUT 0x9 +#define OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT 0xa +#define OPTEE_MSG_ATTR_TYPE_TMEM_INOUT 0xb + +#define OPTEE_MSG_ATTR_TYPE_MASK GENMASK(7, 0) + +/* + * Meta parameter to be absorbed by the Secure OS and not passed + * to the Trusted Application. + * + * Currently only used with OPTEE_MSG_CMD_OPEN_SESSION. + */ +#define OPTEE_MSG_ATTR_META BIT(8) + +/* + * Pointer to a list of pages used to register user-defined SHM buffer. + * Used with OPTEE_MSG_ATTR_TYPE_TMEM_*. + * buf_ptr should point to the beginning of the buffer. Buffer will contain + * list of page addresses. OP-TEE core can reconstruct contiguous buffer from + * that page addresses list. Page addresses are stored as 64 bit values. + * Last entry on a page should point to the next page of buffer. + * Every entry in buffer should point to a 4k page beginning (12 least + * significant bits must be equal to zero). + * + * 12 least significant bints of optee_msg_param.u.tmem.buf_ptr should hold page + * offset of the user buffer. + * + * So, entries should be placed like members of this structure: + * + * struct page_data { + * uint64_t pages_array[OPTEE_MSG_NONCONTIG_PAGE_SIZE/sizeof(uint64_t) - 1]; + * uint64_t next_page_data; + * }; + * + * Structure is designed to exactly fit into the page size + * OPTEE_MSG_NONCONTIG_PAGE_SIZE which is a standard 4KB page. + * + * The size of 4KB is chosen because this is the smallest page size for ARM + * architectures. If REE uses larger pages, it should divide them to 4KB ones. + */ +#define OPTEE_MSG_ATTR_NONCONTIG BIT(9) + +/* + * Memory attributes for caching passed with temp memrefs. The actual value + * used is defined outside the message protocol with the exception of + * OPTEE_MSG_ATTR_CACHE_PREDEFINED which means the attributes already + * defined for the memory range should be used. If optee_smc.h is used as + * bearer of this protocol OPTEE_SMC_SHM_* is used for values. + */ +#define OPTEE_MSG_ATTR_CACHE_SHIFT 16 +#define OPTEE_MSG_ATTR_CACHE_MASK GENMASK(2, 0) +#define OPTEE_MSG_ATTR_CACHE_PREDEFINED 0 + +/* + * Same values as TEE_LOGIN_* from TEE Internal API + */ +#define OPTEE_MSG_LOGIN_PUBLIC 0x00000000 +#define OPTEE_MSG_LOGIN_USER 0x00000001 +#define OPTEE_MSG_LOGIN_GROUP 0x00000002 +#define OPTEE_MSG_LOGIN_APPLICATION 0x00000004 +#define OPTEE_MSG_LOGIN_APPLICATION_USER 0x00000005 +#define OPTEE_MSG_LOGIN_APPLICATION_GROUP 0x00000006 + +/* + * Page size used in non-contiguous buffer entries + */ +#define OPTEE_MSG_NONCONTIG_PAGE_SIZE 4096 + +/** + * struct optee_msg_param_tmem - temporary memory reference parameter + * @buf_ptr: Address of the buffer + * @size: Size of the buffer + * @shm_ref: Temporary shared memory reference, pointer to a struct tee_shm + * + * Secure and normal world communicates pointers as physical address + * instead of the virtual address. This is because secure and normal world + * have completely independent memory mapping. Normal world can even have a + * hypervisor which need to translate the guest physical address (AKA IPA + * in ARM documentation) to a real physical address before passing the + * structure to secure world. + */ +struct optee_msg_param_tmem { + u64 buf_ptr; + u64 size; + u64 shm_ref; +}; + +/** + * struct optee_msg_param_rmem - registered memory reference parameter + * @offs: Offset into shared memory reference + * @size: Size of the buffer + * @shm_ref: Shared memory reference, pointer to a struct tee_shm + */ +struct optee_msg_param_rmem { + u64 offs; + u64 size; + u64 shm_ref; +}; + +/** + * struct optee_msg_param_value - opaque value parameter + * + * Value parameters are passed unchecked between normal and secure world. + */ +struct optee_msg_param_value { + u64 a; + u64 b; + u64 c; +}; + +/** + * struct optee_msg_param - parameter used together with struct optee_msg_arg + * @attr: attributes + * @tmem: parameter by temporary memory reference + * @rmem: parameter by registered memory reference + * @value: parameter by opaque value + * + * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in + * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value, + * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates @tmem and + * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates @rmem, + * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used. + */ +struct optee_msg_param { + u64 attr; + union { + struct optee_msg_param_tmem tmem; + struct optee_msg_param_rmem rmem; + struct optee_msg_param_value value; + } u; +}; + +/** + * struct optee_msg_arg - call argument + * @cmd: Command, one of OPTEE_MSG_CMD_* or OPTEE_MSG_RPC_CMD_* + * @func: Trusted Application function, specific to the Trusted Application, + * used if cmd == OPTEE_MSG_CMD_INVOKE_COMMAND + * @session: In parameter for all OPTEE_MSG_CMD_* except + * OPTEE_MSG_CMD_OPEN_SESSION where it's an output parameter instead + * @cancel_id: Cancellation id, a unique value to identify this request + * @ret: return value + * @ret_origin: origin of the return value + * @num_params: number of parameters supplied to the OS Command + * @params: the parameters supplied to the OS Command + * + * All normal calls to Trusted OS uses this struct. If cmd requires further + * information than what these field holds it can be passed as a parameter + * tagged as meta (setting the OPTEE_MSG_ATTR_META bit in corresponding + * attrs field). All parameters tagged as meta has to come first. + * + * Temp memref parameters can be fragmented if supported by the Trusted OS + * (when optee_smc.h is bearer of this protocol this is indicated with + * OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM). If a logical memref parameter is + * fragmented then has all but the last fragment the + * OPTEE_MSG_ATTR_FRAGMENT bit set in attrs. Even if a memref is fragmented + * it will still be presented as a single logical memref to the Trusted + * Application. + */ +struct optee_msg_arg { + u32 cmd; + u32 func; + u32 session; + u32 cancel_id; + u32 pad; + u32 ret; + u32 ret_origin; + u32 num_params; + + /* num_params tells the actual number of element in params */ + struct optee_msg_param params[0]; +}; + +/** + * OPTEE_MSG_GET_ARG_SIZE - return size of struct optee_msg_arg + * + * @num_params: Number of parameters embedded in the struct optee_msg_arg + * + * Returns the size of the struct optee_msg_arg together with the number + * of embedded parameters. + */ +#define OPTEE_MSG_GET_ARG_SIZE(num_params) \ + (sizeof(struct optee_msg_arg) + \ + sizeof(struct optee_msg_param) * (num_params)) + +/***************************************************************************** + * Part 2 - requests from normal world + *****************************************************************************/ + +/* + * Return the following UID if using API specified in this file without + * further extensions: + * 384fb3e0-e7f8-11e3-af63-0002a5d5c51b. + * Represented in 4 32-bit words in OPTEE_MSG_UID_0, OPTEE_MSG_UID_1, + * OPTEE_MSG_UID_2, OPTEE_MSG_UID_3. + */ +#define OPTEE_MSG_UID_0 0x384fb3e0 +#define OPTEE_MSG_UID_1 0xe7f811e3 +#define OPTEE_MSG_UID_2 0xaf630002 +#define OPTEE_MSG_UID_3 0xa5d5c51b +#define OPTEE_MSG_FUNCID_CALLS_UID 0xFF01 + +/* + * Returns 2.0 if using API specified in this file without further + * extensions. Represented in 2 32-bit words in OPTEE_MSG_REVISION_MAJOR + * and OPTEE_MSG_REVISION_MINOR + */ +#define OPTEE_MSG_REVISION_MAJOR 2 +#define OPTEE_MSG_REVISION_MINOR 0 +#define OPTEE_MSG_FUNCID_CALLS_REVISION 0xFF03 + +/* + * Get UUID of Trusted OS. + * + * Used by non-secure world to figure out which Trusted OS is installed. + * Note that returned UUID is the UUID of the Trusted OS, not of the API. + * + * Returns UUID in 4 32-bit words in the same way as + * OPTEE_MSG_FUNCID_CALLS_UID described above. + */ +#define OPTEE_MSG_OS_OPTEE_UUID_0 0x486178e0 +#define OPTEE_MSG_OS_OPTEE_UUID_1 0xe7f811e3 +#define OPTEE_MSG_OS_OPTEE_UUID_2 0xbc5e0002 +#define OPTEE_MSG_OS_OPTEE_UUID_3 0xa5d5c51b +#define OPTEE_MSG_FUNCID_GET_OS_UUID 0x0000 + +/* + * Get revision of Trusted OS. + * + * Used by non-secure world to figure out which version of the Trusted OS + * is installed. Note that the returned revision is the revision of the + * Trusted OS, not of the API. + * + * Returns revision in 2 32-bit words in the same way as + * OPTEE_MSG_CALLS_REVISION described above. + */ +#define OPTEE_MSG_FUNCID_GET_OS_REVISION 0x0001 + +/* + * Do a secure call with struct optee_msg_arg as argument + * The OPTEE_MSG_CMD_* below defines what goes in struct optee_msg_arg::cmd + * + * OPTEE_MSG_CMD_OPEN_SESSION opens a session to a Trusted Application. + * The first two parameters are tagged as meta, holding two value + * parameters to pass the following information: + * param[0].u.value.a-b uuid of Trusted Application + * param[1].u.value.a-b uuid of Client + * param[1].u.value.c Login class of client OPTEE_MSG_LOGIN_* + * + * OPTEE_MSG_CMD_INVOKE_COMMAND invokes a command a previously opened + * session to a Trusted Application. struct optee_msg_arg::func is Trusted + * Application function, specific to the Trusted Application. + * + * OPTEE_MSG_CMD_CLOSE_SESSION closes a previously opened session to + * Trusted Application. + * + * OPTEE_MSG_CMD_CANCEL cancels a currently invoked command. + * + * OPTEE_MSG_CMD_REGISTER_SHM registers a shared memory reference. The + * information is passed as: + * [in] param[0].attr OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + * [| OPTEE_MSG_ATTR_FRAGMENT] + * [in] param[0].u.tmem.buf_ptr physical address (of first fragment) + * [in] param[0].u.tmem.size size (of first fragment) + * [in] param[0].u.tmem.shm_ref holds shared memory reference + * ... + * The shared memory can optionally be fragmented, temp memrefs can follow + * each other with all but the last with the OPTEE_MSG_ATTR_FRAGMENT bit set. + * + * OPTEE_MSG_CMD_UNREGISTER_SHM unregisteres a previously registered shared + * memory reference. The information is passed as: + * [in] param[0].attr OPTEE_MSG_ATTR_TYPE_RMEM_INPUT + * [in] param[0].u.rmem.shm_ref holds shared memory reference + * [in] param[0].u.rmem.offs 0 + * [in] param[0].u.rmem.size 0 + */ +#define OPTEE_MSG_CMD_OPEN_SESSION 0 +#define OPTEE_MSG_CMD_INVOKE_COMMAND 1 +#define OPTEE_MSG_CMD_CLOSE_SESSION 2 +#define OPTEE_MSG_CMD_CANCEL 3 +#define OPTEE_MSG_CMD_REGISTER_SHM 4 +#define OPTEE_MSG_CMD_UNREGISTER_SHM 5 +#define OPTEE_MSG_FUNCID_CALL_WITH_ARG 0x0004 + +/***************************************************************************** + * Part 3 - Requests from secure world, RPC + *****************************************************************************/ + +/* + * All RPC is done with a struct optee_msg_arg as bearer of information, + * struct optee_msg_arg::arg holds values defined by OPTEE_MSG_RPC_CMD_* below + * + * RPC communication with tee-supplicant is reversed compared to normal + * client communication desribed above. The supplicant receives requests + * and sends responses. + */ + +/* + * Load a TA into memory, defined in tee-supplicant + */ +#define OPTEE_MSG_RPC_CMD_LOAD_TA 0 + +/* + * Reserved + */ +#define OPTEE_MSG_RPC_CMD_RPMB 1 + +/* + * File system access, defined in tee-supplicant + */ +#define OPTEE_MSG_RPC_CMD_FS 2 + +/* + * Get time + * + * Returns number of seconds and nano seconds since the Epoch, + * 1970-01-01 00:00:00 +0000 (UTC). + * + * [out] param[0].u.value.a Number of seconds + * [out] param[0].u.value.b Number of nano seconds. + */ +#define OPTEE_MSG_RPC_CMD_GET_TIME 3 + +/* + * Wait queue primitive, helper for secure world to implement a wait queue. + * + * If secure world need to wait for a secure world mutex it issues a sleep + * request instead of spinning in secure world. Conversely is a wakeup + * request issued when a secure world mutex with a thread waiting thread is + * unlocked. + * + * Waiting on a key + * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP + * [in] param[0].u.value.b wait key + * + * Waking up a key + * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP + * [in] param[0].u.value.b wakeup key + */ +#define OPTEE_MSG_RPC_CMD_WAIT_QUEUE 4 +#define OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP 0 +#define OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP 1 + +/* + * Suspend execution + * + * [in] param[0].value .a number of milliseconds to suspend + */ +#define OPTEE_MSG_RPC_CMD_SUSPEND 5 + +/* + * Allocate a piece of shared memory + * + * Shared memory can optionally be fragmented, to support that additional + * spare param entries are allocated to make room for eventual fragments. + * The spare param entries has .attr = OPTEE_MSG_ATTR_TYPE_NONE when + * unused. All returned temp memrefs except the last should have the + * OPTEE_MSG_ATTR_FRAGMENT bit set in the attr field. + * + * [in] param[0].u.value.a type of memory one of + * OPTEE_MSG_RPC_SHM_TYPE_* below + * [in] param[0].u.value.b requested size + * [in] param[0].u.value.c required alignment + * + * [out] param[0].u.tmem.buf_ptr physical address (of first fragment) + * [out] param[0].u.tmem.size size (of first fragment) + * [out] param[0].u.tmem.shm_ref shared memory reference + * ... + * [out] param[n].u.tmem.buf_ptr physical address + * [out] param[n].u.tmem.size size + * [out] param[n].u.tmem.shm_ref shared memory reference (same value + * as in param[n-1].u.tmem.shm_ref) + */ +#define OPTEE_MSG_RPC_CMD_SHM_ALLOC 6 +/* Memory that can be shared with a non-secure user space application */ +#define OPTEE_MSG_RPC_SHM_TYPE_APPL 0 +/* Memory only shared with non-secure kernel */ +#define OPTEE_MSG_RPC_SHM_TYPE_KERNEL 1 + +/* + * Free shared memory previously allocated with OPTEE_MSG_RPC_CMD_SHM_ALLOC + * + * [in] param[0].u.value.a type of memory one of + * OPTEE_MSG_RPC_SHM_TYPE_* above + * [in] param[0].u.value.b value of shared memory reference + * returned in param[0].u.tmem.shm_ref + * above + */ +#define OPTEE_MSG_RPC_CMD_SHM_FREE 7 + +#endif /* _OPTEE_MSG_H */ diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h new file mode 100644 index 0000000000000000000000000000000000000000..35e79386c556bca1d2819c745d415cc50f63fa74 --- /dev/null +++ b/drivers/tee/optee/optee_private.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef OPTEE_PRIVATE_H +#define OPTEE_PRIVATE_H + +#include +#include +#include +#include +#include "optee_msg.h" + +#define OPTEE_MAX_ARG_SIZE 1024 + +/* Some Global Platform error codes used in this driver */ +#define TEEC_SUCCESS 0x00000000 +#define TEEC_ERROR_BAD_PARAMETERS 0xFFFF0006 +#define TEEC_ERROR_COMMUNICATION 0xFFFF000E +#define TEEC_ERROR_OUT_OF_MEMORY 0xFFFF000C + +#define TEEC_ORIGIN_COMMS 0x00000002 + +typedef void (optee_invoke_fn)(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, + struct arm_smccc_res *); + +struct optee_call_queue { + /* Serializes access to this struct */ + struct mutex mutex; + struct list_head waiters; +}; + +struct optee_wait_queue { + /* Serializes access to this struct */ + struct mutex mu; + struct list_head db; +}; + +/** + * struct optee_supp - supplicant synchronization struct + * @ctx the context of current connected supplicant. + * if !NULL the supplicant device is available for use, + * else busy + * @mutex: held while accessing content of this struct + * @req_id: current request id if supplicant is doing synchronous + * communication, else -1 + * @reqs: queued request not yet retrieved by supplicant + * @idr: IDR holding all requests currently being processed + * by supplicant + * @reqs_c: completion used by supplicant when waiting for a + * request to be queued. + */ +struct optee_supp { + /* Serializes access to this struct */ + struct mutex mutex; + struct tee_context *ctx; + + int req_id; + struct list_head reqs; + struct idr idr; + struct completion reqs_c; +}; + +/** + * struct optee - main service struct + * @supp_teedev: supplicant device + * @teedev: client device + * @invoke_fn: function to issue smc or hvc + * @call_queue: queue of threads waiting to call @invoke_fn + * @wait_queue: queue of threads from secure world waiting for a + * secure world sync object + * @supp: supplicant synchronization struct for RPC to supplicant + * @pool: shared memory pool + * @memremaped_shm virtual address of memory in shared memory pool + * @sec_caps: secure world capabilities defined by + * OPTEE_SMC_SEC_CAP_* in optee_smc.h + */ +struct optee { + struct tee_device *supp_teedev; + struct tee_device *teedev; + optee_invoke_fn *invoke_fn; + struct optee_call_queue call_queue; + struct optee_wait_queue wait_queue; + struct optee_supp supp; + struct tee_shm_pool *pool; + void *memremaped_shm; + u32 sec_caps; +}; + +struct optee_session { + struct list_head list_node; + u32 session_id; +}; + +struct optee_context_data { + /* Serializes access to this struct */ + struct mutex mutex; + struct list_head sess_list; +}; + +struct optee_rpc_param { + u32 a0; + u32 a1; + u32 a2; + u32 a3; + u32 a4; + u32 a5; + u32 a6; + u32 a7; +}; + +/* Holds context that is preserved during one STD call */ +struct optee_call_ctx { + /* information about pages list used in last allocation */ + void *pages_list; + size_t num_entries; +}; + +void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, + struct optee_call_ctx *call_ctx); +void optee_rpc_finalize_call(struct optee_call_ctx *call_ctx); + +void optee_wait_queue_init(struct optee_wait_queue *wq); +void optee_wait_queue_exit(struct optee_wait_queue *wq); + +u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, + struct tee_param *param); + +int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len); +int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len); +void optee_supp_init(struct optee_supp *supp); +void optee_supp_uninit(struct optee_supp *supp); +void optee_supp_release(struct optee_supp *supp); + +int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); +int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); + +u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg); +int optee_open_session(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); +int optee_close_session(struct tee_context *ctx, u32 session); +int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); +int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session); + +void optee_enable_shm_cache(struct optee *optee); +void optee_disable_shm_cache(struct optee *optee); + +int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages, + unsigned long start); +int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm); + +int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages, + unsigned long start); +int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm); + +int optee_from_msg_param(struct tee_param *params, size_t num_params, + const struct optee_msg_param *msg_params); +int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params, + const struct tee_param *params); + +u64 *optee_allocate_pages_list(size_t num_entries); +void optee_free_pages_list(void *array, size_t num_entries); +void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages, + size_t page_offset); + +/* + * Small helpers + */ + +static inline void *reg_pair_to_ptr(u32 reg0, u32 reg1) +{ + return (void *)(unsigned long)(((u64)reg0 << 32) | reg1); +} + +static inline void reg_pair_from_64(u32 *reg0, u32 *reg1, u64 val) +{ + *reg0 = val >> 32; + *reg1 = val; +} + +#endif /*OPTEE_PRIVATE_H*/ diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h new file mode 100644 index 0000000000000000000000000000000000000000..7cd327243ada916e0e8017300c06ef34e1b07ea4 --- /dev/null +++ b/drivers/tee/optee/optee_smc.h @@ -0,0 +1,457 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef OPTEE_SMC_H +#define OPTEE_SMC_H + +#include +#include + +#define OPTEE_SMC_STD_CALL_VAL(func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS, (func_num)) +#define OPTEE_SMC_FAST_CALL_VAL(func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS, (func_num)) + +/* + * Function specified by SMC Calling convention. + */ +#define OPTEE_SMC_FUNCID_CALLS_COUNT 0xFF00 +#define OPTEE_SMC_CALLS_COUNT \ + ARM_SMCCC_CALL_VAL(OPTEE_SMC_FAST_CALL, SMCCC_SMC_32, \ + SMCCC_OWNER_TRUSTED_OS_END, \ + OPTEE_SMC_FUNCID_CALLS_COUNT) + +/* + * Normal cached memory (write-back), shareable for SMP systems and not + * shareable for UP systems. + */ +#define OPTEE_SMC_SHM_CACHED 1 + +/* + * a0..a7 is used as register names in the descriptions below, on arm32 + * that translates to r0..r7 and on arm64 to w0..w7. In both cases it's + * 32-bit registers. + */ + +/* + * Function specified by SMC Calling convention + * + * Return one of the following UIDs if using API specified in this file + * without further extentions: + * 65cb6b93-af0c-4617-8ed6-644a8d1140f8 + * see also OPTEE_SMC_UID_* in optee_msg.h + */ +#define OPTEE_SMC_FUNCID_CALLS_UID OPTEE_MSG_FUNCID_CALLS_UID +#define OPTEE_SMC_CALLS_UID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS_END, \ + OPTEE_SMC_FUNCID_CALLS_UID) + +/* + * Function specified by SMC Calling convention + * + * Returns 2.0 if using API specified in this file without further extentions. + * see also OPTEE_MSG_REVISION_* in optee_msg.h + */ +#define OPTEE_SMC_FUNCID_CALLS_REVISION OPTEE_MSG_FUNCID_CALLS_REVISION +#define OPTEE_SMC_CALLS_REVISION \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_TRUSTED_OS_END, \ + OPTEE_SMC_FUNCID_CALLS_REVISION) + +struct optee_smc_calls_revision_result { + unsigned long major; + unsigned long minor; + unsigned long reserved0; + unsigned long reserved1; +}; + +/* + * Get UUID of Trusted OS. + * + * Used by non-secure world to figure out which Trusted OS is installed. + * Note that returned UUID is the UUID of the Trusted OS, not of the API. + * + * Returns UUID in a0-4 in the same way as OPTEE_SMC_CALLS_UID + * described above. + */ +#define OPTEE_SMC_FUNCID_GET_OS_UUID OPTEE_MSG_FUNCID_GET_OS_UUID +#define OPTEE_SMC_CALL_GET_OS_UUID \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_UUID) + +/* + * Get revision of Trusted OS. + * + * Used by non-secure world to figure out which version of the Trusted OS + * is installed. Note that the returned revision is the revision of the + * Trusted OS, not of the API. + * + * Returns revision in a0-1 in the same way as OPTEE_SMC_CALLS_REVISION + * described above. + */ +#define OPTEE_SMC_FUNCID_GET_OS_REVISION OPTEE_MSG_FUNCID_GET_OS_REVISION +#define OPTEE_SMC_CALL_GET_OS_REVISION \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_REVISION) + +/* + * Call with struct optee_msg_arg as argument + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC*CALL_WITH_ARG + * a1 Upper 32bit of a 64bit physical pointer to a struct optee_msg_arg + * a2 Lower 32bit of a 64bit physical pointer to a struct optee_msg_arg + * a3 Cache settings, not used if physical pointer is in a predefined shared + * memory area else per OPTEE_SMC_SHM_* + * a4-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 Return value, OPTEE_SMC_RETURN_* + * a1-3 Not used + * a4-7 Preserved + * + * OPTEE_SMC_RETURN_ETHREAD_LIMIT return register usage: + * a0 Return value, OPTEE_SMC_RETURN_ETHREAD_LIMIT + * a1-3 Preserved + * a4-7 Preserved + * + * RPC return register usage: + * a0 Return value, OPTEE_SMC_RETURN_IS_RPC(val) + * a1-2 RPC parameters + * a3-7 Resume information, must be preserved + * + * Possible return values: + * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION Trusted OS does not recognize this + * function. + * OPTEE_SMC_RETURN_OK Call completed, result updated in + * the previously supplied struct + * optee_msg_arg. + * OPTEE_SMC_RETURN_ETHREAD_LIMIT Number of Trusted OS threads exceeded, + * try again later. + * OPTEE_SMC_RETURN_EBADADDR Bad physcial pointer to struct + * optee_msg_arg. + * OPTEE_SMC_RETURN_EBADCMD Bad/unknown cmd in struct optee_msg_arg + * OPTEE_SMC_RETURN_IS_RPC() Call suspended by RPC call to normal + * world. + */ +#define OPTEE_SMC_FUNCID_CALL_WITH_ARG OPTEE_MSG_FUNCID_CALL_WITH_ARG +#define OPTEE_SMC_CALL_WITH_ARG \ + OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_CALL_WITH_ARG) + +/* + * Get Shared Memory Config + * + * Returns the Secure/Non-secure shared memory config. + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_GET_SHM_CONFIG + * a1-6 Not used + * a7 Hypervisor Client ID register + * + * Have config return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1 Physical address of start of SHM + * a2 Size of of SHM + * a3 Cache settings of memory, as defined by the + * OPTEE_SMC_SHM_* values above + * a4-7 Preserved + * + * Not available register usage: + * a0 OPTEE_SMC_RETURN_ENOTAVAIL + * a1-3 Not used + * a4-7 Preserved + */ +#define OPTEE_SMC_FUNCID_GET_SHM_CONFIG 7 +#define OPTEE_SMC_GET_SHM_CONFIG \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_SHM_CONFIG) + +struct optee_smc_get_shm_config_result { + unsigned long status; + unsigned long start; + unsigned long size; + unsigned long settings; +}; + +/* + * Exchanges capabilities between normal world and secure world + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_EXCHANGE_CAPABILITIES + * a1 bitfield of normal world capabilities OPTEE_SMC_NSEC_CAP_* + * a2-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1 bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_* + * a2-7 Preserved + * + * Error return register usage: + * a0 OPTEE_SMC_RETURN_ENOTAVAIL, can't use the capabilities from normal world + * a1 bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_* + * a2-7 Preserved + */ +/* Normal world works as a uniprocessor system */ +#define OPTEE_SMC_NSEC_CAP_UNIPROCESSOR BIT(0) +/* Secure world has reserved shared memory for normal world to use */ +#define OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM BIT(0) +/* Secure world can communicate via previously unregistered shared memory */ +#define OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM BIT(1) + +/* + * Secure world supports commands "register/unregister shared memory", + * secure world accepts command buffers located in any parts of non-secure RAM + */ +#define OPTEE_SMC_SEC_CAP_DYNAMIC_SHM BIT(2) + +#define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES 9 +#define OPTEE_SMC_EXCHANGE_CAPABILITIES \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES) + +struct optee_smc_exchange_capabilities_result { + unsigned long status; + unsigned long capabilities; + unsigned long reserved0; + unsigned long reserved1; +}; + +/* + * Disable and empties cache of shared memory objects + * + * Secure world can cache frequently used shared memory objects, for + * example objects used as RPC arguments. When secure world is idle this + * function returns one shared memory reference to free. To disable the + * cache and free all cached objects this function has to be called until + * it returns OPTEE_SMC_RETURN_ENOTAVAIL. + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_DISABLE_SHM_CACHE + * a1-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1 Upper 32bit of a 64bit Shared memory cookie + * a2 Lower 32bit of a 64bit Shared memory cookie + * a3-7 Preserved + * + * Cache empty return register usage: + * a0 OPTEE_SMC_RETURN_ENOTAVAIL + * a1-7 Preserved + * + * Not idle return register usage: + * a0 OPTEE_SMC_RETURN_EBUSY + * a1-7 Preserved + */ +#define OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE 10 +#define OPTEE_SMC_DISABLE_SHM_CACHE \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE) + +struct optee_smc_disable_shm_cache_result { + unsigned long status; + unsigned long shm_upper32; + unsigned long shm_lower32; + unsigned long reserved0; +}; + +/* + * Enable cache of shared memory objects + * + * Secure world can cache frequently used shared memory objects, for + * example objects used as RPC arguments. When secure world is idle this + * function returns OPTEE_SMC_RETURN_OK and the cache is enabled. If + * secure world isn't idle OPTEE_SMC_RETURN_EBUSY is returned. + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_ENABLE_SHM_CACHE + * a1-6 Not used + * a7 Hypervisor Client ID register + * + * Normal return register usage: + * a0 OPTEE_SMC_RETURN_OK + * a1-7 Preserved + * + * Not idle return register usage: + * a0 OPTEE_SMC_RETURN_EBUSY + * a1-7 Preserved + */ +#define OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE 11 +#define OPTEE_SMC_ENABLE_SHM_CACHE \ + OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE) + +/* + * Resume from RPC (for example after processing a foreign interrupt) + * + * Call register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC + * a1-3 Value of a1-3 when OPTEE_SMC_CALL_WITH_ARG returned + * OPTEE_SMC_RETURN_RPC in a0 + * + * Return register usage is the same as for OPTEE_SMC_*CALL_WITH_ARG above. + * + * Possible return values + * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION Trusted OS does not recognize this + * function. + * OPTEE_SMC_RETURN_OK Original call completed, result + * updated in the previously supplied. + * struct optee_msg_arg + * OPTEE_SMC_RETURN_RPC Call suspended by RPC call to normal + * world. + * OPTEE_SMC_RETURN_ERESUME Resume failed, the opaque resume + * information was corrupt. + */ +#define OPTEE_SMC_FUNCID_RETURN_FROM_RPC 3 +#define OPTEE_SMC_CALL_RETURN_FROM_RPC \ + OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_RETURN_FROM_RPC) + +#define OPTEE_SMC_RETURN_RPC_PREFIX_MASK 0xFFFF0000 +#define OPTEE_SMC_RETURN_RPC_PREFIX 0xFFFF0000 +#define OPTEE_SMC_RETURN_RPC_FUNC_MASK 0x0000FFFF + +#define OPTEE_SMC_RETURN_GET_RPC_FUNC(ret) \ + ((ret) & OPTEE_SMC_RETURN_RPC_FUNC_MASK) + +#define OPTEE_SMC_RPC_VAL(func) ((func) | OPTEE_SMC_RETURN_RPC_PREFIX) + +/* + * Allocate memory for RPC parameter passing. The memory is used to hold a + * struct optee_msg_arg. + * + * "Call" register usage: + * a0 This value, OPTEE_SMC_RETURN_RPC_ALLOC + * a1 Size in bytes of required argument memory + * a2 Not used + * a3 Resume information, must be preserved + * a4-5 Not used + * a6-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1 Upper 32bits of 64bit physical pointer to allocated + * memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't + * be allocated. + * a2 Lower 32bits of 64bit physical pointer to allocated + * memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't + * be allocated + * a3 Preserved + * a4 Upper 32bits of 64bit Shared memory cookie used when freeing + * the memory or doing an RPC + * a5 Lower 32bits of 64bit Shared memory cookie used when freeing + * the memory or doing an RPC + * a6-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_ALLOC 0 +#define OPTEE_SMC_RETURN_RPC_ALLOC \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_ALLOC) + +/* + * Free memory previously allocated by OPTEE_SMC_RETURN_RPC_ALLOC + * + * "Call" register usage: + * a0 This value, OPTEE_SMC_RETURN_RPC_FREE + * a1 Upper 32bits of 64bit shared memory cookie belonging to this + * argument memory + * a2 Lower 32bits of 64bit shared memory cookie belonging to this + * argument memory + * a3-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1-2 Not used + * a3-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_FREE 2 +#define OPTEE_SMC_RETURN_RPC_FREE \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE) + +/* + * Deliver foreign interrupt to normal world. + * + * "Call" register usage: + * a0 OPTEE_SMC_RETURN_RPC_FOREIGN_INTR + * a1-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_FOREIGN_INTR 4 +#define OPTEE_SMC_RETURN_RPC_FOREIGN_INTR \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FOREIGN_INTR) + +/* + * Do an RPC request. The supplied struct optee_msg_arg tells which + * request to do and the parameters for the request. The following fields + * are used (the rest are unused): + * - cmd the Request ID + * - ret return value of the request, filled in by normal world + * - num_params number of parameters for the request + * - params the parameters + * - param_attrs attributes of the parameters + * + * "Call" register usage: + * a0 OPTEE_SMC_RETURN_RPC_CMD + * a1 Upper 32bit of a 64bit Shared memory cookie holding a + * struct optee_msg_arg, must be preserved, only the data should + * be updated + * a2 Lower 32bit of a 64bit Shared memory cookie holding a + * struct optee_msg_arg, must be preserved, only the data should + * be updated + * a3-7 Resume information, must be preserved + * + * "Return" register usage: + * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC. + * a1-2 Not used + * a3-7 Preserved + */ +#define OPTEE_SMC_RPC_FUNC_CMD 5 +#define OPTEE_SMC_RETURN_RPC_CMD \ + OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_CMD) + +/* Returned in a0 */ +#define OPTEE_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF + +/* Returned in a0 only from Trusted OS functions */ +#define OPTEE_SMC_RETURN_OK 0x0 +#define OPTEE_SMC_RETURN_ETHREAD_LIMIT 0x1 +#define OPTEE_SMC_RETURN_EBUSY 0x2 +#define OPTEE_SMC_RETURN_ERESUME 0x3 +#define OPTEE_SMC_RETURN_EBADADDR 0x4 +#define OPTEE_SMC_RETURN_EBADCMD 0x5 +#define OPTEE_SMC_RETURN_ENOMEM 0x6 +#define OPTEE_SMC_RETURN_ENOTAVAIL 0x7 +#define OPTEE_SMC_RETURN_IS_RPC(ret) __optee_smc_return_is_rpc((ret)) + +static inline bool __optee_smc_return_is_rpc(u32 ret) +{ + return ret != OPTEE_SMC_RETURN_UNKNOWN_FUNCTION && + (ret & OPTEE_SMC_RETURN_RPC_PREFIX_MASK) == + OPTEE_SMC_RETURN_RPC_PREFIX; +} + +#endif /* OPTEE_SMC_H */ diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c new file mode 100644 index 0000000000000000000000000000000000000000..41aea12e2bccd9f4ab55b3f77706753330850667 --- /dev/null +++ b/drivers/tee/optee/rpc.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" + +struct wq_entry { + struct list_head link; + struct completion c; + u32 key; +}; + +void optee_wait_queue_init(struct optee_wait_queue *priv) +{ + mutex_init(&priv->mu); + INIT_LIST_HEAD(&priv->db); +} + +void optee_wait_queue_exit(struct optee_wait_queue *priv) +{ + mutex_destroy(&priv->mu); +} + +static void handle_rpc_func_cmd_get_time(struct optee_msg_arg *arg) +{ + struct timespec64 ts; + + if (arg->num_params != 1) + goto bad; + if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) != + OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT) + goto bad; + + getnstimeofday64(&ts); + arg->params[0].u.value.a = ts.tv_sec; + arg->params[0].u.value.b = ts.tv_nsec; + + arg->ret = TEEC_SUCCESS; + return; +bad: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +} + +static struct wq_entry *wq_entry_get(struct optee_wait_queue *wq, u32 key) +{ + struct wq_entry *w; + + mutex_lock(&wq->mu); + + list_for_each_entry(w, &wq->db, link) + if (w->key == key) + goto out; + + w = kmalloc(sizeof(*w), GFP_KERNEL); + if (w) { + init_completion(&w->c); + w->key = key; + list_add_tail(&w->link, &wq->db); + } +out: + mutex_unlock(&wq->mu); + return w; +} + +static void wq_sleep(struct optee_wait_queue *wq, u32 key) +{ + struct wq_entry *w = wq_entry_get(wq, key); + + if (w) { + wait_for_completion(&w->c); + mutex_lock(&wq->mu); + list_del(&w->link); + mutex_unlock(&wq->mu); + kfree(w); + } +} + +static void wq_wakeup(struct optee_wait_queue *wq, u32 key) +{ + struct wq_entry *w = wq_entry_get(wq, key); + + if (w) + complete(&w->c); +} + +static void handle_rpc_func_cmd_wq(struct optee *optee, + struct optee_msg_arg *arg) +{ + if (arg->num_params != 1) + goto bad; + + if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) != + OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) + goto bad; + + switch (arg->params[0].u.value.a) { + case OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP: + wq_sleep(&optee->wait_queue, arg->params[0].u.value.b); + break; + case OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP: + wq_wakeup(&optee->wait_queue, arg->params[0].u.value.b); + break; + default: + goto bad; + } + + arg->ret = TEEC_SUCCESS; + return; +bad: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +} + +static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg) +{ + u32 msec_to_wait; + + if (arg->num_params != 1) + goto bad; + + if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) != + OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) + goto bad; + + msec_to_wait = arg->params[0].u.value.a; + + /* Go to interruptible sleep */ + msleep_interruptible(msec_to_wait); + + arg->ret = TEEC_SUCCESS; + return; +bad: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +} + +static void handle_rpc_supp_cmd(struct tee_context *ctx, + struct optee_msg_arg *arg) +{ + struct tee_param *params; + + arg->ret_origin = TEEC_ORIGIN_COMMS; + + params = kmalloc_array(arg->num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + return; + } + + if (optee_from_msg_param(params, arg->num_params, arg->params)) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + goto out; + } + + arg->ret = optee_supp_thrd_req(ctx, arg->cmd, arg->num_params, params); + + if (optee_to_msg_param(arg->params, arg->num_params, params)) + arg->ret = TEEC_ERROR_BAD_PARAMETERS; +out: + kfree(params); +} + +static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz) +{ + u32 ret; + struct tee_param param; + struct optee *optee = tee_get_drvdata(ctx->teedev); + struct tee_shm *shm; + + param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT; + param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL; + param.u.value.b = sz; + param.u.value.c = 0; + + ret = optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_ALLOC, 1, ¶m); + if (ret) + return ERR_PTR(-ENOMEM); + + mutex_lock(&optee->supp.mutex); + /* Increases count as secure world doesn't have a reference */ + shm = tee_shm_get_from_id(optee->supp.ctx, param.u.value.c); + mutex_unlock(&optee->supp.mutex); + return shm; +} + +static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, + struct optee_msg_arg *arg, + struct optee_call_ctx *call_ctx) +{ + phys_addr_t pa; + struct tee_shm *shm; + size_t sz; + size_t n; + + arg->ret_origin = TEEC_ORIGIN_COMMS; + + if (!arg->num_params || + arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + + for (n = 1; n < arg->num_params; n++) { + if (arg->params[n].attr != OPTEE_MSG_ATTR_TYPE_NONE) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + } + + sz = arg->params[0].u.value.b; + switch (arg->params[0].u.value.a) { + case OPTEE_MSG_RPC_SHM_TYPE_APPL: + shm = cmd_alloc_suppl(ctx, sz); + break; + case OPTEE_MSG_RPC_SHM_TYPE_KERNEL: + shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED); + break; + default: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + + if (IS_ERR(shm)) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + return; + } + + if (tee_shm_get_pa(shm, 0, &pa)) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + goto bad; + } + + sz = tee_shm_get_size(shm); + + if (tee_shm_is_registered(shm)) { + struct page **pages; + u64 *pages_list; + size_t page_num; + + pages = tee_shm_get_pages(shm, &page_num); + if (!pages || !page_num) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + goto bad; + } + + pages_list = optee_allocate_pages_list(page_num); + if (!pages_list) { + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + goto bad; + } + + call_ctx->pages_list = pages_list; + call_ctx->num_entries = page_num; + + arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT | + OPTEE_MSG_ATTR_NONCONTIG; + /* + * In the least bits of u.tmem.buf_ptr we store buffer offset + * from 4k page, as described in OP-TEE ABI. + */ + arg->params[0].u.tmem.buf_ptr = virt_to_phys(pages_list) | + (tee_shm_get_page_offset(shm) & + (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1)); + arg->params[0].u.tmem.size = tee_shm_get_size(shm); + arg->params[0].u.tmem.shm_ref = (unsigned long)shm; + + optee_fill_pages_list(pages_list, pages, page_num, + tee_shm_get_page_offset(shm)); + } else { + arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT; + arg->params[0].u.tmem.buf_ptr = pa; + arg->params[0].u.tmem.size = sz; + arg->params[0].u.tmem.shm_ref = (unsigned long)shm; + } + + arg->ret = TEEC_SUCCESS; + return; +bad: + tee_shm_free(shm); +} + +static void cmd_free_suppl(struct tee_context *ctx, struct tee_shm *shm) +{ + struct tee_param param; + + param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT; + param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL; + param.u.value.b = tee_shm_get_id(shm); + param.u.value.c = 0; + + /* + * Match the tee_shm_get_from_id() in cmd_alloc_suppl() as secure + * world has released its reference. + * + * It's better to do this before sending the request to supplicant + * as we'd like to let the process doing the initial allocation to + * do release the last reference too in order to avoid stacking + * many pending fput() on the client process. This could otherwise + * happen if secure world does many allocate and free in a single + * invoke. + */ + tee_shm_put(shm); + + optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_FREE, 1, ¶m); +} + +static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx, + struct optee_msg_arg *arg) +{ + struct tee_shm *shm; + + arg->ret_origin = TEEC_ORIGIN_COMMS; + + if (arg->num_params != 1 || + arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) { + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + return; + } + + shm = (struct tee_shm *)(unsigned long)arg->params[0].u.value.b; + switch (arg->params[0].u.value.a) { + case OPTEE_MSG_RPC_SHM_TYPE_APPL: + cmd_free_suppl(ctx, shm); + break; + case OPTEE_MSG_RPC_SHM_TYPE_KERNEL: + tee_shm_free(shm); + break; + default: + arg->ret = TEEC_ERROR_BAD_PARAMETERS; + } + arg->ret = TEEC_SUCCESS; +} + +static void free_pages_list(struct optee_call_ctx *call_ctx) +{ + if (call_ctx->pages_list) { + optee_free_pages_list(call_ctx->pages_list, + call_ctx->num_entries); + call_ctx->pages_list = NULL; + call_ctx->num_entries = 0; + } +} + +void optee_rpc_finalize_call(struct optee_call_ctx *call_ctx) +{ + free_pages_list(call_ctx); +} + +static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, + struct tee_shm *shm, + struct optee_call_ctx *call_ctx) +{ + struct optee_msg_arg *arg; + + arg = tee_shm_get_va(shm, 0); + if (IS_ERR(arg)) { + pr_err("%s: tee_shm_get_va %p failed\n", __func__, shm); + return; + } + + switch (arg->cmd) { + case OPTEE_MSG_RPC_CMD_GET_TIME: + handle_rpc_func_cmd_get_time(arg); + break; + case OPTEE_MSG_RPC_CMD_WAIT_QUEUE: + handle_rpc_func_cmd_wq(optee, arg); + break; + case OPTEE_MSG_RPC_CMD_SUSPEND: + handle_rpc_func_cmd_wait(arg); + break; + case OPTEE_MSG_RPC_CMD_SHM_ALLOC: + free_pages_list(call_ctx); + handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx); + break; + case OPTEE_MSG_RPC_CMD_SHM_FREE: + handle_rpc_func_cmd_shm_free(ctx, arg); + break; + default: + handle_rpc_supp_cmd(ctx, arg); + } +} + +/** + * optee_handle_rpc() - handle RPC from secure world + * @ctx: context doing the RPC + * @param: value of registers for the RPC + * @call_ctx: call context. Preserved during one OP-TEE invocation + * + * Result of RPC is written back into @param. + */ +void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, + struct optee_call_ctx *call_ctx) +{ + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct tee_shm *shm; + phys_addr_t pa; + + switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { + case OPTEE_SMC_RPC_FUNC_ALLOC: + shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED); + if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { + reg_pair_from_64(¶m->a1, ¶m->a2, pa); + reg_pair_from_64(¶m->a4, ¶m->a5, + (unsigned long)shm); + } else { + param->a1 = 0; + param->a2 = 0; + param->a4 = 0; + param->a5 = 0; + } + break; + case OPTEE_SMC_RPC_FUNC_FREE: + shm = reg_pair_to_ptr(param->a1, param->a2); + tee_shm_free(shm); + break; + case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR: + /* + * A foreign interrupt was raised while secure world was + * executing, since they are handled in Linux a dummy RPC is + * performed to let Linux take the interrupt through the normal + * vector. + */ + break; + case OPTEE_SMC_RPC_FUNC_CMD: + shm = reg_pair_to_ptr(param->a1, param->a2); + handle_rpc_func_cmd(ctx, optee, shm, call_ctx); + break; + default: + pr_warn("Unknown RPC func 0x%x\n", + (u32)OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)); + break; + } + + param->a0 = OPTEE_SMC_CALL_RETURN_FROM_RPC; +} diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c new file mode 100644 index 0000000000000000000000000000000000000000..49397813fff1e5e0f65098c27a29641c407d9ac6 --- /dev/null +++ b/drivers/tee/optee/shm_pool.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015, Linaro Limited + * Copyright (c) 2017, EPAM Systems + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include "optee_private.h" +#include "optee_smc.h" +#include "shm_pool.h" + +static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm, size_t size) +{ + unsigned int order = get_order(size); + struct page *page; + + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!page) + return -ENOMEM; + + shm->kaddr = page_address(page); + shm->paddr = page_to_phys(page); + shm->size = PAGE_SIZE << order; + + return 0; +} + +static void pool_op_free(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm) +{ + free_pages((unsigned long)shm->kaddr, get_order(shm->size)); + shm->kaddr = NULL; +} + +static void pool_op_destroy_poolmgr(struct tee_shm_pool_mgr *poolm) +{ + kfree(poolm); +} + +static const struct tee_shm_pool_mgr_ops pool_ops = { + .alloc = pool_op_alloc, + .free = pool_op_free, + .destroy_poolmgr = pool_op_destroy_poolmgr, +}; + +/** + * optee_shm_pool_alloc_pages() - create page-based allocator pool + * + * This pool is used when OP-TEE supports dymanic SHM. In this case + * command buffers and such are allocated from kernel's own memory. + */ +struct tee_shm_pool_mgr *optee_shm_pool_alloc_pages(void) +{ + struct tee_shm_pool_mgr *mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + + if (!mgr) + return ERR_PTR(-ENOMEM); + + mgr->ops = &pool_ops; + + return mgr; +} diff --git a/drivers/tee/optee/shm_pool.h b/drivers/tee/optee/shm_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..4e753c3bf7ec29e28c8667b5e12ec10dab17eb9b --- /dev/null +++ b/drivers/tee/optee/shm_pool.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2015, Linaro Limited + * Copyright (c) 2016, EPAM Systems + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef SHM_POOL_H +#define SHM_POOL_H + +#include + +struct tee_shm_pool_mgr *optee_shm_pool_alloc_pages(void); + +#endif diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c new file mode 100644 index 0000000000000000000000000000000000000000..df35fc01fd3e5eec43088112ac7976f72c69164f --- /dev/null +++ b/drivers/tee/optee/supp.c @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include "optee_private.h" + +struct optee_supp_req { + struct list_head link; + + bool busy; + u32 func; + u32 ret; + size_t num_params; + struct tee_param *param; + + struct completion c; +}; + +void optee_supp_init(struct optee_supp *supp) +{ + memset(supp, 0, sizeof(*supp)); + mutex_init(&supp->mutex); + init_completion(&supp->reqs_c); + idr_init(&supp->idr); + INIT_LIST_HEAD(&supp->reqs); + supp->req_id = -1; +} + +void optee_supp_uninit(struct optee_supp *supp) +{ + mutex_destroy(&supp->mutex); + idr_destroy(&supp->idr); +} + +void optee_supp_release(struct optee_supp *supp) +{ + int id; + struct optee_supp_req *req; + struct optee_supp_req *req_tmp; + + mutex_lock(&supp->mutex); + + /* Abort all request retrieved by supplicant */ + idr_for_each_entry(&supp->idr, req, id) { + req->busy = false; + idr_remove(&supp->idr, id); + req->ret = TEEC_ERROR_COMMUNICATION; + complete(&req->c); + } + + /* Abort all queued requests */ + list_for_each_entry_safe(req, req_tmp, &supp->reqs, link) { + list_del(&req->link); + req->ret = TEEC_ERROR_COMMUNICATION; + complete(&req->c); + } + + supp->ctx = NULL; + supp->req_id = -1; + + mutex_unlock(&supp->mutex); +} + +/** + * optee_supp_thrd_req() - request service from supplicant + * @ctx: context doing the request + * @func: function requested + * @num_params: number of elements in @param array + * @param: parameters for function + * + * Returns result of operation to be passed to secure world + */ +u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, + struct tee_param *param) + +{ + struct optee *optee = tee_get_drvdata(ctx->teedev); + struct optee_supp *supp = &optee->supp; + struct optee_supp_req *req = kzalloc(sizeof(*req), GFP_KERNEL); + bool interruptable; + u32 ret; + + if (!req) + return TEEC_ERROR_OUT_OF_MEMORY; + + init_completion(&req->c); + req->func = func; + req->num_params = num_params; + req->param = param; + + /* Insert the request in the request list */ + mutex_lock(&supp->mutex); + list_add_tail(&req->link, &supp->reqs); + mutex_unlock(&supp->mutex); + + /* Tell an eventual waiter there's a new request */ + complete(&supp->reqs_c); + + /* + * Wait for supplicant to process and return result, once we've + * returned from wait_for_completion(&req->c) successfully we have + * exclusive access again. + */ + while (wait_for_completion_interruptible(&req->c)) { + mutex_lock(&supp->mutex); + interruptable = !supp->ctx; + if (interruptable) { + /* + * There's no supplicant available and since the + * supp->mutex currently is held none can + * become available until the mutex released + * again. + * + * Interrupting an RPC to supplicant is only + * allowed as a way of slightly improving the user + * experience in case the supplicant hasn't been + * started yet. During normal operation the supplicant + * will serve all requests in a timely manner and + * interrupting then wouldn't make sense. + */ + interruptable = !req->busy; + if (!req->busy) + list_del(&req->link); + } + mutex_unlock(&supp->mutex); + + if (interruptable) { + req->ret = TEEC_ERROR_COMMUNICATION; + break; + } + } + + ret = req->ret; + kfree(req); + + return ret; +} + +static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp, + int num_params, int *id) +{ + struct optee_supp_req *req; + + if (supp->req_id != -1) { + /* + * Supplicant should not mix synchronous and asnynchronous + * requests. + */ + return ERR_PTR(-EINVAL); + } + + if (list_empty(&supp->reqs)) + return NULL; + + req = list_first_entry(&supp->reqs, struct optee_supp_req, link); + + if (num_params < req->num_params) { + /* Not enough room for parameters */ + return ERR_PTR(-EINVAL); + } + + *id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL); + if (*id < 0) + return ERR_PTR(-ENOMEM); + + list_del(&req->link); + req->busy = true; + + return req; +} + +static int supp_check_recv_params(size_t num_params, struct tee_param *params, + size_t *num_meta) +{ + size_t n; + + if (!num_params) + return -EINVAL; + + /* + * If there's memrefs we need to decrease those as they where + * increased earlier and we'll even refuse to accept any below. + */ + for (n = 0; n < num_params; n++) + if (tee_param_is_memref(params + n) && params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + + /* + * We only expect parameters as TEE_IOCTL_PARAM_ATTR_TYPE_NONE with + * or without the TEE_IOCTL_PARAM_ATTR_META bit set. + */ + for (n = 0; n < num_params; n++) + if (params[n].attr && + params[n].attr != TEE_IOCTL_PARAM_ATTR_META) + return -EINVAL; + + /* At most we'll need one meta parameter so no need to check for more */ + if (params->attr == TEE_IOCTL_PARAM_ATTR_META) + *num_meta = 1; + else + *num_meta = 0; + + return 0; +} + +/** + * optee_supp_recv() - receive request for supplicant + * @ctx: context receiving the request + * @func: requested function in supplicant + * @num_params: number of elements allocated in @param, updated with number + * used elements + * @param: space for parameters for @func + * + * Returns 0 on success or <0 on failure + */ +int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param) +{ + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct optee_supp *supp = &optee->supp; + struct optee_supp_req *req = NULL; + int id; + size_t num_meta; + int rc; + + rc = supp_check_recv_params(*num_params, param, &num_meta); + if (rc) + return rc; + + while (true) { + mutex_lock(&supp->mutex); + req = supp_pop_entry(supp, *num_params - num_meta, &id); + mutex_unlock(&supp->mutex); + + if (req) { + if (IS_ERR(req)) + return PTR_ERR(req); + break; + } + + /* + * If we didn't get a request we'll block in + * wait_for_completion() to avoid needless spinning. + * + * This is where supplicant will be hanging most of + * the time, let's make this interruptable so we + * can easily restart supplicant if needed. + */ + if (wait_for_completion_interruptible(&supp->reqs_c)) + return -ERESTARTSYS; + } + + if (num_meta) { + /* + * tee-supplicant support meta parameters -> requsts can be + * processed asynchronously. + */ + param->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT | + TEE_IOCTL_PARAM_ATTR_META; + param->u.value.a = id; + param->u.value.b = 0; + param->u.value.c = 0; + } else { + mutex_lock(&supp->mutex); + supp->req_id = id; + mutex_unlock(&supp->mutex); + } + + *func = req->func; + *num_params = req->num_params + num_meta; + memcpy(param + num_meta, req->param, + sizeof(struct tee_param) * req->num_params); + + return 0; +} + +static struct optee_supp_req *supp_pop_req(struct optee_supp *supp, + size_t num_params, + struct tee_param *param, + size_t *num_meta) +{ + struct optee_supp_req *req; + int id; + size_t nm; + const u32 attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT | + TEE_IOCTL_PARAM_ATTR_META; + + if (!num_params) + return ERR_PTR(-EINVAL); + + if (supp->req_id == -1) { + if (param->attr != attr) + return ERR_PTR(-EINVAL); + id = param->u.value.a; + nm = 1; + } else { + id = supp->req_id; + nm = 0; + } + + req = idr_find(&supp->idr, id); + if (!req) + return ERR_PTR(-ENOENT); + + if ((num_params - nm) != req->num_params) + return ERR_PTR(-EINVAL); + + req->busy = false; + idr_remove(&supp->idr, id); + supp->req_id = -1; + *num_meta = nm; + + return req; +} + +/** + * optee_supp_send() - send result of request from supplicant + * @ctx: context sending result + * @ret: return value of request + * @num_params: number of parameters returned + * @param: returned parameters + * + * Returns 0 on success or <0 on failure. + */ +int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param) +{ + struct tee_device *teedev = ctx->teedev; + struct optee *optee = tee_get_drvdata(teedev); + struct optee_supp *supp = &optee->supp; + struct optee_supp_req *req; + size_t n; + size_t num_meta; + + mutex_lock(&supp->mutex); + req = supp_pop_req(supp, num_params, param, &num_meta); + mutex_unlock(&supp->mutex); + + if (IS_ERR(req)) { + /* Something is wrong, let supplicant restart. */ + return PTR_ERR(req); + } + + /* Update out and in/out parameters */ + for (n = 0; n < req->num_params; n++) { + struct tee_param *p = req->param + n; + + switch (p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + p->u.value.a = param[n + num_meta].u.value.a; + p->u.value.b = param[n + num_meta].u.value.b; + p->u.value.c = param[n + num_meta].u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + p->u.memref.size = param[n + num_meta].u.memref.size; + break; + default: + break; + } + } + req->ret = ret; + + /* Let the requesting thread continue */ + complete(&req->c); + + return 0; +} diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c new file mode 100644 index 0000000000000000000000000000000000000000..6c4b200a45604827517ff1b840ba642ab2beb165 --- /dev/null +++ b/drivers/tee/tee_core.c @@ -0,0 +1,949 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "tee_private.h" + +#define TEE_NUM_DEVICES 32 + +#define TEE_IOCTL_PARAM_SIZE(x) (sizeof(struct tee_param) * (x)) + +/* + * Unprivileged devices in the lower half range and privileged devices in + * the upper half range. + */ +static DECLARE_BITMAP(dev_mask, TEE_NUM_DEVICES); +static DEFINE_SPINLOCK(driver_lock); + +static struct class *tee_class; +static dev_t tee_devt; + +static int tee_open(struct inode *inode, struct file *filp) +{ + int rc; + struct tee_device *teedev; + struct tee_context *ctx; + + teedev = container_of(inode->i_cdev, struct tee_device, cdev); + if (!tee_device_get(teedev)) + return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto err; + } + + kref_init(&ctx->refcount); + ctx->teedev = teedev; + INIT_LIST_HEAD(&ctx->list_shm); + filp->private_data = ctx; + rc = teedev->desc->ops->open(ctx); + if (rc) + goto err; + + return 0; +err: + kfree(ctx); + tee_device_put(teedev); + return rc; +} + +void teedev_ctx_get(struct tee_context *ctx) +{ + if (ctx->releasing) + return; + + kref_get(&ctx->refcount); +} + +static void teedev_ctx_release(struct kref *ref) +{ + struct tee_context *ctx = container_of(ref, struct tee_context, + refcount); + ctx->releasing = true; + ctx->teedev->desc->ops->release(ctx); + kfree(ctx); +} + +void teedev_ctx_put(struct tee_context *ctx) +{ + if (ctx->releasing) + return; + + kref_put(&ctx->refcount, teedev_ctx_release); +} + +static void teedev_close_context(struct tee_context *ctx) +{ + tee_device_put(ctx->teedev); + teedev_ctx_put(ctx); +} + +static int tee_release(struct inode *inode, struct file *filp) +{ + teedev_close_context(filp->private_data); + return 0; +} + +static int tee_ioctl_version(struct tee_context *ctx, + struct tee_ioctl_version_data __user *uvers) +{ + struct tee_ioctl_version_data vers; + + ctx->teedev->desc->ops->get_version(ctx->teedev, &vers); + + if (ctx->teedev->desc->flags & TEE_DESC_PRIVILEGED) + vers.gen_caps |= TEE_GEN_CAP_PRIVILEGED; + + if (copy_to_user(uvers, &vers, sizeof(vers))) + return -EFAULT; + + return 0; +} + +static int tee_ioctl_shm_alloc(struct tee_context *ctx, + struct tee_ioctl_shm_alloc_data __user *udata) +{ + long ret; + struct tee_ioctl_shm_alloc_data data; + struct tee_shm *shm; + + if (copy_from_user(&data, udata, sizeof(data))) + return -EFAULT; + + /* Currently no input flags are supported */ + if (data.flags) + return -EINVAL; + + shm = tee_shm_alloc(ctx, data.size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + data.id = shm->id; + data.flags = shm->flags; + data.size = shm->size; + + if (copy_to_user(udata, &data, sizeof(data))) + ret = -EFAULT; + else + ret = tee_shm_get_fd(shm); + + /* + * When user space closes the file descriptor the shared memory + * should be freed or if tee_shm_get_fd() failed then it will + * be freed immediately. + */ + tee_shm_put(shm); + return ret; +} + +static int +tee_ioctl_shm_register(struct tee_context *ctx, + struct tee_ioctl_shm_register_data __user *udata) +{ + long ret; + struct tee_ioctl_shm_register_data data; + struct tee_shm *shm; + + if (copy_from_user(&data, udata, sizeof(data))) + return -EFAULT; + + /* Currently no input flags are supported */ + if (data.flags) + return -EINVAL; + + shm = tee_shm_register(ctx, data.addr, data.length, + TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + data.id = shm->id; + data.flags = shm->flags; + data.length = shm->size; + + if (copy_to_user(udata, &data, sizeof(data))) + ret = -EFAULT; + else + ret = tee_shm_get_fd(shm); + /* + * When user space closes the file descriptor the shared memory + * should be freed or if tee_shm_get_fd() failed then it will + * be freed immediately. + */ + tee_shm_put(shm); + return ret; +} + +static int params_from_user(struct tee_context *ctx, struct tee_param *params, + size_t num_params, + struct tee_ioctl_param __user *uparams) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_shm *shm; + struct tee_ioctl_param ip; + + if (copy_from_user(&ip, uparams + n, sizeof(ip))) + return -EFAULT; + + /* All unused attribute bits has to be zero */ + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_MASK) + return -EINVAL; + + params[n].attr = ip.attr; + switch (ip.attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_NONE: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + params[n].u.value.a = ip.a; + params[n].u.value.b = ip.b; + params[n].u.value.c = ip.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + /* + * If we fail to get a pointer to a shared memory + * object (and increase the ref count) from an + * identifier we return an error. All pointers that + * has been added in params have an increased ref + * count. It's the callers responibility to do + * tee_shm_put() on all resolved pointers. + */ + shm = tee_shm_get_from_id(ctx, ip.c); + if (IS_ERR(shm)) + return PTR_ERR(shm); + + params[n].u.memref.shm_offs = ip.a; + params[n].u.memref.size = ip.b; + params[n].u.memref.shm = shm; + break; + default: + /* Unknown attribute */ + return -EINVAL; + } + } + return 0; +} + +static int params_to_user(struct tee_ioctl_param __user *uparams, + size_t num_params, struct tee_param *params) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_ioctl_param __user *up = uparams + n; + struct tee_param *p = params + n; + + switch (p->attr) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + if (put_user(p->u.value.a, &up->a) || + put_user(p->u.value.b, &up->b) || + put_user(p->u.value.c, &up->c)) + return -EFAULT; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + if (put_user((u64)p->u.memref.size, &up->b)) + return -EFAULT; + default: + break; + } + } + return 0; +} + +static int tee_ioctl_open_session(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + size_t n; + struct tee_ioctl_buf_data buf; + struct tee_ioctl_open_session_arg __user *uarg; + struct tee_ioctl_open_session_arg arg; + struct tee_ioctl_param __user *uparams = NULL; + struct tee_param *params = NULL; + bool have_session = false; + + if (!ctx->teedev->desc->ops->open_session) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_ioctl_open_session_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len) + return -EINVAL; + + if (arg.num_params) { + params = kcalloc(arg.num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) + return -ENOMEM; + uparams = uarg->params; + rc = params_from_user(ctx, params, arg.num_params, uparams); + if (rc) + goto out; + } + + rc = ctx->teedev->desc->ops->open_session(ctx, &arg, params); + if (rc) + goto out; + have_session = true; + + if (put_user(arg.session, &uarg->session) || + put_user(arg.ret, &uarg->ret) || + put_user(arg.ret_origin, &uarg->ret_origin)) { + rc = -EFAULT; + goto out; + } + rc = params_to_user(uparams, arg.num_params, params); +out: + /* + * If we've succeeded to open the session but failed to communicate + * it back to user space, close the session again to avoid leakage. + */ + if (rc && have_session && ctx->teedev->desc->ops->close_session) + ctx->teedev->desc->ops->close_session(ctx, arg.session); + + if (params) { + /* Decrease ref count for all valid shared memory pointers */ + for (n = 0; n < arg.num_params; n++) + if (tee_param_is_memref(params + n) && + params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + kfree(params); + } + + return rc; +} + +static int tee_ioctl_invoke(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + size_t n; + struct tee_ioctl_buf_data buf; + struct tee_ioctl_invoke_arg __user *uarg; + struct tee_ioctl_invoke_arg arg; + struct tee_ioctl_param __user *uparams = NULL; + struct tee_param *params = NULL; + + if (!ctx->teedev->desc->ops->invoke_func) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_ioctl_invoke_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len) + return -EINVAL; + + if (arg.num_params) { + params = kcalloc(arg.num_params, sizeof(struct tee_param), + GFP_KERNEL); + if (!params) + return -ENOMEM; + uparams = uarg->params; + rc = params_from_user(ctx, params, arg.num_params, uparams); + if (rc) + goto out; + } + + rc = ctx->teedev->desc->ops->invoke_func(ctx, &arg, params); + if (rc) + goto out; + + if (put_user(arg.ret, &uarg->ret) || + put_user(arg.ret_origin, &uarg->ret_origin)) { + rc = -EFAULT; + goto out; + } + rc = params_to_user(uparams, arg.num_params, params); +out: + if (params) { + /* Decrease ref count for all valid shared memory pointers */ + for (n = 0; n < arg.num_params; n++) + if (tee_param_is_memref(params + n) && + params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + kfree(params); + } + return rc; +} + +static int tee_ioctl_cancel(struct tee_context *ctx, + struct tee_ioctl_cancel_arg __user *uarg) +{ + struct tee_ioctl_cancel_arg arg; + + if (!ctx->teedev->desc->ops->cancel_req) + return -EINVAL; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + return ctx->teedev->desc->ops->cancel_req(ctx, arg.cancel_id, + arg.session); +} + +static int +tee_ioctl_close_session(struct tee_context *ctx, + struct tee_ioctl_close_session_arg __user *uarg) +{ + struct tee_ioctl_close_session_arg arg; + + if (!ctx->teedev->desc->ops->close_session) + return -EINVAL; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + return ctx->teedev->desc->ops->close_session(ctx, arg.session); +} + +static int params_to_supp(struct tee_context *ctx, + struct tee_ioctl_param __user *uparams, + size_t num_params, struct tee_param *params) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_ioctl_param ip; + struct tee_param *p = params + n; + + ip.attr = p->attr; + switch (p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + ip.a = p->u.value.a; + ip.b = p->u.value.b; + ip.c = p->u.value.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + ip.b = p->u.memref.size; + if (!p->u.memref.shm) { + ip.a = 0; + ip.c = (u64)-1; /* invalid shm id */ + break; + } + ip.a = p->u.memref.shm_offs; + ip.c = p->u.memref.shm->id; + break; + default: + ip.a = 0; + ip.b = 0; + ip.c = 0; + break; + } + + if (copy_to_user(uparams + n, &ip, sizeof(ip))) + return -EFAULT; + } + + return 0; +} + +static int tee_ioctl_supp_recv(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + int rc; + struct tee_ioctl_buf_data buf; + struct tee_iocl_supp_recv_arg __user *uarg; + struct tee_param *params; + u32 num_params; + u32 func; + + if (!ctx->teedev->desc->ops->supp_recv) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_iocl_supp_recv_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (get_user(num_params, &uarg->num_params)) + return -EFAULT; + + if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) != buf.buf_len) + return -EINVAL; + + params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL); + if (!params) + return -ENOMEM; + + rc = params_from_user(ctx, params, num_params, uarg->params); + if (rc) + goto out; + + rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params); + if (rc) + goto out; + + if (put_user(func, &uarg->func) || + put_user(num_params, &uarg->num_params)) { + rc = -EFAULT; + goto out; + } + + rc = params_to_supp(ctx, uarg->params, num_params, params); +out: + kfree(params); + return rc; +} + +static int params_from_supp(struct tee_param *params, size_t num_params, + struct tee_ioctl_param __user *uparams) +{ + size_t n; + + for (n = 0; n < num_params; n++) { + struct tee_param *p = params + n; + struct tee_ioctl_param ip; + + if (copy_from_user(&ip, uparams + n, sizeof(ip))) + return -EFAULT; + + /* All unused attribute bits has to be zero */ + if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_MASK) + return -EINVAL; + + p->attr = ip.attr; + switch (ip.attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT: + /* Only out and in/out values can be updated */ + p->u.value.a = ip.a; + p->u.value.b = ip.b; + p->u.value.c = ip.c; + break; + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + /* + * Only the size of the memref can be updated. + * Since we don't have access to the original + * parameters here, only store the supplied size. + * The driver will copy the updated size into the + * original parameters. + */ + p->u.memref.shm = NULL; + p->u.memref.shm_offs = 0; + p->u.memref.size = ip.b; + break; + default: + memset(&p->u, 0, sizeof(p->u)); + break; + } + } + return 0; +} + +static int tee_ioctl_supp_send(struct tee_context *ctx, + struct tee_ioctl_buf_data __user *ubuf) +{ + long rc; + struct tee_ioctl_buf_data buf; + struct tee_iocl_supp_send_arg __user *uarg; + struct tee_param *params; + u32 num_params; + u32 ret; + + /* Not valid for this driver */ + if (!ctx->teedev->desc->ops->supp_send) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + + if (buf.buf_len > TEE_MAX_ARG_SIZE || + buf.buf_len < sizeof(struct tee_iocl_supp_send_arg)) + return -EINVAL; + + uarg = u64_to_user_ptr(buf.buf_ptr); + if (get_user(ret, &uarg->ret) || + get_user(num_params, &uarg->num_params)) + return -EFAULT; + + if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) > buf.buf_len) + return -EINVAL; + + params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL); + if (!params) + return -ENOMEM; + + rc = params_from_supp(params, num_params, uarg->params); + if (rc) + goto out; + + rc = ctx->teedev->desc->ops->supp_send(ctx, ret, num_params, params); +out: + kfree(params); + return rc; +} + +static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct tee_context *ctx = filp->private_data; + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case TEE_IOC_VERSION: + return tee_ioctl_version(ctx, uarg); + case TEE_IOC_SHM_ALLOC: + return tee_ioctl_shm_alloc(ctx, uarg); + case TEE_IOC_SHM_REGISTER: + return tee_ioctl_shm_register(ctx, uarg); + case TEE_IOC_OPEN_SESSION: + return tee_ioctl_open_session(ctx, uarg); + case TEE_IOC_INVOKE: + return tee_ioctl_invoke(ctx, uarg); + case TEE_IOC_CANCEL: + return tee_ioctl_cancel(ctx, uarg); + case TEE_IOC_CLOSE_SESSION: + return tee_ioctl_close_session(ctx, uarg); + case TEE_IOC_SUPPL_RECV: + return tee_ioctl_supp_recv(ctx, uarg); + case TEE_IOC_SUPPL_SEND: + return tee_ioctl_supp_send(ctx, uarg); + default: + return -EINVAL; + } +} + +static const struct file_operations tee_fops = { + .owner = THIS_MODULE, + .open = tee_open, + .release = tee_release, + .unlocked_ioctl = tee_ioctl, + .compat_ioctl = tee_ioctl, +}; + +static void tee_release_device(struct device *dev) +{ + struct tee_device *teedev = container_of(dev, struct tee_device, dev); + + spin_lock(&driver_lock); + clear_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + mutex_destroy(&teedev->mutex); + idr_destroy(&teedev->idr); + kfree(teedev); +} + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data) +{ + struct tee_device *teedev; + void *ret; + int rc; + int offs = 0; + + if (!teedesc || !teedesc->name || !teedesc->ops || + !teedesc->ops->get_version || !teedesc->ops->open || + !teedesc->ops->release || !pool) + return ERR_PTR(-EINVAL); + + teedev = kzalloc(sizeof(*teedev), GFP_KERNEL); + if (!teedev) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + if (teedesc->flags & TEE_DESC_PRIVILEGED) + offs = TEE_NUM_DEVICES / 2; + + spin_lock(&driver_lock); + teedev->id = find_next_zero_bit(dev_mask, TEE_NUM_DEVICES, offs); + if (teedev->id < TEE_NUM_DEVICES) + set_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + + if (teedev->id >= TEE_NUM_DEVICES) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + snprintf(teedev->name, sizeof(teedev->name), "tee%s%d", + teedesc->flags & TEE_DESC_PRIVILEGED ? "priv" : "", + teedev->id - offs); + + teedev->dev.class = tee_class; + teedev->dev.release = tee_release_device; + teedev->dev.parent = dev; + + teedev->dev.devt = MKDEV(MAJOR(tee_devt), teedev->id); + + rc = dev_set_name(&teedev->dev, "%s", teedev->name); + if (rc) { + ret = ERR_PTR(rc); + goto err_devt; + } + + cdev_init(&teedev->cdev, &tee_fops); + teedev->cdev.owner = teedesc->owner; + teedev->cdev.kobj.parent = &teedev->dev.kobj; + + dev_set_drvdata(&teedev->dev, driver_data); + device_initialize(&teedev->dev); + + /* 1 as tee_device_unregister() does one final tee_device_put() */ + teedev->num_users = 1; + init_completion(&teedev->c_no_users); + mutex_init(&teedev->mutex); + idr_init(&teedev->idr); + + teedev->desc = teedesc; + teedev->pool = pool; + + return teedev; +err_devt: + unregister_chrdev_region(teedev->dev.devt, 1); +err: + pr_err("could not register %s driver\n", + teedesc->flags & TEE_DESC_PRIVILEGED ? "privileged" : "client"); + if (teedev && teedev->id < TEE_NUM_DEVICES) { + spin_lock(&driver_lock); + clear_bit(teedev->id, dev_mask); + spin_unlock(&driver_lock); + } + kfree(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_device_alloc); + +static ssize_t implementation_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tee_device *teedev = container_of(dev, struct tee_device, dev); + struct tee_ioctl_version_data vers; + + teedev->desc->ops->get_version(teedev, &vers); + return scnprintf(buf, PAGE_SIZE, "%d\n", vers.impl_id); +} +static DEVICE_ATTR_RO(implementation_id); + +static struct attribute *tee_dev_attrs[] = { + &dev_attr_implementation_id.attr, + NULL +}; + +static const struct attribute_group tee_dev_group = { + .attrs = tee_dev_attrs, +}; + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev) +{ + int rc; + + if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) { + dev_err(&teedev->dev, "attempt to register twice\n"); + return -EINVAL; + } + + rc = cdev_add(&teedev->cdev, teedev->dev.devt, 1); + if (rc) { + dev_err(&teedev->dev, + "unable to cdev_add() %s, major %d, minor %d, err=%d\n", + teedev->name, MAJOR(teedev->dev.devt), + MINOR(teedev->dev.devt), rc); + return rc; + } + + rc = device_add(&teedev->dev); + if (rc) { + dev_err(&teedev->dev, + "unable to device_add() %s, major %d, minor %d, err=%d\n", + teedev->name, MAJOR(teedev->dev.devt), + MINOR(teedev->dev.devt), rc); + goto err_device_add; + } + + rc = sysfs_create_group(&teedev->dev.kobj, &tee_dev_group); + if (rc) { + dev_err(&teedev->dev, + "failed to create sysfs attributes, err=%d\n", rc); + goto err_sysfs_create_group; + } + + teedev->flags |= TEE_DEVICE_FLAG_REGISTERED; + return 0; + +err_sysfs_create_group: + device_del(&teedev->dev); +err_device_add: + cdev_del(&teedev->cdev); + return rc; +} +EXPORT_SYMBOL_GPL(tee_device_register); + +void tee_device_put(struct tee_device *teedev) +{ + mutex_lock(&teedev->mutex); + /* Shouldn't put in this state */ + if (!WARN_ON(!teedev->desc)) { + teedev->num_users--; + if (!teedev->num_users) { + teedev->desc = NULL; + complete(&teedev->c_no_users); + } + } + mutex_unlock(&teedev->mutex); +} + +bool tee_device_get(struct tee_device *teedev) +{ + mutex_lock(&teedev->mutex); + if (!teedev->desc) { + mutex_unlock(&teedev->mutex); + return false; + } + teedev->num_users++; + mutex_unlock(&teedev->mutex); + return true; +} + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev) +{ + if (!teedev) + return; + + if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) { + sysfs_remove_group(&teedev->dev.kobj, &tee_dev_group); + cdev_del(&teedev->cdev); + device_del(&teedev->dev); + } + + tee_device_put(teedev); + wait_for_completion(&teedev->c_no_users); + + /* + * No need to take a mutex any longer now since teedev->desc was + * set to NULL before teedev->c_no_users was completed. + */ + + teedev->pool = NULL; + + put_device(&teedev->dev); +} +EXPORT_SYMBOL_GPL(tee_device_unregister); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @teedev: Device containing the driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev) +{ + return dev_get_drvdata(&teedev->dev); +} +EXPORT_SYMBOL_GPL(tee_get_drvdata); + +static int __init tee_init(void) +{ + int rc; + + tee_class = class_create(THIS_MODULE, "tee"); + if (IS_ERR(tee_class)) { + pr_err("couldn't create class\n"); + return PTR_ERR(tee_class); + } + + rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee"); + if (rc) { + pr_err("failed to allocate char dev region\n"); + class_destroy(tee_class); + tee_class = NULL; + } + + return rc; +} + +static void __exit tee_exit(void) +{ + class_destroy(tee_class); + tee_class = NULL; + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); +} + +subsys_initcall(tee_init); +module_exit(tee_exit); + +MODULE_AUTHOR("Linaro"); +MODULE_DESCRIPTION("TEE Driver"); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h new file mode 100644 index 0000000000000000000000000000000000000000..85d99d621603d7f2dd111c2549c312c01069df85 --- /dev/null +++ b/drivers/tee/tee_private.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef TEE_PRIVATE_H +#define TEE_PRIVATE_H + +#include +#include +#include +#include +#include +#include + +/** + * struct tee_shm_pool - shared memory pool + * @private_mgr: pool manager for shared memory only between kernel + * and secure world + * @dma_buf_mgr: pool manager for shared memory exported to user space + */ +struct tee_shm_pool { + struct tee_shm_pool_mgr *private_mgr; + struct tee_shm_pool_mgr *dma_buf_mgr; +}; + +#define TEE_DEVICE_FLAG_REGISTERED 0x1 +#define TEE_MAX_DEV_NAME_LEN 32 + +/** + * struct tee_device - TEE Device representation + * @name: name of device + * @desc: description of device + * @id: unique id of device + * @flags: represented by TEE_DEVICE_FLAG_REGISTERED above + * @dev: embedded basic device structure + * @cdev: embedded cdev + * @num_users: number of active users of this device + * @c_no_user: completion used when unregistering the device + * @mutex: mutex protecting @num_users and @idr + * @idr: register of shared memory object allocated on this device + * @pool: shared memory pool + */ +struct tee_device { + char name[TEE_MAX_DEV_NAME_LEN]; + const struct tee_desc *desc; + int id; + unsigned int flags; + + struct device dev; + struct cdev cdev; + + size_t num_users; + struct completion c_no_users; + struct mutex mutex; /* protects num_users and idr */ + + struct idr idr; + struct tee_shm_pool *pool; +}; + +int tee_shm_init(void); + +int tee_shm_get_fd(struct tee_shm *shm); + +bool tee_device_get(struct tee_device *teedev); +void tee_device_put(struct tee_device *teedev); + +void teedev_ctx_get(struct tee_context *ctx); +void teedev_ctx_put(struct tee_context *ctx); + +#endif /*TEE_PRIVATE_H*/ diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c new file mode 100644 index 0000000000000000000000000000000000000000..ed2d71c3337dad74ce75738f7498f81a1f747424 --- /dev/null +++ b/drivers/tee/tee_shm.c @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include "tee_private.h" + +static void tee_shm_release(struct tee_shm *shm) +{ + struct tee_device *teedev = shm->teedev; + + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + if (shm->ctx) + list_del(&shm->link); + mutex_unlock(&teedev->mutex); + + if (shm->flags & TEE_SHM_POOL) { + struct tee_shm_pool_mgr *poolm; + + if (shm->flags & TEE_SHM_DMA_BUF) + poolm = teedev->pool->dma_buf_mgr; + else + poolm = teedev->pool->private_mgr; + + poolm->ops->free(poolm, shm); + } else if (shm->flags & TEE_SHM_REGISTER) { + size_t n; + int rc = teedev->desc->ops->shm_unregister(shm->ctx, shm); + + if (rc) + dev_err(teedev->dev.parent, + "unregister shm %p failed: %d", shm, rc); + + for (n = 0; n < shm->num_pages; n++) + put_page(shm->pages[n]); + + kfree(shm->pages); + } + + if (shm->ctx) + teedev_ctx_put(shm->ctx); + + kfree(shm); + + tee_device_put(teedev); +} + +static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment + *attach, enum dma_data_direction dir) +{ + return NULL; +} + +static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *table, + enum dma_data_direction dir) +{ +} + +static void tee_shm_op_release(struct dma_buf *dmabuf) +{ + struct tee_shm *shm = dmabuf->priv; + + tee_shm_release(shm); +} + +static void *tee_shm_op_kmap_atomic(struct dma_buf *dmabuf, unsigned long pgnum) +{ + return NULL; +} + +static void *tee_shm_op_kmap(struct dma_buf *dmabuf, unsigned long pgnum) +{ + return NULL; +} + +static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + struct tee_shm *shm = dmabuf->priv; + size_t size = vma->vm_end - vma->vm_start; + + /* Refuse sharing shared memory provided by application */ + if (shm->flags & TEE_SHM_REGISTER) + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static const struct dma_buf_ops tee_shm_dma_buf_ops = { + .map_dma_buf = tee_shm_op_map_dma_buf, + .unmap_dma_buf = tee_shm_op_unmap_dma_buf, + .release = tee_shm_op_release, + .kmap_atomic = tee_shm_op_kmap_atomic, + .kmap = tee_shm_op_kmap, + .mmap = tee_shm_op_mmap, +}; + +static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx, + struct tee_device *teedev, + size_t size, u32 flags) +{ + struct tee_shm_pool_mgr *poolm = NULL; + struct tee_shm *shm; + void *ret; + int rc; + + if (ctx && ctx->teedev != teedev) { + dev_err(teedev->dev.parent, "ctx and teedev mismatch\n"); + return ERR_PTR(-EINVAL); + } + + if (!(flags & TEE_SHM_MAPPED)) { + dev_err(teedev->dev.parent, + "only mapped allocations supported\n"); + return ERR_PTR(-EINVAL); + } + + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); + return ERR_PTR(-EINVAL); + } + + if (!tee_device_get(teedev)) + return ERR_PTR(-EINVAL); + + if (!teedev->pool) { + /* teedev has been detached from driver */ + ret = ERR_PTR(-EINVAL); + goto err_dev_put; + } + + shm = kzalloc(sizeof(*shm), GFP_KERNEL); + if (!shm) { + ret = ERR_PTR(-ENOMEM); + goto err_dev_put; + } + + shm->flags = flags | TEE_SHM_POOL; + shm->teedev = teedev; + shm->ctx = ctx; + if (flags & TEE_SHM_DMA_BUF) + poolm = teedev->pool->dma_buf_mgr; + else + poolm = teedev->pool->private_mgr; + + rc = poolm->ops->alloc(poolm, shm, size); + if (rc) { + ret = ERR_PTR(rc); + goto err_kfree; + } + + mutex_lock(&teedev->mutex); + shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); + mutex_unlock(&teedev->mutex); + if (shm->id < 0) { + ret = ERR_PTR(shm->id); + goto err_pool_free; + } + + if (flags & TEE_SHM_DMA_BUF) { + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &tee_shm_dma_buf_ops; + exp_info.size = shm->size; + exp_info.flags = O_RDWR; + exp_info.priv = shm; + + shm->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(shm->dmabuf)) { + ret = ERR_CAST(shm->dmabuf); + goto err_rem; + } + } + + if (ctx) { + teedev_ctx_get(ctx); + mutex_lock(&teedev->mutex); + list_add_tail(&shm->link, &ctx->list_shm); + mutex_unlock(&teedev->mutex); + } + + return shm; +err_rem: + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + mutex_unlock(&teedev->mutex); +err_pool_free: + poolm->ops->free(poolm, shm); +err_kfree: + kfree(shm); +err_dev_put: + tee_device_put(teedev); + return ret; +} + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* in . TEE_SHM_MAPPED must currently always be + * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and + * associated with a dma-buf handle, else driver private memory. + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) +{ + return __tee_shm_alloc(ctx, ctx->teedev, size, flags); +} +EXPORT_SYMBOL_GPL(tee_shm_alloc); + +struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size) +{ + return __tee_shm_alloc(NULL, teedev, size, TEE_SHM_MAPPED); +} +EXPORT_SYMBOL_GPL(tee_shm_priv_alloc); + +struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, + size_t length, u32 flags) +{ + struct tee_device *teedev = ctx->teedev; + const u32 req_flags = TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED; + struct tee_shm *shm; + void *ret; + int rc; + int num_pages; + unsigned long start; + + if (flags != req_flags) + return ERR_PTR(-ENOTSUPP); + + if (!tee_device_get(teedev)) + return ERR_PTR(-EINVAL); + + if (!teedev->desc->ops->shm_register || + !teedev->desc->ops->shm_unregister) { + tee_device_put(teedev); + return ERR_PTR(-ENOTSUPP); + } + + teedev_ctx_get(ctx); + + shm = kzalloc(sizeof(*shm), GFP_KERNEL); + if (!shm) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + shm->flags = flags | TEE_SHM_REGISTER; + shm->teedev = teedev; + shm->ctx = ctx; + shm->id = -1; + start = rounddown(addr, PAGE_SIZE); + shm->offset = addr - start; + shm->size = length; + num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE; + shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL); + if (!shm->pages) { + ret = ERR_PTR(-ENOMEM); + goto err; + } + + rc = get_user_pages_fast(start, num_pages, 1, shm->pages); + if (rc > 0) + shm->num_pages = rc; + if (rc != num_pages) { + if (rc >= 0) + rc = -ENOMEM; + ret = ERR_PTR(rc); + goto err; + } + + mutex_lock(&teedev->mutex); + shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); + mutex_unlock(&teedev->mutex); + + if (shm->id < 0) { + ret = ERR_PTR(shm->id); + goto err; + } + + rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages, + shm->num_pages, start); + if (rc) { + ret = ERR_PTR(rc); + goto err; + } + + if (flags & TEE_SHM_DMA_BUF) { + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &tee_shm_dma_buf_ops; + exp_info.size = shm->size; + exp_info.flags = O_RDWR; + exp_info.priv = shm; + + shm->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(shm->dmabuf)) { + ret = ERR_CAST(shm->dmabuf); + teedev->desc->ops->shm_unregister(ctx, shm); + goto err; + } + } + + mutex_lock(&teedev->mutex); + list_add_tail(&shm->link, &ctx->list_shm); + mutex_unlock(&teedev->mutex); + + return shm; +err: + if (shm) { + size_t n; + + if (shm->id >= 0) { + mutex_lock(&teedev->mutex); + idr_remove(&teedev->idr, shm->id); + mutex_unlock(&teedev->mutex); + } + if (shm->pages) { + for (n = 0; n < shm->num_pages; n++) + put_page(shm->pages[n]); + kfree(shm->pages); + } + } + kfree(shm); + teedev_ctx_put(ctx); + tee_device_put(teedev); + return ret; +} +EXPORT_SYMBOL_GPL(tee_shm_register); + +/** + * tee_shm_get_fd() - Increase reference count and return file descriptor + * @shm: Shared memory handle + * @returns user space file descriptor to shared memory + */ +int tee_shm_get_fd(struct tee_shm *shm) +{ + int fd; + + if (!(shm->flags & TEE_SHM_DMA_BUF)) + return -EINVAL; + + fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + if (fd >= 0) + get_dma_buf(shm->dmabuf); + return fd; +} + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm) +{ + /* + * dma_buf_put() decreases the dmabuf reference counter and will + * call tee_shm_release() when the last reference is gone. + * + * In the case of driver private memory we call tee_shm_release + * directly instead as it doesn't have a reference counter. + */ + if (shm->flags & TEE_SHM_DMA_BUF) + dma_buf_put(shm->dmabuf); + else + tee_shm_release(shm); +} +EXPORT_SYMBOL_GPL(tee_shm_free); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa) +{ + if (!(shm->flags & TEE_SHM_MAPPED)) + return -EINVAL; + /* Check that we're in the range of the shm */ + if ((char *)va < (char *)shm->kaddr) + return -EINVAL; + if ((char *)va >= ((char *)shm->kaddr + shm->size)) + return -EINVAL; + + return tee_shm_get_pa( + shm, (unsigned long)va - (unsigned long)shm->kaddr, pa); +} +EXPORT_SYMBOL_GPL(tee_shm_va2pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va) +{ + if (!(shm->flags & TEE_SHM_MAPPED)) + return -EINVAL; + /* Check that we're in the range of the shm */ + if (pa < shm->paddr) + return -EINVAL; + if (pa >= (shm->paddr + shm->size)) + return -EINVAL; + + if (va) { + void *v = tee_shm_get_va(shm, pa - shm->paddr); + + if (IS_ERR(v)) + return PTR_ERR(v); + *va = v; + } + return 0; +} +EXPORT_SYMBOL_GPL(tee_shm_pa2va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs) +{ + if (!(shm->flags & TEE_SHM_MAPPED)) + return ERR_PTR(-EINVAL); + if (offs >= shm->size) + return ERR_PTR(-EINVAL); + return (char *)shm->kaddr + offs; +} +EXPORT_SYMBOL_GPL(tee_shm_get_va); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa) +{ + if (offs >= shm->size) + return -EINVAL; + if (pa) + *pa = shm->paddr + offs; + return 0; +} +EXPORT_SYMBOL_GPL(tee_shm_get_pa); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) +{ + struct tee_device *teedev; + struct tee_shm *shm; + + if (!ctx) + return ERR_PTR(-EINVAL); + + teedev = ctx->teedev; + mutex_lock(&teedev->mutex); + shm = idr_find(&teedev->idr, id); + if (!shm || shm->ctx != ctx) + shm = ERR_PTR(-EINVAL); + else if (shm->flags & TEE_SHM_DMA_BUF) + get_dma_buf(shm->dmabuf); + mutex_unlock(&teedev->mutex); + return shm; +} +EXPORT_SYMBOL_GPL(tee_shm_get_from_id); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm) +{ + if (shm->flags & TEE_SHM_DMA_BUF) + dma_buf_put(shm->dmabuf); +} +EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c new file mode 100644 index 0000000000000000000000000000000000000000..e6d4b9e4a86494638eb2f85fe22078d5a4f91e53 --- /dev/null +++ b/drivers/tee/tee_shm_pool.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include "tee_private.h" + +static int pool_op_gen_alloc(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm, size_t size) +{ + unsigned long va; + struct gen_pool *genpool = poolm->private_data; + size_t s = roundup(size, 1 << genpool->min_alloc_order); + + va = gen_pool_alloc(genpool, s); + if (!va) + return -ENOMEM; + + memset((void *)va, 0, s); + shm->kaddr = (void *)va; + shm->paddr = gen_pool_virt_to_phys(genpool, va); + shm->size = s; + return 0; +} + +static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm, + struct tee_shm *shm) +{ + gen_pool_free(poolm->private_data, (unsigned long)shm->kaddr, + shm->size); + shm->kaddr = NULL; +} + +static void pool_op_gen_destroy_poolmgr(struct tee_shm_pool_mgr *poolm) +{ + gen_pool_destroy(poolm->private_data); + kfree(poolm); +} + +static const struct tee_shm_pool_mgr_ops pool_ops_generic = { + .alloc = pool_op_gen_alloc, + .free = pool_op_gen_free, + .destroy_poolmgr = pool_op_gen_destroy_poolmgr, +}; + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info) +{ + struct tee_shm_pool_mgr *priv_mgr; + struct tee_shm_pool_mgr *dmabuf_mgr; + void *rc; + + /* + * Create the pool for driver private shared memory + */ + rc = tee_shm_pool_mgr_alloc_res_mem(priv_info->vaddr, priv_info->paddr, + priv_info->size, + 3 /* 8 byte aligned */); + if (IS_ERR(rc)) + return rc; + priv_mgr = rc; + + /* + * Create the pool for dma_buf shared memory + */ + rc = tee_shm_pool_mgr_alloc_res_mem(dmabuf_info->vaddr, + dmabuf_info->paddr, + dmabuf_info->size, PAGE_SHIFT); + if (IS_ERR(rc)) + goto err_free_priv_mgr; + dmabuf_mgr = rc; + + rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr); + if (IS_ERR(rc)) + goto err_free_dmabuf_mgr; + + return rc; + +err_free_dmabuf_mgr: + tee_shm_pool_mgr_destroy(dmabuf_mgr); +err_free_priv_mgr: + tee_shm_pool_mgr_destroy(priv_mgr); + + return rc; +} +EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem); + +struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, + size_t size, + int min_alloc_order) +{ + const size_t page_mask = PAGE_SIZE - 1; + struct tee_shm_pool_mgr *mgr; + int rc; + + /* Start and end must be page aligned */ + if (vaddr & page_mask || paddr & page_mask || size & page_mask) + return ERR_PTR(-EINVAL); + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return ERR_PTR(-ENOMEM); + + mgr->private_data = gen_pool_create(min_alloc_order, -1); + if (!mgr->private_data) { + rc = -ENOMEM; + goto err; + } + + gen_pool_set_algo(mgr->private_data, gen_pool_best_fit, NULL); + rc = gen_pool_add_virt(mgr->private_data, vaddr, paddr, size, -1); + if (rc) { + gen_pool_destroy(mgr->private_data); + goto err; + } + + mgr->ops = &pool_ops_generic; + + return mgr; +err: + kfree(mgr); + + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(tee_shm_pool_mgr_alloc_res_mem); + +static bool check_mgr_ops(struct tee_shm_pool_mgr *mgr) +{ + return mgr && mgr->ops && mgr->ops->alloc && mgr->ops->free && + mgr->ops->destroy_poolmgr; +} + +struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr, + struct tee_shm_pool_mgr *dmabuf_mgr) +{ + struct tee_shm_pool *pool; + + if (!check_mgr_ops(priv_mgr) || !check_mgr_ops(dmabuf_mgr)) + return ERR_PTR(-EINVAL); + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + pool->private_mgr = priv_mgr; + pool->dma_buf_mgr = dmabuf_mgr; + + return pool; +} +EXPORT_SYMBOL_GPL(tee_shm_pool_alloc); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * There must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool) +{ + if (pool->private_mgr) + tee_shm_pool_mgr_destroy(pool->private_mgr); + if (pool->dma_buf_mgr) + tee_shm_pool_mgr_destroy(pool->dma_buf_mgr); + kfree(pool); +} +EXPORT_SYMBOL_GPL(tee_shm_pool_free); diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index f6429666a1cfe03d1f122266938a5343cf744054..2d855a96cdd9886c5f3abbcfb4e1d8a5c1ea8666 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -23,222 +23,450 @@ #include #include #include +#include #include "thermal_core.h" -#define TEMP0_TH (0x4) -#define TEMP0_RST_TH (0x8) -#define TEMP0_CFG (0xC) -#define TEMP0_EN (0x10) -#define TEMP0_INT_EN (0x14) -#define TEMP0_INT_CLR (0x18) -#define TEMP0_RST_MSK (0x1C) -#define TEMP0_VALUE (0x28) - -#define HISI_TEMP_BASE (-60) -#define HISI_TEMP_RESET (100000) - -#define HISI_MAX_SENSORS 4 +#define HI6220_TEMP0_LAG (0x0) +#define HI6220_TEMP0_TH (0x4) +#define HI6220_TEMP0_RST_TH (0x8) +#define HI6220_TEMP0_CFG (0xC) +#define HI6220_TEMP0_CFG_SS_MSK (0xF000) +#define HI6220_TEMP0_CFG_HDAK_MSK (0x30) +#define HI6220_TEMP0_EN (0x10) +#define HI6220_TEMP0_INT_EN (0x14) +#define HI6220_TEMP0_INT_CLR (0x18) +#define HI6220_TEMP0_RST_MSK (0x1C) +#define HI6220_TEMP0_VALUE (0x28) + +#define HI3660_OFFSET(chan) ((chan) * 0x40) +#define HI3660_TEMP(chan) (HI3660_OFFSET(chan) + 0x1C) +#define HI3660_TH(chan) (HI3660_OFFSET(chan) + 0x20) +#define HI3660_LAG(chan) (HI3660_OFFSET(chan) + 0x28) +#define HI3660_INT_EN(chan) (HI3660_OFFSET(chan) + 0x2C) +#define HI3660_INT_CLR(chan) (HI3660_OFFSET(chan) + 0x30) + +#define HI6220_TEMP_BASE (-60000) +#define HI6220_TEMP_RESET (100000) +#define HI6220_TEMP_STEP (785) +#define HI6220_TEMP_LAG (3500) + +#define HI3660_TEMP_BASE (-63780) +#define HI3660_TEMP_STEP (205) +#define HI3660_TEMP_LAG (4000) + +#define HI6220_DEFAULT_SENSOR 2 +#define HI3660_DEFAULT_SENSOR 1 struct hisi_thermal_sensor { - struct hisi_thermal_data *thermal; struct thermal_zone_device *tzd; - - long sensor_temp; uint32_t id; uint32_t thres_temp; }; struct hisi_thermal_data { - struct mutex thermal_lock; /* protects register data */ + int (*get_temp)(struct hisi_thermal_data *data); + int (*enable_sensor)(struct hisi_thermal_data *data); + int (*disable_sensor)(struct hisi_thermal_data *data); + int (*irq_handler)(struct hisi_thermal_data *data); struct platform_device *pdev; struct clk *clk; - struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS]; - - int irq, irq_bind_sensor; - bool irq_enabled; - + struct hisi_thermal_sensor sensor; void __iomem *regs; + int irq; }; -/* in millicelsius */ -static inline int _step_to_temp(int step) +/* + * The temperature computation on the tsensor is as follow: + * Unit: millidegree Celsius + * Step: 200/255 (0.7843) + * Temperature base: -60°C + * + * The register is programmed in temperature steps, every step is 785 + * millidegree and begins at -60 000 m°C + * + * The temperature from the steps: + * + * Temp = TempBase + (steps x 785) + * + * and the steps from the temperature: + * + * steps = (Temp - TempBase) / 785 + * + */ +static inline int hi6220_thermal_step_to_temp(int step) { - /* - * Every step equals (1 * 200) / 255 celsius, and finally - * need convert to millicelsius. - */ - return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255)); + return HI6220_TEMP_BASE + (step * HI6220_TEMP_STEP); } -static inline long _temp_to_step(long temp) +static inline int hi6220_thermal_temp_to_step(int temp) { - return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000; + return DIV_ROUND_UP(temp - HI6220_TEMP_BASE, HI6220_TEMP_STEP); } -static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor) +/* + * for Hi3660, + * Step: 189/922 (0.205) + * Temperature base: -63.780°C + * + * The register is programmed in temperature steps, every step is 205 + * millidegree and begins at -63 780 m°C + */ +static inline int hi3660_thermal_step_to_temp(int step) { - long val; + return HI3660_TEMP_BASE + step * HI3660_TEMP_STEP; +} - mutex_lock(&data->thermal_lock); +static inline int hi3660_thermal_temp_to_step(int temp) +{ + return DIV_ROUND_UP(temp - HI3660_TEMP_BASE, HI3660_TEMP_STEP); +} - /* disable interrupt */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x1, data->regs + TEMP0_INT_CLR); +/* + * The lag register contains 5 bits encoding the temperature in steps. + * + * Each time the temperature crosses the threshold boundary, an + * interrupt is raised. It could be when the temperature is going + * above the threshold or below. However, if the temperature is + * fluctuating around this value due to the load, we can receive + * several interrupts which may not desired. + * + * We can setup a temperature representing the delta between the + * threshold and the current temperature when the temperature is + * decreasing. + * + * For instance: the lag register is 5°C, the threshold is 65°C, when + * the temperature reaches 65°C an interrupt is raised and when the + * temperature decrease to 65°C - 5°C another interrupt is raised. + * + * A very short lag can lead to an interrupt storm, a long lag + * increase the latency to react to the temperature changes. In our + * case, that is not really a problem as we are polling the + * temperature. + * + * [0:4] : lag register + * + * The temperature is coded in steps, cf. HI6220_TEMP_STEP. + * + * Min : 0x00 : 0.0 °C + * Max : 0x1F : 24.3 °C + * + * The 'value' parameter is in milliCelsius. + */ +static inline void hi6220_thermal_set_lag(void __iomem *addr, int value) +{ + writel(DIV_ROUND_UP(value, HI6220_TEMP_STEP) & 0x1F, + addr + HI6220_TEMP0_LAG); +} - /* disable module firstly */ - writel(0x0, data->regs + TEMP0_EN); +static inline void hi6220_thermal_alarm_clear(void __iomem *addr, int value) +{ + writel(value, addr + HI6220_TEMP0_INT_CLR); +} - /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); +static inline void hi6220_thermal_alarm_enable(void __iomem *addr, int value) +{ + writel(value, addr + HI6220_TEMP0_INT_EN); +} - /* enable module */ - writel(0x1, data->regs + TEMP0_EN); +static inline void hi6220_thermal_alarm_set(void __iomem *addr, int temp) +{ + writel(hi6220_thermal_temp_to_step(temp) | 0x0FFFFFF00, + addr + HI6220_TEMP0_TH); +} - usleep_range(3000, 5000); +static inline void hi6220_thermal_reset_set(void __iomem *addr, int temp) +{ + writel(hi6220_thermal_temp_to_step(temp), addr + HI6220_TEMP0_RST_TH); +} - val = readl(data->regs + TEMP0_VALUE); - val = _step_to_temp(val); +static inline void hi6220_thermal_reset_enable(void __iomem *addr, int value) +{ + writel(value, addr + HI6220_TEMP0_RST_MSK); +} - mutex_unlock(&data->thermal_lock); +static inline void hi6220_thermal_enable(void __iomem *addr, int value) +{ + writel(value, addr + HI6220_TEMP0_EN); +} - return val; +static inline int hi6220_thermal_get_temperature(void __iomem *addr) +{ + return hi6220_thermal_step_to_temp(readl(addr + HI6220_TEMP0_VALUE)); } -static void hisi_thermal_enable_bind_irq_sensor - (struct hisi_thermal_data *data) +/* + * [0:6] lag register + * + * The temperature is coded in steps, cf. HI3660_TEMP_STEP. + * + * Min : 0x00 : 0.0 °C + * Max : 0x7F : 26.0 °C + * + */ +static inline void hi3660_thermal_set_lag(void __iomem *addr, + int id, int value) { - struct hisi_thermal_sensor *sensor; + writel(DIV_ROUND_UP(value, HI3660_TEMP_STEP) & 0x7F, + addr + HI3660_LAG(id)); +} - mutex_lock(&data->thermal_lock); +static inline void hi3660_thermal_alarm_clear(void __iomem *addr, + int id, int value) +{ + writel(value, addr + HI3660_INT_CLR(id)); +} - sensor = &data->sensors[data->irq_bind_sensor]; +static inline void hi3660_thermal_alarm_enable(void __iomem *addr, + int id, int value) +{ + writel(value, addr + HI3660_INT_EN(id)); +} - /* setting the hdak time */ - writel(0x0, data->regs + TEMP0_CFG); +static inline void hi3660_thermal_alarm_set(void __iomem *addr, + int id, int value) +{ + writel(value, addr + HI3660_TH(id)); +} + +static inline int hi3660_thermal_get_temperature(void __iomem *addr, int id) +{ + return hi3660_thermal_step_to_temp(readl(addr + HI3660_TEMP(id))); +} + +/* + * Temperature configuration register - Sensor selection + * + * Bits [19:12] + * + * 0x0: local sensor (default) + * 0x1: remote sensor 1 (ACPU cluster 1) + * 0x2: remote sensor 2 (ACPU cluster 0) + * 0x3: remote sensor 3 (G3D) + */ +static inline void hi6220_thermal_sensor_select(void __iomem *addr, int sensor) +{ + writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_SS_MSK) | + (sensor << 12), addr + HI6220_TEMP0_CFG); +} + +/* + * Temperature configuration register - Hdak conversion polling interval + * + * Bits [5:4] + * + * 0x0 : 0.768 ms + * 0x1 : 6.144 ms + * 0x2 : 49.152 ms + * 0x3 : 393.216 ms + */ +static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value) +{ + writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_HDAK_MSK) | + (value << 4), addr + HI6220_TEMP0_CFG); +} + +static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data) +{ + hi6220_thermal_alarm_clear(data->regs, 1); + return 0; +} + +static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data) +{ + hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1); + return 0; +} + +static int hi6220_thermal_get_temp(struct hisi_thermal_data *data) +{ + return hi6220_thermal_get_temperature(data->regs); +} + +static int hi3660_thermal_get_temp(struct hisi_thermal_data *data) +{ + return hi3660_thermal_get_temperature(data->regs, data->sensor.id); +} + +static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) +{ + /* disable sensor module */ + hi6220_thermal_enable(data->regs, 0); + hi6220_thermal_alarm_enable(data->regs, 0); + hi6220_thermal_reset_enable(data->regs, 0); + + clk_disable_unprepare(data->clk); + + return 0; +} + +static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data) +{ + /* disable sensor module */ + hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0); + return 0; +} + +static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) +{ + struct hisi_thermal_sensor *sensor = &data->sensor; + int ret; + + /* enable clock for tsensor */ + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; /* disable module firstly */ - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hi6220_thermal_reset_enable(data->regs, 0); + hi6220_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hi6220_thermal_sensor_select(data->regs, sensor->id); + + /* setting the hdak time */ + hi6220_thermal_hdak_set(data->regs, 0); + + /* setting lag value between current temp and the threshold */ + hi6220_thermal_set_lag(data->regs, HI6220_TEMP_LAG); /* enable for interrupt */ - writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, - data->regs + TEMP0_TH); + hi6220_thermal_alarm_set(data->regs, sensor->thres_temp); - writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH); + hi6220_thermal_reset_set(data->regs, HI6220_TEMP_RESET); /* enable module */ - writel(0x1, data->regs + TEMP0_RST_MSK); - writel(0x1, data->regs + TEMP0_EN); - - writel(0x0, data->regs + TEMP0_INT_CLR); - writel(0x1, data->regs + TEMP0_INT_EN); + hi6220_thermal_reset_enable(data->regs, 1); + hi6220_thermal_enable(data->regs, 1); - usleep_range(3000, 5000); + hi6220_thermal_alarm_clear(data->regs, 0); + hi6220_thermal_alarm_enable(data->regs, 1); - mutex_unlock(&data->thermal_lock); + return 0; } -static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) +static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data) { - mutex_lock(&data->thermal_lock); + unsigned int value; + struct hisi_thermal_sensor *sensor = &data->sensor; - /* disable sensor module */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + /* disable interrupt */ + hi3660_thermal_alarm_enable(data->regs, sensor->id, 0); - mutex_unlock(&data->thermal_lock); -} + /* setting lag value between current temp and the threshold */ + hi3660_thermal_set_lag(data->regs, sensor->id, HI3660_TEMP_LAG); -static int hisi_thermal_get_temp(void *_sensor, int *temp) -{ - struct hisi_thermal_sensor *sensor = _sensor; - struct hisi_thermal_data *data = sensor->thermal; + /* set interrupt threshold */ + value = hi3660_thermal_temp_to_step(sensor->thres_temp); + hi3660_thermal_alarm_set(data->regs, sensor->id, value); - int sensor_id = -1, i; - long max_temp = 0; + /* enable interrupt */ + hi3660_thermal_alarm_clear(data->regs, sensor->id, 1); + hi3660_thermal_alarm_enable(data->regs, sensor->id, 1); - *temp = hisi_thermal_get_sensor_temp(data, sensor); + return 0; +} - sensor->sensor_temp = *temp; +static int hi6220_thermal_probe(struct hisi_thermal_data *data) +{ + struct platform_device *pdev = data->pdev; + struct device *dev = &pdev->dev; + struct resource *res; + int ret; - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; + data->get_temp = hi6220_thermal_get_temp; + data->enable_sensor = hi6220_thermal_enable_sensor; + data->disable_sensor = hi6220_thermal_disable_sensor; + data->irq_handler = hi6220_thermal_irq_handler; - if (data->sensors[i].sensor_temp >= max_temp) { - max_temp = data->sensors[i].sensor_temp; - sensor_id = i; - } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(data->regs)) { + dev_err(dev, "failed to get io address\n"); + return PTR_ERR(data->regs); } - /* If no sensor has been enabled, then skip to enable irq */ - if (sensor_id == -1) - return 0; - - mutex_lock(&data->thermal_lock); - data->irq_bind_sensor = sensor_id; - mutex_unlock(&data->thermal_lock); - - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", - sensor->id, data->irq_enabled, *temp, sensor->thres_temp); - /* - * Bind irq to sensor for two cases: - * Reenable alarm IRQ if temperature below threshold; - * if irq has been enabled, always set it; - */ - if (data->irq_enabled) { - hisi_thermal_enable_bind_irq_sensor(data); - return 0; + data->clk = devm_clk_get(dev, "thermal_clk"); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to get thermal clk: %d\n", ret); + return ret; } - if (max_temp < sensor->thres_temp) { - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); - enable_irq(data->irq); - } + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + data->sensor.id = HI6220_DEFAULT_SENSOR; return 0; } -static struct thermal_zone_of_device_ops hisi_of_thermal_ops = { - .get_temp = hisi_thermal_get_temp, -}; +static int hi3660_thermal_probe(struct hisi_thermal_data *data) +{ + struct platform_device *pdev = data->pdev; + struct device *dev = &pdev->dev; + struct resource *res; + + data->get_temp = hi3660_thermal_get_temp; + data->enable_sensor = hi3660_thermal_enable_sensor; + data->disable_sensor = hi3660_thermal_disable_sensor; + data->irq_handler = hi3660_thermal_irq_handler; -static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(data->regs)) { + dev_err(dev, "failed to get io address\n"); + return PTR_ERR(data->regs); + } + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + data->sensor.id = HI3660_DEFAULT_SENSOR; + + return 0; +} + +static int hisi_thermal_get_temp(void *__data, int *temp) { - struct hisi_thermal_data *data = dev; + struct hisi_thermal_data *data = __data; + struct hisi_thermal_sensor *sensor = &data->sensor; + + *temp = data->get_temp(data); - disable_irq_nosync(irq); - data->irq_enabled = false; + dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", + sensor->id, *temp, sensor->thres_temp); - return IRQ_WAKE_THREAD; + return 0; } +static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { + .get_temp = hisi_thermal_get_temp, +}; + static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor; - int i; + struct hisi_thermal_sensor *sensor = &data->sensor; + int temp = 0; - mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + data->irq_handler(data); - dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", - sensor->thres_temp / 1000); - mutex_unlock(&data->thermal_lock); + hisi_thermal_get_temp(data, &temp); - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; + if (temp >= sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", + temp, sensor->thres_temp); - thermal_zone_device_update(data->sensors[i].tzd, + thermal_zone_device_update(data->sensor.tzd, THERMAL_EVENT_UNSPECIFIED); + + } else { + dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n", + temp, sensor->thres_temp); } return IRQ_HANDLED; @@ -246,17 +474,14 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) static int hisi_thermal_register_sensor(struct platform_device *pdev, struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor, - int index) + struct hisi_thermal_sensor *sensor) { int ret, i; const struct thermal_trip *trip; - sensor->id = index; - sensor->thermal = data; - sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, - sensor->id, sensor, &hisi_of_thermal_ops); + sensor->id, data, + &hisi_of_thermal_ops); if (IS_ERR(sensor->tzd)) { ret = PTR_ERR(sensor->tzd); sensor->tzd = NULL; @@ -278,7 +503,14 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, } static const struct of_device_id of_hisi_thermal_match[] = { - { .compatible = "hisilicon,tsensor" }, + { + .compatible = "hisilicon,tsensor", + .data = hi6220_thermal_probe + }, + { + .compatible = "hisilicon,hi3660-tsensor", + .data = hi3660_thermal_probe + }, { /* end */ } }; MODULE_DEVICE_TABLE(of, of_hisi_thermal_match); @@ -295,88 +527,63 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; - struct resource *res; - int i; + int const (*platform_probe)(struct hisi_thermal_data *); + struct device *dev = &pdev->dev; int ret; - data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - mutex_init(&data->thermal_lock); data->pdev = pdev; + platform_set_drvdata(pdev, data); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(data->regs)) { - dev_err(&pdev->dev, "failed to get io address\n"); - return PTR_ERR(data->regs); + platform_probe = of_device_get_match_data(dev); + if (!platform_probe) { + dev_err(dev, "failed to get probe func\n"); + return -EINVAL; } - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) - return data->irq; - - ret = devm_request_threaded_irq(&pdev->dev, data->irq, - hisi_thermal_alarm_irq, - hisi_thermal_alarm_irq_thread, - 0, "hisi_thermal", data); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + ret = platform_probe(data); + if (ret) return ret; - } - platform_set_drvdata(pdev, data); - - data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); - if (IS_ERR(data->clk)) { - ret = PTR_ERR(data->clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "failed to get thermal clk: %d\n", ret); + ret = hisi_thermal_register_sensor(pdev, data, + &data->sensor); + if (ret) { + dev_err(dev, "failed to register thermal sensor: %d\n", ret); return ret; } - /* enable clock for thermal */ - ret = clk_prepare_enable(data->clk); + ret = data->enable_sensor(data); if (ret) { - dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret); + dev_err(dev, "Failed to setup the sensor: %d\n", ret); return ret; } - hisi_thermal_enable_bind_irq_sensor(data); - irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED, - &data->irq_enabled); - - for (i = 0; i < HISI_MAX_SENSORS; ++i) { - ret = hisi_thermal_register_sensor(pdev, data, - &data->sensors[i], i); - if (ret) - dev_err(&pdev->dev, - "failed to register thermal sensor: %d\n", ret); - else - hisi_thermal_toggle_sensor(&data->sensors[i], true); + if (data->irq) { + ret = devm_request_threaded_irq(dev, data->irq, NULL, + hisi_thermal_alarm_irq_thread, + IRQF_ONESHOT, "hisi_thermal", data); + if (ret < 0) { + dev_err(dev, "failed to request alarm irq: %d\n", ret); + return ret; + } } + hisi_thermal_toggle_sensor(&data->sensor, true); + return 0; } static int hisi_thermal_remove(struct platform_device *pdev) { struct hisi_thermal_data *data = platform_get_drvdata(pdev); - int i; + struct hisi_thermal_sensor *sensor = &data->sensor; - for (i = 0; i < HISI_MAX_SENSORS; i++) { - struct hisi_thermal_sensor *sensor = &data->sensors[i]; - - if (!sensor->tzd) - continue; - - hisi_thermal_toggle_sensor(sensor, false); - } + hisi_thermal_toggle_sensor(sensor, false); - hisi_thermal_disable_sensor(data); - clk_disable_unprepare(data->clk); + data->disable_sensor(data); return 0; } @@ -386,10 +593,7 @@ static int hisi_thermal_suspend(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - hisi_thermal_disable_sensor(data); - data->irq_enabled = false; - - clk_disable_unprepare(data->clk); + data->disable_sensor(data); return 0; } @@ -398,12 +602,7 @@ static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - clk_prepare_enable(data->clk); - - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); - - return 0; + return data->enable_sensor(data); } #endif diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index bcef2e7c4ec96f1cfc662019ccf0440d52d26328..1eea63caa451b0c4958b44069c0c781413240547 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -31,8 +31,7 @@ * If the temperature is higher than a trip point, * a. if the trend is THERMAL_TREND_RAISING, use higher cooling * state for this trip point - * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling - * state for this trip point + * b. if the trend is THERMAL_TREND_DROPPING, do nothing * c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit * for this trip point * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit @@ -94,9 +93,11 @@ static unsigned long get_target_state(struct thermal_instance *instance, if (!throttle) next_target = THERMAL_NO_TARGET; } else { - next_target = cur_state - 1; - if (next_target > instance->upper) - next_target = instance->upper; + if (!throttle) { + next_target = cur_state - 1; + if (next_target > instance->upper) + next_target = instance->upper; + } } break; case THERMAL_TREND_DROP_FULL: diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index bdf0e6e899914803e710d81a764e46c6061718ed..faf50df816224feb643d75f79c75aa2800297474 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1764,7 +1764,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { struct n_tty_data *ldata = tty->disc_data; - if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { + if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->line_start = ldata->read_tail; if (!L_ICANON(tty) || !read_cnt(ldata)) { @@ -2427,7 +2427,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file, return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: down_write(&tty->termios_rwsem); - if (L_ICANON(tty)) + if (L_ICANON(tty) && !L_EXTPROC(tty)) retval = inq_canon(ldata); else retval = read_cnt(ldata); diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index f8c31070a3371ebb313b86a4912afc0a06ec96ab..2ffebb7e5ff8ad1e0adcc4c9bc04698228441a02 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -121,7 +121,7 @@ static int fintek_8250_rs485_config(struct uart_port *port, if ((!!(rs485->flags & SER_RS485_RTS_ON_SEND)) == (!!(rs485->flags & SER_RS485_RTS_AFTER_SEND))) - rs485->flags &= SER_RS485_ENABLED; + rs485->flags &= ~SER_RS485_ENABLED; else config |= RS485_URA; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 22d32d295c5b0f2fdb00aca232292b1f665a3792..b80ea872b039e77cac8b56e10763ef2cf0adea1b 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5568,6 +5568,9 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 }, { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 }, + /* Amazon PCI serial device */ + { PCI_DEVICE(0x1d0f, 0x8250), .driver_data = pbn_b0_1_115200 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 1ef31e3ee4a1f37886e829deb8668883f8e21044..f6e4373a88504b2684dfe86b4c929eb57e47a41f 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2526,8 +2526,11 @@ static void serial8250_set_divisor(struct uart_port *port, unsigned int baud, serial_dl_write(up, quot); /* XR17V35x UARTs have an extra fractional divisor register (DLD) */ - if (up->port.type == PORT_XR17V35X) + if (up->port.type == PORT_XR17V35X) { + /* Preserve bits not related to baudrate; DLD[7:4]. */ + quot_frac |= serial_port_in(port, 0x2) & 0xf0; serial_port_out(port, 0x2, quot_frac); + } } static unsigned int serial8250_get_baud_rate(struct uart_port *port, diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a70356dad1b75a4a1e4a5d68344aaf51f6f96ef7..521a6e4507559461ef9de2ab651e7576146c7acb 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2239,12 +2239,14 @@ static void serial_imx_enable_wakeup(struct imx_port *sport, bool on) val &= ~UCR3_AWAKEN; writel(val, sport->port.membase + UCR3); - val = readl(sport->port.membase + UCR1); - if (on) - val |= UCR1_RTSDEN; - else - val &= ~UCR1_RTSDEN; - writel(val, sport->port.membase + UCR1); + if (sport->have_rtscts) { + val = readl(sport->port.membase + UCR1); + if (on) + val |= UCR1_RTSDEN; + else + val &= ~UCR1_RTSDEN; + writel(val, sport->port.membase + UCR1); + } } static int imx_serial_port_suspend_noirq(struct device *dev) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 2e2b88aa3004b27502601d24d54910ce20fd7019..dce39de6e3d5742f68953fc9fd3f8d7f6402ef2d 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -968,6 +968,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, } } else { retval = uart_startup(tty, state, 1); + if (retval == 0) + tty_port_set_initialized(port, true); if (retval > 0) retval = 0; } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 701c085bb19b8e03f67316133fa5a4be7e0b05fe..53cbf4ebef10a7e0777d46754911a1ef79e2a401 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -243,8 +243,10 @@ static void sysrq_handle_showallcpus(int key) * architecture has no support for it: */ if (!trigger_all_cpu_backtrace()) { - struct pt_regs *regs = get_irq_regs(); + struct pt_regs *regs = NULL; + if (in_irq()) + regs = get_irq_regs(); if (regs) { pr_info("CPU%d:\n", smp_processor_id()); show_regs(regs); @@ -263,7 +265,10 @@ static struct sysrq_key_op sysrq_showallcpus_op = { static void sysrq_handle_showregs(int key) { - struct pt_regs *regs = get_irq_regs(); + struct pt_regs *regs = NULL; + + if (in_irq()) + regs = get_irq_regs(); if (regs) show_regs(regs); perf_event_print_debug(); diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index c220c2c0893fbc31aaf373802449d4dba6b0e4dd..e99f1c5b1df665d9b351fdfb87ce9aee69c938ce 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -446,7 +446,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string); * Callers other than flush_to_ldisc() need to exclude the kworker * from concurrent use of the line discipline, see paste_selection(). * - * Returns the number of bytes not processed + * Returns the number of bytes processed */ int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p, char *f, int count) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 734a635e73632910d3d418a1eb980b3430a0b8b5..8d9f9a803b42987f3f4cdd994ede69f7bba129a6 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1543,6 +1543,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) "%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n", __func__, tty->driver->name); + retval = tty_ldisc_lock(tty, 5 * HZ); + if (retval) + goto err_release_lock; tty->port->itty = tty; /* @@ -1553,6 +1556,7 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) retval = tty_ldisc_setup(tty, tty->link); if (retval) goto err_release_tty; + tty_ldisc_unlock(tty); /* Return the tty locked so that it cannot vanish under the caller */ return tty; @@ -1565,9 +1569,11 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) /* call the tty release_tty routine to clean out this slot */ err_release_tty: - tty_unlock(tty); + tty_ldisc_unlock(tty); tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n", retval, idx); +err_release_lock: + tty_unlock(tty); release_tty(tty, idx); return ERR_PTR(retval); } diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 68947f6de5ad6339adea804182597229c3eb1d38..3a9e2a2fd4c64c79cad7707300c46c15eefec9a7 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -271,10 +271,13 @@ const struct file_operations tty_ldiscs_proc_fops = { struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { + struct tty_ldisc *ld; + ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT); - if (!tty->ldisc) + ld = tty->ldisc; + if (!ld) ldsem_up_read(&tty->ldisc_sem); - return tty->ldisc; + return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); @@ -333,7 +336,7 @@ static inline void __tty_ldisc_unlock(struct tty_struct *tty) ldsem_up_write(&tty->ldisc_sem); } -static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) +int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; @@ -344,7 +347,7 @@ static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) return 0; } -static void tty_ldisc_unlock(struct tty_struct *tty) +void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); __tty_ldisc_unlock(tty); @@ -488,41 +491,6 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) tty_ldisc_debug(tty, "%p: closed\n", ld); } -/** - * tty_ldisc_restore - helper for tty ldisc change - * @tty: tty to recover - * @old: previous ldisc - * - * Restore the previous line discipline or N_TTY when a line discipline - * change fails due to an open error - */ - -static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) -{ - struct tty_ldisc *new_ldisc; - int r; - - /* There is an outstanding reference here so this is safe */ - old = tty_ldisc_get(tty, old->ops->num); - WARN_ON(IS_ERR(old)); - tty->ldisc = old; - tty_set_termios_ldisc(tty, old->ops->num); - if (tty_ldisc_open(tty, old) < 0) { - tty_ldisc_put(old); - /* This driver is always present */ - new_ldisc = tty_ldisc_get(tty, N_TTY); - if (IS_ERR(new_ldisc)) - panic("n_tty: get"); - tty->ldisc = new_ldisc; - tty_set_termios_ldisc(tty, N_TTY); - r = tty_ldisc_open(tty, new_ldisc); - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty), r); - } -} - /** * tty_set_ldisc - set line discipline * @tty: the terminal to set @@ -536,12 +504,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) int tty_set_ldisc(struct tty_struct *tty, int disc) { - int retval; - struct tty_ldisc *old_ldisc, *new_ldisc; - - new_ldisc = tty_ldisc_get(tty, disc); - if (IS_ERR(new_ldisc)) - return PTR_ERR(new_ldisc); + int retval, old_disc; tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); @@ -554,7 +517,8 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) } /* Check the no-op case */ - if (tty->ldisc->ops->num == disc) + old_disc = tty->ldisc->ops->num; + if (old_disc == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { @@ -563,34 +527,25 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) goto out; } - old_ldisc = tty->ldisc; - - /* Shutdown the old discipline. */ - tty_ldisc_close(tty, old_ldisc); - - /* Now set up the new line discipline. */ - tty->ldisc = new_ldisc; - tty_set_termios_ldisc(tty, disc); - - retval = tty_ldisc_open(tty, new_ldisc); + retval = tty_ldisc_reinit(tty, disc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ - tty_ldisc_put(new_ldisc); - tty_ldisc_restore(tty, old_ldisc); + if (tty_ldisc_reinit(tty, old_disc) < 0) { + pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n"); + if (tty_ldisc_reinit(tty, N_TTY) < 0) { + /* At this point we have tty->ldisc == NULL. */ + pr_err("tty: reinitializing N_TTY failed\n"); + } + } } - if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { + if (tty->ldisc && tty->ldisc->ops->num != old_disc && + tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } - /* At this point we hold a reference to the new ldisc and a - reference to the old ldisc, or we hold two references to - the old ldisc (if it was restored as part of error cleanup - above). In either case, releasing a single reference from - the old ldisc is correct. */ - new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); @@ -598,7 +553,6 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) already running */ tty_buffer_restart_work(tty->port); err: - tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } @@ -659,10 +613,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) int retval; ld = tty_ldisc_get(tty, disc); - if (IS_ERR(ld)) { - BUG_ON(disc == N_TTY); + if (IS_ERR(ld)) return PTR_ERR(ld); - } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); @@ -674,10 +626,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - if (!WARN_ON(disc == N_TTY)) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; - } + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; } return retval; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ea20b2cc189fe880ddeab1c102d793316778f334..34d23cc99fbd6c5f3735dcd4d5baf2a8fc45178b 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -375,7 +375,7 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) res = usb_submit_urb(acm->read_urbs[index], mem_flags); if (res) { - if (res != -EPERM) { + if (res != -EPERM && res != -ENODEV) { dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); @@ -1706,6 +1706,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */ .driver_info = SINGLE_RX_URB, /* firmware bug */ }, + { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ + .driver_info = SINGLE_RX_URB, + }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 5008f71fb08d3b132445b72764b93d61059abf54..7c54a19b20e07b58f6aae39f40c3736c84d67d23 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -550,6 +550,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, unsigned iad_num = 0; memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); + nintf = nintf_orig = config->desc.bNumInterfaces; + config->desc.bNumInterfaces = 0; // Adjusted later + if (config->desc.bDescriptorType != USB_DT_CONFIG || config->desc.bLength < USB_DT_CONFIG_SIZE || config->desc.bLength > size) { @@ -563,7 +566,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, buffer += config->desc.bLength; size -= config->desc.bLength; - nintf = nintf_orig = config->desc.bNumInterfaces; if (nintf > USB_MAXINTERFACES) { dev_warn(ddev, "config %d has too many interfaces: %d, " "using maximum allowed: %d\n", @@ -900,14 +902,25 @@ void usb_release_bos_descriptor(struct usb_device *dev) } } +static const __u8 bos_desc_len[256] = { + [USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE, + [USB_CAP_TYPE_EXT] = USB_DT_USB_EXT_CAP_SIZE, + [USB_SS_CAP_TYPE] = USB_DT_USB_SS_CAP_SIZE, + [USB_SSP_CAP_TYPE] = USB_DT_USB_SSP_CAP_SIZE(1), + [CONTAINER_ID_TYPE] = USB_DT_USB_SS_CONTN_ID_SIZE, + [USB_PTM_CAP_TYPE] = USB_DT_USB_PTM_ID_SIZE, +}; + /* Get BOS descriptor set */ int usb_get_bos_descriptor(struct usb_device *dev) { struct device *ddev = &dev->dev; struct usb_bos_descriptor *bos; struct usb_dev_cap_header *cap; + struct usb_ssp_cap_descriptor *ssp_cap; unsigned char *buffer; - int length, total_len, num, i; + int length, total_len, num, i, ssac; + __u8 cap_type; int ret; bos = kzalloc(sizeof(struct usb_bos_descriptor), GFP_KERNEL); @@ -960,7 +973,13 @@ int usb_get_bos_descriptor(struct usb_device *dev) dev->bos->desc->bNumDeviceCaps = i; break; } + cap_type = cap->bDevCapabilityType; length = cap->bLength; + if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) { + dev->bos->desc->bNumDeviceCaps = i; + break; + } + total_len -= length; if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { @@ -968,7 +987,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) continue; } - switch (cap->bDevCapabilityType) { + switch (cap_type) { case USB_CAP_TYPE_WIRELESS_USB: /* Wireless USB cap descriptor is handled by wusb */ break; @@ -981,8 +1000,11 @@ int usb_get_bos_descriptor(struct usb_device *dev) (struct usb_ss_cap_descriptor *)buffer; break; case USB_SSP_CAP_TYPE: - dev->bos->ssp_cap = - (struct usb_ssp_cap_descriptor *)buffer; + ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; + ssac = (le32_to_cpu(ssp_cap->bmAttributes) & + USB_SSP_SUBLINK_SPEED_ATTRIBS); + if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) + dev->bos->ssp_cap = ssp_cap; break; case CONTAINER_ID_TYPE: dev->bos->ss_id = diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index fa619354c5c562438dc414efac0d5556731dd8d6..893ebae5102924ef778f381d76286a947fedc77b 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -134,42 +134,38 @@ enum snoop_when { #define USB_DEVICE_DEV MKDEV(USB_DEVICE_MAJOR, 0) /* Limit on the total amount of memory we can allocate for transfers */ -static unsigned usbfs_memory_mb = 16; +static u32 usbfs_memory_mb = 16; module_param(usbfs_memory_mb, uint, 0644); MODULE_PARM_DESC(usbfs_memory_mb, "maximum MB allowed for usbfs buffers (0 = no limit)"); /* Hard limit, necessary to avoid arithmetic overflow */ -#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000) +#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000) -static atomic_t usbfs_memory_usage; /* Total memory currently allocated */ +static atomic64_t usbfs_memory_usage; /* Total memory currently allocated */ /* Check whether it's okay to allocate more memory for a transfer */ -static int usbfs_increase_memory_usage(unsigned amount) +static int usbfs_increase_memory_usage(u64 amount) { - unsigned lim; + u64 lim; - /* - * Convert usbfs_memory_mb to bytes, avoiding overflows. - * 0 means use the hard limit (effectively unlimited). - */ lim = ACCESS_ONCE(usbfs_memory_mb); - if (lim == 0 || lim > (USBFS_XFER_MAX >> 20)) - lim = USBFS_XFER_MAX; - else - lim <<= 20; + lim <<= 20; - atomic_add(amount, &usbfs_memory_usage); - if (atomic_read(&usbfs_memory_usage) <= lim) - return 0; - atomic_sub(amount, &usbfs_memory_usage); - return -ENOMEM; + atomic64_add(amount, &usbfs_memory_usage); + + if (lim > 0 && atomic64_read(&usbfs_memory_usage) > lim) { + atomic64_sub(amount, &usbfs_memory_usage); + return -ENOMEM; + } + + return 0; } /* Memory for a transfer is being deallocated */ -static void usbfs_decrease_memory_usage(unsigned amount) +static void usbfs_decrease_memory_usage(u64 amount) { - atomic_sub(amount, &usbfs_memory_usage); + atomic64_sub(amount, &usbfs_memory_usage); } static int connected(struct usb_dev_state *ps) @@ -1191,7 +1187,7 @@ static int proc_bulk(struct usb_dev_state *ps, void __user *arg) if (!usb_maxpacket(dev, pipe, !(bulk.ep & USB_DIR_IN))) return -EINVAL; len1 = bulk.len; - if (len1 >= USBFS_XFER_MAX) + if (len1 >= (INT_MAX - sizeof(struct urb))) return -EINVAL; ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb)); if (ret) @@ -1458,13 +1454,19 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb int number_of_packets = 0; unsigned int stream_id = 0; void *buf; - - if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP | - USBDEVFS_URB_SHORT_NOT_OK | + unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK | USBDEVFS_URB_BULK_CONTINUATION | USBDEVFS_URB_NO_FSBR | USBDEVFS_URB_ZERO_PACKET | - USBDEVFS_URB_NO_INTERRUPT)) + USBDEVFS_URB_NO_INTERRUPT; + /* USBDEVFS_URB_ISO_ASAP is a special case */ + if (uurb->type == USBDEVFS_URB_TYPE_ISO) + mask |= USBDEVFS_URB_ISO_ASAP; + + if (uurb->flags & ~mask) + return -EINVAL; + + if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX) return -EINVAL; if (uurb->buffer_length > 0 && !uurb->buffer) return -EINVAL; @@ -1584,10 +1586,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb return -EINVAL; } - if (uurb->buffer_length >= USBFS_XFER_MAX) { - ret = -EINVAL; - goto error; - } if (uurb->buffer_length > 0 && !access_ok(is_in ? VERIFY_WRITE : VERIFY_READ, uurb->buffer, uurb->buffer_length)) { diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 706b3d6a7614063995a39d51210712365f5ef6e5..d0d3f9ef9f102c4a4c4b3dc146f809606fa9fe08 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4925,6 +4925,15 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; + + /* When halfway through our retry count, power-cycle the port */ + if (i == (SET_CONFIG_TRIES / 2) - 1) { + dev_info(&port_dev->dev, "attempt power cycle\n"); + usb_hub_set_port_power(hdev, hub, port1, false); + msleep(2 * hub_power_on_good_delay(hub)); + usb_hub_set_port_power(hdev, hub, port1, true); + msleep(hub_power_on_good_delay(hub)); + } } if (hub->hdev->parent || !hcd->driver->port_handed_over || diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 37c418e581fbe33036334344cf589ccb4cb746be..c05c4f87775043e271574458019759b92b74d1df 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -57,10 +57,11 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920, C920-C and C930e */ + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, @@ -151,6 +152,12 @@ static const struct usb_device_id usb_quirk_list[] = { /* appletouch */ { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ + { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + + /* ELSA MicroLink 56K */ + { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c61ddbf94bc740fc38b0ec130aeeb5f66cda38db..16c67120d72b24ccaeae16bb8acdd7d9ec28646d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3092,15 +3092,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc) int dwc3_gadget_suspend(struct dwc3 *dwc) { - int ret; - if (!dwc->gadget_driver) return 0; - ret = dwc3_gadget_run_stop(dwc, false, false); - if (ret < 0) - return ret; - + dwc3_gadget_run_stop(dwc, false, false); dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index afc581a1aa3f474f1d8d24085759b9b2b704ec2d..b47ef9e3f93996a9314dd9b148c513d1d10004f0 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -150,7 +150,6 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep) { - struct usb_composite_dev *cdev = get_gadget_data(g); struct usb_endpoint_descriptor *chosen_desc = NULL; struct usb_descriptor_header **speed_desc = NULL; @@ -229,8 +228,12 @@ int config_ep_by_speed(struct usb_gadget *g, _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) + if (comp_desc->bMaxBurst != 0) { + struct usb_composite_dev *cdev; + + cdev = get_gadget_data(g); ERROR(cdev, "ep0 bMaxBurst must be 0\n"); + } _ep->maxburst = 1; break; } diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 2ef4377005dbd963075a78341e3107ba94119576..b1d22d8c9f7ef296283a98986741639a53800b2e 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -300,6 +300,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, ret = unregister_gadget(gi); if (ret) goto err; + kfree(name); } else { if (gi->composite.gadget_driver.udc_name) { ret = -EBUSY; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 273320fa30ae2ebb01b6652b93ce2ecea62d5f2b..d90bf57ba30ee0e34d120f964e61ec5b4290aa71 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1015,7 +1015,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) else ret = ep->status; goto error_mutex; - } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_KERNEL))) { + } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) { ret = -ENOMEM; } else { req->buf = data; @@ -2262,9 +2262,18 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, int i; if (len < sizeof(*d) || - d->bFirstInterfaceNumber >= ffs->interfaces_count || - !d->Reserved1) + d->bFirstInterfaceNumber >= ffs->interfaces_count) return -EINVAL; + if (d->Reserved1 != 1) { + /* + * According to the spec, Reserved1 must be set to 1 + * but older kernels incorrectly rejected non-zero + * values. We fix it here to avoid returning EINVAL + * in response to values we used to accept. + */ + pr_debug("usb_ext_compat_desc::Reserved1 forced to 1\n"); + d->Reserved1 = 1; + } for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) return -EINVAL; @@ -3716,7 +3725,8 @@ static void ffs_closed(struct ffs_data *ffs) ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; ffs_dev_unlock(); - unregister_gadget_item(ci); + if (test_bit(FFS_FL_BOUND, &ffs->flags)) + unregister_gadget_item(ci); return; done: ffs_dev_unlock(); diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index c7689d05356c4f57f4c7f37559a0c5d1a7fc2ec6..f8a1881609a2c808f690f6d31ea6bcaf118b8bb4 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U); opts->streaming_maxburst = min(opts->streaming_maxburst, 15U); + /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */ + if (opts->streaming_maxburst && + (opts->streaming_maxpacket % 1024) != 0) { + opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024); + INFO(cdev, "overriding streaming_maxpacket to %d\n", + opts->streaming_maxpacket); + } + /* Fill in the FS/HS/SS Video Streaming specific descriptors from the * module parameters. * diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 31125a4a2658938cdc67ef4b24ee9bfbfeeadf44..d7dcd39fe12cb76d7e5275b66646c79e0413c985 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2140,7 +2140,7 @@ static struct configfs_item_operations uvc_item_ops = { .release = uvc_attr_release, }; -#define UVCG_OPTS_ATTR(cname, conv, str2u, uxx, vnoc, limit) \ +#define UVCG_OPTS_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit) \ static ssize_t f_uvc_opts_##cname##_show( \ struct config_item *item, char *page) \ { \ @@ -2183,16 +2183,16 @@ end: \ return ret; \ } \ \ -UVC_ATTR(f_uvc_opts_, cname, aname) +UVC_ATTR(f_uvc_opts_, cname, cname) #define identity_conv(x) (x) -UVCG_OPTS_ATTR(streaming_interval, identity_conv, kstrtou8, u8, identity_conv, - 16); -UVCG_OPTS_ATTR(streaming_maxpacket, le16_to_cpu, kstrtou16, u16, le16_to_cpu, - 3072); -UVCG_OPTS_ATTR(streaming_maxburst, identity_conv, kstrtou8, u8, identity_conv, - 15); +UVCG_OPTS_ATTR(streaming_interval, streaming_interval, identity_conv, + kstrtou8, u8, identity_conv, 16); +UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, le16_to_cpu, + kstrtou16, u16, le16_to_cpu, 3072); +UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, identity_conv, + kstrtou8, u8, identity_conv, 15); #undef identity_conv diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index f69dbd4bcd18dd54d37effe7a03e7b73e7372cd5..b8534d3f8bb0ee5bacb9a5f7493cb794eace3806 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1819,8 +1819,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; - if (dev->buf) + if (dev->buf) { + kfree(kbuf); goto fail; + } dev->buf = kbuf; /* full or low speed config */ diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d685d82dcf487c9f53bb3cd821424159d2e445d1..e97539fc127e6575977d77c17537aefe8cf0bdfc 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -913,7 +913,7 @@ int usb_gadget_ep_match_desc(struct usb_gadget *gadget, return 0; /* "high bandwidth" works only at high speed */ - if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11)) + if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp_mult(desc) > 1) return 0; switch (type) { diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 33f3987218f7d2f69bf4668db4152a81d9cdc8b4..d133252ef2c3995b7ef0070fafe045550dcb4fbe 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep) */ while (!list_empty(&ep->queue)) { struct net2280_request *req; - u32 tmp; + u32 req_dma_count; req = list_entry(ep->queue.next, struct net2280_request, queue); if (!req->valid) break; rmb(); - tmp = le32_to_cpup(&req->td->dmacount); - if ((tmp & BIT(VALID_BIT)) != 0) + req_dma_count = le32_to_cpup(&req->td->dmacount); + if ((req_dma_count & BIT(VALID_BIT)) != 0) break; /* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short" @@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep) */ if (unlikely(req->td->dmadesc == 0)) { /* paranoia */ - tmp = readl(&ep->dma->dmacount); - if (tmp & DMA_BYTE_COUNT_MASK) + u32 const ep_dmacount = readl(&ep->dma->dmacount); + + if (ep_dmacount & DMA_BYTE_COUNT_MASK) break; /* single transfer mode */ - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; break; } else if (!ep->is_in && (req->req.length % ep->ep.maxpacket) && !(ep->dev->quirks & PLX_PCIE)) { - tmp = readl(&ep->regs->ep_stat); + u32 const ep_stat = readl(&ep->regs->ep_stat); /* AVOID TROUBLE HERE by not issuing short reads from * your gadget driver. That helps avoids errata 0121, * 0122, and 0124; not all cases trigger the warning. */ - if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) { + if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) { ep_warn(ep->dev, "%s lost packet sync!\n", ep->ep.name); req->req.status = -EOVERFLOW; } else { - tmp = readl(&ep->regs->ep_avail); - if (tmp) { + u32 const ep_avail = readl(&ep->regs->ep_avail); + if (ep_avail) { /* fifo gets flushed later */ ep->out_overflow = 1; ep_dbg(ep->dev, "%s dma, discard %d len %d\n", - ep->ep.name, tmp, + ep->ep.name, ep_avail, req->req.length); req->req.status = -EOVERFLOW; } } } - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; } diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index a97da645c1b9eaecc5e5bb0bcb5ad25857c43ec8..8a365aad66fe2e38eaf0a869ae0f774349f694f2 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, td = phys_to_virt(addr); addr2 = (dma_addr_t)td->next; pci_pool_free(dev->data_requests, td, addr); - td->next = 0x00; addr = addr2; } req->chain_len = 1; diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index 7fa60f5b7ae408b39461107954d561bc303926af..afd6b86458c530b2e18377c5d0c3ca030208fb05 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev) usb_del_gadget_udc(&udc->gadget); pxa_cleanup_debugfs(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) + if (!IS_ERR_OR_NULL(udc->transceiver)) { usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy); - usb_put_phy(udc->transceiver); + usb_put_phy(udc->transceiver); + } udc->transceiver = NULL; the_controller = NULL; diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index bb89e24c48b4c65f8d6d41c6278f1982c40d2895..2197a50ed2ab814967df57acc73af438f760dda9 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -222,7 +222,7 @@ #define USB3_EP0_SS_MAX_PACKET_SIZE 512 #define USB3_EP0_HSFS_MAX_PACKET_SIZE 64 #define USB3_EP0_BUF_SIZE 8 -#define USB3_MAX_NUM_PIPES 30 +#define USB3_MAX_NUM_PIPES 6 /* This includes PIPE 0 */ #define USB3_WAIT_US 3 struct renesas_usb3; diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 1a2614aae42cabd8859f1ab327034d18c5f46c12..3ff6468a1f5f60fee5a789d7c182d75d06ffbdf3 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -837,7 +837,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf) default: /* unknown */ break; } - temp = (cap >> 8) & 0xff; + offset = (cap >> 8) & 0xff; } } #endif diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index b7114c3f52aae7b49e6de7879a61901b97d80085..3b7d69ca83be2a790c38aecaac208f13f390e6de 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -996,6 +996,12 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) if (!vdev) return; + if (vdev->real_port == 0 || + vdev->real_port > HCS_MAX_PORTS(xhci->hcs_params1)) { + xhci_dbg(xhci, "Bad vdev->real_port.\n"); + goto out; + } + tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts); list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { /* is this a hub device that added a tt_info to the tts list */ @@ -1009,6 +1015,7 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) } } } +out: /* we are now at a leaf device */ xhci_free_virt_device(xhci, slot_id); } @@ -1025,10 +1032,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, return 0; } - xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags); - if (!xhci->devs[slot_id]) + dev = kzalloc(sizeof(*dev), flags); + if (!dev) return 0; - dev = xhci->devs[slot_id]; /* Allocate the (output) device context that will be used in the HC. */ dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags); @@ -1076,9 +1082,18 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, &xhci->dcbaa->dev_context_ptrs[slot_id], le64_to_cpu(xhci->dcbaa->dev_context_ptrs[slot_id])); + xhci->devs[slot_id] = dev; + return 1; fail: - xhci_free_virt_device(xhci, slot_id); + if (dev->eps[0].ring) + xhci_ring_free(xhci, dev->eps[0].ring); + if (dev->in_ctx) + xhci_free_container_ctx(xhci, dev->in_ctx); + if (dev->out_ctx) + xhci_free_container_ctx(xhci, dev->out_ctx); + kfree(dev); + return 0; } diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index f2365a47fa4a42d593b7644ddbe574cdc103f2c0..ce9e457e60c3327d2658217d740b717e803c94bb 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -632,13 +632,13 @@ static int xhci_mtk_probe(struct platform_device *pdev) goto power_off_phys; } - if (HCC_MAX_PSA(xhci->hcc_params) >= 4) - xhci->shared_hcd->can_do_streams = 1; - ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) goto put_usb3_hcd; + if (HCC_MAX_PSA(xhci->hcc_params) >= 4) + xhci->shared_hcd->can_do_streams = 1; + ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED); if (ret) goto dealloc_usb2_hcd; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c87ef38e7416cb5062b45e455d05d4f34012de40..f6782a347cde8f48e7640e50abc5133b561c66e2 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -189,6 +189,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_TRUST_TX_LENGTH; xhci->quirks |= XHCI_BROKEN_STREAMS; } + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + pdev->device == 0x0014) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) xhci->quirks |= XHCI_RESET_ON_RESUME; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index ca8b0b1ae37dd05ff5230e8400d04bd1c0dac819..dec100811946b08a17f7d9a0e4b3133297bd2a74 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -335,6 +335,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = DEV_PM_OPS, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 63735b5310bb17cfc2cf8f1099694bfe16fd2bb3..89a14d5f6ad885c9f19bf5613b131c29d4a1a4e5 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3132,7 +3132,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, { u32 maxp, total_packet_count; - /* MTK xHCI is mostly 0.97 but contains some features from 1.0 */ + /* MTK xHCI 0.96 contains some features from 1.0 */ if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST)) return ((td_total_len - transferred) >> 10); @@ -3141,8 +3141,8 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, trb_buff_len == td_total_len) return 0; - /* for MTK xHCI, TD size doesn't include this TRB */ - if (xhci->quirks & XHCI_MTK_HOST) + /* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */ + if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100)) trb_buff_len = 0; maxp = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c index 8e7737d7ac0a3e0bacdc6b2a2b67e18f9385a51e..03be5d574f23426a2257562790c9178e242e5358 100644 --- a/drivers/usb/misc/usb3503.c +++ b/drivers/usb/misc/usb3503.c @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub) if (gpio_is_valid(hub->gpio_reset)) { err = devm_gpio_request_one(dev, hub->gpio_reset, GPIOF_OUT_INIT_LOW, "usb3503 reset"); + /* Datasheet defines a hardware reset to be at least 100us */ + usleep_range(100, 10000); if (err) { dev_err(dev, "unable to request GPIO %d as reset pin (%d)\n", diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 1a874a1f3890eb6d385e767589df6d5ea9d65e5c..80b37d214bebf0ba896ad2de9ca2c4d86b4a3132 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1002,7 +1002,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg break; case MON_IOCQ_RING_SIZE: + mutex_lock(&rp->fetch_lock); ret = rp->b_size; + mutex_unlock(&rp->fetch_lock); break; case MON_IOCT_RING_SIZE: @@ -1229,12 +1231,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long offset, chunk_idx; struct page *pageptr; + mutex_lock(&rp->fetch_lock); offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= rp->b_size) + if (offset >= rp->b_size) { + mutex_unlock(&rp->fetch_lock); return VM_FAULT_SIGBUS; + } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); + mutex_unlock(&rp->fetch_lock); vmf->page = pageptr; return 0; } diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index bacee0fd4dd38b70d13d49396e139e57397bb2e6..ea5bad49394beb50495cb8afbcfde3c1e7543638 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -302,7 +302,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci) musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; portstate(musb->port1_status |= USB_PORT_STAT_POWER); del_timer(&otg_workaround); - } else { + } else if (!(musb->int_usb & MUSB_INTR_BABBLE)){ + /* + * When babble condition happens, drvvbus interrupt + * is also generated. Ignore this drvvbus interrupt + * and let babble interrupt handler recovers the + * controller; otherwise, the host-mode flag is lost + * due to the MUSB_DEV_MODE() call below and babble + * recovery logic will not called. + */ musb->is_active = 0; MUSB_DEV_MODE(musb); otg->default_a = 0; diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c index db68156568e6e7bc209eaf942eaa9c51bd4e15ad..b3b33cf7ddf608f24b0bca12f1c241261837f791 100644 --- a/drivers/usb/phy/phy-isp1301.c +++ b/drivers/usb/phy/phy-isp1301.c @@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = { }; MODULE_DEVICE_TABLE(i2c, isp1301_id); +static const struct of_device_id isp1301_of_match[] = { + {.compatible = "nxp,isp1301" }, + { }, +}; +MODULE_DEVICE_TABLE(of, isp1301_of_match); + static struct i2c_client *isp1301_i2c_client; static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear) @@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client) static struct i2c_driver isp1301_driver = { .driver = { .name = DRV_NAME, + .of_match_table = of_match_ptr(isp1301_of_match), }, .probe = isp1301_probe, .remove = isp1301_remove, diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c index ab5d364f6e8c7964e004c01c78b50baf3f32832c..335a1ef352242f5be250ea451710272ca4b49b2d 100644 --- a/drivers/usb/phy/phy-tahvo.c +++ b/drivers/usb/phy/phy-tahvo.c @@ -368,7 +368,8 @@ static int tahvo_usb_probe(struct platform_device *pdev) tu->extcon = devm_extcon_dev_allocate(&pdev->dev, tahvo_cable); if (IS_ERR(tu->extcon)) { dev_err(&pdev->dev, "failed to allocate memory for extcon\n"); - return -ENOMEM; + ret = PTR_ERR(tu->extcon); + goto err_disable_clk; } ret = devm_extcon_dev_register(&pdev->dev, tu->extcon); diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index 56ecb8b5115dd66b8d0db52849e9738e127a2afa..584ae8cbaf1ce348024f4a42404cdf8b1267c370 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -63,6 +63,7 @@ config USB_SERIAL_SIMPLE - Google USB serial devices - HP4x calculators - a number of Motorola phones + - Motorola Tetra devices - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 11ee55e080e51738b6c3c9dfc84ff9ab44e77fcb..3178d8afb3e6798533e8bb6260b98ebfe1c826fe 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -121,6 +121,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ @@ -171,6 +172,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */ { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 3249f42b4b93496f5b9d65a1075479af3abaed97..0c743e4cca1eedee378c4f824e24e692eb5d6c2f 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1017,6 +1017,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, + { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index f9d15bd627855440c48bb01f224a852d12591425..543d2801632b4907daab18b8aed682c338b729f3 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -913,6 +913,12 @@ #define ICPDAS_I7561U_PID 0x0104 #define ICPDAS_I7563U_PID 0x0105 +/* + * Airbus Defence and Space + */ +#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */ +#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */ + /* * RT Systems programming cables for various ham radios */ diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 464db17b53285b26ce42d15fe14eb281a4276665..de61271f2ba3a16279cca85251cfbce61ccd398e 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2215,7 +2215,6 @@ static int write_cmd_usb(struct edgeport_port *edge_port, /* something went wrong */ dev_err(dev, "%s - usb_submit_urb(write command) failed, status = %d\n", __func__, status); - usb_kill_urb(urb); usb_free_urb(urb); atomic_dec(&CmdUrbs); return status; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index db3d34c2c82e4b128af2102c5fa140e84e3ee360..1799aa058a5b56cc49c37e7530998d99b266e97c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -236,11 +236,14 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Qualcomm's vendor ID */ #define QUECTEL_PRODUCT_UC20 0x9003 #define QUECTEL_PRODUCT_UC15 0x9090 +/* These Yuga products use Qualcomm's vendor ID */ +#define YUGA_PRODUCT_CLM920_NC5 0x9625 #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 +#define QUECTEL_PRODUCT_BG96 0x0296 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -282,6 +285,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_ME910 0x1100 +#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -379,6 +383,9 @@ static void option_instat_callback(struct urb *urb); #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 #define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 +/* Fujisoft products */ +#define FUJISOFT_PRODUCT_FS040U 0x9b02 + /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -647,6 +654,11 @@ static const struct option_blacklist_info telit_me910_blacklist = { .reserved = BIT(1) | BIT(3), }; +static const struct option_blacklist_info telit_me910_dual_modem_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -676,6 +688,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = { .reserved = BIT(4) | BIT(5), }; +static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { + .reserved = BIT(1) | BIT(4), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1180,11 +1196,16 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Yuga products use Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5), + .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), @@ -1244,6 +1265,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), + .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), @@ -1877,6 +1900,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), .driver_info = (kernel_ulong_t)&four_g_w100_blacklist }, + {USB_DEVICE(LONGCHEER_VENDOR_ID, FUJISOFT_PRODUCT_FS040U), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist}, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a51b2837985000281f51bc822742b1e6eb33f260..3da25ad267a2631cfe782302866ac39071065d37 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -39,6 +39,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_CHILITAG) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 3b5a15d1dc0dd50a0ebf9d925e33ef4cf8db0090..123289085ee25974c1045adaa7fa56dd11198169 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -17,6 +17,7 @@ #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 #define PL2303_PRODUCT_ID_RSAQ3 0xaaa2 +#define PL2303_PRODUCT_ID_CHILITAG 0xaaa8 #define PL2303_PRODUCT_ID_ALDIGA 0x0611 #define PL2303_PRODUCT_ID_MMX 0x0612 #define PL2303_PRODUCT_ID_GPRS 0x0609 diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 4516291df1b854eb98044c41fd3edd2260e6383a..fb6dc16c754aec5e85fd13d70e8eee021519b00f 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ + {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ @@ -346,6 +348,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case 2: dev_dbg(dev, "NMEA GPS interface found\n"); + sendsetup = true; break; case 3: dev_dbg(dev, "Modem port found\n"); diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index e98b6e57b703dbd84c3b618fe56dc50eff3fd7ec..6aa7ff2c1cf74b953dc5669e08500b43ccb9e115 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -80,6 +80,11 @@ DEVICE(vivopay, VIVOPAY_IDS); { USB_DEVICE(0x22b8, 0x2c64) } /* Motorola V950 phone */ DEVICE(moto_modem, MOTO_IDS); +/* Motorola Tetra driver */ +#define MOTOROLA_TETRA_IDS() \ + { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ +DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -110,6 +115,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &google_device, &vivopay_device, &moto_modem_device, + &motorola_tetra_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -125,6 +131,7 @@ static const struct usb_device_id id_table[] = { GOOGLE_IDS(), VIVOPAY_IDS(), MOTO_IDS(), + MOTOROLA_TETRA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index a155cd02bce240db83742f0299ab8ea12a56e7a8..ecc83c405a8b0968cdd26efa9b715263183a2e1b 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -111,6 +111,10 @@ static int uas_use_uas_driver(struct usb_interface *intf, } } + /* All Seagate disk enclosures have broken ATA pass-through support */ + if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2) + flags |= US_FL_NO_ATA_1X; + usb_stor_adjust_quirks(udev, &flags); if (flags & US_FL_IGNORE_UAS) { diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9876af4ab64e004c82a4c9505fb1a70bdbe0a5f1..6891e909277573603dd993ecf13bc71cd301a859 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -1076,20 +1076,19 @@ static int uas_post_reset(struct usb_interface *intf) return 0; err = uas_configure_endpoints(devinfo); - if (err) { + if (err && err != ENODEV) shost_printk(KERN_ERR, shost, "%s: alloc streams error %d after reset", __func__, err); - return 1; - } + /* we must unblock the host in every case lest we deadlock */ spin_lock_irqsave(shost->host_lock, flags); scsi_report_bus_reset(shost, 0); spin_unlock_irqrestore(shost->host_lock, flags); scsi_unblock_requests(shost); - return 0; + return err ? 1 : 0; } static int uas_suspend(struct usb_interface *intf, pm_message_t message) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 2572fd5cd2bb94978b2beb3421db20f32b716bda..b605115eb47a600c105f77248fb9d5c713335d07 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2113,6 +2113,13 @@ UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported by David Kozub */ +UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, + "JMicron", + "JMS567", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA), + /* * Reported by Alexandre Oliva * JMicron responds to USN and several other SCSI ioctls with a diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index cde115359793001dcf18b8884c3c9e22eec0fa73..719ec68ae3099e78b1c11554884f87e43b5c1cde 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -142,6 +142,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES), +/* Reported-by: David Kozub */ +UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, + "JMicron", + "JMS567", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA), + /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, "VIA", @@ -149,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: Icenowy Zheng */ +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, + "Norelsys", + "NS1068X", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Takeo Nakayama */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index c653ce533430acfe39b193458fab74664ea3067c..1886d8e4f14ea0d6d53fb9ab19bb75ddf39691ea 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -163,8 +163,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) * step 1? */ if (ud->tcp_socket) { - dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", - ud->tcp_socket); + dev_dbg(&sdev->udev->dev, "shutdown sockfd\n"); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index af10f7b131a4920ec96190b954d43996a7af9e4d..325b4c05acdd3d1b548808c198793e61e562a9b3 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -252,11 +252,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev) struct stub_priv *priv; struct urb *urb; - dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); + dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); + dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", + priv->seqnum); usb_kill_urb(urb); kmem_cache_free(stub_priv_cache, priv); diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 191b176ffedfdaa119df7f4d31b8941e388b2299..5b807185f79ec93ddfb48a54adbdec92ea4143ef 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -225,9 +225,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, if (priv->seqnum != pdu->u.cmd_unlink.seqnum) continue; - dev_info(&priv->urb->dev->dev, "unlink urb %p\n", - priv->urb); - /* * This matched urb is not completed yet (i.e., be in * flight in usb hcd hardware/driver). Now we are @@ -266,8 +263,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, ret = usb_unlink_urb(priv->urb); if (ret != -EINPROGRESS) dev_err(&priv->urb->dev->dev, - "failed to unlink a urb %p, ret %d\n", - priv->urb, ret); + "failed to unlink a urb # %lu, ret %d\n", + priv->seqnum, ret); return 0; } @@ -336,23 +333,34 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev, return priv; } -static int get_pipe(struct stub_device *sdev, int epnum, int dir) +static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) { struct usb_device *udev = sdev->udev; struct usb_host_endpoint *ep; struct usb_endpoint_descriptor *epd = NULL; + int epnum = pdu->base.ep; + int dir = pdu->base.direction; + + if (epnum < 0 || epnum > 15) + goto err_ret; if (dir == USBIP_DIR_IN) ep = udev->ep_in[epnum & 0x7f]; else ep = udev->ep_out[epnum & 0x7f]; - if (!ep) { - dev_err(&sdev->udev->dev, "no such endpoint?, %d\n", - epnum); - BUG(); - } + if (!ep) + goto err_ret; epd = &ep->desc; + + /* validate transfer_buffer_length */ + if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { + dev_err(&sdev->udev->dev, + "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", + pdu->u.cmd_submit.transfer_buffer_length); + return -1; + } + if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -375,15 +383,31 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) } if (usb_endpoint_xfer_isoc(epd)) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(epd); + maxp *= usb_endpoint_maxp_mult(epd); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&sdev->udev->dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + return -1; + } if (dir == USBIP_DIR_OUT) return usb_sndisocpipe(udev, epnum); else return usb_rcvisocpipe(udev, epnum); } +err_ret: /* NOT REACHED */ - dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum); - return 0; + dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum); + return -1; } static void masking_bogus_flags(struct urb *urb) @@ -447,7 +471,10 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, struct stub_priv *priv; struct usbip_device *ud = &sdev->ud; struct usb_device *udev = sdev->udev; - int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction); + int pipe = get_pipe(sdev, pdu); + + if (pipe == -1) + return; priv = stub_priv_alloc(sdev, pdu); if (!priv) @@ -466,7 +493,8 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0 && + pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index be50cef645d8aadb04d49314d86ed7947c182572..96aa375b80d9cc5d33b9f8780eb17bc3fc126280 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -102,7 +102,7 @@ void stub_complete(struct urb *urb) /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { - usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); + usbip_dbg_stub_tx("ignore urb for closed connection\n"); /* It will be freed in stub_device_cleanup_urbs(). */ } else if (priv->unlinking) { stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); @@ -181,6 +181,13 @@ static int stub_send_ret_submit(struct stub_device *sdev) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&sdev->udev->dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) iovnum = 2 + urb->number_of_packets; else @@ -197,8 +204,8 @@ static int stub_send_ret_submit(struct stub_device *sdev) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 8b232290be6b97a04be362582e4a4d87db8d9185..2a5d3180777d9aa714162097aad36c1f3d4708d0 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev) dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", udev->devnum, udev->devpath, usb_speed_string(udev->speed)); - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport); + pr_debug("tt hub ttport %d\n", udev->ttport); dev_dbg(dev, " "); for (i = 0; i < 16; i++) @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev) } pr_debug("\n"); - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus); - - dev_dbg(dev, - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n", - &udev->descriptor, udev->config, - udev->actconfig, udev->rawdescriptors); + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), + udev->bus->bus_name); dev_dbg(dev, "have_langid %d, string_langid %d\n", udev->have_langid, udev->string_langid); @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb) dev = &urb->dev->dev; - dev_dbg(dev, " urb :%p\n", urb); - dev_dbg(dev, " dev :%p\n", urb->dev); - usbip_dump_usb_device(urb->dev); dev_dbg(dev, " pipe :%08x ", urb->pipe); @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " status :%d\n", urb->status); dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer); dev_dbg(dev, " transfer_buffer_length:%d\n", urb->transfer_buffer_length); dev_dbg(dev, " actual_length :%d\n", urb->actual_length); - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet); if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) usbip_dump_usb_ctrlrequest( @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); dev_dbg(dev, " interval :%d\n", urb->interval); dev_dbg(dev, " error_count :%d\n", urb->error_count); - dev_dbg(dev, " context :%p\n", urb->context); - dev_dbg(dev, " complete :%p\n", urb->complete); } EXPORT_SYMBOL_GPL(usbip_dump_urb); @@ -335,13 +324,10 @@ int usbip_recv(struct socket *sock, void *buf, int size) char *bp = buf; int osize = size; - usbip_dbg_xmit("enter\n"); - - if (!sock || !buf || !size) { - pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf, - size); + if (!sock || !buf || !size) return -EINVAL; - } + + usbip_dbg_xmit("enter\n"); do { sock->sk->sk_allocation = GFP_NOIO; @@ -354,11 +340,8 @@ int usbip_recv(struct socket *sock, void *buf, int size) msg.msg_flags = MSG_NOSIGNAL; result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL); - if (result <= 0) { - pr_debug("receive sock %p buf %p size %u ret %d total %d\n", - sock, buf, size, result, total); + if (result <= 0) goto err; - } size -= result; buf += result; diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index 9f490375ac92362dce2174fb246a2e1425dd25b7..f0b955f8504eba470e0007aa929352218082ffcd 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -271,6 +271,7 @@ struct usbip_device { /* lock for status */ spinlock_t lock; + int sockfd; struct socket *tcp_socket; struct task_struct *tcp_rx; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index d6dc165e924b6e04bd50294590d19c37d302e80f..dbe615ba07c9557c8117ddcd92bb5774727961d1 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -300,7 +300,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " ClearPortFeature: USB_PORT_FEAT_POWER\n"); - dum->port_status[rhport] = 0; + dum->port_status[rhport] &= ~USB_PORT_STAT_POWER; dum->resuming = 0; break; case USB_PORT_FEAT_C_RESET: @@ -506,9 +506,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, struct vhci_device *vdev; unsigned long flags; - usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n", - hcd, urb, mem_flags); - if (portnum > VHCI_HC_PORTS) { pr_err("invalid port number %d\n", portnum); return -ENODEV; @@ -671,8 +668,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) struct vhci_device *vdev; unsigned long flags; - pr_info("dequeue a urb %p\n", urb); - spin_lock_irqsave(&vhci->lock, flags); priv = urb->hcpriv; @@ -700,7 +695,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* tcp connection is closed */ spin_lock(&vdev->priv_lock); - pr_info("device %p seems to be disconnected\n", vdev); list_del(&priv->list); kfree(priv); urb->hcpriv = NULL; @@ -712,8 +706,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * vhci_rx will receive RET_UNLINK and give back the URB. * Otherwise, we give back it here. */ - pr_info("gives back urb %p\n", urb); - usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&vhci->lock, flags); @@ -741,8 +733,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) unlink->unlink_seqnum = priv->seqnum; - pr_info("device %p seems to be still connected\n", vdev); - /* send cmd_unlink and try to cancel the pending URB in the * peer */ list_add_tail(&unlink->list, &vdev->unlink_tx); @@ -823,7 +813,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) /* need this? see stub_dev.c */ if (ud->tcp_socket) { - pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); + pr_debug("shutdown tcp_socket\n"); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index fc2d319e2360b87c8d05c0b1a4f0e68a2895fa52..5943deeec115492b6e48840dadcba093b2ab140d 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -37,24 +37,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum) urb = priv->urb; status = urb->status; - usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", - urb, priv, seqnum); + usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum); switch (status) { case -ENOENT: /* fall through */ case -ECONNRESET: - dev_info(&urb->dev->dev, - "urb %p was unlinked %ssynchronuously.\n", urb, - status == -ENOENT ? "" : "a"); + dev_dbg(&urb->dev->dev, + "urb seq# %u was unlinked %ssynchronuously\n", + seqnum, status == -ENOENT ? "" : "a"); break; case -EINPROGRESS: /* no info output */ break; default: - dev_info(&urb->dev->dev, - "urb %p may be in a error, status %d\n", urb, - status); + dev_dbg(&urb->dev->dev, + "urb seq# %u may be in a error, status %d\n", + seqnum, status); } list_del(&priv->list); @@ -80,8 +79,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, spin_unlock_irqrestore(&vdev->priv_lock, flags); if (!urb) { - pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); - pr_info("max seqnum %d\n", + pr_err("cannot find a urb of seqnum %u max seqnum %d\n", + pdu->base.seqnum, atomic_read(&vhci->seqnum)); usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return; @@ -104,7 +103,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); usb_hcd_unlink_urb_from_ep(vhci_to_hcd(vhci), urb); @@ -170,7 +169,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev, pr_info("the urb (seqnum %d) was already given back\n", pdu->base.seqnum); } else { - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum); /* If unlink is successful, status is -ECONNRESET */ urb->status = pdu->u.ret_unlink.status; diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index c404017c1b5ae26aa1df115c0ac1652284ed21b4..c287ccc78fde811c26a2bea60ade1fe2c39c507f 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -49,13 +49,17 @@ static ssize_t status_show_vhci(int pdev_nr, char *out) /* * output example: - * port sta spd dev socket local_busid - * 0000 004 000 00000000 c5a7bb80 1-2.3 - * 0001 004 000 00000000 d8cee980 2-3.4 + * port sta spd dev sockfd local_busid + * 0000 004 000 00000000 000003 1-2.3 + * 0001 004 000 00000000 000004 2-3.4 * - * IP address can be retrieved from a socket pointer address by looking - * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a - * port number and its peer IP address. + * Output includes socket fd instead of socket pointer address to + * avoid leaking kernel memory address in: + * /sys/devices/platform/vhci_hcd.0/status and in debug output. + * The socket pointer address is not used at the moment and it was + * made visible as a convenient way to find IP address from socket + * pointer address by looking up /proc/net/{tcp,tcp6}. As this opens + * a security hole, the change is made to use sockfd instead. */ for (i = 0; i < VHCI_HC_PORTS; i++) { struct vhci_device *vdev = &vhci->vdev[i]; @@ -68,13 +72,13 @@ static ssize_t status_show_vhci(int pdev_nr, char *out) if (vdev->ud.status == VDEV_ST_USED) { out += sprintf(out, "%03u %08x ", vdev->speed, vdev->devid); - out += sprintf(out, "%16p %s", - vdev->ud.tcp_socket, + out += sprintf(out, "%06u %s", + vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { out += sprintf(out, "000 00000000 "); - out += sprintf(out, "0000000000000000 0-0"); + out += sprintf(out, "000000 0-0"); } out += sprintf(out, "\n"); @@ -125,7 +129,7 @@ static ssize_t status_show(struct device *dev, int pdev_nr; out += sprintf(out, - "port sta spd dev socket local_busid\n"); + "port sta spd dev sockfd local_busid\n"); pdev_nr = status_name_to_id(attr->attr.name); if (pdev_nr < 0) @@ -324,6 +328,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr, vdev->devid = devid; vdev->speed = speed; + vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; vdev->ud.status = VDEV_ST_NOTASSIGNED; @@ -361,6 +366,7 @@ static void set_status_attr(int id) status->attr.attr.name = status->name; status->attr.attr.mode = S_IRUGO; status->attr.show = status_show; + sysfs_attr_init(&status->attr.attr); } static int init_status_attrs(void) diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index 3e7878fe2fd463f5051b228b364a8ca4be6f4761..a9a663a578b6235d18bbf716d918dd6d2e4f4908 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -83,7 +83,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) memset(&msg, 0, sizeof(msg)); memset(&iov, 0, sizeof(iov)); - usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); + usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", + priv->seqnum); /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c index e429b59f6f8a634f66900f367feeb782bec10de5..d020e72b312259d08fd19de03bf23052fedb910e 100644 --- a/drivers/usb/usbip/vudc_rx.c +++ b/drivers/usb/usbip/vudc_rx.c @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc, urb_p->new = 1; urb_p->seqnum = pdu->base.seqnum; + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(urb_p->ep->desc); + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&udc->gadget.dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + ret = -EMSGSIZE; + goto free_urbp; + } + } + ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type); if (ret) { usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC); diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c index 234661782fa07ea8fb98356a3bf453ed2dd49f56..3ab4c86486a740344fba10029e654c06e54ff215 100644 --- a/drivers/usb/usbip/vudc_tx.c +++ b/drivers/usb/usbip/vudc_tx.c @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&udc->gadget.dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (urb_p->type == USB_ENDPOINT_XFER_ISOC) iovnum = 2 + urb->number_of_packets; else @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb_p); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 65d4a3015542874e46a299954645705656aeb98d..9f1ec43922094927b979682f6dca01405f52f253 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -851,11 +851,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) /* * Allow writes to device control fields, except devctl_phantom, - * which could confuse IOMMU, and the ARI bit in devctl2, which + * which could confuse IOMMU, MPS, which can break communication + * with other physical devices, and the ARI bit in devctl2, which * is set at probe time. FLR gets virtualized via our writefn. */ p_setw(perm, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, + ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 85d3e648bdea80ae65d331cf6b42912b99dab1f0..59b3f62a2d64ebd85be990630e09a48c7c7f2f1c 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1123,12 +1123,11 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_lock(&container->lock); ret = tce_iommu_create_default_window(container); - if (ret) - return ret; - - ret = tce_iommu_create_window(container, create.page_shift, - create.window_size, create.levels, - &create.start_addr); + if (!ret) + ret = tce_iommu_create_window(container, + create.page_shift, + create.window_size, create.levels, + &create.start_addr); mutex_unlock(&container->lock); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 96a0661011fdf1e3d12005f5f21b542b7aed9a71..e5b7652234fcf7efa89e9595383470df9da28166 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1078,6 +1078,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index e3fad302b4fb2ea2ffc70a769b4f35f1a122e2b5..0ec970ca64ce14022e5755cee7b4216b2238e929 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -218,6 +218,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) return len; } +static int +vhost_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct vhost_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + LIST_HEAD(freeme); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); + if (!vsock) + return -ENODEV; + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) + vhost_poll_queue(&tx_vq->poll); + } + + return 0; +} + static struct virtio_vsock_pkt * vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, unsigned int out, unsigned int in) @@ -669,6 +709,7 @@ static struct virtio_transport vhost_transport = { .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_dequeue = virtio_transport_dgram_dequeue, diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 12614006211ecb89c527172af675ebcd97b34bcd..d95ae092f154494f37bffeae75801b64252d06a6 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb) static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness) { unsigned int lth = pb->lth_brightness; - int duty_cycle; + u64 duty_cycle; if (pb->levels) duty_cycle = pb->levels[brightness]; else duty_cycle = brightness; - return (duty_cycle * (pb->period - lth) / pb->scale) + lth; + duty_cycle *= pb->period - lth; + do_div(duty_cycle, pb->scale); + + return duty_cycle + lth; } static int pwm_backlight_update_status(struct backlight_device *bl) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index 6c2b2ca4a909d8d7b2df6d05195085d09a78962e..44c2be15a08b906c70bdc4da6c07401d5d2e4dfd 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1681,8 +1681,10 @@ static int au1200fb_drv_probe(struct platform_device *dev) fbi = framebuffer_alloc(sizeof(struct au1200fb_device), &dev->dev); - if (!fbi) + if (!fbi) { + ret = -ENOMEM; goto failed; + } _au1200fb_infos[plane] = fbi; fbdev = fbi->par; @@ -1700,7 +1702,8 @@ static int au1200fb_drv_probe(struct platform_device *dev) if (!fbdev->fb_mem) { print_err("fail to allocate frambuffer (size: %dK))", fbdev->fb_len / 1024); - return -ENOMEM; + ret = -ENOMEM; + goto failed; } /* diff --git a/drivers/video/fbdev/controlfb.h b/drivers/video/fbdev/controlfb.h index 6026c60fc1007e007ec568d23ad26fa3910795e6..261522fabdac89ae2644089aba612dee880b4d24 100644 --- a/drivers/video/fbdev/controlfb.h +++ b/drivers/video/fbdev/controlfb.h @@ -141,5 +141,7 @@ static struct max_cmodes control_mac_modes[] = { {{ 1, 2}}, /* 1152x870, 75Hz */ {{ 0, 1}}, /* 1280x960, 75Hz */ {{ 0, 1}}, /* 1280x1024, 75Hz */ + {{ 1, 2}}, /* 1152x768, 60Hz */ + {{ 0, 1}}, /* 1600x1024, 60Hz */ }; diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index e9c2f7ba3c8e6c34382b6b0b58a7d65e41e8292d..53326badfb615cd6310a8e1d8310d3e59c5cbec1 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -769,11 +769,11 @@ static int dlfb_get_edid(struct dlfb_data *dev, char *edid, int len) for (i = 0; i < len; i++) { ret = usb_control_msg(dev->udev, - usb_rcvctrlpipe(dev->udev, 0), (0x02), - (0x80 | (0x02 << 5)), i << 8, 0xA1, rbuf, 2, - HZ); - if (ret < 1) { - pr_err("Read EDID byte %d failed err %x\n", i, ret); + usb_rcvctrlpipe(dev->udev, 0), 0x02, + (0x80 | (0x02 << 5)), i << 8, 0xA1, + rbuf, 2, USB_CTRL_GET_TIMEOUT); + if (ret < 2) { + pr_err("Read EDID byte %d failed: %d\n", i, ret); i--; break; } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7062bb0975a521f1a28125c092c2f0d0bbd35787..462e183609b6b797ba9878c77a0bd8ceea097ac4 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -323,6 +323,8 @@ int register_virtio_device(struct virtio_device *dev) /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); + if (err) + ida_simple_remove(&virtio_index_ida, dev->index); out: if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 2c2e6792f7e0a3141a7a505c360c94f1fda9d510..a7c08cc4c1b7017702188fa7df8f7401fa364998 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -241,11 +241,11 @@ static inline void update_stat(struct virtio_balloon *vb, int idx, #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) -static void update_balloon_stats(struct virtio_balloon *vb) +static unsigned int update_balloon_stats(struct virtio_balloon *vb) { unsigned long events[NR_VM_EVENT_ITEMS]; struct sysinfo i; - int idx = 0; + unsigned int idx = 0; long available; all_vm_events(events); @@ -253,18 +253,22 @@ static void update_balloon_stats(struct virtio_balloon *vb) available = si_mem_available(); +#ifdef CONFIG_VM_EVENT_COUNTERS update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, pages_to_bytes(events[PSWPIN])); update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, pages_to_bytes(events[PSWPOUT])); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); +#endif update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, pages_to_bytes(i.freeram)); update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, pages_to_bytes(i.totalram)); update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, pages_to_bytes(available)); + + return idx; } /* @@ -290,14 +294,14 @@ static void stats_handle_request(struct virtio_balloon *vb) { struct virtqueue *vq; struct scatterlist sg; - unsigned int len; + unsigned int len, num_stats; - update_balloon_stats(vb); + num_stats = update_balloon_stats(vb); vq = vb->stats_vq; if (!virtqueue_get_buf(vq, &len)) return; - sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); virtqueue_kick(vq); } @@ -421,15 +425,16 @@ static int init_vqs(struct virtio_balloon *vb) vb->deflate_vq = vqs[1]; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { struct scatterlist sg; + unsigned int num_stats; vb->stats_vq = vqs[2]; /* * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ - update_balloon_stats(vb); + num_stats = update_balloon_stats(vb); - sg_init_one(&sg, vb->stats, sizeof vb->stats); + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) < 0) BUG(); diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 4874b0f18650f7bae4380f4a34b9ad5203ae49d5..518dfa1047cbd584d692930da7467d643c109a42 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -169,15 +169,21 @@ static int imx2_wdt_ping(struct watchdog_device *wdog) return 0; } -static int imx2_wdt_set_timeout(struct watchdog_device *wdog, - unsigned int new_timeout) +static void __imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) { struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); - wdog->timeout = new_timeout; - regmap_update_bits(wdev->regmap, IMX2_WDT_WCR, IMX2_WDT_WCR_WT, WDOG_SEC_TO_COUNT(new_timeout)); +} + +static int imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) +{ + __imx2_wdt_set_timeout(wdog, new_timeout); + + wdog->timeout = new_timeout; return 0; } @@ -371,7 +377,11 @@ static int imx2_wdt_suspend(struct device *dev) /* The watchdog IP block is running */ if (imx2_wdt_is_running(wdev)) { - imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); + /* + * Don't update wdog->timeout, we'll restore the current value + * during resume. + */ + __imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); imx2_wdt_ping(wdog); } diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 1e9d2f84e5b53cf3a97f7863e3260adc4f8312eb..1592dc61320013bcbad9147ec6692509bc2daab9 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work) { struct afs_server *server; struct afs_vnode *vnode, *xvnode; - time_t now; + time64_t now; long timeout; int ret; @@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find the first vnode to update */ spin_lock(&server->cb_lock); @@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vnode->update_at = get_seconds() + afs_vnode_update_timeout; + vnode->update_at = ktime_get_real_seconds() + + afs_vnode_update_timeout; spin_lock(&server->cb_lock); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index d764236072b192d33a0b8eedb3821d7391991067..168f2a4d1180dff1a835121cc54f869bb2130fb2 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -106,6 +106,9 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; + case CBProbeUuid: + call->type = &afs_SRXCBProbeUuid; + return true; case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true; @@ -165,7 +168,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) struct afs_callback *cb; struct afs_server *server; __be32 *bp; - u32 tmp; int ret, loop; _enter("{%u}", call->unmarshall); @@ -227,9 +229,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (ret < 0) return ret; - tmp = ntohl(call->tmp); - _debug("CB count: %u", tmp); - if (tmp != call->count && tmp != 0) + call->count2 = ntohl(call->tmp); + _debug("CB count: %u", call->count2); + if (call->count2 != call->count && call->count2 != 0) return -EBADMSG; call->offset = 0; call->unmarshall++; @@ -237,14 +239,14 @@ static int afs_deliver_cb_callback(struct afs_call *call) case 4: _debug("extract CB array"); ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, false); + call->count2 * 3 * 4, false); if (ret < 0) return ret; _debug("unmarshall CB array"); cb = call->request; bp = call->buffer; - for (loop = call->count; loop > 0; loop--, cb++) { + for (loop = call->count2; loop > 0; loop--, cb++) { cb->version = ntohl(*bp++); cb->expiry = ntohl(*bp++); cb->type = ntohl(*bp++); diff --git a/fs/afs/file.c b/fs/afs/file.c index 6344aee4ac4bff8e768fc7c344ec7ccea3670b98..72372970725b56d35d76a0c840bfb3a04f4e7f3c 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -29,6 +29,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, + .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 31c616ab9b400a66dfbcd39c12dd87665f88acf9..88e440607ed71ad6da305ed38186f5558ba45a54 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -105,7 +105,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_mode = mode; } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; + vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_version = data_version; @@ -139,7 +139,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) vnode->cb_version = ntohl(*bp++); vnode->cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); *_bp = bp; } @@ -676,8 +676,8 @@ int afs_fs_create(struct afs_server *server, memset(bp, 0, padsz); bp = (void *) bp + padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ @@ -945,8 +945,8 @@ int afs_fs_symlink(struct afs_server *server, memset(bp, 0, c_padsz); bp = (void *) bp + c_padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ @@ -1145,8 +1145,8 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ @@ -1178,7 +1178,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, _enter(",%x,{%x:%u},,", key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); - size = to - offset; + size = (loff_t)to - (loff_t)offset; if (first != last) size += (loff_t)(last - first) << PAGE_SHIFT; pos = (loff_t)first << PAGE_SHIFT; @@ -1222,8 +1222,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 86cc7264c21cda727a3bb71e73af9a7ddc35eec5..42582e41948fd06fe387bf0766f44d62aabc238b 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -70,9 +70,9 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; - inode->i_gid = GLOBAL_ROOT_GID; + inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_server; + inode->i_ctime.tv_sec = vnode->status.mtime_client; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; @@ -245,12 +245,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = 0; vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = get_seconds(); + vnode->cb_expires = ktime_get_real_seconds(); } else { vnode->cb_version = cb->version; vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + + ktime_get_real_seconds(); } } @@ -323,7 +324,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < get_seconds() + 10) { + if (vnode->cb_expires < ktime_get_real_seconds() + 10) { _debug("callback expired"); set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); } else { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 535a38d2c1d06f752cd6beae54766c1a9d899527..dd98dcda6a3fb3666eec84f04eef1b75a71ed9df 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -105,7 +106,10 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ - unsigned last_to; /* amount of mapping[last] */ + union { + unsigned last_to; /* amount of mapping[last] */ + unsigned count2; /* count used in unmarshalling */ + }; unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ @@ -242,7 +246,7 @@ struct afs_cache_vhash { */ struct afs_vlocation { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct list_head link; /* link in cell volume location list */ struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ @@ -253,7 +257,7 @@ struct afs_vlocation { struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ wait_queue_head_t waitq; /* status change waitqueue */ - time_t update_at; /* time at which record should be updated */ + time64_t update_at; /* time at which record should be updated */ spinlock_t lock; /* access lock */ afs_vlocation_state_t state; /* volume location state */ unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ @@ -266,7 +270,7 @@ struct afs_vlocation { */ struct afs_server { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct in_addr addr; /* server address */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ @@ -369,8 +373,8 @@ struct afs_vnode { struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ struct work_struct cb_broken_work; /* work to be done on callback break */ - time_t cb_expires; /* time at which callback expires */ - time_t cb_expires_at; /* time used to order cb_promise */ + time64_t cb_expires; /* time at which callback expires */ + time64_t cb_expires_at; /* time used to order cb_promise */ unsigned cb_version; /* callback version */ unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ @@ -749,6 +753,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_writeback_all(struct afs_vnode *); +extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 91ea1aa0d8b3ab0a817b525e9f9b3deec98f775f..100b207efc9eaddff4ed9f7e0e4415ed62ba2880 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGEN_OPCODE: return -ENOTSUPP; + default: return -EREMOTEIO; } } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 25f05a8d21b195fffb10f89cff990888fefd1ab2..523b1d3ca2c68437566d920ea5332735774be43c 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -321,6 +321,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; + size_t offset; + u32 abort_code; int ret; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -368,9 +370,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); - /* have to change the state *before* sending the last packet as RxRPC - * might give us the reply before it returns from sending the - * request */ + /* We have to change the state *before* sending the last packet as + * rxrpc might give us the reply before it returns from sending the + * request. Further, if the send fails, we may already have been given + * a notification and may have collected it. + */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, rxcall, @@ -389,7 +393,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); + call->state = AFS_CALL_COMPLETE; + if (ret != -ECONNABORTED) { + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, + -ret, "KSD"); + } else { + abort_code = 0; + offset = 0; + rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, + false, &abort_code); + ret = call->type->abort_to_error(abort_code); + } error_kill_call: afs_end_call(call); _leave(" = %d", ret); @@ -434,16 +448,18 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; + case -ECONNABORTED: + goto call_complete; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KNC"); - goto do_abort; + goto save_error; case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; + abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); - goto do_abort; + goto save_error; case -ENODATA: case -EBADMSG: case -EMSGSIZE: @@ -453,7 +469,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, EBADMSG, "KUM"); - goto do_abort; + goto save_error; } } @@ -464,8 +480,9 @@ static void afs_deliver_to_call(struct afs_call *call) _leave(""); return; -do_abort: +save_error: call->error = ret; +call_complete: call->state = AFS_CALL_COMPLETE; goto done; } @@ -475,7 +492,6 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -494,13 +510,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } - abort_why = "KWC"; - ret = call->error; - if (call->state == AFS_CALL_COMPLETE) - break; - abort_why = "KWI"; - ret = -EINTR; - if (signal_pending(current)) + if (call->state == AFS_CALL_COMPLETE || + signal_pending(current)) break; schedule(); } @@ -508,13 +519,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* kill the call */ + /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { - _debug("call incomplete"); + _debug("call interrupted"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD, -ret, abort_why); + RX_USER_ABORT, -EINTR, "KWI"); } + ret = call->error; _debug("call complete"); afs_end_call(call); _leave(" = %d", ret); diff --git a/fs/afs/security.c b/fs/afs/security.c index 8d010422dc8962b72fb3af64f75fdedb8e892cc0..bfa9d342838355b817703fe3a3e4b6e692fdedb0 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask) } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; + if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) + goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; + if (!(inode->i_mode & S_IRUSR)) + goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; + if (!(inode->i_mode & S_IWUSR)) + goto permission_denied; } } key_put(key); - ret = generic_permission(inode, mask); _leave(" = %d", ret); return ret; diff --git a/fs/afs/server.c b/fs/afs/server.c index d4066ab7dd5505b364a6506a1a2d932274bb5d9d..c001b1f2455fbf6dee4c9635c95590ada3890483 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server) spin_lock(&afs_server_graveyard_lock); if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &afs_server_graveyard); - server->time_of_death = get_seconds(); + server->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_server_reaper, afs_server_timeout * HZ); } @@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work) LIST_HEAD(corpses); struct afs_server *server; unsigned long delay, expiry; - time_t now; + time64_t now; - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_server_graveyard_lock); while (!list_empty(&afs_server_graveyard)) { diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 45a86396fd2db350f097bc291b1bbffee70b621e..92bd5553b8c91afa434a5d5c5db8dbb431e53e25 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) struct afs_vlocation *xvl; /* wait at least 10 minutes before updating... */ - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); @@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl) if (atomic_read(&vl->usage) == 0) { _debug("buried"); list_move_tail(&vl->grave, &afs_vlocation_graveyard); - vl->time_of_death = get_seconds(); + vl->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_vlocation_reap, afs_vlocation_timeout * HZ); @@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work) LIST_HEAD(corpses); struct afs_vlocation *vl; unsigned long delay, expiry; - time_t now; + time64_t now; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_vlocation_graveyard_lock); while (!list_empty(&afs_vlocation_graveyard)) { @@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; struct afs_vlocation *vl, *xvl; - time_t now; + time64_t now; long timeout; int ret; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find a record to update */ spin_lock(&afs_vlocation_updates_lock); @@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); diff --git a/fs/afs/write.c b/fs/afs/write.c index f865c3f05bea5074a91a547e4682523e8aab88be..3fba2b573c86a72e66df4b4878015fc53000a238 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -148,12 +148,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping, kfree(candidate); return -ENOMEM; } - *pagep = page; - /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); if (ret < 0) { + unlock_page(page); + put_page(page); kfree(candidate); _leave(" = %d [prep]", ret); return ret; @@ -161,6 +161,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + /* page won't leak in error case: it eventually gets cleaned off LRU */ + *pagep = page; + try_again: spin_lock(&vnode->writeback_lock); @@ -296,10 +299,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - ClearPageUptodate(pv.pages[loop]); + struct page *page = pv.pages[loop]; + ClearPageUptodate(page); if (error) - SetPageError(pv.pages[loop]); - end_page_writeback(pv.pages[loop]); + SetPageError(page); + if (PageWriteback(page)) + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; } __pagevec_release(&pv); @@ -502,6 +509,7 @@ static int afs_writepages_region(struct address_space *mapping, if (PageWriteback(page) || !PageDirty(page)) { unlock_page(page); + put_page(page); continue; } @@ -734,6 +742,20 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +/* + * Flush out all outstanding writes on a file opened for writing when it is + * closed. + */ +int afs_flush(struct file *file, fl_owner_t id) +{ + _enter(""); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + + return vfs_fsync(file, 0); +} + /* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 4c71dba90120a52648045171fae8b1e025b49997..0ea31a53fd5bb8dbf762727343de8baa872a06c9 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -176,7 +176,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); - if (autofs4_write(sbi, pipe, &pkt, pktsz)) switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { case 0: break; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 705bb5f5a87f2ce67066705262f696175c82d46d..a29730c448506f59ef41ac366ac21f11466bdc7c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3397,13 +3397,6 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, goto again; } - /* We've already setup this transaction, go ahead and exit */ - if (block_group->cache_generation == trans->transid && - i_size_read(inode)) { - dcs = BTRFS_DC_SETUP; - goto out_put; - } - /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -3427,6 +3420,13 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, } WARN_ON(ret); + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + if (i_size_read(inode) > 0) { ret = btrfs_check_trunc_cache_free_space(root, &root->fs_info->global_block_rsv); @@ -9362,6 +9362,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_del_root(trans, tree_root, &root->root_key); if (ret) { btrfs_abort_transaction(trans, ret); + err = ret; goto out_end_trans; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e4b48f377d3a9110542872c56ddbb3933e6d4a75..c56253a1e5b4972f93b07c01356ec34c684c16d7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1253,7 +1253,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Lock all pages first so we can lock the extent safely. */ ret = io_ctl_prepare_pages(io_ctl, inode, 0); if (ret) - goto out; + goto out_unlock; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state); @@ -1346,6 +1346,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, out_nospc: cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); +out_unlock: if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f089d7d8afe76abd1398f2c2dcd165836e17610a..a8a1fb40e25819f9d962491b39c40de4c1886ce1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2063,8 +2063,15 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state, 0); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + ClearPageChecked(page); set_page_dirty(page); out: @@ -6812,6 +6819,20 @@ static noinline int uncompress_inline(struct btrfs_path *path, max_size = min_t(unsigned long, PAGE_SIZE, max_size); ret = btrfs_decompress(compress_type, tmp, page, extent_offset, inline_size, max_size); + + /* + * decompression code contains a memset to fill in any space between the end + * of the uncompressed data and the end of max_size in case the decompressed + * data ends up shorter than ram_bytes. That doesn't cover the hole between + * the end of an inline extent and the beginning of the next block, so we + * cover that region here. + */ + + if (max_size + pg_offset < PAGE_SIZE) { + char *map = kmap(page); + memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset); + kunmap(page); + } kfree(tmp); return ret; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 77f9efc1f7aa02f420d2e806783fab70be2d91a1..9a47b5598df764f862ac0b7e726d48959b48e208 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6196,8 +6196,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + /* + * Check that we don't overflow at later allocations, we request + * clone_sources_count + 1 items, and compare to unsigned long inside + * access_ok. + */ if (arg->clone_sources_count > - ULLONG_MAX / sizeof(*arg->clone_sources)) { + ULONG_MAX / sizeof(struct clone_root) - 1) { ret = -EINVAL; goto out; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 6e144048a72eedb206e902bf71fe19d6c6555c0e..a724d9a79bd20b63f6f4a589803637fc31a0f2be 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -501,7 +501,8 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, path = btrfs_alloc_path(); if (!path) { test_msg("Couldn't allocate path\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } ret = add_block_group_free_space(&trans, root->fs_info, cache); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c0f52c443c343a1c4811098c39176e33b1e81804..3d2639c30018a5956314fe00cdd5fe8f8c81afb9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1396,6 +1396,29 @@ static int __close_session(struct ceph_mds_client *mdsc, return request_close_session(mdsc, session); } +static bool drop_negative_children(struct dentry *dentry) +{ + struct dentry *child; + bool all_negative = true; + + if (!d_is_dir(dentry)) + goto out; + + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + if (d_really_is_positive(child)) { + all_negative = false; + break; + } + } + spin_unlock(&dentry->d_lock); + + if (all_negative) + shrink_dcache_parent(dentry); +out: + return all_negative; +} + /* * Trim old(er) caps. * @@ -1441,16 +1464,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) if ((used | wanted) & ~oissued & mine) goto out; /* we need these caps */ - session->s_trim_caps--; if (oissued) { /* we aren't the only cap.. just remove us */ __ceph_remove_cap(cap, true); + session->s_trim_caps--; } else { + struct dentry *dentry; /* try dropping referring dentries */ spin_unlock(&ci->i_ceph_lock); - d_prune_aliases(inode); - dout("trim_caps_cb %p cap %p pruned, count now %d\n", - inode, cap, atomic_read(&inode->i_count)); + dentry = d_find_any_alias(inode); + if (dentry && drop_negative_children(dentry)) { + int count; + dput(dentry); + d_prune_aliases(inode); + count = atomic_read(&inode->i_count); + if (count == 1) + session->s_trim_caps--; + dout("trim_caps_cb %p cap %p pruned, count now %d\n", + inode, cap, count); + } else { + dput(dentry); + } return 0; } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5eb04129f93849c2727a517119bc484761160332..73360df52ce943e5f9236170034fce1eecf5d115 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -318,9 +318,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, { int i; int rc; - char password_with_pad[CIFS_ENCPWD_SIZE]; + char password_with_pad[CIFS_ENCPWD_SIZE] = {0}; - memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); if (password) strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7710dd90ca6623be36a31fe801a0e17c31e62be7..c4a27f6b6fa82c3364c0deff3229aeb990314ee0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1667,7 +1667,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ - kfree(vol->password); + kzfree(vol->password); vol->password = NULL; break; } @@ -1705,7 +1705,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } - kfree(vol->password); + kzfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -4159,7 +4159,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: kfree(vol_info->username); - kfree(vol_info->password); + kzfree(vol_info->password); kfree(vol_info); return tcon; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cf192f9ce25465950bb7c2069ae3bb117cdefd4f..02e403af9518f353623cc5f9dbf56cf3add32798 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3285,20 +3285,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = { int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) { - int rc, xid; + int xid, rc = 0; struct inode *inode = file_inode(file); xid = get_xid(); - if (!CIFS_CACHE_READ(CIFS_I(inode))) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) rc = cifs_zap_mapping(inode); - if (rc) - return rc; - } - - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } @@ -3308,16 +3306,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) int rc, xid; xid = get_xid(); + rc = cifs_revalidate_file(file); - if (rc) { + if (rc) cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", rc); - free_xid(xid); - return rc; - } - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 5419afea0a36c40a32ef5944dc6b151f094d0b9a..323d8e34abde6429e4be159a4c7f4e18b7000e50 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -99,14 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); - kfree(buf_to_free->auth_key.response); - kfree(buf_to_free); + kzfree(buf_to_free->auth_key.response); + kzfree(buf_to_free); } struct cifs_tcon * @@ -137,10 +134,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) } atomic_dec(&tconInfoAllocCount); kfree(buf_to_free->nativeFileSystem); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 69b610ad3fdc536c5f8befff05b01e9228850822..94c4c19012226d97916d84057215761b83518586 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -585,8 +585,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != - cpu_to_le16(tcon->ses->server->vals->protocol_id)) + if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect)) goto vneg_out; if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 9f6607f17b53bfeba9d7940380928439f4e5b466..cb496989a6b693fe0b13cb2c737a7643288d5bc9 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o -fscrypto-y := crypto.o fname.o policy.o keyinfo.o +fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o fscrypto-$(CONFIG_BLOCK) += bio.o diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 151d4893382d8fae348cd7aa2f03b1c3b3c39656..732a786cce9deabe490410ee6dfb15c72fc8f048 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -126,21 +126,6 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) } EXPORT_SYMBOL(fscrypt_get_ctx); -/** - * page_crypt_complete() - completion callback for page crypto - * @req: The asynchronous cipher request context - * @res: The result of the cipher operation - */ -static void page_crypt_complete(struct crypto_async_request *req, int res) -{ - struct fscrypt_completion_result *ecr = req->data; - - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); -} - int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, struct page *dest_page, unsigned int len, @@ -151,7 +136,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u8 padding[FS_IV_SIZE - sizeof(__le64)]; } iv; struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; @@ -179,7 +164,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, skcipher_request_set_callback( req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - page_crypt_complete, &ecr); + crypto_req_done, &wait); sg_init_table(&dst, 1); sg_set_page(&dst, dest_page, len, offs); @@ -187,14 +172,9 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, sg_set_page(&src, src_page, len, offs); skcipher_request_set_crypt(req, &src, &dst, len, &iv); if (rw == FS_DECRYPT) - res = crypto_skcipher_decrypt(req); + res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); else - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res) { printk_ratelimited(KERN_ERR @@ -340,7 +320,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; dir = dget_parent(dentry); - if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) { + if (!IS_ENCRYPTED(d_inode(dir))) { dput(dir); return 0; } @@ -410,10 +390,7 @@ int fscrypt_initialize(unsigned int cop_flags) { int i, res = -ENOMEM; - /* - * No need to allocate a bounce page pool if there already is one or - * this FS won't use it. - */ + /* No need to allocate a bounce page pool if this FS won't use it. */ if (cop_flags & FS_CFLG_OWN_PAGES) return 0; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index ad9f814fdead370eaa420a423139938687c3047b..6eb434363ff2b6b2b7c61d44e6f8dd45feaf8833 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -14,21 +14,6 @@ #include #include "fscrypt_private.h" -/** - * fname_crypt_complete() - completion callback for filename crypto - * @req: The asynchronous cipher request context - * @res: The result of the cipher operation - */ -static void fname_crypt_complete(struct crypto_async_request *req, int res) -{ - struct fscrypt_completion_result *ecr = req->data; - - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); -} - /** * fname_encrypt() - encrypt a filename * @@ -40,7 +25,7 @@ static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) { struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; @@ -76,17 +61,12 @@ static int fname_encrypt(struct inode *inode, } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fname_crypt_complete, &ecr); + crypto_req_done, &wait); sg_init_one(&sg, oname->name, cryptlen); skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); /* Do the encryption */ - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - /* Request is being completed asynchronously; wait for it */ - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -110,7 +90,7 @@ static int fname_decrypt(struct inode *inode, struct fscrypt_str *oname) { struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; @@ -131,7 +111,7 @@ static int fname_decrypt(struct inode *inode, } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fname_crypt_complete, &ecr); + crypto_req_done, &wait); /* Initialize IV */ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); @@ -140,11 +120,7 @@ static int fname_decrypt(struct inode *inode, sg_init_one(&src_sg, iname->name, iname->len); sg_init_one(&dst_sg, oname->name, oname->len); skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); - res = crypto_skcipher_decrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -382,8 +358,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; - if (!dir->i_sb->s_cop->is_encrypted(dir) || - fscrypt_is_dot_dotdot(iname)) { + if (!IS_ENCRYPTED(dir) || fscrypt_is_dot_dotdot(iname)) { fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index a1d5021c31ef8f494b5ae96cd4e2129976e43ef3..4688a645cd6311476c37a93f9ac8c7f815314f77 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -11,7 +11,8 @@ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H -#include +#define __FS_HAS_ENCRYPTION 1 +#include #include /* Encryption parameters */ @@ -69,16 +70,6 @@ typedef enum { #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 -struct fscrypt_completion_result { - struct completion completion; - int res; -}; - -#define DECLARE_FS_COMPLETION_RESULT(ecr) \ - struct fscrypt_completion_result ecr = { \ - COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 } - - /* crypto.c */ extern int fscrypt_initialize(unsigned int cop_flags); extern struct workqueue_struct *fscrypt_read_workqueue; diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c new file mode 100644 index 0000000000000000000000000000000000000000..9f5fb2eb9cf7dcd19f47c96295f9a33ac3021192 --- /dev/null +++ b/fs/crypto/hooks.c @@ -0,0 +1,112 @@ +/* + * fs/crypto/hooks.c + * + * Encryption hooks for higher-level filesystem operations. + */ + +#include +#include "fscrypt_private.h" + +/** + * fscrypt_file_open - prepare to open a possibly-encrypted regular file + * @inode: the inode being opened + * @filp: the struct file being set up + * + * Currently, an encrypted regular file can only be opened if its encryption key + * is available; access to the raw encrypted contents is not supported. + * Therefore, we first set up the inode's encryption key (if not already done) + * and return an error if it's unavailable. + * + * We also verify that if the parent directory (from the path via which the file + * is being opened) is encrypted, then the inode being opened uses the same + * encryption policy. This is needed as part of the enforcement that all files + * in an encrypted directory tree use the same encryption policy, as a + * protection against certain types of offline attacks. Note that this check is + * needed even when opening an *unencrypted* file, since it's forbidden to have + * an unencrypted file in an encrypted directory. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + */ +int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + int err; + struct dentry *dir; + + err = fscrypt_require_key(inode); + if (err) + return err; + + dir = dget_parent(file_dentry(filp)); + if (IS_ENCRYPTED(d_inode(dir)) && + !fscrypt_has_permitted_context(d_inode(dir), inode)) { + pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu", + d_inode(dir)->i_ino, inode->i_ino); + err = -EPERM; + } + dput(dir); + return err; +} +EXPORT_SYMBOL_GPL(fscrypt_file_open); + +int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) +{ + int err; + + err = fscrypt_require_key(dir); + if (err) + return err; + + if (!fscrypt_has_permitted_context(dir, inode)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_link); + +int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err; + + err = fscrypt_require_key(old_dir); + if (err) + return err; + + err = fscrypt_require_key(new_dir); + if (err) + return err; + + if (old_dir != new_dir) { + if (IS_ENCRYPTED(new_dir) && + !fscrypt_has_permitted_context(new_dir, + d_inode(old_dentry))) + return -EPERM; + + if ((flags & RENAME_EXCHANGE) && + IS_ENCRYPTED(old_dir) && + !fscrypt_has_permitted_context(old_dir, + d_inode(new_dentry))) + return -EPERM; + } + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename); + +int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) +{ + int err = fscrypt_get_encryption_info(dir); + + if (err) + return err; + + if (fscrypt_has_encryption_key(dir)) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + spin_unlock(&dentry->d_lock); + } + + d_set_d_op(dentry, &fscrypt_d_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 8e704d12a1cf2781087ec14d6ef6561ea9dd3236..c891d2ea9d2484d242c87a5d775bd1c6467c5276 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -17,17 +17,6 @@ static struct crypto_shash *essiv_hash_tfm; -static void derive_crypt_complete(struct crypto_async_request *req, int rc) -{ - struct fscrypt_completion_result *ecr = req->data; - - if (rc == -EINPROGRESS) - return; - - ecr->res = rc; - complete(&ecr->completion); -} - /** * derive_key_aes() - Derive a key using AES-128-ECB * @deriving_key: Encryption key used for derivation. @@ -42,7 +31,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], { int res = 0; struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); @@ -59,7 +48,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - derive_crypt_complete, &ecr); + crypto_req_done, &wait); res = crypto_skcipher_setkey(tfm, deriving_key, FS_AES_128_ECB_KEY_SIZE); if (res < 0) @@ -69,11 +58,7 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], sg_init_one(&dst_sg, derived_raw_key, source_key->size); skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size, NULL); - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); out: skcipher_request_free(req); crypto_free_skcipher(tfm); @@ -273,7 +258,7 @@ int fscrypt_get_encryption_info(struct inode *inode) res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { if (!fscrypt_dummy_context_enabled(inode) || - inode->i_sb->s_cop->is_encrypted(inode)) + IS_ENCRYPTED(inode)) return res; /* Fake up a context for an unencrypted directory */ memset(&ctx, 0, sizeof(ctx)); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 9914d51dff863d27bb0a1aa93ffef21269f84abb..2f2c53f2e1366d3dd1793094f7ffa59634041add 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -109,7 +109,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) struct fscrypt_policy policy; int res; - if (!inode->i_sb->s_cop->is_encrypted(inode)) + if (!IS_ENCRYPTED(inode)) return -ENODATA; res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); @@ -166,11 +166,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) return 1; /* No restrictions if the parent directory is unencrypted */ - if (!cops->is_encrypted(parent)) + if (!IS_ENCRYPTED(parent)) return 1; /* Encrypted directories must not contain unencrypted files */ - if (!cops->is_encrypted(child)) + if (!IS_ENCRYPTED(child)) return 0; /* diff --git a/fs/dax.c b/fs/dax.c index bf6218da79287e0b0c3f416a00ba6340cbaf44f7..800748f10b3d7068073b82f56616584c53dfdc1b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1265,6 +1265,17 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; + /* + * Write can allocate block for an area which has a hole page mapped + * into page tables. We have to tear down these mappings so that data + * written by write(2) is visible in mmap. + */ + if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + invalidate_inode_pages2_range(inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); + } + while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); struct blk_dax_ctl dax = { 0 }; @@ -1329,23 +1340,6 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) flags |= IOMAP_WRITE; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. We'll eventually need to shift this down even further so that - * we can check if we allocated blocks over a hole first. - */ - if (mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, - (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, iomap_dax_actor); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index be2d9ae0a749e1ed074e539feb4ec8661a7f2ae7..c17539153099b090385412acaaaa10b6103590ba 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -32,17 +32,15 @@ #include #include #include -#ifdef CONFIG_EXT4_FS_ENCRYPTION -#include -#else -#include -#endif #include #include #ifdef __KERNEL__ #include #endif +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION) +#include + /* * The fourth extended filesystem constants/structures */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a77cbc5b657b5e391ae3f746b2c8d4f24f45aa1c..1a0c57100f28e988c2aea44628d010b62132bbe5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4731,6 +4731,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, EXT4_INODE_EOFBLOCKS); } ext4_mark_inode_dirty(handle, inode); + ext4_update_inode_fsync_trans(handle, inode, 1); ret2 = ext4_journal_stop(handle); if (ret2) break; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d887e32e88a063f41fd874a93e245d9301070db3..74c49b5f378475d6fae5625d086429a7137e9e08 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4443,8 +4443,11 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) new_fl |= S_DAX; + if (flags & EXT4_ENCRYPT_FL) + new_fl |= S_ENCRYPTED; inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| + S_ENCRYPTED); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index c30889b6084864f932d53da6bb0275093ff7d011..8338cd4f19c3b7940e29450c305b8a4445445ed9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1387,6 +1387,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, "falling back\n")); } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); + if (!nblocks) { + ret = NULL; + goto cleanup_and_exit; + } start = EXT4_I(dir)->i_dir_start_lookup; if (start >= nblocks) start = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6c8f2bdef79fb57afd8708ce36d588134b5db8e0..0440befe4a6a8597114741a9abbfecd870571110 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1130,7 +1130,8 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); /* - * Update inode->i_flags - e.g. S_DAX may get disabled + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled */ ext4_set_inode_flags(inode); } @@ -1151,7 +1152,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - /* Update inode->i_flags - e.g. S_DAX may get disabled */ + /* + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled + */ ext4_set_inode_flags(inode); res = ext4_mark_inode_dirty(handle, inode); if (res) @@ -1166,7 +1170,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, return res; } -static int ext4_dummy_context(struct inode *inode) +static bool ext4_dummy_context(struct inode *inode) { return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); } @@ -1182,14 +1186,9 @@ static const struct fscrypt_operations ext4_cryptops = { .get_context = ext4_get_context, .set_context = ext4_set_context, .dummy_context = ext4_dummy_context, - .is_encrypted = ext4_encrypted_inode, .empty_dir = ext4_empty_dir, .max_namelen = ext4_max_namelen, }; -#else -static const struct fscrypt_operations ext4_cryptops = { - .is_encrypted = ext4_encrypted_inode, -}; #endif #ifdef CONFIG_QUOTA @@ -3919,7 +3918,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; +#ifdef CONFIG_EXT4_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; +#endif #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (ext4_has_feature_quota(sb)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 341dbe5632244ee8b7cc8c3d3e2962f17acdb12d..d156dee60fc822c893a9ac3ac4eda3c2f1915770 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,13 +24,11 @@ #include #include #include -#ifdef CONFIG_F2FS_FS_ENCRYPTION -#include -#else -#include -#endif #include +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION) +#include + #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) #else @@ -3096,6 +3094,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); + inode->i_flags |= S_ENCRYPTED; #endif } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9684d53563f13b1654c12ae4027d9366480d23b7..b4c4f2b2530404d5dc5b60bafab86a910c1a7879 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -43,8 +43,11 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) new_fl |= S_DIRSYNC; + if (f2fs_encrypted_inode(inode)) + new_fl |= S_ENCRYPTED; inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| + S_ENCRYPTED); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f95ffde1cff74902232804534de93441c8c2ae4a..8f364c5870e5d0d9cf2f8a48a1b8d36145ffa38a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1738,14 +1738,9 @@ static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, - .is_encrypted = f2fs_encrypted_inode, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; -#else -static const struct fscrypt_operations f2fs_cryptops = { - .is_encrypted = f2fs_encrypted_inode, -}; #endif static struct inode *f2fs_nfs_get_inode(struct super_block *sb, @@ -2474,7 +2469,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) #endif sb->s_op = &f2fs_sops; +#ifdef CONFIG_F2FS_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; +#endif sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; diff --git a/fs/fcntl.c b/fs/fcntl.c index 1493ceb0477ddce1e61055e7b6e1d83aaefcdbb4..ec03cf620fd7a4a2253633573e4f08bc05a3a478 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -114,6 +114,10 @@ void f_setown(struct file *filp, unsigned long arg, int force) int who = arg; type = PIDTYPE_PID; if (who < 0) { + /* avoid overflow below */ + if (who == INT_MIN) + return; + type = PIDTYPE_PGID; who = -who; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ffec69dfb80d36e19f214306a7082ec29f92f451..ad2e55d3ed787f1fa5784fd2aebc5e2c9847e6d8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb) spin_unlock_bh(&wb->work_lock); } +static void finish_writeback_work(struct bdi_writeback *wb, + struct wb_writeback_work *work) +{ + struct wb_completion *done = work->done; + + if (work->auto_free) + kfree(work); + if (done && atomic_dec_and_test(&done->cnt)) + wake_up_all(&wb->bdi->wb_waitq); +} + static void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { trace_writeback_queue(wb, work); - spin_lock_bh(&wb->work_lock); - if (!test_bit(WB_registered, &wb->state)) - goto out_unlock; if (work->done) atomic_inc(&work->done->cnt); - list_add_tail(&work->list, &wb->work_list); - mod_delayed_work(bdi_wq, &wb->dwork, 0); -out_unlock: + + spin_lock_bh(&wb->work_lock); + + if (test_bit(WB_registered, &wb->state)) { + list_add_tail(&work->list, &wb->work_list); + mod_delayed_work(bdi_wq, &wb->dwork, 0); + } else + finish_writeback_work(wb, work); + spin_unlock_bh(&wb->work_lock); } @@ -1875,16 +1889,9 @@ static long wb_do_writeback(struct bdi_writeback *wb) set_bit(WB_writeback_running, &wb->state); while ((work = get_next_work_item(wb)) != NULL) { - struct wb_completion *done = work->done; - trace_writeback_exec(wb, work); - wrote += wb_writeback(wb, work); - - if (work->auto_free) - kfree(work); - if (done && atomic_dec_and_test(&done->cnt)) - wake_up_all(&wb->bdi->wb_waitq); + finish_writeback_work(wb, work); } /* diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e23ff70b34357b12f9990b7d92a1355f1de62a8e..39c382f1627200334c197756b284244818e7be89 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out; } if ((flags ^ new_flags) & GFS2_DIF_JDATA) { - if (flags & GFS2_DIF_JDATA) + if (new_flags & GFS2_DIF_JDATA) gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); error = filemap_fdatawrite(inode->i_mapping); if (error) @@ -264,6 +264,8 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) error = filemap_fdatawait(inode->i_mapping); if (error) goto out; + if (new_flags & GFS2_DIF_JDATA) + gfs2_ordered_del_inode(ip); } error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 78219d5644e90aacaf3aeb9fdfe2a234f4e31b84..d6512cd9ba023d69866cd380cc5cd7d122ccd901 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - size_t len; + ssize_t len; char *buf; if (of->atomic_write_len) { diff --git a/fs/libfs.c b/fs/libfs.c index 48826d4da189ec0373f24c16a89e4f1066815200..9588780ad43e175c795580275f6ddc62be4f670a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); - s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); + s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, + &init_user_ns, NULL); if (IS_ERR(s)) return ERR_CAST(s); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d04ec38147790048556ff168e38c5792dbce819e..1e5321d1ed2265f3d78b4e76192d6305ef4ba6ff 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1292,7 +1292,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1443,6 +1443,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, @@ -2097,7 +2098,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode != NULL) nfs_drop_nlink(new_inode); d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, + nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bd81bcf3ffcf3cabeed98274df0f839d33088d1d..1ac1593aded30fc978eb31e29a42d6656c6ecdf8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -787,10 +787,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - dreq->flags = 0; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) dreq->error = hdr->error; - } if (dreq->error == 0) { nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { diff --git a/fs/nfs/io.c b/fs/nfs/io.c index 1fc5d1ce327e272c0878e8a53aa45b2dc96e97ad..d18ccc1ce0b579965e23ff01dce9f4b291d4eea6 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c @@ -98,7 +98,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) { if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { set_bit(NFS_INO_ODIRECT, &nfsi->flags); - nfs_wb_all(inode); + nfs_sync_mapping(inode->i_mapping); } } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 074ac7131459ebc378ef3a1cf50244ff758f2a88..f6b0848cc8317b8f105d2cad4c9e40b9cc5132e5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1004,9 +1004,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (server->rsize > server_resp_sz) + if (!server->rsize || server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) + if (!server->wsize || server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 835c163f61af5324c988d1c38e51bfeb47983e76..eaac878c1463f5a222a091a094d17f232ffc2407 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -567,9 +567,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; struct key *key = cons->key; - int ret = -ENOMEM; + int ret = -ENOKEY; + + if (!aux) + goto out1; /* msg and im are freed in idmap_pipe_destroy_msg */ + ret = -ENOMEM; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out1; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 67845220fc272d9d497d7e48f86221f8b25952d0..4638654e26f3f99c8b0ab205c43ee2f598ab2ef3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -6006,7 +6005,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -6119,7 +6117,6 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); - fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 92671914067fef922e8dc6a4034793ab53253bde..71deeae6eefdc4de2a3160b4507f41b9adfb7c1b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1718,7 +1718,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) break; case -NFS4ERR_STALE_CLIENTID: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b7a07ba8783aa229a2dd124ac76ac4051253eaf1..b8e44746f761b82cc33a1003a548c340bc1c557e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2145,7 +2145,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } static enum pnfs_try_status @@ -2256,7 +2256,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e4772a8340f831260e68906a48bdaa24aca5afba..9a3b3820306d4d6aedc94423be148a52fc908eb2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1806,6 +1806,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); + /* Latency breaker */ + cond_resched(); } nfss = NFS_SERVER(data->inode); if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) @@ -1859,6 +1861,8 @@ int nfs_commit_inode(struct inode *inode, int how) if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); nfs_commit_end(cinfo.mds); + if (res == 0) + return res; if (error < 0) goto out_error; if (!may_wait) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index fd8c9a5bcac44c74101d1b2ba4001c8785875254..77d136ac890990358ac1a82ff855ff25d176851d 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm) struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); - list_add(&lm->list, grace_list); + if (list_empty(&lm->list)) + list_add(&lm->list, grace_list); + else + WARN(1, "double list_add attempt detected in net %x %s\n", + net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); @@ -104,7 +108,9 @@ grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); - BUG_ON(!list_empty(grace_list)); + WARN_ONCE(!list_empty(grace_list), + "net %x %s: grace_list is not empty\n", + net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 62469c60be23263f21903fadc2217488c7160ed6..81c018e5c31e52246237b5003f284612dd0b5636 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else gi->gid[i] = rqgi->gid[i]; } + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } else { gi = get_group_info(rqgi); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ec2a69dac5368c6d0097cde306d75b3e06e0172c..f463c4e0b2ea597de61522a78d2628639adb0158 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,12 +63,16 @@ static const stateid_t zero_stateid = { static const stateid_t currentstateid = { .si_generation = 1, }; +static const stateid_t close_stateid = { + .si_generation = 0xffffffffU, +}; static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -3513,7 +3517,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - if (local->st_stateowner == &oo->oo_owner) { + if (local->st_stateowner != &oo->oo_owner) + continue; + if (local->st_stid.sc_type == NFS4_OPEN_STID) { ret = local; atomic_inc(&ret->st_stid.sc_count); break; @@ -3522,6 +3528,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) return ret; } +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ + __be32 ret = nfs_ok; + + switch (s->sc_type) { + default: + break; + case NFS4_CLOSED_STID: + case NFS4_CLOSED_DELEG_STID: + ret = nfserr_bad_stateid; + break; + case NFS4_REVOKED_DELEG_STID: + ret = nfserr_deleg_revoked; + } + return ret; +} + +/* Lock the stateid st_mutex, and deal with races with CLOSE */ +static __be32 +nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) +{ + __be32 ret; + + mutex_lock(&stp->st_mutex); + ret = nfsd4_verify_open_stid(&stp->st_stid); + if (ret != nfs_ok) + mutex_unlock(&stp->st_mutex); + return ret; +} + +static struct nfs4_ol_stateid * +nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *stp; + for (;;) { + spin_lock(&fp->fi_lock); + stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); + if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) + break; + nfs4_put_stid(&stp->st_stid); + } + return stp; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3566,6 +3618,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) mutex_init(&stp->st_mutex); mutex_lock(&stp->st_mutex); +retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3590,7 +3643,11 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { - mutex_lock(&retstp->st_mutex); + /* Handle races with CLOSE */ + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); stp = retstp; @@ -4400,6 +4457,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; + bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, @@ -4411,9 +4469,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; - spin_lock(&fp->fi_lock); - stp = nfsd4_find_existing_open(fp, open); - spin_unlock(&fp->fi_lock); + stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4421,35 +4477,31 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } + if (!stp) { + stp = init_open_stateid(fp, open); + if (!open->op_stp) + new_stp = true; + } + /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. + * + * stp is already locked. */ - if (stp) { + if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); goto out; } } else { - /* stp is returned locked. */ - stp = init_open_stateid(fp, open); - /* See if we lost the race to some other thread */ - if (stp->st_access_bmap != 0) { - status = nfs4_upgrade_open(rqstp, fp, current_fh, - stp, open); - if (status) { - mutex_unlock(&stp->st_mutex); - goto out; - } - goto upgrade_out; - } status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - mutex_unlock(&stp->st_mutex); + stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); + mutex_unlock(&stp->st_mutex); goto out; } @@ -4458,7 +4510,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } -upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); @@ -4684,7 +4736,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); @@ -4818,7 +4870,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4876,7 +4929,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, else if (typemask & NFS4_DELEG_STID) typemask |= NFS4_REVOKED_DELEG_STID; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { @@ -5127,15 +5181,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID - || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) - /* - * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step, and - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; - mutex_lock(&stp->st_mutex); + status = nfsd4_lock_ol_stateid(stp); + if (status != nfs_ok) + return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); @@ -5314,7 +5362,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) bool unhashed; LIST_HEAD(reaplist); - s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -5353,10 +5400,17 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; + + stp->st_stid.sc_type = NFS4_CLOSED_STID; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); + mutex_unlock(&stp->st_mutex); + + /* See RFC5661 sectionm 18.2.4 */ + if (stp->st_stid.sc_client->cl_minorversion) + memcpy(&close->cl_stateid, &close_stateid, + sizeof(close->cl_stateid)); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); @@ -6958,6 +7012,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; + nn->boot_time = get_seconds(); + nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; + INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7015,9 +7073,6 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nn->boot_time = get_seconds(); - nn->grace_ended = false; - nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", @@ -7094,7 +7149,7 @@ nfs4_state_shutdown_net(struct net *net) spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { - nbl = list_first_entry(&nn->blocked_locks_lru, + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); posix_unblock_lock(&nbl->nbl_lock); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1645b977c9c6ae7c6d24c072ec90a785a0ad3452..5c4800626f130acc9574733547fc46c613768e02 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -155,7 +155,8 @@ int nfsd_vers(int vers, enum vers_op change) int nfsd_minorversion(u32 minorversion, enum vers_op change) { - if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && + change != NFSD_AVAIL) return -1; switch(change) { case NFSD_SET: @@ -399,23 +400,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) void nfsd_reset_versions(void) { - int found_one = 0; int i; - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { - if (nfsd_program.pg_vers[i]) - found_one = 1; - } - - if (!found_one) { - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) - nfsd_program.pg_vers[i] = nfsd_version[i]; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) - nfsd_acl_program.pg_vers[i] = - nfsd_acl_version[i]; -#endif - } + for (i = 0; i < NFSD_NRVERS; i++) + if (nfsd_vers(i, NFSD_TEST)) + return; + + for (i = 0; i < NFSD_NRVERS; i++) + if (i != 4) + nfsd_vers(i, NFSD_SET); + else { + int minor = 0; + while (nfsd_minorversion(minor, NFSD_SET) >= 0) + minor++; + } } /* diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eabf791451e1b607d7b0a2b2d33cef2..80fdfad7c215c1f60693557625bcc3d852ad8c2f 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -90,6 +90,7 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns) return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index fe2cbeb90772aabf399aea2809b34af552efac98..939aa066e1ca36d44a3f0a79bcb7efb944a2338f 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -161,7 +161,7 @@ static ssize_t orangefs_devreq_read(struct file *file, struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - struct orangefs_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ @@ -181,6 +181,7 @@ static ssize_t orangefs_devreq_read(struct file *file, } restart: + cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 02cc6139ec90798900f6c626934c9f6d2baee266..5b2cbe567365c639da3b520ae4e8c32546b4579c 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -446,7 +446,7 @@ ssize_t orangefs_inode_read(struct inode *inode, static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - loff_t pos = *(&iocb->ki_pos); + loff_t pos = iocb->ki_pos; ssize_t rc = 0; BUG_ON(iocb->private); @@ -485,9 +485,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite } } - if (file->f_pos > i_size_read(file->f_mapping->host)) - orangefs_i_size_write(file->f_mapping->host, file->f_pos); - rc = generic_write_checks(iocb, iter); if (rc <= 0) { @@ -501,7 +498,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite * pos to the end of the file, so we will wait till now to set * pos... */ - pos = *(&iocb->ki_pos); + pos = iocb->ki_pos; rc = do_readv_writev(ORANGEFS_IO_WRITE, file, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 45dd8f27b2ac34e94f1e3fbed645ff703170f6d5..f28381a7cd12cc18cf1dd97db2b8c6dc73d8991b 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -570,17 +570,6 @@ do { \ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) -static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) -{ -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_lock(inode); -#endif - i_size_write(inode, i_size); -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_unlock(inode); -#endif -} - static inline void orangefs_set_timeout(struct dentry *dentry) { unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index abcfa3fa9992957006461c202c54380018a17787..f61b00887481f7e0417b528133e614b405d8f9e9 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -28,10 +28,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s */ void purge_waiting_ops(void) { - struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); - list_for_each_entry(op, &orangefs_request_list, list) { + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index f241b4ee3d8a50b1c03ae4b1e1848b772fd26af1..a1be6baabca35a2a68469ed735886236aeaa3169 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -434,10 +434,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = lockless_dereference(od->upperfile); diff --git a/fs/pipe.c b/fs/pipe.c index 8e0d9f26dfadc4b5849a9f46a933a89ea2aa49ca..34345535f63d60d6b9bb72ad662ec2c75242aa0b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -609,12 +609,17 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; + return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; + return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard; +} + +static bool is_unprivileged_user(void) +{ + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); } struct pipe_inode_info *alloc_pipe_info(void) @@ -633,12 +638,12 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs)) { + if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs)) + if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -1018,13 +1023,19 @@ const struct file_operations pipefifo_fops = { /* * Currently we rely on the pipe array holding a power-of-2 number - * of pages. + * of pages. Returns 0 on error. */ static inline unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; + if (size < pipe_min_size) + size = pipe_min_size; + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (nr_pages == 0) + return 0; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; } @@ -1040,6 +1051,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) long ret = 0; size = round_pipe_size(arg); + if (size == 0) + return -EINVAL; nr_pages = size >> PAGE_SHIFT; if (!nr_pages) @@ -1061,7 +1074,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (nr_pages > pipe->buffers && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } @@ -1123,13 +1136,18 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, size_t *lenp, loff_t *ppos) { + unsigned int rounded_pipe_max_size; int ret; ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); if (ret < 0 || !write) return ret; - pipe_max_size = round_pipe_size(pipe_max_size); + rounded_pipe_max_size = round_pipe_size(pipe_max_size); + if (rounded_pipe_max_size == 0) + return -EINVAL; + + pipe_max_size = rounded_pipe_max_size; return ret; } diff --git a/fs/proc/array.c b/fs/proc/array.c index c932ec454625f45f2ab844239771ea212a5516ec..794b52a6c20dcb8545dcdb6b24f517d3c7561d07 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -423,8 +423,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * safe because the task has stopped executing permanently. */ if (permitted && (task->flags & PF_DUMPCORE)) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); + if (try_get_task_stack(task)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + put_task_stack(task); + } } } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 5c89a07e3d7f540b48757b907557e5f5df300ff2..df7e07986ead5eae73d37ea980eecfc4a7a8f46c 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -507,23 +507,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 15f327bed8c6fea9ccdba944b3540524b6c6d065..7340c36978a3bca20a04ff55d59db0dfd5e4149e 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -14,6 +14,7 @@ #include #include #include +#include "internal.h" /* * The /proc/tty directory inodes... @@ -164,7 +165,7 @@ void proc_tty_unregister_driver(struct tty_driver *driver) if (!ent) return; - remove_proc_entry(driver->driver_name, proc_tty_driver); + remove_proc_entry(ent->name, proc_tty_driver); driver->proc_entry = NULL; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 1bfac28b7e7df1febe08012abf2ad51d3189cc54..f9246ac4eef81493ddbd87237158f6aa423b339f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2985,7 +2985,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index dc198bc64c61ddae9db7a936365a820d71abf2d0..edc8ef78b63fc204275725bf9c9d6b9a5b788a81 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -513,9 +513,17 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th, "inode has negative prealloc blocks count."); #endif while (ei->i_prealloc_count > 0) { - reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); - ei->i_prealloc_block++; + b_blocknr_t block_to_free; + + /* + * reiserfs_free_prealloc_block can drop the write lock, + * which could allow another caller to free the same block. + * We can protect against it by modifying the prealloc + * state before calling it. + */ + block_to_free = ei->i_prealloc_block++; ei->i_prealloc_count--; + reiserfs_free_prealloc_block(th, inode, block_to_free); dirty = 1; } if (dirty) @@ -1128,7 +1136,7 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) hint->prealloc_size = 0; if (!hint->formatted_node && hint->preallocate) { - if (S_ISREG(hint->inode->i_mode) + if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode) && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options. preallocmin * hint->inode->i_sb->s_blocksize) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0a6ad4e71e88dd0c2a7a3b3c4eaaa1671ba6dcad..e101d70d2327bc822fb44a5632697f2b2d10c90e 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2521,7 +2521,6 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index fffaad4701cf160bc6c90ba658676e4a1c7dd749..0b3b22334e54a27c09eb4f3e6e3008c16701202a 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -32,23 +32,20 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) ci->data->under_android = pi->data->under_android; ci->data->under_cache = pi->data->under_cache; ci->data->under_obb = pi->data->under_obb; - set_top(ci, pi->top_data); } /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, - uid_t uid, bool under_android, - struct sdcardfs_inode_data *top) + uid_t uid) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); info->data->perm = perm; info->data->userid = userid; info->data->d_uid = uid; - info->data->under_android = under_android; + info->data->under_android = false; info->data->under_cache = false; info->data->under_obb = false; - set_top(info, top); } /* While renaming, there is a point where we want the path from dentry, @@ -58,8 +55,8 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name) { struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); - struct sdcardfs_inode_data *parent_data = - SDCARDFS_I(d_inode(parent))->data; + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_data *parent_data = parent_info->data; appid_t appid; unsigned long user_num; int err; @@ -80,13 +77,15 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, inherit_derived_state(d_inode(parent), d_inode(dentry)); /* Files don't get special labels */ - if (!S_ISDIR(d_inode(dentry)->i_mode)) + if (!S_ISDIR(d_inode(dentry)->i_mode)) { + set_top(info, parent_info); return; + } /* Derive custom permissions based on parent and current node */ switch (parent_data->perm) { case PERM_INHERIT: case PERM_ANDROID_PACKAGE_CACHE: - /* Already inherited above */ + set_top(info, parent_info); break; case PERM_PRE_ROOT: /* Legacy internal layout places users at top level */ @@ -96,7 +95,6 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, info->data->userid = 0; else info->data->userid = user_num; - set_top(info, info->data); break; case PERM_ROOT: /* Assume masked off by default. */ @@ -104,24 +102,24 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID; info->data->under_android = true; - set_top(info, info->data); + } else { + set_top(info, parent_info); } break; case PERM_ANDROID: if (qstr_case_eq(name, &q_data)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_DATA; - set_top(info, info->data); } else if (qstr_case_eq(name, &q_obb)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_OBB; info->data->under_obb = true; - set_top(info, info->data); /* Single OBB directory is always shared */ } else if (qstr_case_eq(name, &q_media)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_MEDIA; - set_top(info, info->data); + } else { + set_top(info, parent_info); } break; case PERM_ANDROID_OBB: @@ -132,13 +130,13 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, if (appid != 0 && !is_excluded(name->name, parent_data->userid)) info->data->d_uid = multiuser_get_uid(parent_data->userid, appid); - set_top(info, info->data); break; case PERM_ANDROID_PACKAGE: if (qstr_case_eq(name, &q_cache)) { info->data->perm = PERM_ANDROID_PACKAGE_CACHE; info->data->under_cache = true; } + set_top(info, parent_info); break; } } diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4a971e2ba406861e204525546fd15f2b75bdcff1..9cdb396d9d375206dbe0391318c798d1b025a439 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -629,6 +629,8 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma struct inode tmp; struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode)); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); if (!top) return -EINVAL; @@ -645,7 +647,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma */ copy_attrs(&tmp, inode); tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); - tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, inode->i_sb, top)); tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(inode), top); data_put(top); @@ -724,7 +726,7 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct */ copy_attrs(&tmp, inode); tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); - tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, dentry->d_sb, top)); tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(inode), top); tmp.i_size = i_size_read(inode); @@ -821,11 +823,12 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct return err; } -static int sdcardfs_fillattr(struct vfsmount *mnt, - struct inode *inode, struct kstat *stat) +static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, + struct kstat *lower_stat, struct kstat *stat) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); struct sdcardfs_inode_data *top = top_data_get(info); + struct super_block *sb = inode->i_sb; if (!top) return -EINVAL; @@ -835,14 +838,14 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, info, top); stat->nlink = inode->i_nlink; stat->uid = make_kuid(&init_user_ns, top->d_uid); - stat->gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top)); stat->rdev = inode->i_rdev; - stat->size = i_size_read(inode); - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; - stat->blksize = (1 << inode->i_blkbits); - stat->blocks = inode->i_blocks; + stat->size = lower_stat->size; + stat->atime = lower_stat->atime; + stat->mtime = lower_stat->mtime; + stat->ctime = lower_stat->ctime; + stat->blksize = lower_stat->blksize; + stat->blocks = lower_stat->blocks; data_put(top); return 0; } @@ -868,8 +871,7 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, goto out; sdcardfs_copy_and_fix_attrs(d_inode(dentry), d_inode(lower_path.dentry)); - err = sdcardfs_fillattr(mnt, d_inode(dentry), stat); - stat->blocks = lower_stat.blocks; + err = sdcardfs_fillattr(mnt, d_inode(dentry), &lower_stat, stat); out: sdcardfs_put_lower_path(dentry, &lower_path); return err; diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 0a2b5167e9a2e5562b7af6e3642140c20816609b..e4fd3fbb05e69c765bf5f53173a4a662642462f5 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -33,6 +33,7 @@ enum { Opt_userid, Opt_reserved_mb, Opt_gid_derivation, + Opt_default_normal, Opt_err, }; @@ -45,6 +46,7 @@ static const match_table_t sdcardfs_tokens = { {Opt_userid, "userid=%d"}, {Opt_multiuser, "multiuser"}, {Opt_gid_derivation, "derive_gid"}, + {Opt_default_normal, "default_normal"}, {Opt_reserved_mb, "reserved_mb=%u"}, {Opt_err, NULL} }; @@ -68,6 +70,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->reserved_mb = 0; /* by default, gid derivation is off */ opts->gid_derivation = false; + opts->default_normal = false; *debug = 0; @@ -122,6 +125,9 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_gid_derivation: opts->gid_derivation = true; break; + case Opt_default_normal: + opts->default_normal = true; + break; /* unknown option */ default: if (!silent) @@ -175,6 +181,7 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, return 0; vfsopts->mask = option; break; + case Opt_default_normal: case Opt_multiuser: case Opt_userid: case Opt_fsuid: @@ -334,13 +341,11 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, mutex_lock(&sdcardfs_super_list_lock); if (sb_info->options.multiuser) { setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, - sb_info->options.fs_user_id, AID_ROOT, - false, SDCARDFS_I(d_inode(sb->s_root))->data); + sb_info->options.fs_user_id, AID_ROOT); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); } else { setup_derived_state(d_inode(sb->s_root), PERM_ROOT, - sb_info->options.fs_user_id, AID_ROOT, - false, SDCARDFS_I(d_inode(sb->s_root))->data); + sb_info->options.fs_user_id, AID_ROOT); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fixup_tmp_permissions(d_inode(sb->s_root)); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index d1d8bab00fe51282a847427ac76321bcc94b05e3..610466ad153d451198d66e82995bf3e5fe5b1bed 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -201,6 +201,7 @@ struct sdcardfs_inode_info { struct sdcardfs_inode_data *data; /* top folder for ownership */ + spinlock_t top_lock; struct sdcardfs_inode_data *top_data; struct inode vfs_inode; @@ -220,6 +221,7 @@ struct sdcardfs_mount_options { userid_t fs_user_id; bool multiuser; bool gid_derivation; + bool default_normal; unsigned int reserved_mb; }; @@ -379,7 +381,12 @@ static inline struct sdcardfs_inode_data *data_get( static inline struct sdcardfs_inode_data *top_data_get( struct sdcardfs_inode_info *info) { - return data_get(info->top_data); + struct sdcardfs_inode_data *top_data; + + spin_lock(&info->top_lock); + top_data = data_get(info->top_data); + spin_unlock(&info->top_lock); + return top_data; } extern void data_release(struct kref *ref); @@ -401,23 +408,30 @@ static inline void release_own_data(struct sdcardfs_inode_info *info) } static inline void set_top(struct sdcardfs_inode_info *info, - struct sdcardfs_inode_data *top) + struct sdcardfs_inode_info *top_owner) { - struct sdcardfs_inode_data *old_top = info->top_data; + struct sdcardfs_inode_data *old_top; + struct sdcardfs_inode_data *new_top = NULL; - if (top) - data_get(top); - info->top_data = top; + if (top_owner) + new_top = top_data_get(top_owner); + + spin_lock(&info->top_lock); + old_top = info->top_data; + info->top_data = new_top; if (old_top) data_put(old_top); + spin_unlock(&info->top_lock); } static inline int get_gid(struct vfsmount *mnt, + struct super_block *sb, struct sdcardfs_inode_data *data) { - struct sdcardfs_vfsmount_options *opts = mnt->data; + struct sdcardfs_vfsmount_options *vfsopts = mnt->data; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(sb); - if (opts->gid == AID_SDCARD_RW) + if (vfsopts->gid == AID_SDCARD_RW && !sbi->options.default_normal) /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing @@ -426,7 +440,7 @@ static inline int get_gid(struct vfsmount *mnt, */ return AID_SDCARD_RW; else - return multiuser_get_uid(data->userid, opts->gid); + return multiuser_get_uid(data->userid, vfsopts->gid); } static inline int get_mode(struct vfsmount *mnt, @@ -513,8 +527,7 @@ struct limit_search { }; extern void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, bool under_android, - struct sdcardfs_inode_data *top); + userid_t userid, uid_t uid); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name); extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit); diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index b89947d878e33d50645502b60d27b9d9968d81fb..cffcdb11cb8ac8516e246524216f634bffe1e903 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -215,6 +215,9 @@ static struct inode *sdcardfs_alloc_inode(struct super_block *sb) i->data = d; kref_init(&d->refcount); + i->top_data = d; + spin_lock_init(&i->top_lock); + kref_get(&d->refcount); i->vfs_inode.i_version = 1; return &i->vfs_inode; @@ -304,6 +307,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_printf(m, ",userid=%u", opts->fs_user_id); if (opts->gid_derivation) seq_puts(m, ",derive_gid"); + if (opts->default_normal) + seq_puts(m, ",default_normal"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index d9f9615bfd71a24c4235795cef11f7c5b1c4c6b6..3979d767a0cbecb9b083cd64716441f423aa9f0a 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -270,7 +270,8 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) } static int __ubifs_setxattr(struct inode *host, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags, + bool check_lock) { struct inode *inode; struct ubifs_info *c = host->i_sb->s_fs_info; @@ -279,7 +280,8 @@ static int __ubifs_setxattr(struct inode *host, const char *name, union ubifs_key key; int err; - ubifs_assert(inode_is_locked(host)); + if (check_lock) + ubifs_assert(inode_is_locked(host)); if (size > UBIFS_MAX_INO_DATA) return -ERANGE; @@ -548,7 +550,8 @@ static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, } strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); - err = __ubifs_setxattr(inode, name, xattr->value, xattr->value_len, 0); + err = __ubifs_setxattr(inode, name, xattr->value, + xattr->value_len, 0, false); kfree(name); if (err < 0) break; @@ -594,7 +597,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler, name = xattr_full_name(handler, name); if (value) - return __ubifs_setxattr(inode, name, value, size, flags); + return __ubifs_setxattr(inode, name, value, size, flags, + true); else return __ubifs_removexattr(inode, name); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 4942549e7dc8b3758dd8c8fadb99e8fba4265c00..4b1f6d5372c38833aaf84e14349252dded28b149 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -710,7 +710,7 @@ static loff_t udf_check_vsd(struct super_block *sb) else sectorsize = sb->s_blocksize; - sector += (sbi->s_session << sb->s_blocksize_bits); + sector += (((loff_t)sbi->s_session) << sb->s_blocksize_bits); udf_debug("Starting at sector %u (%ld byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 2cde073e9d850938d75ac46b9e3c88aa296c56af..9d9c032d5fe5c41c280bf5302b3922684cefe23b 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -419,7 +419,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason) * in such case. */ down_read(&mm->mmap_sem); - ret = 0; + ret = VM_FAULT_NOPAGE; } } diff --git a/fs/xattr.c b/fs/xattr.c index 932b9061a3a25dbd9154d0d50ffe2fdbab4b909a..1b00babeaa67f4f4094d066b69387429c3c4bba2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -130,7 +130,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) return -EPERM; } - return inode_permission(inode, mask); + return inode_permission2(ERR_PTR(-EOPNOTSUPP), inode, mask); } int diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7eb99701054fb9b5cd157cc4925c6ff9af9cc382..8ad65d43b65d892da966b678c4e2f1e653e2022f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2713,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real( &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; + cur->bc_rec.b.br_state = new->br_state; if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index d31cd1ebd8e9888633e3e8c3f1780f3abe47ac6f..f3acecf3869dd6cd85f1e67bbe8397901085de52 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -391,7 +391,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1295,7 +1295,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1347,7 +1347,7 @@ __xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eca7baecc9f08cebd3303c87d11286c539da27bb..3f45d9867e10a75cae0563c8281f4d691514e3ff 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1785,22 +1785,27 @@ xfs_alloc_buftarg( btp->bt_bdi = blk_get_backing_dev_info(bdev); if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9d06cc30e875e147a5560bad24e5a55aedb65cf0..7a7b3ccf22731d3edc1e265fb7599d3a8e5c2c12 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1004,14 +1004,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064b4fd1f647affc3432797d058ae26..664dea105e76fee564a1feeb16a05387fe6b9000 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fe9a9a183b2d054068399d250a024d930c084354..c5f2f1e3cc4bdf3202abcbfcf66f253fee2b1b4a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2386,6 +2386,7 @@ xfs_ifree_cluster( */ if (ip->i_ino != inum + i) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + rcu_read_unlock(); continue; } } @@ -2428,6 +2429,24 @@ xfs_ifree_cluster( return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2465,6 +2484,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 5b81f7f41b805d43a8f7c033a1200e2bede3bd83..33c3899342389a3060536a780136bbb8c31aea91 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -870,22 +870,6 @@ xfs_setattr_size( if (error) return error; - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. Note that this includes any block zeroing we did above; - * otherwise those blocks may not be zeroed after a crash. - */ - if (did_zeroing || - (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ip->i_d.di_size, newsize); - if (error) - return error; - } - /* * We've already locked out new page faults, so now we can safely remove * pages from the page cache knowing they won't get refaulted until we @@ -902,9 +886,29 @@ xfs_setattr_size( * user visible changes). There's not much we can do about this, except * to hope that the caller sees ENOMEM and retries the truncate * operation. + * + * And we update in-core i_size and truncate page cache beyond newsize + * before writeback the [di_size, newsize] range, so we're guaranteed + * not to write stale data past the new EOF on truncate down. */ truncate_setsize(inode, newsize); + /* + * We are going to log the inode size change in this transaction so + * any previous writes that are beyond the on disk EOF and the new + * EOF that have not been written out need to be written here. If we + * do not write the data out, we expose ourselves to the null files + * problem. Note that this includes any block zeroing we did above; + * otherwise those blocks may not be zeroed after a crash. + */ + if (did_zeroing || + (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + ip->i_d.di_size, newsize - 1); + if (error) + return error; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) return error; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 05909269f9735d1900abd93e229ccfc6b0efd686..1e26f4504eeda77eee1b68c58ac0cabd1cf4cef5 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -753,7 +753,7 @@ xlog_find_head( * in the in-core log. The following number can be made tighter if * we actually look at the block size of the filesystem. */ - num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); + num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log)); if (head_blk >= num_scan_bblks) { /* * We are guaranteed that the entire check can be performed diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 4df64a1fc09e7aab7f88cd4afe73928228930147..e02a3d968f123111261cc50d5d81c700221de44c 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -27,6 +27,8 @@ * __kprobes_text_start, __kprobes_text_end * __entry_text_start, __entry_text_end * __ctors_start, __ctors_end + * __irqentry_text_start, __irqentry_text_end + * __softirqentry_text_start, __softirqentry_text_end */ extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; @@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __softirqentry_text_start[], __softirqentry_text_end[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dc81e5287ebfb22af52148c9dcc7a9194d5d023e..3a396a9cb1768dd7c219fca5ae3ec0dc3bd8ab9f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -499,25 +499,17 @@ *(.entry.text) \ VMLINUX_SYMBOL(__entry_text_end) = .; -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define IRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__irqentry_text_start) = .; \ *(.irqentry.text) \ VMLINUX_SYMBOL(__irqentry_text_end) = .; -#else -#define IRQENTRY_TEXT -#endif -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define SOFTIRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__softirqentry_text_start) = .; \ *(.softirqentry.text) \ VMLINUX_SYMBOL(__softirqentry_text_end) = .; -#else -#define SOFTIRQENTRY_TEXT -#endif /* Section used for early init (in .S files) */ #define HEAD_TEXT *(.head.text) @@ -778,7 +770,14 @@ */ #define PERCPU_INPUT(cacheline) \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ + VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(cacheline); \ + *(.data..percpu..user_mapped) \ + *(.data..percpu..user_mapped..shared_aligned) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..user_mapped..page_aligned) \ + VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f6d9af3efa45a6cc8eb448a71f5d43d72d8fcbd1..5203560f992e40c4433b9d0fb31e25bdff3b7a91 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -80,6 +80,16 @@ int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); void ahash_free_instance(struct crypto_instance *inst); +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen); + +static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) +{ + return alg->setkey != shash_no_setkey; +} + +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); + int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, struct hash_alg_common *alg, struct crypto_instance *inst); diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index 4a53c0d38cd2c48e105085966bf362fa1b54d93c..e045722a69aa9c681c954158ee205e60519f1e01 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -26,6 +26,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast( struct mcryptd_cpu_queue { struct crypto_queue queue; + spinlock_t q_lock; struct work_struct work; }; diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 894df59b74e44a403947ca75e669688f5165640f..d586f741cab57b0cc6b8644e3e91a2cba076f6c0 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -30,8 +30,6 @@ struct poly1305_desc_ctx { }; int crypto_poly1305_init(struct shash_desc *desc); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen); int crypto_poly1305_update(struct shash_desc *desc, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 61a3d90f32b338a030c3a064b50c403a48464293..ca2b4c4aec42c2dadd6b6ffca6e1c4dc8603ee52 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -276,11 +276,8 @@ bool acpi_processor_validate_proc_id(int proc_id); /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e47a7f7025a0900ac7e01d76d50fb0d74b58f2c8..fe8dd2797b1f092abf37fc81fe26ef04746d6dca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1738,43 +1738,26 @@ static const u_int64_t latency_x_axis_us[] = { #define BLK_IO_LAT_HIST_ZERO 2 struct io_latency_state { - u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1]; - u_int64_t latency_reads_elems; - u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1]; - u_int64_t latency_writes_elems; + u_int64_t latency_y_axis[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_elems; + u_int64_t latency_sum; }; static inline void -blk_update_latency_hist(struct io_latency_state *s, - int read, - u_int64_t delta_us) +blk_update_latency_hist(struct io_latency_state *s, u_int64_t delta_us) { int i; - for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) { - if (delta_us < (u_int64_t)latency_x_axis_us[i]) { - if (read) - s->latency_y_axis_read[i]++; - else - s->latency_y_axis_write[i]++; + for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) + if (delta_us < (u_int64_t)latency_x_axis_us[i]) break; - } - } - if (i == ARRAY_SIZE(latency_x_axis_us)) { - /* Overflowed the histogram */ - if (read) - s->latency_y_axis_read[i]++; - else - s->latency_y_axis_write[i]++; - } - if (read) - s->latency_reads_elems++; - else - s->latency_writes_elems++; + s->latency_y_axis[i]++; + s->latency_elems++; + s->latency_sum += delta_us; } -void blk_zero_latency_hist(struct io_latency_state *s); -ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf); +ssize_t blk_latency_hist_show(char* name, struct io_latency_state *s, + char *buf, int buf_size); #else /* CONFIG_BLOCK */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 97b3c2a928125eb80538681bb01e12fd20c70b2f..193c40e296fcd9c1459f417491206d427fec4bfd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,17 +36,26 @@ struct bpf_map_ops { }; struct bpf_map { - atomic_t refcnt; + /* 1st cacheline with read-mostly members of which some + * are also accessed in fast-path (e.g. ops, max_entries). + */ + const struct bpf_map_ops *ops ____cacheline_aligned; enum bpf_map_type map_type; u32 key_size; u32 value_size; u32 max_entries; u32 map_flags; u32 pages; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; + bool unpriv_array; + /* 7 bytes hole */ + + /* 2nd cacheline with misc members to avoid false sharing + * particularly with refcounting. + */ + struct user_struct *user ____cacheline_aligned; + atomic_t refcnt; atomic_t usercnt; + struct work_struct work; #ifdef CONFIG_SECURITY void *security; #endif @@ -195,6 +204,7 @@ struct bpf_prog_aux { struct bpf_array { struct bpf_map map; u32 elem_size; + u32 index_mask; /* 'ownership' of prog_array is claimed by the first program that * is going to use this map or by the first program which FD is stored * in the map to make sure that all callers and callees have the same diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3101141661a19498c0131ef0e34a72cc918d2f71..070fc49e39e2c25568e92498c2b2dc1a3c3c06a7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -67,7 +67,11 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; + bool seen; /* this insn was processed by the verifier */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 447a915db25d744716c9a11de53955f2f30ff470..4431ea2c88022ca0cba42fb2c707e9ecc8f05242 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -239,12 +239,10 @@ static inline int block_page_mkwrite_return(int err) { if (err == 0) return VM_FAULT_LOCKED; - if (err == -EFAULT) + if (err == -EFAULT || err == -EAGAIN) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; - if (err == -EAGAIN) - return VM_FAULT_RETRY; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2189935075b48e3d5b06012f9279f6eea4973fa7..a951fd10aaaad07cf7ab897615a4e3d84caf6a42 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -71,6 +71,7 @@ struct cpu_cacheinfo { struct cacheinfo *info_list; unsigned int num_levels; unsigned int num_leaves; + bool cpu_map_populated; }; /* diff --git a/include/linux/cpu.h b/include/linux/cpu.h index dfd2b7c14ef70a0336000b608e04b986b4d2ddb8..b86c3fbb666888b9369a4f713a10efc810f5c442 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); +extern ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf); + extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 965cc5693a46d91d68c5d1f6edcc9adaae3f809e..c9447a689522a6aaa763520bf64bd9daf5f31820 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -48,7 +48,7 @@ enum cpuhp_state { CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BRINGUP_CPU, diff --git a/include/linux/cred.h b/include/linux/cred.h index f0e70a1bb3acfe784148ba468a3577d23065a9a7..cf1a5d0c4eb411994641f40bb1d002ffae3d49b7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -82,6 +82,7 @@ extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); extern bool may_setgroups(void); +extern void groups_sort(struct group_info *); /* * The security context of a task diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7cee5551625b63f854a533e383850032c41a3579..0a726257f74ac0730d8408b4c7c098797190ea5f 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #include #include #include +#include /* * Autoloaded crypto modules should only use a prefixed name to avoid allowing @@ -464,6 +465,45 @@ struct crypto_alg { struct module *cra_module; } CRYPTO_MINALIGN_ATTR; +/* + * A helper struct for waiting for completion of async crypto ops + */ +struct crypto_wait { + struct completion completion; + int err; +}; + +/* + * Macro for declaring a crypto op async wait object on stack + */ +#define DECLARE_CRYPTO_WAIT(_wait) \ + struct crypto_wait _wait = { \ + COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 } + +/* + * Async ops completion helper functioons + */ +void crypto_req_done(struct crypto_async_request *req, int err); + +static inline int crypto_wait_req(int err, struct crypto_wait *wait) +{ + switch (err) { + case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&wait->completion); + reinit_completion(&wait->completion); + err = wait->err; + break; + }; + + return err; +} + +static inline void crypto_init_wait(struct crypto_wait *wait) +{ + init_completion(&wait->completion); +} + /* * Algorithm registration interface. */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 08528afdf58b31ee0e0d584161f4a685da69b174..704caae69c42750ae6920ea100603879861d7d72 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -659,7 +659,6 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, return ret; } -#ifdef CONFIG_HAS_DMA static inline int dma_get_cache_alignment(void) { #ifdef ARCH_DMA_MINALIGN @@ -667,7 +666,6 @@ static inline int dma_get_cache_alignment(void) #endif return 1; } -#endif /* flags for the coherent memory api */ #define DMA_MEMORY_MAP 0x01 diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 6e84b2cae6ad62b529298b662856dc857c3091b6..442b54a14cbc0f9b9100a240d70b052ee13cfc22 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i { struct fdtable *fdt = rcu_dereference_raw(files->fdt); - if (fd < fdt->max_fds) + if (fd < fdt->max_fds) { + fd = array_index_nospec(fd, fdt->max_fds); return rcu_dereference_raw(fdt->fd[fd]); + } return NULL; } diff --git a/include/linux/fence.h b/include/linux/fence.h index fd9b89f96ba39a7b69eb0c16766c806a8c9dee34..7c9b78cf2a7ed2fa0a2e92adf07747251494ed1c 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -47,7 +47,7 @@ struct fence_cb; * can be compared to decide which fence would be signaled later. * @flags: A mask of FENCE_FLAG_* defined below * @timestamp: Timestamp when the fence was signaled. - * @status: Optional, only valid if < 0, must be set before calling + * @error: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. * * the flags member must be manipulated and read using the appropriate @@ -79,7 +79,7 @@ struct fence { unsigned seqno; unsigned long flags; ktime_t timestamp; - int status; + int error; }; enum fence_flag_bits { @@ -133,7 +133,7 @@ struct fence_cb { * or some failure occurred that made it impossible to enable * signaling. True indicates successful enabling. * - * fence->status may be set in enable_signaling, but only when false is + * fence->error may be set in enable_signaling, but only when false is * returned. * * Calling fence_signal before enable_signaling is called allows @@ -145,7 +145,7 @@ struct fence_cb { * the second time will be a noop since it was already signaled. * * Notes on signaled: - * May set fence->status if returning true. + * May set fence->error if returning true. * * Notes on wait: * Must not be NULL, set to fence_default_wait for default implementation. @@ -328,6 +328,19 @@ fence_is_signaled(struct fence *fence) return false; } +/** + * __fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence's seqno + * @f2: [in] the second fence's seqno from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not common across contexts. + */ +static inline bool __fence_is_later(u32 f1, u32 f2) +{ + return (int)(f1 - f2) > 0; +} + /** * fence_is_later - return if f1 is chronologically later than f2 * @f1: [in] the first fence from the same context @@ -341,7 +354,7 @@ static inline bool fence_is_later(struct fence *f1, struct fence *f2) if (WARN_ON(f1->context != f2->context)) return false; - return (int)(f1->seqno - f2->seqno) > 0; + return __fence_is_later(f1->seqno, f2->seqno); } /** @@ -369,6 +382,50 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) return fence_is_signaled(f2) ? NULL : f2; } +/** + * fence_get_status_locked - returns the status upon completion + * @fence: [in] the fence to query + * + * Drivers can supply an optional error status condition before they signal + * the fence (to indicate whether the fence was completed due to an error + * rather than success). The value of the status condition is only valid + * if the fence has been signaled, fence_get_status_locked() first checks + * the signal state before reporting the error status. + * + * Returns 0 if the fence has not yet been signaled, 1 if the fence has + * been signaled without an error condition, or a negative error code + * if the fence has been completed in err. + */ +static inline int fence_get_status_locked(struct fence *fence) +{ + if (fence_is_signaled_locked(fence)) + return fence->error ?: 1; + else + return 0; +} + +int fence_get_status(struct fence *fence); + +/** + * fence_set_error - flag an error condition on the fence + * @fence: [in] the fence + * @error: [in] the error to store + * + * Drivers can supply an optional error status condition before they signal + * the fence, to indicate that the fence was completed due to an error + * rather than success. This must be set before signaling (so that the value + * is visible before any waiters on the signal callback are woken). This + * helper exists to help catching erroneous setting of #fence.error. + */ +static inline void fence_set_error(struct fence *fence, + int error) +{ + BUG_ON(test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + BUG_ON(error >= 0 || error < -MAX_ERRNO); + + fence->error = error; +} + signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, bool intr, signed long timeout); diff --git a/include/linux/frame.h b/include/linux/frame.h index e6baaba3f1aea91c21168ce756de7a1015fa1dfe..d772c61c31da3cca3ba99274ce355704e1eb0df7 100644 --- a/include/linux/frame.h +++ b/include/linux/frame.h @@ -11,7 +11,7 @@ * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(__func_stack_frame_non_standard) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func #else /* !CONFIG_STACK_VALIDATION */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 18bd249d380bbeb6eb087f48f71a83207b41b790..cbcdcb209204d9fcac1f1acc67e426435cfef3c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1857,6 +1857,7 @@ struct super_operations { #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif +#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -1895,6 +1896,7 @@ struct super_operations { #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) +#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 115bb81912ccc8759a54d206487cc8936e2a90c7..94a8aae8f9e2be702c15e7b081f9d1b4197421e1 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -764,7 +764,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h new file mode 100644 index 0000000000000000000000000000000000000000..8641e56b8f8a9107a3cb595b130fe70b12839f79 --- /dev/null +++ b/include/linux/fscrypt.h @@ -0,0 +1,290 @@ +/* + * fscrypt.h: declarations for per-file encryption + * + * Filesystems that implement per-file encryption include this header + * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem + * is being built with encryption support or not. + * + * Copyright (C) 2015, Google, Inc. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _LINUX_FSCRYPT_H +#define _LINUX_FSCRYPT_H + +#include +#include +#include +#include +#include +#include +#include + +#define FS_CRYPTO_BLOCK_SIZE 16 + +struct fscrypt_info; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + +struct fscrypt_str { + unsigned char *name; + u32 len; +}; + +struct fscrypt_name { + const struct qstr *usr_fname; + struct fscrypt_str disk_name; + u32 hash; + u32 minor_hash; + struct fscrypt_str crypto_buf; +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto opertions for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + if (inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode)) + return true; + return false; +} + +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + +#if __FS_HAS_ENCRYPTION + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +#include + +#else /* !__FS_HAS_ENCRYPTION */ + +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return 0; +} + +#include +#endif /* __FS_HAS_ENCRYPTION */ + +/** + * fscrypt_require_key - require an inode's encryption key + * @inode: the inode we need the key for + * + * If the inode is encrypted, set up its encryption key if not already done. + * Then require that the key be present and return -ENOKEY otherwise. + * + * No locks are needed, and the key will live as long as the struct inode --- so + * it won't go away from under you. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_require_key(struct inode *inode) +{ + if (IS_ENCRYPTED(inode)) { + int err = fscrypt_get_encryption_info(inode); + + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } + return 0; +} + +/** + * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory + * @old_dentry: an existing dentry for the inode being linked + * @dir: the target directory + * @dentry: negative dentry for the target filename + * + * A new link can only be added to an encrypted directory if the directory's + * encryption key is available --- since otherwise we'd have no way to encrypt + * the filename. Therefore, we first set up the directory's encryption key (if + * not already done) and return an error if it's unavailable. + * + * We also verify that the link will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, + * -EPERM if the link would result in an inconsistent encryption policy, or + * another -errno code. + */ +static inline int fscrypt_prepare_link(struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_link(d_inode(old_dentry), dir); + return 0; +} + +/** + * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories + * @old_dir: source directory + * @old_dentry: dentry for source file + * @new_dir: target directory + * @new_dentry: dentry for target location (may be negative unless exchanging) + * @flags: rename flags (we care at least about %RENAME_EXCHANGE) + * + * Prepare for ->rename() where the source and/or target directories may be + * encrypted. A new link can only be added to an encrypted directory if the + * directory's encryption key is available --- since otherwise we'd have no way + * to encrypt the filename. A rename to an existing name, on the other hand, + * *is* cryptographically possible without the key. However, we take the more + * conservative approach and just forbid all no-key renames. + * + * We also verify that the rename will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * rename would cause inconsistent encryption policies, or another -errno code. + */ +static inline int fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) + return __fscrypt_prepare_rename(old_dir, old_dentry, + new_dir, new_dentry, flags); + return 0; +} + +/** + * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory + * @dir: directory being searched + * @dentry: filename being looked up + * @flags: lookup flags + * + * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be + * done with or without the directory's encryption key; without the key, + * filenames are presented in encrypted form. Therefore, we'll try to set up + * the directory's encryption key, but even without it the lookup can continue. + * + * To allow invalidating stale dentries if the directory's encryption key is + * added later, we also install a custom ->d_revalidate() method and use the + * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a + * plaintext name (flag set) or a ciphertext name (flag cleared). + * + * Return: 0 on success, -errno if a problem occurred while setting up the + * encryption key + */ +static inline int fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_lookup(dir, dentry); + return 0; +} + +/** + * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, + * most attribute changes are allowed even without the encryption key. However, + * without the encryption key we do have to forbid truncates. This is needed + * because the size being truncated to may not be a multiple of the filesystem + * block size, and in that case we'd have to decrypt the final block, zero the + * portion past i_size, and re-encrypt it. (We *could* allow truncating to a + * filesystem block boundary, but it's simpler to just forbid all truncates --- + * and we already forbid all other contents modifications without the key.) + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + if (attr->ia_valid & ATTR_SIZE) + return fscrypt_require_key(d_inode(dentry)); + return 0; +} + +#endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h deleted file mode 100644 index 4022c61f7e9b4d6186b0e7bfd9faf364c708ee17..0000000000000000000000000000000000000000 --- a/include/linux/fscrypt_common.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * fscrypt_common.h: common declarations for per-file encryption - * - * Copyright (C) 2015, Google, Inc. - * - * Written by Michael Halcrow, 2015. - * Modified by Jaegeuk Kim, 2015. - */ - -#ifndef _LINUX_FSCRYPT_COMMON_H -#define _LINUX_FSCRYPT_COMMON_H - -#include -#include -#include -#include -#include -#include -#include - -#define FS_CRYPTO_BLOCK_SIZE 16 - -struct fscrypt_info; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - -struct fscrypt_str { - unsigned char *name; - u32 len; -}; - -struct fscrypt_name { - const struct qstr *usr_fname; - struct fscrypt_str disk_name; - u32 hash; - u32 minor_hash; - struct fscrypt_str crypto_buf; -}; - -#define FSTR_INIT(n, l) { .name = n, .len = l } -#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) -#define fname_name(p) ((p)->disk_name.name) -#define fname_len(p) ((p)->disk_name.len) - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - int (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -#else - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -#endif -} - -static inline int fscrypt_has_encryption_key(const struct inode *inode) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return (inode->i_crypt_info != NULL); -#else - return 0; -#endif -} - -#endif /* _LINUX_FSCRYPT_COMMON_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index ec406aed2f2f8c3331d0002fba8a83036e75c08b..c4c6bf2c390e0550198e58040ae6c9918571adb0 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -3,13 +3,16 @@ * * This stubs out the fscrypt functions for filesystems configured without * encryption support. + * + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_notsupp.h!" +#endif #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H -#include - /* crypto.c */ static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) @@ -97,7 +100,7 @@ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - if (dir->i_sb->s_cop->is_encrypted(dir)) + if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(struct fscrypt_name)); @@ -174,4 +177,34 @@ static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return -EOPNOTSUPP; } +/* hooks.c */ + +static inline int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + if (IS_ENCRYPTED(inode)) + return -EOPNOTSUPP; + return 0; +} + +static inline int __fscrypt_prepare_link(struct inode *inode, + struct inode *dir) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 32e2fcf13b015ed198381c06c750b84ada99cafd..2db5e9706f60d32f8dcf0dffecf72bc7b39e7247 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -1,14 +1,15 @@ /* * fscrypt_supp.h * - * This is included by filesystems configured with encryption support. + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_supp.h!" +#endif #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H -#include - /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); @@ -142,4 +143,14 @@ extern void fscrypt_pullback_bio_page(struct page **, bool); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); +/* hooks.c */ +extern int fscrypt_file_open(struct inode *inode, struct file *filp); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); + #endif /* _LINUX_FSCRYPT_SUPP_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index e5f03a4d8430818787ec4255929f51aa23aeeb79..ff51592c4792b45e9beb56fa369f1e7a40c319d4 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -213,12 +213,20 @@ static inline void fsnotify_modify(struct file *file) static inline void fsnotify_open(struct file *file) { struct path *path = &file->f_path; + struct path lower_path; struct inode *inode = path->dentry->d_inode; + __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; + if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) { + path->dentry->d_op->d_canonical_path(path, &lower_path); + fsnotify_parent(&lower_path, NULL, mask); + fsnotify(lower_path.dentry->d_inode, mask, &lower_path, FSNOTIFY_EVENT_PATH, NULL, 0); + path_put(&lower_path); + } fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 29d4385903d456c274e56d258741fa51002881ce..206fe3bcccccd929e0f37ff98e70078c0fad3d77 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -32,6 +32,7 @@ #include #include +#include struct device; struct device_node; @@ -70,7 +71,7 @@ struct gen_pool { */ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ - atomic_t avail; + atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ diff --git a/include/linux/init.h b/include/linux/init.h index e30104ceb86dcb3076f7950d34d2776735b132ff..8e346d1bd83757519e0cc661c3728e1f7e4d50a9 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -4,6 +4,13 @@ #include #include +/* Built-in __init functions needn't be compiled with retpoline */ +#if defined(RETPOLINE) && !defined(MODULE) +#define __noretpoline __attribute__((indirect_branch("keep"))) +#else +#define __noretpoline +#endif + /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -39,7 +46,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold notrace __latent_entropy +#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 72f0721f75e734ae88d4b3f84272a7a552fb0a4f..999b7c374645d0abdbdd99bcd1ef0107306451c9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -699,7 +700,6 @@ extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ @@ -707,16 +707,4 @@ extern int arch_early_irq_init(void); #define __softirq_entry \ __attribute__((__section__(".softirqentry.text"))) -/* Limits of hardirq entrypoints */ -extern char __irqentry_text_start[]; -extern char __irqentry_text_end[]; -/* Limits of softirq entrypoints */ -extern char __softirqentry_text_start[]; -extern char __softirqentry_text_end[]; - -#else -#define __irq_entry -#define __softirq_entry -#endif - #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 3b944002c54dd2ab10ae074ccce98be9d0214eda..11ff751403617c4a6fd2468df9ac80d7a5c89081 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -248,7 +248,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h new file mode 100644 index 0000000000000000000000000000000000000000..58c55b1589d061113da66a54de77a75f8f021330 --- /dev/null +++ b/include/linux/kaiser.h @@ -0,0 +1,52 @@ +#ifndef _LINUX_KAISER_H +#define _LINUX_KAISER_H + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +#include + +static inline int kaiser_map_thread_stack(void *stack) +{ + /* + * Map that page of kernel stack on which we enter from user context. + */ + return kaiser_add_mapping((unsigned long)stack + + THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL); +} + +static inline void kaiser_unmap_thread_stack(void *stack) +{ + /* + * Note: may be called even when kaiser_map_thread_stack() failed. + */ + kaiser_remove_mapping((unsigned long)stack + + THREAD_SIZE - PAGE_SIZE, PAGE_SIZE); +} +#else + +/* + * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which + * includes architectures that support KAISER, but have it disabled. + */ + +static inline void kaiser_init(void) +{ +} +static inline int kaiser_add_mapping(unsigned long addr, + unsigned long size, unsigned long flags) +{ + return 0; +} +static inline void kaiser_remove_mapping(unsigned long start, + unsigned long size) +{ +} +static inline int kaiser_map_thread_stack(void *stack) +{ + return 0; +} +static inline void kaiser_unmap_thread_stack(void *stack) +{ +} + +#endif /* !CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* _LINUX_KAISER_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 820c0ad54a0117596e63bae6845b1ebd771c3412..b37afd197eedafb8588907f18f1c6b5ef143c292 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr) } /* Enable reporting bugs after kasan_disable_current() */ -static inline void kasan_enable_current(void) -{ - current->kasan_depth++; -} +extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ -static inline void kasan_disable_current(void) -{ - current->kasan_depth--; -} +extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); @@ -52,7 +46,7 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); void kasan_cache_shrink(struct kmem_cache *cache); -void kasan_cache_destroy(struct kmem_cache *cache); +void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -81,6 +75,9 @@ size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } size_t kasan_metadata_size(struct kmem_cache *cache); +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -98,7 +95,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} -static inline void kasan_cache_destroy(struct kmem_cache *cache) {} +static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 2883ac98c280ca429f25f5571805d02fa09be96b..87e2a44f1bab1a257bdb214ae8a33e21a0ad9e2f 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -7,19 +7,23 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + #else static inline void kcov_task_init(struct task_struct *t) {} diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80faf44b88876fdecd5d119858eccd01f97bbf05..dd1b009106a5cf32a3173fe74bc518d12e84eb0c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,6 +476,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6045d4d580656b95a69a5f773db84945f39f9a23..20ee90c47cd50972c5b69e9a9aac82d6658bb4e4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -143,7 +143,7 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, - MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, @@ -737,6 +737,12 @@ enum { MLX5_CAP_PORT_TYPE_ETH = 0x1, }; +enum { + MLX5_CAP_UMR_FENCE_STRONG = 0x0, + MLX5_CAP_UMR_FENCE_SMALL = 0x1, + MLX5_CAP_UMR_FENCE_NONE = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x80]; @@ -838,7 +844,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 striding_rq[0x1]; u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0xa]; + u8 reserved_at_205[0x5]; + u8 umr_fence[0x2]; + u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; @@ -6689,7 +6697,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; -struct mlx5_ifc_set_rate_limit_out_bits { +struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6698,7 +6706,7 @@ struct mlx5_ifc_set_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_set_rate_limit_in_bits { +struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -6711,6 +6719,8 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; + + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_access_register_out_bits { diff --git a/include/linux/mm.h b/include/linux/mm.h index cae2445ae8a21960adcaaf1ab3338d9af2eb84dc..67dffb8aaafcd050f9957998bc43e65b90ac384c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -351,6 +351,7 @@ struct fault_env { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); + int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); int (*pmd_fault)(struct vm_area_struct *, unsigned long address, diff --git a/include/linux/mman.h b/include/linux/mman.h index 634c4c51fe3adaee4b65d9a977e954f42ecf6131..c540001ca8618b5f8e2d1c0d7072bbcc1d2a1f03 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -63,8 +63,9 @@ static inline bool arch_validate_prot(unsigned long prot) * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ + ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ - : ((x) & (bit1)) / ((bit1) / (bit2))) + : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index fac3b5c27f4ff80c55068b37225b1999090c5b6b..1808b7895daca070c7d7dd94c05fe7eb84a4c7e6 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -409,7 +409,8 @@ struct mmc_host { #ifdef CONFIG_BLOCK int latency_hist_enabled; - struct io_latency_state io_lat_s; + struct io_latency_state io_lat_read; + struct io_latency_state io_lat_write; #endif unsigned long private[0] ____cacheline_aligned; diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 25c0dc31f08496a64d783d0eb197f675bb0d8e60..854dfa6fa6e3fed984a591a2cb99af0fefabac92 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pmd; \ }) -#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ - mmu_notifier_invalidate_range(__mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -480,7 +468,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fff21a82780cf9e77991b655d04633ca5151c70d..e3d7754f25f000fef65fee3e62bb498cb7c59b50 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -124,8 +124,9 @@ enum zone_stat_item { NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ - /* Second 128 byte cacheline */ + NR_KAISERTABLE, NR_BOUNCE, + /* Second 128 byte cacheline */ #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif @@ -632,6 +633,8 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_classzone_idx; + int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; diff --git a/include/linux/module.h b/include/linux/module.h index 1226280be63bc17c92e95775499a703960c2ebda..fd9e121c7b3f3bbd1bfd66e78e0b324be894d1c4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef RETPOLINE +extern bool retpoline_module_ok(bool has_retpoline); +#else +static inline bool retpoline_module_ok(bool has_retpoline) +{ + return true; +} +#endif + #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3aa56e3104bb747da8fedd70da2d3570cfa54103..b5b43f94f311626ee364157515c4342aba976e6f 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -270,75 +270,67 @@ void map_destroy(struct mtd_info *mtd); #define INVALIDATE_CACHED_RANGE(map, from, size) \ do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0) - -static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] != val2.x[i]) - return 0; - } - - return 1; -} - -static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & val2.x[i]; - - return r; -} - -static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & ~val2.x[i]; - - return r; -} - -static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] | val2.x[i]; - - return r; -} - -static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if ((val1.x[i] & val2.x[i]) != val3.x[i]) - return 0; - } - - return 1; -} - -static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] & val2.x[i]) - return 1; - } - - return 0; -} +#define map_word_equal(map, val1, val2) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) \ + if ((val1).x[i] != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + ret; \ +}) + +#define map_word_and(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & (val2).x[i]; \ + r; \ +}) + +#define map_word_clr(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & ~(val2).x[i]; \ + r; \ +}) + +#define map_word_or(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] | (val2).x[i]; \ + r; \ +}) + +#define map_word_andequal(map, val1, val2, val3) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) { \ + if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + } \ + ret; \ +}) + +#define map_word_bitsset(map, val1, val2) \ +({ \ + int i, ret = 0; \ + for (i = 0; i < map_words(map); i++) { \ + if ((val1).x[i] & (val2).x[i]) { \ + ret = 1; \ + break; \ + } \ + } \ + ret; \ +}) static inline map_word map_word_load(struct map_info *map, const void *ptr) { diff --git a/include/linux/nospec.h b/include/linux/nospec.h new file mode 100644 index 0000000000000000000000000000000000000000..b99bced39ac2fa1b91cea50a8301336c3cbff133 --- /dev/null +++ b/include/linux/nospec.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Linus Torvalds. All rights reserved. +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#ifndef _LINUX_NOSPEC_H +#define _LINUX_NOSPEC_H + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * When @index is out of bounds (@index >= @size), the sign bit will be + * set. Extend the sign bit to all bits and invert, giving a result of + * zero for an out of bounds index, or ~0 if within bounds [0, @size). + */ +#ifndef array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) + return 0; + + /* + * Always calculate and emit the mask even if the compiler + * thinks the mask is not needed. The compiler does not take + * into account the value of @index under speculation. + */ + OPTIMIZER_HIDE_VAR(index); + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); +} +#endif + +/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: + * + * if (index < size) { + * index = array_index_nospec(index, size); + * val = array[index]; + * } + * + * ...if the CPU speculates past the bounds check then + * array_index_nospec() will clamp the index within the range of [0, + * size). + */ +#define array_index_nospec(index, size) \ +({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ + \ + _i &= _mask; \ + _i; \ +}) +#endif /* _LINUX_NOSPEC_H */ diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 35d0fd7a4948e6e49eb7e75512461d935cccd161..e821a3132a3e1b0401023442002958974f673bb0 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f16299ca0683f426e9a1dfb21ab68b244655166..8902f23bb7702c30d00d2ce3ab66caf8e73599b7 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -35,6 +35,12 @@ #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +#define USER_MAPPED_SECTION "..user_mapped" +#else +#define USER_MAPPED_SECTION "" +#endif + /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the @@ -115,6 +121,12 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") +#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + +#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + /* * Declaration/definition used for per-CPU variables that must come first in * the set of variables. @@ -144,6 +156,14 @@ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp +#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + #define DECLARE_PER_CPU_ALIGNED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ ____cacheline_aligned @@ -162,11 +182,21 @@ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. + */ +#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) + +#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) /* * Declaration/definition used for per-CPU variables that must be read mostly. */ -#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ +#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 531b8b10fcdafffd0ab5d098790ea07c1e63e9b1..61ab566856e6f0612c8ae63f1a09d68f0b573b18 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1264,6 +1264,7 @@ extern void perf_event_disable(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); +extern int perf_event_account_interrupt(struct perf_event *event); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, diff --git a/include/linux/phy.h b/include/linux/phy.h index a04d69ab7c34074b0af65857619f8f513dde0f39..867110c9d707934c183590ca2469aae594940518 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -683,6 +683,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_mode_is_rgmii - Convenience function for testing if a + * PHY interface mode is RGMII (all variants) + * @mode: the phy_interface_t enum + */ +static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) +{ + return mode >= PHY_INTERFACE_MODE_RGMII && + mode <= PHY_INTERFACE_MODE_RGMII_TXID; +}; + /** * phy_interface_is_rgmii - Convenience function for testing if a PHY interface * is RGMII (all variants) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index b83507c0640c14b2646b682b2d0f62c021e790be..e38f471a5402d45a071aab264118915d6add3eac 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -99,12 +99,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. + * Callers are responsible for making sure pointer that is being queued + * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; + /* Make sure the pointer we are storing points to a valid data. */ + /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + smp_wmb(); + r->queue[r->producer++] = ptr; if (unlikely(r->producer >= r->size)) r->producer = 0; @@ -244,6 +250,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) if (ptr) __ptr_ring_discard_one(r); + /* Make sure anyone accessing data through the pointer is up to date. */ + /* Pairs with smp_wmb in __ptr_ring_produce. */ + smp_read_barrier_depends(); return ptr; } diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 4ae95f7e8597b0b43575d04aaf524cf252761e6e..6224a0ab0b1e88eff3c06a5becdcf9b916c48974 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -99,44 +99,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } -/** - * hlist_nulls_add_tail_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the end of the specified hlist_nulls, - * while permitting racing traversals. NOTE: tail insertion requires - * list traversal. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() - * or hlist_nulls_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, - struct hlist_nulls_head *h) -{ - struct hlist_nulls_node *i, *last = NULL; - - for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); - i = hlist_nulls_next_rcu(i)) - last = i; - - if (last) { - n->next = last->next; - n->pprev = &last->next; - rcu_assign_pointer(hlist_nulls_next_rcu(last), n); - } else { - hlist_nulls_add_head_rcu(n, h); - } -} - /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index f2e27e07836231d9b640e4408287b5b274047b56..01b3778ba6dab46ae47936f38a50ab4d3355c812 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -16,7 +16,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c6f0f0d0e17e07609a8f6ae493bd71bdef093676..00a1f330f93afa8cc13cc7302c934698bd673db8 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -116,6 +116,12 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RO_MODE(_name, _mode) { \ + .attr = { .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .show = _name##_show, \ +} + #define __ATTR_WO(_name) { \ .attr = { .name = __stringify(_name), .mode = S_IWUSR }, \ .store = _name##_store, \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e3fe7744ed7a911ce655457ed1be2b11dd33c704..fc1164181d69f116fcc1eab7d117e2fdd3034c74 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,7 +220,8 @@ struct tcp_sock { u16 advmss; /* Advertised MSS */ u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ - unused:6; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:5; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h new file mode 100644 index 0000000000000000000000000000000000000000..a2b3dfcee0b55d231d457ea24baa5e2080585569 --- /dev/null +++ b/include/linux/tee_drv.h @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __TEE_DRV_H +#define __TEE_DRV_H + +#include +#include +#include +#include +#include + +/* + * The file describes the API provided by the generic TEE driver to the + * specific TEE driver. + */ + +#define TEE_SHM_MAPPED BIT(0) /* Memory mapped by the kernel */ +#define TEE_SHM_DMA_BUF BIT(1) /* Memory with dma-buf handle */ +#define TEE_SHM_EXT_DMA_BUF BIT(2) /* Memory with dma-buf handle */ +#define TEE_SHM_REGISTER BIT(3) /* Memory registered in secure world */ +#define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ +#define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ + +struct device; +struct tee_device; +struct tee_shm; +struct tee_shm_pool; + +/** + * struct tee_context - driver specific context on file pointer data + * @teedev: pointer to this drivers struct tee_device + * @list_shm: List of shared memory object owned by this context + * @data: driver specific context data, managed by the driver + * @refcount: reference counter for this structure + * @releasing: flag that indicates if context is being released right now. + * It is needed to break circular dependency on context during + * shared memory release. + */ +struct tee_context { + struct tee_device *teedev; + struct list_head list_shm; + void *data; + struct kref refcount; + bool releasing; +}; + +struct tee_param_memref { + size_t shm_offs; + size_t size; + struct tee_shm *shm; +}; + +struct tee_param_value { + u64 a; + u64 b; + u64 c; +}; + +struct tee_param { + u64 attr; + union { + struct tee_param_memref memref; + struct tee_param_value value; + } u; +}; + +/** + * struct tee_driver_ops - driver operations vtable + * @get_version: returns version of driver + * @open: called when the device file is opened + * @release: release this open file + * @open_session: open a new session + * @close_session: close a session + * @invoke_func: invoke a trusted function + * @cancel_req: request cancel of an ongoing invoke or open + * @supp_revc: called for supplicant to get a command + * @supp_send: called for supplicant to send a response + * @shm_register: register shared memory buffer in TEE + * @shm_unregister: unregister shared memory buffer in TEE + */ +struct tee_driver_ops { + void (*get_version)(struct tee_device *teedev, + struct tee_ioctl_version_data *vers); + int (*open)(struct tee_context *ctx); + void (*release)(struct tee_context *ctx); + int (*open_session)(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); + int (*close_session)(struct tee_context *ctx, u32 session); + int (*invoke_func)(struct tee_context *ctx, + struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); + int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); + int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); + int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); + int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages, + unsigned long start); + int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm); +}; + +/** + * struct tee_desc - Describes the TEE driver to the subsystem + * @name: name of driver + * @ops: driver operations vtable + * @owner: module providing the driver + * @flags: Extra properties of driver, defined by TEE_DESC_* below + */ +#define TEE_DESC_PRIVILEGED 0x1 +struct tee_desc { + const char *name; + const struct tee_driver_ops *ops; + struct module *owner; + u32 flags; +}; + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data); + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev); + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev); + +/** + * struct tee_shm - shared memory object + * @teedev: device used to allocate the object + * @ctx: context using the object, if NULL the context is gone + * @link link element + * @paddr: physical address of the shared memory + * @kaddr: virtual address of the shared memory + * @size: size of shared memory + * @offset: offset of buffer in user space + * @pages: locked pages from userspace + * @num_pages: number of locked pages + * @dmabuf: dmabuf used to for exporting to user space + * @flags: defined by TEE_SHM_* in tee_drv.h + * @id: unique id of a shared memory object on this device + * + * This pool is only supposed to be accessed directly from the TEE + * subsystem and from drivers that implements their own shm pool manager. + */ +struct tee_shm { + struct tee_device *teedev; + struct tee_context *ctx; + struct list_head link; + phys_addr_t paddr; + void *kaddr; + size_t size; + unsigned int offset; + struct page **pages; + size_t num_pages; + struct dma_buf *dmabuf; + u32 flags; + int id; +}; + +/** + * struct tee_shm_pool_mgr - shared memory manager + * @ops: operations + * @private_data: private data for the shared memory manager + */ +struct tee_shm_pool_mgr { + const struct tee_shm_pool_mgr_ops *ops; + void *private_data; +}; + +/** + * struct tee_shm_pool_mgr_ops - shared memory pool manager operations + * @alloc: called when allocating shared memory + * @free: called when freeing shared memory + * @destroy_poolmgr: called when destroying the pool manager + */ +struct tee_shm_pool_mgr_ops { + int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, + size_t size); + void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); + void (*destroy_poolmgr)(struct tee_shm_pool_mgr *poolmgr); +}; + +/** + * tee_shm_pool_alloc() - Create a shared memory pool from shm managers + * @priv_mgr: manager for driver private shared memory allocations + * @dmabuf_mgr: manager for dma-buf shared memory allocations + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr, + struct tee_shm_pool_mgr *dmabuf_mgr); + +/* + * tee_shm_pool_mgr_alloc_res_mem() - Create a shm manager for reserved + * memory + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + * + * @returns pointer to a 'struct tee_shm_pool_mgr' or an ERR_PTR on failure. + */ +struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, + size_t size, + int min_alloc_order); + +/** + * tee_shm_pool_mgr_destroy() - Free a shared memory manager + */ +static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm) +{ + poolm->ops->destroy_poolmgr(poolm); +} + +/** + * struct tee_shm_pool_mem_info - holds information needed to create a shared + * memory pool + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + */ +struct tee_shm_pool_mem_info { + unsigned long vaddr; + phys_addr_t paddr; + size_t size; +}; + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * The must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev); + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If + * TEE_SHM_DMA_BUF global shared memory will be allocated and associated + * with a dma-buf handle, else driver private memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); + +/** + * tee_shm_priv_alloc() - Allocate shared memory privately + * @dev: Device that allocates the shared memory + * @size: Requested size of shared memory + * + * Allocates shared memory buffer that is not associated with any client + * context. Such buffers are owned by TEE driver and used for internal calls. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size); + +/** + * tee_shm_register() - Register shared memory buffer + * @ctx: Context that registers the shared memory + * @addr: Address is userspace of the shared buffer + * @length: Length of the shared buffer + * @flags: Flags setting properties for the requested shared memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, + size_t length, u32 flags); + +/** + * tee_shm_is_registered() - Check if shared memory object in registered in TEE + * @shm: Shared memory handle + * @returns true if object is registered in TEE + */ +static inline bool tee_shm_is_registered(struct tee_shm *shm) +{ + return shm && (shm->flags & TEE_SHM_REGISTER); +} + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa); + +/** + * tee_shm_get_size() - Get size of shared memory buffer + * @shm: Shared memory handle + * @returns size of shared memory + */ +static inline size_t tee_shm_get_size(struct tee_shm *shm) +{ + return shm->size; +} + +/** + * tee_shm_get_pages() - Get list of pages that hold shared buffer + * @shm: Shared memory handle + * @num_pages: Number of pages will be stored there + * @returns pointer to pages array + */ +static inline struct page **tee_shm_get_pages(struct tee_shm *shm, + size_t *num_pages) +{ + *num_pages = shm->num_pages; + return shm->pages; +} + +/** + * tee_shm_get_page_offset() - Get shared buffer offset from page start + * @shm: Shared memory handle + * @returns page offset of shared buffer + */ +static inline size_t tee_shm_get_page_offset(struct tee_shm *shm) +{ + return shm->offset; +} + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +static inline int tee_shm_get_id(struct tee_shm *shm) +{ + return shm->id; +} + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); + +static inline bool tee_param_is_memref(struct tee_param *param) +{ + switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + return true; + default: + return false; + } +} + +#endif /*__TEE_DRV_H*/ diff --git a/include/linux/timer.h b/include/linux/timer.h index 51d601f192d421c6e3721c6621d7c3e08dc1a54d..ec86e4e55ea38cac96826952bd76833bf32a4e41 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -274,9 +274,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif diff --git a/include/linux/tty.h b/include/linux/tty.h index 40144f38251631dea097f0e7e03cba5928e31254..a41244fe58d022655c0726c7a9db25afa4643d0f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -394,6 +394,8 @@ extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); +extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); +extern void tty_ldisc_unlock(struct tty_struct *tty); #else static inline void console_init(void) { } diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 6e0ce8c7b8cb5a9fcb985a5a5078f82267d03092..fde7550754df60b39dfb3d8bbffb85bec787fd1c 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -79,6 +79,7 @@ struct usbnet { # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 +# define EVENT_NO_IP_ALIGN 13 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 9638bfeb0d1f639ae310d1586b4e2fca567ba2f7..584f9a647ad4acca191ff6116a47c14da1385fa3 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,8 @@ struct virtio_vsock_pkt { struct virtio_vsock_hdr hdr; struct work_struct work; struct list_head list; + /* socket refcnt not held, only use for cancellation */ + struct vsock_sock *vsk; void *buf; u32 len; u32 off; @@ -56,6 +58,7 @@ struct virtio_vsock_pkt { struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; + struct vsock_sock *vsk; struct msghdr *msg; u32 pkt_len; u16 type; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 4d6ec58a8d45b04aa6bae51585e49d44319dd155..2edb150f1a4d1c8f8f9e5e5f15d0b174a5e2a204 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -89,10 +89,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #endif #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f2758964ce6f890e3b11df5ba5bf2eefe636abd1..f32ed9ac181a47c00757596fc3b8c5733426c468 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,6 +100,9 @@ struct vsock_transport { void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); + /* Cancel all pending packets sent on vsock. */ + int (*cancel_pkt)(struct vsock_sock *vsk); + /* Connections. */ int (*connect)(struct vsock_sock *); diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c299d91fde14ce9cdeedc9a7e26c6..1b3f8698175785f1818c891a85af16a102da981f 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -19,6 +19,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = INADDR_ANY; + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } diff --git a/include/net/ip.h b/include/net/ip.h index 3591ddc3d4f24a3d653659641c2b39d934ad2bfb..573fd7f09d6fa858a8282f6013d5a026feea2e03 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,6 +33,8 @@ #include #include +#define IPV4_MIN_MTU 68 /* RFC 791 */ + struct sock; struct inet_skb_parm { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 615ce0abba9cf91bd0bf4daf7305d9ea64ee0c9f..e64210c98c2b540281230606915dcdc086e12a39 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -290,6 +290,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c7d876e2a1ae522e8df3c2b9b59291790b33087..8fd61bc50383c6f074d493266afe35b3feca787d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1007,7 +1007,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware. * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame, * verification has been done by the hardware. - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this @@ -1078,6 +1078,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. * This is used for AMSDU subframes which can have the same PN as * the first subframe. + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must + * be done in the hardware. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1113,6 +1115,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), RX_FLAG_MIC_STRIPPED = BIT_ULL(32), RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), + RX_FLAG_ICV_STRIPPED = BIT_ULL(34), }; #define RX_FLAG_STBC_SHIFT 26 diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0940598c002ff7fee832f9cf670b8006d0202ddc..23102da24dd937157c84667df0243e3a6e02f99d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -213,6 +213,11 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } +static inline int check_net(const struct net *net) +{ + return atomic_read(&net->count) != 0; +} + void net_drop_ns(void *); #else @@ -237,6 +242,11 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL #endif diff --git a/include/net/sock.h b/include/net/sock.h index 97f8ed2202bff6849f6b826b9a597d95546c7644..badd144fca1c36ca9d5cf2aedac4ca625e8bbf41 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -649,11 +649,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); - else - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) diff --git a/include/net/tcp.h b/include/net/tcp.h index eaf8144272fa964fbe67787d0576606f593a33d4..d082f1179d0734c93d5e33c7e9463893ec1d4ad2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1002,7 +1002,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs); + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 1beab5532035dc2126405384d44457f183de2a90..818a38f9922122167cb11deebe6e87b6643fc522 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -243,10 +243,11 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g static inline enum ib_mtu iboe_get_mtu(int mtu) { /* - * reduce IB headers from effective IBoE MTU. 28 stands for - * atomic header which is the biggest possible header after BTH + * Reduce IB headers from effective IBoE MTU. */ - mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index b13419ce99ff5b52d666c2af7a91f58457f58100..e02b78a38ebacfd3c407e01cab590a1d6917e6c9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -37,14 +37,17 @@ #include enum { - IB_LRH_BYTES = 8, - IB_ETH_BYTES = 14, - IB_VLAN_BYTES = 4, - IB_GRH_BYTES = 40, - IB_IP4_BYTES = 20, - IB_UDP_BYTES = 8, - IB_BTH_BYTES = 12, - IB_DETH_BYTES = 8 + IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, + IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8, + IB_EXT_ATOMICETH_BYTES = 28, + IB_EXT_XRC_BYTES = 4, + IB_ICRC_BYTES = 4 }; struct ib_field { diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index dae99d7d2bc05a265faddb5a4f953d34f99e4ef1..706a7017885c20c0fdfa9ee6520f00d1e8278c53 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -165,11 +165,11 @@ struct expander_device { struct sata_device { unsigned int class; - struct smp_resp rps_resp; /* report_phy_sata_resp */ u8 port_no; /* port number, if this is a PM (Port) */ struct ata_port *ap; struct ata_host ata_host; + struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */ u8 fis[ATA_RESP_FIS_SIZE]; }; diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 3afec7032448c8927d2cd134111606ee0206ca3d..20bc71c3e0b81acf07af84fea30d6b530427bb70 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -197,7 +197,6 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_DEFAULT_RETRIES 0 /* Defaults, commented if they differ from original sg driver */ -#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */ #define SG_DEF_FORCE_PACK_ID 0 #define SG_DEF_KEEP_ORPHAN 0 #define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index a87e8940fe57f4d0f0dcd65137601f8cbdc47fb3..30f99ce4c6cea05b4d6d77225b23c93998325794 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -297,7 +297,7 @@ struct t10_alua_tg_pt_gp { struct list_head tg_pt_gp_lun_list; struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; - struct delayed_work tg_pt_gp_transition_work; + struct work_struct tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; @@ -493,6 +493,7 @@ struct se_cmd { #define CMD_T_BUSY (1 << 9) #define CMD_T_TAS (1 << 10) #define CMD_T_FABRIC_STOP (1 << 11) +#define CMD_T_PRE_EXECUTE (1 << 12) spinlock_t t_state_lock; struct kref cmd_kref; struct completion t_transport_stop_comp; diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 8ade3eb6c640e0725e2c1fe0d930e6309cd0801f..90fce4d6956ad863c9dde8d25a31443762615580 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq, { KVM_TRACE_MMIO_WRITE, "write" } TRACE_EVENT(kvm_mmio, - TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( @@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio, __entry->type = type; __entry->len = len; __entry->gpa = gpa; - __entry->val = val; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); ), TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 22b6ad31c706dae59544286faea5808d7b303342..8562b1cb776b7d69ae6c5c46d0867302a55f658f 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -90,7 +90,7 @@ PTR_FIELD(PTR_GEN, 0, 8) #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) -#define PTR(gen, offset, dev) \ +#define MAKE_PTR(gen, offset, dev) \ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) /* Bkey utility code */ diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 1c3154913a391fb0f67ade68150fd8af939d791d..bc96b14dfb2c554df6f36d6a8540a297aee0f61f 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,6 +26,19 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Epoll event masks */ +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE (1 << 28) diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 574e22ec640dab24b7c182bc93f616c99b7f2cf2..33b826b9946ed3a1b7acd8e7d86632222a6c9358 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -7,4 +7,28 @@ #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +enum { + /* + * Tracing coverage collection mode. + * Covered PCs are collected in a per-task buffer. + * In new KCOV version the mode is chosen by calling + * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument + * was supposed to be 0 in such a call. So, for reasons of backward + * compatibility, we have chosen the value KCOV_TRACE_PC to be 0. + */ + KCOV_TRACE_PC = 0, + /* Collecting comparison operands mode. */ + KCOV_TRACE_CMP = 1, +}; + +/* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ +#define KCOV_CMP_CONST (1 << 0) +#define KCOV_CMP_SIZE(n) ((n) << 1) +#define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h new file mode 100644 index 0000000000000000000000000000000000000000..4b9eb064d7e709298a011dbd54dae7940a42c010 --- /dev/null +++ b/include/uapi/linux/tee.h @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TEE_H +#define __TEE_H + +#include +#include + +/* + * This file describes the API provided by a TEE driver to user space. + * + * Each TEE driver defines a TEE specific protocol which is used for the + * data passed back and forth using TEE_IOC_CMD. + */ + +/* Helpers to make the ioctl defines */ +#define TEE_IOC_MAGIC 0xa4 +#define TEE_IOC_BASE 0 + +/* Flags relating to shared memory */ +#define TEE_IOCTL_SHM_MAPPED 0x1 /* memory mapped in normal world */ +#define TEE_IOCTL_SHM_DMA_BUF 0x2 /* dma-buf handle on shared memory */ + +#define TEE_MAX_ARG_SIZE 1024 + +#define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ +#define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ +#define TEE_GEN_CAP_REG_MEM (1 << 2)/* Supports registering shared memory */ + +/* + * TEE Implementation ID + */ +#define TEE_IMPL_ID_OPTEE 1 + +/* + * OP-TEE specific capabilities + */ +#define TEE_OPTEE_CAP_TZ (1 << 0) + +/** + * struct tee_ioctl_version_data - TEE version + * @impl_id: [out] TEE implementation id + * @impl_caps: [out] Implementation specific capabilities + * @gen_caps: [out] Generic capabilities, defined by TEE_GEN_CAPS_* above + * + * Identifies the TEE implementation, @impl_id is one of TEE_IMPL_ID_* above. + * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_* + * is valid when @impl_id == TEE_IMPL_ID_OPTEE. + */ +struct tee_ioctl_version_data { + __u32 impl_id; + __u32 impl_caps; + __u32 gen_caps; +}; + +/** + * TEE_IOC_VERSION - query version of TEE + * + * Takes a tee_ioctl_version_data struct and returns with the TEE version + * data filled in. + */ +#define TEE_IOC_VERSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \ + struct tee_ioctl_version_data) + +/** + * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument + * @size: [in/out] Size of shared memory to allocate + * @flags: [in/out] Flags to/from allocation. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_ALLOC below. + */ +struct tee_ioctl_shm_alloc_data { + __u64 size; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_ALLOC - allocate shared memory + * + * Allocates shared memory between the user space process and secure OS. + * + * Returns a file descriptor on success or < 0 on failure + * + * The returned file descriptor is used to map the shared memory into user + * space. The shared memory is freed when the descriptor is closed and the + * memory is unmapped. + */ +#define TEE_IOC_SHM_ALLOC _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \ + struct tee_ioctl_shm_alloc_data) + +/** + * struct tee_ioctl_buf_data - Variable sized buffer + * @buf_ptr: [in] A __user pointer to a buffer + * @buf_len: [in] Length of the buffer above + * + * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE, + * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below. + */ +struct tee_ioctl_buf_data { + __u64 buf_ptr; + __u64 buf_len; +}; + +/* + * Attributes for struct tee_ioctl_param, selects field in the union + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE 0 /* parameter not used */ + +/* + * These defines value parameters (struct tee_ioctl_param_value) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT 1 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT 2 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT 3 /* input and output */ + +/* + * These defines shared memory reference parameters (struct + * tee_ioctl_param_memref) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT 5 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT 6 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT 7 /* input and output */ + +/* + * Mask for the type part of the attribute, leaves room for more types + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK 0xff + +/* Meta parameter carrying extra information about the message. */ +#define TEE_IOCTL_PARAM_ATTR_META 0x100 + +/* Mask of all known attr bits */ +#define TEE_IOCTL_PARAM_ATTR_MASK \ + (TEE_IOCTL_PARAM_ATTR_TYPE_MASK | TEE_IOCTL_PARAM_ATTR_META) + +/* + * Matches TEEC_LOGIN_* in GP TEE Client API + * Are only defined for GP compliant TEEs + */ +#define TEE_IOCTL_LOGIN_PUBLIC 0 +#define TEE_IOCTL_LOGIN_USER 1 +#define TEE_IOCTL_LOGIN_GROUP 2 +#define TEE_IOCTL_LOGIN_APPLICATION 4 +#define TEE_IOCTL_LOGIN_USER_APPLICATION 5 +#define TEE_IOCTL_LOGIN_GROUP_APPLICATION 6 + +/** + * struct tee_ioctl_param - parameter + * @attr: attributes + * @a: if a memref, offset into the shared memory object, else a value parameter + * @b: if a memref, size of the buffer, else a value parameter + * @c: if a memref, shared memory identifier, else a value parameter + * + * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in + * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and + * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE + * indicates that none of the members are used. + * + * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an + * identifier representing the shared memory object. A memref can reference + * a part of a shared memory by specifying an offset (@a) and size (@b) of + * the object. To supply the entire shared memory object set the offset + * (@a) to 0 and size (@b) to the previously returned size of the object. + */ +struct tee_ioctl_param { + __u64 attr; + __u64 a; + __u64 b; + __u64 c; +}; + +#define TEE_IOCTL_UUID_LEN 16 + +/** + * struct tee_ioctl_open_session_arg - Open session argument + * @uuid: [in] UUID of the Trusted Application + * @clnt_uuid: [in] UUID of client + * @clnt_login: [in] Login class of client, TEE_IOCTL_LOGIN_* above + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [out] Session id + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_open_session_arg { + __u8 uuid[TEE_IOCTL_UUID_LEN]; + __u8 clnt_uuid[TEE_IOCTL_UUID_LEN]; + __u32 clnt_login; + __u32 cancel_id; + __u32 session; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_ioctl_open_session_arg followed by any array of struct + * tee_ioctl_param + */ +#define TEE_IOC_OPEN_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted + * Application + * @func: [in] Trusted Application function, specific to the TA + * @session: [in] Session id + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_invoke_arg { + __u32 func; + __u32 session; + __u32 cancel_id; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_INVOKE - Invokes a function in a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_invoke_func_arg followed by any array of struct tee_param + */ +#define TEE_IOC_INVOKE _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [in] Session id, if the session is opened, else set to 0 + */ +struct tee_ioctl_cancel_arg { + __u32 cancel_id; + __u32 session; +}; + +/** + * TEE_IOC_CANCEL - Cancels an open session or invoke + */ +#define TEE_IOC_CANCEL _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \ + struct tee_ioctl_cancel_arg) + +/** + * struct tee_ioctl_close_session_arg - Closes an open session + * @session: [in] Session id + */ +struct tee_ioctl_close_session_arg { + __u32 session; +}; + +/** + * TEE_IOC_CLOSE_SESSION - Closes a session + */ +#define TEE_IOC_CLOSE_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \ + struct tee_ioctl_close_session_arg) + +/** + * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function + * @func: [in] supplicant function + * @num_params [in/out] number of parameters following this struct + * + * @num_params is the number of params that tee-supplicant has room to + * receive when input, @num_params is the number of actual params + * tee-supplicant receives when output. + */ +struct tee_iocl_supp_recv_arg { + __u32 func; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_recv_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_RECV _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_iocl_supp_send_arg - Send a response to a received request + * @ret: [out] return value + * @num_params [in] number of parameters following this struct + */ +struct tee_iocl_supp_send_arg { + __u32 ret; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_send_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_SEND _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_shm_register_data - Shared memory register argument + * @addr: [in] Start address of shared memory to register + * @length: [in/out] Length of shared memory to register + * @flags: [in/out] Flags to/from registration. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_REGISTER below. + */ +struct tee_ioctl_shm_register_data { + __u64 addr; + __u64 length; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_REGISTER - Register shared memory argument + * + * Registers shared memory between the user space process and secure OS. + * + * Returns a file descriptor on success or < 0 on failure + * + * The shared memory is unregisterred when the descriptor is closed. + */ +#define TEE_IOC_SHM_REGISTER _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 9, \ + struct tee_ioctl_shm_register_data) +/* + * Five syscalls are used when communicating with the TEE driver. + * open(): opens the device associated with the driver + * ioctl(): as described above operating on the file descriptor from open() + * close(): two cases + * - closes the device file descriptor + * - closes a file descriptor connected to allocated shared memory + * mmap(): maps shared memory into user space using information from struct + * tee_ioctl_shm_alloc_data + * munmap(): unmaps previously shared memory + */ + +#endif /*__TEE_H*/ diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 5e64a86989a576bf566aed67d7af57b3b75c3311..33c603dd7cd3052ae23162284d89af1d7ca7219c 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -423,6 +423,11 @@ struct usb_endpoint_descriptor { #define USB_ENDPOINT_XFER_INT 3 #define USB_ENDPOINT_MAX_ADJUSTABLE 0x80 +#define USB_EP_MAXP_MULT_SHIFT 11 +#define USB_EP_MAXP_MULT_MASK (3 << USB_EP_MAXP_MULT_SHIFT) +#define USB_EP_MAXP_MULT(m) \ + (((m) & USB_EP_MAXP_MULT_MASK) >> USB_EP_MAXP_MULT_SHIFT) + /* The USB 3.0 spec redefines bits 5:4 of bmAttributes as interrupt ep type. */ #define USB_ENDPOINT_INTRTYPE 0x30 #define USB_ENDPOINT_INTR_PERIODIC (0 << 4) @@ -630,6 +635,20 @@ static inline int usb_endpoint_maxp(const struct usb_endpoint_descriptor *epd) return __le16_to_cpu(epd->wMaxPacketSize); } +/** + * usb_endpoint_maxp_mult - get endpoint's transactional opportunities + * @epd: endpoint to be checked + * + * Return @epd's wMaxPacketSize[12:11] + 1 + */ +static inline int +usb_endpoint_maxp_mult(const struct usb_endpoint_descriptor *epd) +{ + int maxp = __le16_to_cpu(epd->wMaxPacketSize); + + return USB_EP_MAXP_MULT(maxp) + 1; +} + static inline int usb_endpoint_interrupt_type( const struct usb_endpoint_descriptor *epd) { @@ -854,6 +873,8 @@ struct usb_wireless_cap_descriptor { /* Ultra Wide Band */ __u8 bReserved; } __attribute__((packed)); +#define USB_DT_USB_WIRELESS_CAP_SIZE 11 + /* USB 2.0 Extension descriptor */ #define USB_CAP_TYPE_EXT 2 @@ -1046,6 +1067,7 @@ struct usb_ptm_cap_descriptor { __u8 bDevCapabilityType; } __attribute__((packed)); +#define USB_DT_USB_PTM_ID_SIZE 3 /* * The size of the descriptor for the Sublink Speed Attribute Count * (SSAC) specified in bmAttributes[4:0]. diff --git a/init/Kconfig b/init/Kconfig index 3bb582498f39c30359928b05e9cf450527ff5c9b..544d7910d89bf218f5a04cf5522e1ad70f53ae94 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1748,6 +1748,13 @@ config BPF_SYSCALL Enable the bpf() system call that allows to manipulate eBPF programs and maps via file descriptors. +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid + speculative execution of BPF instructions by the interpreter + config SHMEM bool "Use full shmem filesystem" if EXPERT default y diff --git a/init/initramfs.c b/init/initramfs.c index d0b53f49c98aaedead58c8b48422270b935f7b45..bf3af10c500a519d6232912eab63ff7372071ebf 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -622,8 +622,11 @@ static int __init populate_rootfs(void) { char *err; - if (do_skip_initramfs) + if (do_skip_initramfs) { + if (initrd_start) + free_initrd(); return default_rootfs(); + } err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) diff --git a/init/main.c b/init/main.c index 25bac88bc66e8d9a2e1d96b6891f061b552f0609..99f026565608f7a25e846aacf7cb35c4514bb9fb 100644 --- a/init/main.c +++ b/init/main.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -473,6 +474,7 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); + kaiser_init(); } asmlinkage __visible void __init start_kernel(void) diff --git a/ipc/msg.c b/ipc/msg.c index e12307d0c920ccd9e50168ed890c5b6057206a52..ff10d43b51848e70eccedf3bd6a6db22bc2e9837 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -763,7 +763,10 @@ static inline int convert_mode(long *msgtyp, int msgflg) if (*msgtyp == 0) return SEARCH_ANY; if (*msgtyp < 0) { - *msgtyp = -*msgtyp; + if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */ + *msgtyp = LONG_MAX; + else + *msgtyp = -*msgtyp; return SEARCH_LESSEQUAL; } if (msgflg & MSG_EXCEPT) diff --git a/kernel/acct.c b/kernel/acct.c index 74963d192c5d96ce3674f0adbf554c95af4867ec..37f1dc696fbd4f514796e84bd60a33e1d3882358 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -99,7 +99,7 @@ static int check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_before_jiffies(acct->needcheck)) + if (time_is_after_jiffies(acct->needcheck)) goto out; /* May block */ diff --git a/kernel/async.c b/kernel/async.c index d2edd6efec56295d3dcfefa5587266915657d9d3..d84d4860992ea1f3e9f37199914a0b122fedbcc7 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; diff --git a/kernel/audit.c b/kernel/audit.c index f1ca11613379fa2950aa7e33e732d85462f1fb57..da4e7c0e36f7aac4940a4ef37e86c569a9ed5f32 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -79,13 +79,13 @@ static int audit_initialized; #define AUDIT_OFF 0 #define AUDIT_ON 1 #define AUDIT_LOCKED 2 -u32 audit_enabled; -u32 audit_ever_enabled; +u32 audit_enabled = AUDIT_OFF; +u32 audit_ever_enabled = !!AUDIT_OFF; EXPORT_SYMBOL_GPL(audit_enabled); /* Default state when kernel boots without any parameters. */ -static u32 audit_default; +static u32 audit_default = AUDIT_OFF; /* If auditing cannot proceed, audit_failure selects what happens. */ static u32 audit_failure = AUDIT_FAIL_PRINTK; @@ -1199,8 +1199,6 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; - audit_enabled = audit_default; - audit_ever_enabled |= !!audit_default; audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); @@ -1217,6 +1215,8 @@ static int __init audit_enable(char *str) audit_default = !!simple_strtol(str, NULL, 0); if (!audit_default) audit_initialized = AUDIT_DISABLED; + audit_enabled = audit_default; + audit_ever_enabled = !!audit_enabled; pr_info("%s\n", audit_default ? "enabled (after initialization)" : "disabled (until reboot)"); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index a1f8b54863b25bc9d9f2ab633785725ea5af8669..2eb9b7f3c8041f066e155cb9e5f5bed31dcf467f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -49,9 +49,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; - u64 array_size; - u32 elem_size; + u64 array_size, mask64; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -67,11 +68,32 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); + max_entries = attr->max_entries; + + /* On 32 bit archs roundup_pow_of_two() with max_entries that has + * upper most bit set in u32 space is undefined behavior due to + * resulting 1U << 32, so do it manually here in u64 space. + */ + mask64 = fls_long(max_entries - 1); + mask64 = 1ULL << mask64; + mask64 -= 1; + + index_mask = mask64; + if (unpriv) { + /* round up array size to nearest power of 2, + * since cpu will speculate within index_mask limits + */ + max_entries = index_mask + 1; + /* Check for overflows. */ + if (max_entries < attr->max_entries) + return ERR_PTR(-E2BIG); + } + array_size = sizeof(*array); if (percpu) - array_size += (u64) attr->max_entries * sizeof(void *); + array_size += (u64) max_entries * sizeof(void *); else - array_size += (u64) attr->max_entries * elem_size; + array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ if (array_size >= U32_MAX - PAGE_SIZE) @@ -81,6 +103,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array = bpf_map_area_alloc(array_size); if (!array) return ERR_PTR(-ENOMEM); + array->index_mask = index_mask; + array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ array->map.map_type = attr->map_type; @@ -114,7 +138,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * index; + return array->value + array->elem_size * (index & array->index_mask); } /* Called from eBPF program */ @@ -126,7 +150,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return this_cpu_ptr(array->pptrs[index]); + return this_cpu_ptr(array->pptrs[index & array->index_mask]); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) @@ -146,7 +170,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); off += size; @@ -194,10 +218,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, return -EEXIST; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) - memcpy(this_cpu_ptr(array->pptrs[index]), + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else - memcpy(array->value + array->elem_size * index, + memcpy(array->value + + array->elem_size * (index & array->index_mask), value, map->value_size); return 0; } @@ -231,7 +256,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); off += size; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa6d981541067d20f7dbe147ccd92532bfd97ee6..879ca844ba1d3371617ef4a16bf103dad61d69ee 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) } EXPORT_SYMBOL_GPL(__bpf_call_base); +#ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -641,7 +642,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) DST = tmp; CONT; ALU_MOD_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; DST = do_div(tmp, (u32) SRC); @@ -660,7 +661,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; do_div(tmp, (u32) SRC); @@ -715,7 +716,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; - u64 index = BPF_R3; + u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; @@ -923,6 +924,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) } STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ +#else +static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn) +{ + return 0; +} +#endif + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { @@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { +#ifndef CONFIG_BPF_JIT_ALWAYS_ON fp->bpf_func = (void *) __bpf_prog_run; +#else + fp->bpf_func = (void *) __bpf_prog_ret0; +#endif /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during @@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * be JITed, but falls back to the interpreter. */ fp = bpf_int_jit_compile(fp); +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + if (!fp->jited) { + *err = -ENOTSUPP; + return fp; + } +#endif bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 5c51d1985b5102fbba64929b5626d41fb4b88b66..673fa6fe2d73cf815daf8a0c2eb0ce6eab15b41e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -78,8 +78,10 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; + unsigned long flags; int orig_cpu, cpu; + local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -87,14 +89,16 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock(&head->lock); + raw_spin_unlock_irqrestore(&head->lock, flags); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) + if (cpu == orig_cpu) { + local_irq_restore(flags); return NULL; + } } } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index adadb88c4fec278f7f6a2f37dfa36b7eec4a9193..590ce0acdf179ddb5f23e00b47bf4b7fbc93bede 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -636,57 +636,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) list_add(&tl->list_node, &bpf_prog_types); } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... - * - * this function is called after eBPF program passed verification - */ -static void fixup_bpf_calls(struct bpf_prog *prog) -{ - const struct bpf_func_proto *fn; - int i; - - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; - - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; - } - } -} - /* drop refcnt on maps used by eBPF program and free auxilary data */ static void free_used_maps(struct bpf_prog_aux *aux) { @@ -894,9 +843,6 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* fixup BPF_CALL->imm field */ - fixup_bpf_calls(prog); - /* eBPF program is ready to be JITed */ prog = bpf_prog_select_runtime(prog, &err); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 372454aa7f37222e0f78b99f2c75e40a695eb80f..076e4a0ff95e7b4d3b41a2e9f040da8e20b6feaf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -702,6 +702,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); } +static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; + + return reg->type == PTR_TO_CTX; +} + static int check_ptr_alignment(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int off, int size) { @@ -896,6 +903,12 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } + if (is_ctx_reg(env, insn->dst_reg)) { + verbose("BPF_XADD stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); @@ -1187,7 +1200,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } } -static int check_call(struct bpf_verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; @@ -1238,6 +1251,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id) err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; + if (func_id == BPF_FUNC_tail_call) { + if (meta.map_ptr == NULL) { + verbose("verifier bug\n"); + return -EINVAL; + } + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; + } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@ -1790,10 +1810,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* case: R = imm * remember the value we stored into this reg */ + u64 imm; + + if (BPF_CLASS(insn->code) == BPF_ALU64) + imm = insn->imm; + else + imm = (u32)insn->imm; + regs[insn->dst_reg].type = CONST_IMM; - regs[insn->dst_reg].imm = insn->imm; - regs[insn->dst_reg].max_value = insn->imm; - regs[insn->dst_reg].min_value = insn->imm; + regs[insn->dst_reg].imm = imm; + regs[insn->dst_reg].max_value = imm; + regs[insn->dst_reg].min_value = imm; } } else if (opcode > BPF_END) { @@ -1829,6 +1856,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { + verbose("BPF_ARSH not supported for 32 bit ALU\n"); + return -EINVAL; + } + if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; @@ -1861,10 +1893,28 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) ((BPF_SRC(insn->code) == BPF_X && regs[insn->src_reg].type == CONST_IMM) || BPF_SRC(insn->code) == BPF_K)) { - if (BPF_SRC(insn->code) == BPF_X) + if (BPF_SRC(insn->code) == BPF_X) { + /* check in case the register contains a big + * 64-bit value + */ + if (regs[insn->src_reg].imm < -MAX_BPF_STACK || + regs[insn->src_reg].imm > MAX_BPF_STACK) { + verbose("R%d value too big in R%d pointer arithmetic\n", + insn->src_reg, insn->dst_reg); + return -EACCES; + } dst_reg->imm += regs[insn->src_reg].imm; - else + } else { + /* safe against overflow: addition of 32-bit + * numbers in 64-bit representation + */ dst_reg->imm += insn->imm; + } + if (dst_reg->imm > 0 || dst_reg->imm < -MAX_BPF_STACK) { + verbose("R%d out-of-bounds pointer arithmetic\n", + insn->dst_reg); + return -EACCES; + } return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && @@ -2697,11 +2747,12 @@ static bool states_equal(struct bpf_verifier_env *env, /* If we didn't map access then again we don't care about the * mismatched range values and it's ok if our old type was - * UNKNOWN and we didn't go to a NOT_INIT'ed reg. + * UNKNOWN and we didn't go to a NOT_INIT'ed or pointer reg. */ if (rold->type == NOT_INIT || (!varlen_map_access && rold->type == UNKNOWN_VALUE && - rcur->type != NOT_INIT)) + rcur->type != NOT_INIT && + !__is_pointer_value(env->allow_ptr_leaks, rcur))) continue; /* Don't care about the reg->id in this case. */ @@ -2862,6 +2913,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + env->insn_aux_data[insn_idx].seen = true; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -2973,6 +3025,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + if (is_ctx_reg(env, insn->dst_reg)) { + verbose("BPF_ST stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, @@ -2992,7 +3050,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm); + err = check_call(env, insn->imm, insn_idx); if (err) return err; @@ -3059,6 +3117,7 @@ static int do_check(struct bpf_verifier_env *env) return err; insn_idx++; + env->insn_aux_data[insn_idx].seen = true; } else { verbose("invalid BPF_LD mode\n"); return -EINVAL; @@ -3210,6 +3269,63 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) insn->src_reg = 0; } +/* single env->prog->insni[off] instruction was replaced with the range + * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying + * [0, off) and [off, end) to new locations, so the patched range stays zero + */ +static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, + u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + int i; + + if (cnt == 1) + return 0; + new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); + if (!new_data) + return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); + memcpy(new_data + off + cnt - 1, old_data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + for (i = off; i < off + cnt - 1; i++) + new_data[i].seen = true; + env->insn_aux_data = new_data; + vfree(old_data); + return 0; +} + +static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + struct bpf_prog *new_prog; + + new_prog = bpf_patch_insn_single(env->prog, off, patch, len); + if (!new_prog) + return NULL; + if (adjust_insn_aux_data(env, new_prog->len, off, len)) + return NULL; + return new_prog; +} + +/* The verifier does more data flow analysis than llvm and will not explore + * branches that are dead at run time. Malicious programs can have dead code + * too. Therefore replace all dead at-run-time code with nops. + */ +static void sanitize_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0); + struct bpf_insn *insn = env->prog->insnsi; + const int insn_cnt = env->prog->len; + int i; + + for (i = 0; i < insn_cnt; i++) { + if (aux_data[i].seen) + continue; + memcpy(insn + i, &nop, sizeof(nop)); + } +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -3229,10 +3345,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) verbose("bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { - new_prog = bpf_patch_insn_single(env->prog, 0, - insn_buf, cnt); + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); if (!new_prog) return -ENOMEM; + env->prog = new_prog; delta += cnt - 1; } @@ -3253,7 +3369,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; - if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, @@ -3263,8 +3379,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, - cnt); + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; @@ -3278,6 +3393,99 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } +/* fixup insn->imm field of bpf_call instructions + * + * this function is called after eBPF program passed verification + */ +static int fixup_bpf_calls(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; + const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; + + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || + insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { + /* due to JIT bugs clear upper 32-bits of src register + * before div/mod operation + */ + insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); + insn_buf[1] = *insn; + cnt = 2; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; + + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode to avoid + * conditional branch in the interpeter for every normal + * call and to prevent accidental JITing by JIT compiler + * that doesn't support bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + + /* instead of changing every JIT dealing with tail_call + * emit two extra insns: + * if (index >= max_entries) goto out; + * index &= array->index_mask; + * to avoid out-of-bounds cpu speculation + */ + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (!map_ptr->unpriv_array) + continue; + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, + map_ptr->max_entries, 2); + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, + container_of(map_ptr, + struct bpf_array, + map)->index_mask); + insn_buf[2] = *insn; + cnt = 3; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + if (!fn->func) { + verbose("kernel subsystem misconfigured func %d\n", + insn->imm); + return -EFAULT; + } + insn->imm = fn->func - __bpf_call_base; + } + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -3372,10 +3580,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) while (pop_stack(env, NULL) >= 0); free_states(env); + if (ret == 0) + sanitize_dead_code(env); + if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); + if (ret == 0) + ret = fixup_bpf_calls(env); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ diff --git a/kernel/cpu.c b/kernel/cpu.c index ed48455d9d1cc41e76b42bd467314265bacca861..9c707694f1dc807c72cd662a26fcc0dac4161d8a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1316,9 +1316,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ - [CPUHP_TIMERS_DEAD] = { + [CPUHP_TIMERS_PREPARE] = { .name = "timers:dead", - .startup.single = NULL, + .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, /* Kicks the plugged cpu into life */ @@ -1328,11 +1328,6 @@ static struct cpuhp_step cpuhp_bp_states[] = { .teardown.single = NULL, .cant_stop = true, }, - [CPUHP_AP_SMPCFD_DYING] = { - .name = "smpcfd:dying", - .startup.single = NULL, - .teardown.single = smpcfd_dying_cpu, - }, /* * Handled on controll processor until the plugged processor manages * this itself. @@ -1374,6 +1369,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup.single = NULL, .teardown.single = rcutree_dying_cpu, }, + [CPUHP_AP_SMPCFD_DYING] = { + .name = "smpcfd:dying", + .startup.single = NULL, + .teardown.single = smpcfd_dying_cpu, + }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 0b891286a150c8535454a0be774180016c7a5536..3990c1f73e451bd2f5ba7860d594ccc8fac924b6 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -357,7 +357,7 @@ static char *kdb_read(char *buffer, size_t bufsize) } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (kallsyms_symbol_next(p_tmp, i) < 0) + if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) break; kdb_printf("%s ", p_tmp); *(p_tmp + len) = '\0'; diff --git a/kernel/events/core.c b/kernel/events/core.c index 60a8da7cdffffe127a819afa5d7c49205597a4d8..2cc5ed1adac0b87061ab474cf515a7e7aca8c1bc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7093,25 +7093,12 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -/* - * Generic event overflow handling, sampling. - */ - -static int __perf_event_overflow(struct perf_event *event, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) +static int +__perf_event_account_interrupt(struct perf_event *event, int throttle) { - int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; - u64 seq; int ret = 0; - - /* - * Non-sampling counters might still use the PMI to fold short - * hardware counters, ignore those. - */ - if (unlikely(!is_sampling_event(event))) - return 0; + u64 seq; seq = __this_cpu_read(perf_throttled_seq); if (seq != hwc->interrupts_seq) { @@ -7139,6 +7126,34 @@ static int __perf_event_overflow(struct perf_event *event, perf_adjust_period(event, delta, hwc->last_period, true); } + return ret; +} + +int perf_event_account_interrupt(struct perf_event *event) +{ + return __perf_event_account_interrupt(event, 1); +} + +/* + * Generic event overflow handling, sampling. + */ + +static int __perf_event_overflow(struct perf_event *event, + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int events = atomic_read(&event->event_limit); + int ret = 0; + + /* + * Non-sampling counters might still use the PMI to fold short + * hardware counters, ignore those. + */ + if (unlikely(!is_sampling_event(event))) + return 0; + + ret = __perf_event_account_interrupt(event, throttle); + /* * XXX event_limit might not quite work as expected on inherited * events diff --git a/kernel/fork.c b/kernel/fork.c index 9321b1ad3335ade560477040df46b745c78a028d..70e10cb49be0ff861bd379ac6cfec3561764e156 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -213,6 +214,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) static inline void free_thread_stack(struct task_struct *tsk) { + kaiser_unmap_thread_stack(tsk->stack); #ifdef CONFIG_VMAP_STACK if (task_stack_vm_area(tsk)) { unsigned long flags; @@ -495,6 +497,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) * functions again. */ tsk->stack = stack; + + err= kaiser_map_thread_stack(tsk->stack); + if (err) + goto free_stack; #ifdef CONFIG_VMAP_STACK tsk->stack_vm_area = stack_vm_area; #endif diff --git a/kernel/futex.c b/kernel/futex.c index 88bad86180ac9192d61fd3e09a0adb68864a51fe..bb2265ae5cbcad9ce2a1c7d5c423677c22a32249 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1711,6 +1711,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_q *this, *next; WAKE_Q(wake_q); + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + if (requeue_pi) { /* * Requeue PI only works on two distinct uaddrs. This diff --git a/kernel/groups.c b/kernel/groups.c index 2fcadd66a8fd7cba19e264c6542ec8940a248a73..94bde5210e3d5ec0dfaa893f735ed9613e1a127b 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -77,7 +77,7 @@ static int groups_from_user(struct group_info *group_info, } /* a simple Shell sort */ -static void groups_sort(struct group_info *group_info) +void groups_sort(struct group_info *group_info) { int base, max, stride; int gidsetsize = group_info->ngroups; @@ -103,6 +103,7 @@ static void groups_sort(struct group_info *group_info) stride /= 3; } } +EXPORT_SYMBOL(groups_sort); /* a simple bsearch */ int groups_search(const struct group_info *group_info, kgid_t grp) @@ -134,7 +135,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp) void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); - groups_sort(group_info); get_group_info(group_info); new->group_info = group_info; } @@ -218,6 +218,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index a9b8cf50059151c17f63d35cf4c622ae8b72f131..def4548ea40cbfc2dbf334928a4196772c60dcdc 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -612,7 +612,7 @@ static __init int jump_label_test(void) return 0; } -late_initcall(jump_label_test); +early_initcall(jump_label_test); #endif /* STATIC_KEYS_SELFTEST */ #endif /* HAVE_JUMP_LABEL */ diff --git a/kernel/kcov.c b/kernel/kcov.c index 3cbb0c879705f7a29f1d5bfad58551c098e69432..f4988f5a72d239e79d35ac9733b72f8e80a251eb 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -1,11 +1,16 @@ #define pr_fmt(fmt) "kcov: " fmt #define DISABLE_BRANCH_PROFILING +#include #include +#include +#include #include #include #include +#include #include +#include #include #include #include @@ -14,6 +19,10 @@ #include #include #include +#include + +/* Number of 64-bit words written per one comparison: */ +#define KCOV_WORDS_PER_CMP 4 /* * kcov descriptor (one per opened debugfs file). @@ -21,7 +30,12 @@ * - initial state after open() * - then there must be a single ioctl(KCOV_INIT_TRACE) call * - then, mmap() call (several calls are allowed but not useful) - * - then, repeated enable/disable for a task (only one task a time allowed) + * - then, ioctl(KCOV_ENABLE, arg), where arg is + * KCOV_TRACE_PC - to trace only the PCs + * or + * KCOV_TRACE_CMP - to trace only the comparison operands + * - then, ioctl(KCOV_DISABLE) to disable the task. + * Enabling/disabling ioctls can be repeated (only one task a time allowed). */ struct kcov { /* @@ -41,6 +55,36 @@ struct kcov { struct task_struct *t; }; +static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) +{ + enum kcov_mode mode; + + /* + * We are interested in code coverage as a function of a syscall inputs, + * so we ignore code executed in interrupts. + */ + if (!in_task()) + return false; + mode = READ_ONCE(t->kcov_mode); + /* + * There is some code that runs in interrupts but for which + * in_interrupt() returns false (e.g. preempt_schedule_irq()). + * READ_ONCE()/barrier() effectively provides load-acquire wrt + * interrupts, there are paired barrier()/WRITE_ONCE() in + * kcov_ioctl_locked(). + */ + barrier(); + return mode == needed_mode; +} + +static unsigned long canonicalize_ip(unsigned long ip) +{ +#ifdef CONFIG_RANDOMIZE_BASE + ip -= kaslr_offset(); +#endif + return ip; +} + /* * Entry point from instrumented code. * This is called once per basic-block/edge. @@ -48,46 +92,139 @@ struct kcov { void notrace __sanitizer_cov_trace_pc(void) { struct task_struct *t; - enum kcov_mode mode; + unsigned long *area; + unsigned long ip = canonicalize_ip(_RET_IP_); + unsigned long pos; t = current; - /* - * We are interested in code coverage as a function of a syscall inputs, - * so we ignore code executed in interrupts. - * The checks for whether we are in an interrupt are open-coded, because - * 1. We can't use in_interrupt() here, since it also returns true - * when we are inside local_bh_disable() section. - * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()), - * since that leads to slower generated code (three separate tests, - * one for each of the flags). - */ - if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET - | NMI_MASK))) + if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) return; - mode = READ_ONCE(t->kcov_mode); - if (mode == KCOV_MODE_TRACE) { - unsigned long *area; - unsigned long pos; - /* - * There is some code that runs in interrupts but for which - * in_interrupt() returns false (e.g. preempt_schedule_irq()). - * READ_ONCE()/barrier() effectively provides load-acquire wrt - * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). - */ - barrier(); - area = t->kcov_area; - /* The first word is number of subsequent PCs. */ - pos = READ_ONCE(area[0]) + 1; - if (likely(pos < t->kcov_size)) { - area[pos] = _RET_IP_; - WRITE_ONCE(area[0], pos); - } + area = t->kcov_area; + /* The first 64-bit word is the number of subsequent PCs. */ + pos = READ_ONCE(area[0]) + 1; + if (likely(pos < t->kcov_size)) { + area[pos] = ip; + WRITE_ONCE(area[0], pos); } } EXPORT_SYMBOL(__sanitizer_cov_trace_pc); +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS +static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) +{ + struct task_struct *t; + u64 *area; + u64 count, start_index, end_pos, max_pos; + + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t)) + return; + + ip = canonicalize_ip(ip); + + /* + * We write all comparison arguments and types as u64. + * The buffer was allocated for t->kcov_size unsigned longs. + */ + area = (u64 *)t->kcov_area; + max_pos = t->kcov_size * sizeof(unsigned long); + + count = READ_ONCE(area[0]); + + /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */ + start_index = 1 + count * KCOV_WORDS_PER_CMP; + end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64); + if (likely(end_pos <= max_pos)) { + area[start_index] = type; + area[start_index + 1] = arg1; + area[start_index + 2] = arg2; + area[start_index + 3] = ip; + WRITE_ONCE(area[0], count + 1); + } +} + +void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1); + +void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); + +void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4); + +void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8); + +void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1); + +void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); + +void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4); + +void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8); + +void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) +{ + u64 i; + u64 count = cases[0]; + u64 size = cases[1]; + u64 type = KCOV_CMP_CONST; + + switch (size) { + case 8: + type |= KCOV_CMP_SIZE(0); + break; + case 16: + type |= KCOV_CMP_SIZE(1); + break; + case 32: + type |= KCOV_CMP_SIZE(2); + break; + case 64: + type |= KCOV_CMP_SIZE(3); + break; + default: + return; + } + for (i = 0; i < count; i++) + write_comp_data(type, cases[i + 2], val, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_switch); +#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ + static void kcov_get(struct kcov *kcov) { atomic_inc(&kcov->refcount); @@ -124,6 +261,7 @@ void kcov_task_exit(struct task_struct *t) /* Just to not leave dangling references behind. */ kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -142,7 +280,7 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) spin_lock(&kcov->lock); size = kcov->size * sizeof(unsigned long); - if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 || + if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; @@ -171,6 +309,7 @@ static int kcov_open(struct inode *inode, struct file *filep) kcov = kzalloc(sizeof(*kcov), GFP_KERNEL); if (!kcov) return -ENOMEM; + kcov->mode = KCOV_MODE_DISABLED; atomic_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -206,7 +345,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (size < 2 || size > INT_MAX / sizeof(unsigned long)) return -EINVAL; kcov->size = size; - kcov->mode = KCOV_MODE_TRACE; + kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: /* @@ -216,17 +355,25 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, * at task exit or voluntary by KCOV_DISABLE. After that it can * be enabled for another task. */ - unused = arg; - if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || - kcov->area == NULL) + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) return -EINVAL; if (kcov->t != NULL) return -EBUSY; + if (arg == KCOV_TRACE_PC) + kcov->mode = KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + kcov->mode = KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; t = current; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; - /* See comment in __sanitizer_cov_trace_pc(). */ + /* See comment in check_kcov_mode(). */ barrier(); WRITE_ONCE(t->kcov_mode, kcov->mode); t->kcov = kcov; @@ -244,6 +391,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; kcov_put(kcov); return 0; default: @@ -266,6 +414,7 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) static const struct file_operations kcov_fops = { .open = kcov_open, .unlocked_ioctl = kcov_ioctl, + .compat_ioctl = kcov_ioctl, .mmap = kcov_mmap, .release = kcov_close, }; diff --git a/kernel/module.c b/kernel/module.c index 0e54d5bf0097f57954d4848db6a830f5a0bc7072..07bfb9971f2fb7b55f52bbe677e5f9afd8b84d53 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) } #endif /* CONFIG_LIVEPATCH */ +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) +{ + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) + return; + + pr_warn("%s: loading module not compiled with retpoline compiler.\n", + mod->name); +} + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } + check_modinfo_retpoline(mod, info); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); pr_warn("%s: module is from the staging directory, the quality " diff --git a/kernel/relay.c b/kernel/relay.c index 8f18d314a96a49a1e9fc6c4cf1c5bead5c96a787..2603e04f55f9a4542a209d7304687585f883f122 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -611,7 +611,6 @@ struct rchan *relay_open(const char *base_filename, kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); - kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3af7c66698bb229f740b988c953a04f4e065acf6..b9d5c0dd3c6c9070ff2e57a3a94f0561f760ada3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5968,6 +5968,19 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); } +void sched_get_rd(struct root_domain *rd) +{ + atomic_inc(&rd->refcount); +} + +void sched_put_rd(struct root_domain *rd) +{ + if (!atomic_dec_and_test(&rd->refcount)) + return; + + call_rcu_sched(&rd->rcu, free_rootdomain); +} + static int init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 6c1f4443a150c10076f575b61c2d8c85200d612f..49815cc154e4a368413088de2112cd6f23e268e5 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -447,13 +447,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * * This function returns true if: * - * runtime / (deadline - t) > dl_runtime / dl_period , + * runtime / (deadline - t) > dl_runtime / dl_deadline , * * IOW we can't recycle current parameters. * - * Notice that the bandwidth check is done against the period. For + * Notice that the bandwidth check is done against the deadline. For * task with deadline equal to period this is the same of using - * dl_deadline instead of dl_period in the equation above. + * dl_period instead of dl_deadline in the equation above. */ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, struct sched_dl_entity *pi_se, u64 t) @@ -478,7 +478,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, * of anything below microseconds resolution is actually fiction * (but still we want to give the user that illusion >;). */ - left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); + left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); right = ((dl_se->deadline - t) >> DL_SCALE) * (pi_se->dl_runtime >> DL_SCALE); @@ -507,10 +507,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se, } } +static inline u64 dl_next_period(struct sched_dl_entity *dl_se) +{ + return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; +} + /* * If the entity depleted all its runtime, and if we want it to sleep * while waiting for some new execution time to become available, we - * set the bandwidth enforcement timer to the replenishment instant + * set the bandwidth replenishment timer to the replenishment instant * and try to activate it. * * Notice that it is important for the caller to know if the timer @@ -532,7 +537,7 @@ static int start_dl_timer(struct task_struct *p) * that it is actually coming from rq->clock and not from * hrtimer's time base reading. */ - act = ns_to_ktime(dl_se->deadline); + act = ns_to_ktime(dl_next_period(dl_se)); now = hrtimer_cb_get_time(timer); delta = ktime_to_ns(now) - rq_clock(rq); act = ktime_add_ns(act, delta); @@ -640,6 +645,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) lockdep_unpin_lock(&rq->lock, rf.cookie); rq = dl_task_offline_migration(rq, p); rf.cookie = lockdep_pin_lock(&rq->lock); + update_rq_clock(rq); /* * Now that the task has been migrated to the new RQ and we @@ -691,6 +697,39 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) timer->function = dl_task_timer; } +/* + * During the activation, CBS checks if it can reuse the current task's + * runtime and period. If the deadline of the task is in the past, CBS + * cannot use the runtime, and so it replenishes the task. This rule + * works fine for implicit deadline tasks (deadline == period), and the + * CBS was designed for implicit deadline tasks. However, a task with + * constrained deadline (deadine < period) might be awakened after the + * deadline, but before the next period. In this case, replenishing the + * task would allow it to run for runtime / deadline. As in this case + * deadline < period, CBS enables a task to run for more than the + * runtime / period. In a very loaded system, this can cause a domino + * effect, making other tasks miss their deadlines. + * + * To avoid this problem, in the activation of a constrained deadline + * task after the deadline but before the next period, throttle the + * task and set the replenishing timer to the begin of the next period, + * unless it is boosted. + */ +static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) +{ + struct task_struct *p = dl_task_of(dl_se); + struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); + + if (dl_time_before(dl_se->deadline, rq_clock(rq)) && + dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { + if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) + return; + dl_se->dl_throttled = 1; + if (dl_se->runtime > 0) + dl_se->runtime = 0; + } +} + static int dl_runtime_exceeded(struct sched_dl_entity *dl_se) { @@ -926,6 +965,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) __dequeue_dl_entity(dl_se); } +static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_deadline < dl_se->dl_period; +} + static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) { struct task_struct *pi_task = rt_mutex_get_top_task(p); @@ -951,6 +995,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) return; } + /* + * Check if a constrained deadline task was activated + * after the deadline but before the next period. + * If that is the case, the task will be throttled and + * the replenishment timer will be set to the next period. + */ + if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) + dl_check_constrained_dl(&p->dl); + /* * If p is throttled, we do nothing. In fact, if it exhausted * its budget it needs a replenishment and, since it now is on diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 33b8142758bdbceb158fc889078e1ad821923ed5..1f1a73c8b7c9b17972f1d80102b6fd83cd0fac84 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5745,10 +5745,22 @@ static void calc_sg_energy(struct energy_env *eenv) static int compute_energy(struct energy_env *eenv) { struct cpumask visit_cpus; + int cpu_count; WARN_ON(!eenv->sg_top->sge); cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); + /* If a cpu is hotplugged in while we are in this function, + * it does not appear in the existing visit_cpus mask + * which came from the sched_group pointer of the + * sched_domain pointed at by sd_ea for either the prev + * or next cpu and was dereferenced in __energy_diff. + * Since we will dereference sd_scs later as we iterate + * through the CPUs we expect to visit, new CPUs can + * be present which are not in the visit_cpus mask. + * Guard this with cpu_count. + */ + cpu_count = cpumask_weight(&visit_cpus); while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; @@ -5758,6 +5770,8 @@ static int compute_energy(struct energy_env *eenv) /* * Is the group utilization affected by cpus outside this * sched_group? + * This sd may have groups with cpus which were not present + * when we took visit_cpus. */ sd = rcu_dereference(per_cpu(sd_scs, cpu)); if (sd && sd->parent) @@ -5782,9 +5796,24 @@ static int compute_energy(struct energy_env *eenv) eenv->sg = sg; calc_sg_energy(eenv); - /* remove CPUs we have just visited */ - if (!sd->child) + if (!sd->child) { + /* + * cpu_count here is the number of + * cpus we expect to visit in this + * calculation. If we race against + * hotplug, we can have extra cpus + * added to the groups we are + * iterating which do not appear in + * the visit_cpus mask. In that case + * we are not able to calculate energy + * without restarting so we will bail + * out and use prev_cpu this time. + */ + if (!cpu_count) + return -EINVAL; cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); + cpu_count--; + } if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) goto next_cpu; @@ -5796,6 +5825,9 @@ static int compute_energy(struct energy_env *eenv) * If we raced with hotplug and got an sd NULL-pointer; * returning a wrong energy estimation is better than * entering an infinite loop. + * Specifically: If a cpu is unplugged after we took + * the visit_cpus mask, it no longer has an sd_scs + * pointer, so when we dereference it, we get NULL. */ if (cpumask_test_cpu(cpu, &visit_cpus)) return -EINVAL; @@ -6532,7 +6564,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); @@ -10356,7 +10388,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p) int active_balance; int cpu = task_cpu(p); - if (rq->misfit_task) { + if (energy_aware() && rq->misfit_task) { if (rq->curr->state != TASK_RUNNING || rq->curr->nr_cpus_allowed == 1) return; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index a8fe53bd51df780a512eb5e315f49c03505f37be..c7f1bdd4dcc8aa5a538f09e67c8e6f818271fc3c 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 925c6c9484e214fbf0570122ccab4dbd7e87822c..cedc7c7be76a46d7174aa17f2bba10ad1c48a634 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1952,9 +1952,8 @@ static void push_rt_tasks(struct rq *rq) * the rt_loop_next will cause the iterator to perform another scan. * */ -static int rto_next_cpu(struct rq *rq) +static int rto_next_cpu(struct root_domain *rd) { - struct root_domain *rd = rq->rd; int next; int cpu; @@ -2030,19 +2029,24 @@ static void tell_cpu_to_push(struct rq *rq) * Otherwise it is finishing up and an ipi needs to be sent. */ if (rq->rd->rto_cpu < 0) - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rq->rd); raw_spin_unlock(&rq->rd->rto_lock); rto_start_unlock(&rq->rd->rto_loop_start); - if (cpu >= 0) + if (cpu >= 0) { + /* Make sure the rd does not get freed while pushing */ + sched_get_rd(rq->rd); irq_work_queue_on(&rq->rd->rto_push_work, cpu); + } } /* Called from hardirq context */ void rto_push_irq_work_func(struct irq_work *work) { + struct root_domain *rd = + container_of(work, struct root_domain, rto_push_work); struct rq *rq; int cpu; @@ -2058,18 +2062,20 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rq->lock); } - raw_spin_lock(&rq->rd->rto_lock); + raw_spin_lock(&rd->rto_lock); /* Pass the IPI to the next rt overloaded queue */ - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rd); - raw_spin_unlock(&rq->rd->rto_lock); + raw_spin_unlock(&rd->rto_lock); - if (cpu < 0) + if (cpu < 0) { + sched_put_rd(rd); return; + } /* Try the next RT overloaded CPU */ - irq_work_queue_on(&rq->rd->rto_push_work, cpu); + irq_work_queue_on(&rd->rto_push_work, cpu); } #endif /* HAVE_RT_PUSH_IPI */ @@ -2079,8 +2085,9 @@ static void pull_rt_task(struct rq *this_rq) bool resched = false; struct task_struct *p; struct rq *src_rq; + int rt_overload_count = rt_overloaded(this_rq); - if (likely(!rt_overloaded(this_rq))) + if (likely(!rt_overload_count)) return; /* @@ -2089,6 +2096,11 @@ static void pull_rt_task(struct rq *this_rq) */ smp_rmb(); + /* If we are the only overloaded CPU do nothing */ + if (rt_overload_count == 1 && + cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) + return; + #ifdef HAVE_RT_PUSH_IPI if (sched_feat(RT_PUSH_IPI)) { tell_cpu_to_push(this_rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 646ff3c94e7dc4e6d960d937da74c95b287c1782..fc77c453de28bfe5ff791351a67ba2b08be429d5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -610,6 +610,8 @@ struct root_domain { }; extern struct root_domain def_root_domain; +extern void sched_get_rd(struct root_domain *rd); +extern void sched_put_rd(struct root_domain *rd); #ifdef HAVE_RT_PUSH_IPI extern void rto_push_irq_work_func(struct irq_work *work); diff --git a/kernel/signal.c b/kernel/signal.c index e48668c3c972fda7e0c606917b325a94765dc351..7ebe236a5364f0b407842537ee1b207e323c3200 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -72,7 +72,7 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) handler = sig_handler(t, sig); if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && - handler == SIG_DFL && !force) + handler == SIG_DFL && !(force && sig_kernel_only(sig))) return 1; return sig_handler_ignored(handler, sig); @@ -88,13 +88,15 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - if (!sig_task_ignored(t, sig, force)) - return 0; - /* - * Tracers may want to know about even ignored signals. + * Tracers may want to know about even ignored signal unless it + * is SIGKILL which can't be reported anyway but can be ignored + * by SIGNAL_UNKILLABLE task. */ - return !t->ptrace; + if (t->ptrace && sig != SIGKILL) + return 0; + + return sig_task_ignored(t, sig, force); } /* @@ -917,9 +919,9 @@ static void complete_signal(int sig, struct task_struct *p, int group) * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) && - !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && + !(signal->flags & SIGNAL_GROUP_EXIT) && !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || !t->ptrace)) { + (sig == SIGKILL || !p->ptrace)) { /* * This signal will be fatal to the whole group. */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index eeb7f2f5698d5cd6dda88c8e794299447e4cc243..54fd2fed36e9c47c29dbfbe71924dbed03a095ba 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -652,7 +652,9 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next.tv64 = KTIME_MAX; + base->hang_detected = 0; base->hres_active = 0; + base->next_timer = NULL; } /* @@ -1610,6 +1612,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) timerqueue_init_head(&cpu_base->clock_base[i].active); } + cpu_base->active_bases = 0; cpu_base->cpu = cpu; hrtimer_init_hres(cpu_base); return 0; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index f2826c35e9185f60586f06dff53f51ca3324c659..fc7c37ad90a0054100c96a2d588495ce34413a18 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -507,17 +507,22 @@ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; - if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || - !same_thread_group(rtn, current) || - (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) + switch (event->sigev_notify) { + case SIGEV_SIGNAL | SIGEV_THREAD_ID: + rtn = find_task_by_vpid(event->sigev_notify_thread_id); + if (!rtn || !same_thread_group(rtn, current)) + return NULL; + /* FALLTHRU */ + case SIGEV_SIGNAL: + case SIGEV_THREAD: + if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) + return NULL; + /* FALLTHRU */ + case SIGEV_NONE: + return task_pid(rtn); + default: return NULL; - - if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) - return NULL; - - return task_pid(rtn); + } } void posix_timers_register_clock(const clockid_t clock_id, @@ -745,8 +750,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) /* interval timer ? */ if (iv.tv64) cur_setting->it_interval = ktime_to_timespec(iv); - else if (!hrtimer_active(timer) && - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE) return; now = timer->base->get_time(); @@ -757,7 +761,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * expiry is > now. */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_sigev_notify == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); @@ -767,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * A single shot SIGEV_NONE timer must return 0, when * it is expired ! */ - if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + if (timr->it_sigev_notify != SIGEV_NONE) cur_setting->it_value.tv_nsec = 1; } else cur_setting->it_value = ktime_to_timespec(remaining); @@ -865,7 +869,7 @@ common_timer_set(struct k_itimer *timr, int flags, timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); /* SIGEV_NONE timers are not queued ! See common_timer_get */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { + if (timr->it_sigev_notify == SIGEV_NONE) { /* Setup correct expiry time for relative timers */ if (mode == HRTIMER_MODE_REL) { hrtimer_add_expires(timer, timer->base->get_time()); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 380159479aeb114107c6066643b49f43271eb98c..6a93da84fee94fba7468319df3055448bb7a5c53 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -663,6 +663,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } +static inline bool local_timer_softirq_pending(void) +{ + return local_softirq_pending() & TIMER_SOFTIRQ; +} + static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now, int cpu) { @@ -679,8 +684,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); ts->last_jiffies = basejiff; - if (rcu_needs_cpu(basemono, &next_rcu) || - arch_needs_cpu() || irq_work_needs_cpu()) { + /* + * Keep the periodic tick, when RCU, architecture or irq_work + * requests it. + * Aside of that check whether the local timer softirq is + * pending. If so its a bad idea to call get_next_timer_interrupt() + * because there is an already expired timer, so it will request + * immeditate expiry, which rearms the hardware timer with a + * minimal delta which brings us back to this place + * immediately. Lather, rinse and repeat... + */ + if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 17c0d149075424e7861b8d96b4b797a8aa1a4a8b..427e33dbb98d75463b21cdb1f292f5755fd25ae4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -636,9 +636,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) tk->ktime_sec = seconds; /* Update the monotonic raw base */ - seconds = tk->raw_sec; - nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); - tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); } /* must hold timekeeper_lock */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 7d670362891a9c563d1b68e699eb0a9ef01ebf7e..2d5cc7dfee14c2d0a36bbe915da3cd7d878cb5e2 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -849,11 +849,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); return base; } @@ -863,11 +862,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = this_cpu_ptr(&timer_bases[BASE_DEF]); return base; } @@ -1021,8 +1019,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) if (!ret && pending_only) goto out_unlock; - debug_activate(timer, expires); - new_base = get_target_base(base, timer->flags); if (base != new_base) { @@ -1046,6 +1042,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } + debug_activate(timer, expires); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) base->must_forward_clk = false; __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } @@ -1698,7 +1696,7 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ if (time_before(jiffies, base->clk)) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) return; /* CPU is awake, so check the deferrable base. */ base++; @@ -1853,6 +1851,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } +int timers_prepare_cpu(unsigned int cpu) +{ + struct timer_base *base; + int b; + + for (b = 0; b < NR_BASES; b++) { + base = per_cpu_ptr(&timer_bases[b], cpu); + base->clk = jiffies; + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->is_idle = false; + base->must_forward_clk = true; + } + return 0; +} + int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b8d7189e1479d5eb3cf06cc57a82ffa693903af..2884fe01cb5435be69dc122248220436cb2b137d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3911,7 +3911,6 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, func_g.type = filter_parse_regex(glob, strlen(glob), &func_g.search, ¬); func_g.len = strlen(func_g.search); - func_g.search = glob; /* we do not support '!' for function probes */ if (WARN_ON(not)) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f5c016e8fc884e2374b369bac9de2dd79798f332..3e1d11f4fe445abbb01329b5476c81e13581a522 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) +#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) */ size_t ring_buffer_page_len(void *page) { - return local_read(&((struct buffer_data_page *)page)->commit) + struct buffer_data_page *bpage = page; + + return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) + BUF_PAGE_HDR_SIZE; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b782c8487c11cfe7ae5c7941f9f23a7b856bca35..7590d6acc82b456dcf4618955cd946fd679809c6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3773,37 +3773,30 @@ static const struct file_operations show_traces_fops = { .llseek = seq_lseek, }; -/* - * The tracer itself will not take this lock, but still we want - * to provide a consistent cpumask to user-space: - */ -static DEFINE_MUTEX(tracing_cpumask_update_lock); - -/* - * Temporary storage for the character representation of the - * CPU bitmask (and one more byte for the newline): - */ -static char mask_str[NR_CPUS + 1]; - static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; + char *mask_str; int len; - mutex_lock(&tracing_cpumask_update_lock); + len = snprintf(NULL, 0, "%*pb\n", + cpumask_pr_args(tr->tracing_cpumask)) + 1; + mask_str = kmalloc(len, GFP_KERNEL); + if (!mask_str) + return -ENOMEM; - len = snprintf(mask_str, count, "%*pb\n", + len = snprintf(mask_str, len, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)); if (len >= count) { count = -EINVAL; goto out_err; } - count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); + count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); out_err: - mutex_unlock(&tracing_cpumask_update_lock); + kfree(mask_str); return count; } @@ -3823,8 +3816,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - mutex_lock(&tracing_cpumask_update_lock); - local_irq_disable(); arch_spin_lock(&tr->max_lock); for_each_tracing_cpu(cpu) { @@ -3847,8 +3838,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); - - mutex_unlock(&tracing_cpumask_update_lock); free_cpumask_var(tracing_cpumask_new); return count; @@ -6282,7 +6271,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int entries, size, i; + int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE @@ -6333,14 +6322,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - /* - * zero out any left over data, this is going to - * user land. - */ - size = ring_buffer_page_len(ref->page); - if (size < PAGE_SIZE) - memset(ref->page + size, 0, PAGE_SIZE - size); - page = virt_to_page(ref->page); spd.pages[i] = page; @@ -7064,6 +7045,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); + buf->buffer = NULL; return -ENOMEM; } @@ -7087,7 +7069,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) allocate_snapshot ? size : 1); if (WARN_ON(ret)) { ring_buffer_free(tr->trace_buffer.buffer); + tr->trace_buffer.buffer = NULL; free_percpu(tr->trace_buffer.data); + tr->trace_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 03c0a48c3ac4f3daca47a46e401a75f02a826316..9549ed120556ee75f7305e4f2dc379d470c36e37 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2200,6 +2200,7 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -2207,15 +2208,28 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f3a960ed75a197ffd0e519501b74bade3f227ce3..0664044ade06856f891a292b8134d1387d94669b 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -449,7 +449,7 @@ static int create_val_field(struct hist_trigger_data *hist_data, } field = trace_find_event_field(file->event_call, field_name); - if (!field) { + if (!field || !field->size) { ret = -EINVAL; goto out; } @@ -547,7 +547,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } field = trace_find_event_field(file->event_call, field_name); - if (!field) { + if (!field || !field->size) { ret = -EINVAL; goto out; } diff --git a/kernel/uid16.c b/kernel/uid16.c index cc40793464e3ca1959c9960a3cdbc3180d4c468b..dcffcce9d75e91ee5cd45095d585ff714886c253 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -190,6 +190,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 296dcca77f33f2eaa2b2f05d35d5d57770a305c9..ebfea5f94b66a37bade9d3cb5f72c3cdf1c362a8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -1506,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); @@ -4423,6 +4425,12 @@ void show_workqueue_state(void) if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } } @@ -4450,6 +4458,12 @@ void show_workqueue_state(void) pr_cont("\n"); next_pool: spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } rcu_read_unlock_sched(); diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 1ef0cec38d7879332966f8095d0bfdc0c7580230..dc14beae2c9aac7df010cdc36e861e33d34a1103 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -313,42 +313,47 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, /* Decide how to handle the operation */ switch (op) { - case ASN1_OP_MATCH_ANY_ACT: - case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: - case ASN1_OP_COND_MATCH_ANY_ACT: - case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: - ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len); - if (ret < 0) - return ret; - goto skip_data; - - case ASN1_OP_MATCH_ACT: - case ASN1_OP_MATCH_ACT_OR_SKIP: - case ASN1_OP_COND_MATCH_ACT_OR_SKIP: - ret = actions[machine[pc + 2]](context, hdr, tag, data + dp, len); - if (ret < 0) - return ret; - goto skip_data; - case ASN1_OP_MATCH: case ASN1_OP_MATCH_OR_SKIP: + case ASN1_OP_MATCH_ACT: + case ASN1_OP_MATCH_ACT_OR_SKIP: case ASN1_OP_MATCH_ANY: case ASN1_OP_MATCH_ANY_OR_SKIP: + case ASN1_OP_MATCH_ANY_ACT: + case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_OR_SKIP: + case ASN1_OP_COND_MATCH_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_ANY: case ASN1_OP_COND_MATCH_ANY_OR_SKIP: - skip_data: + case ASN1_OP_COND_MATCH_ANY_ACT: + case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: + if (!(flags & FLAG_CONS)) { if (flags & FLAG_INDEFINITE_LENGTH) { + size_t tmp = dp; + ret = asn1_find_indefinite_length( - data, datalen, &dp, &len, &errmsg); + data, datalen, &tmp, &len, &errmsg); if (ret < 0) goto error; - } else { - dp += len; } pr_debug("- LEAF: %zu\n", len); } + + if (op & ASN1_OP_MATCH__ACT) { + unsigned char act; + + if (op & ASN1_OP_MATCH__ANY) + act = machine[pc + 1]; + else + act = machine[pc + 2]; + ret = actions[act](context, hdr, tag, data + dp, len); + if (ret < 0) + return ret; + } + + if (!(flags & FLAG_CONS)) + dp += len; pc += asn1_op_lengths[op]; goto next_op; @@ -434,6 +439,8 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, else act = machine[pc + 1]; ret = actions[act](context, hdr, 0, data + tdp, len); + if (ret < 0) + return ret; } pc += asn1_op_lengths[op]; goto next_op; diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index da796e2dc4f506dc63db68b132180acff00c291f..c7c96bc7654af7c4d5f3c8bbf595989f9b3d0ebf 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -360,6 +360,10 @@ static int ddebug_parse_query(char *words[], int nwords, if (parse_lineno(last, &query->last_lineno) < 0) return -EINVAL; + /* special case for last lineno not specified */ + if (query->last_lineno == 0) + query->last_lineno = UINT_MAX; + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, diff --git a/lib/genalloc.c b/lib/genalloc.c index 144fe6b1a03ea536893f6e35d153a895a238b67d..ca06adc4f44510d816ddc9911a1ff38f04cd497b 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size - 1; - atomic_set(&chunk->avail, size); + atomic_long_set(&chunk->avail, size); spin_lock(&pool->lock); list_add_rcu(&chunk->next_chunk, &pool->chunks); @@ -304,7 +304,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (size > atomic_read(&chunk->avail)) + if (size > atomic_long_read(&chunk->avail)) continue; start_bit = 0; @@ -324,7 +324,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, addr = chunk->start_addr + ((unsigned long)start_bit << order); size = nbits << order; - atomic_sub(size, &chunk->avail); + atomic_long_sub(size, &chunk->avail); break; } rcu_read_unlock(); @@ -390,7 +390,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); BUG_ON(remain); size = nbits << order; - atomic_add(size, &chunk->avail); + atomic_long_add(size, &chunk->avail); rcu_read_unlock(); return; } @@ -464,7 +464,7 @@ size_t gen_pool_avail(struct gen_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) - avail += atomic_read(&chunk->avail); + avail += atomic_long_read(&chunk->avail); rcu_read_unlock(); return avail; } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2e385026915c152f0821629527c8378087e235af..98da7520a6aaef6608c6157806dace8d06780c3d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5646,9 +5646,8 @@ static struct bpf_prog *generate_filter(int which, int *err) return NULL; } } - /* We don't expect to fail. */ if (*err) { - pr_cont("FAIL to attach err=%d len=%d\n", + pr_cont("FAIL to prog_create err=%d len=%d\n", *err, fprog.len); return NULL; } @@ -5671,6 +5670,10 @@ static struct bpf_prog *generate_filter(int which, int *err) * checks. */ fp = bpf_prog_select_runtime(fp, err); + if (*err) { + pr_cont("FAIL to select_runtime err=%d\n", *err); + return NULL; + } break; } @@ -5856,8 +5859,8 @@ static __init int test_bpf(void) pass_cnt++; continue; } - - return err; + err_cnt++; + continue; } pr_cont("jited:%u ", fp->jited); diff --git a/lib/test_kasan.c b/lib/test_kasan.c index fbdf87920093b9b1dbd94e4b370a24e43d6bb8f7..0e70ecc12fe20655b5aa0c4b4d7704706c55ec69 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Note: test functions are marked noinline so that their names appear in @@ -441,6 +442,12 @@ static noinline void __init use_after_scope_test(void) static int __init kmalloc_tests_init(void) { + /* + * Temporarily enable multi-shot mode. Otherwise, we'd only get a + * report for the first case. + */ + bool multishot = kasan_save_enable_multi_shot(); + kmalloc_oob_right(); kmalloc_oob_left(); kmalloc_node_oob_right(); @@ -465,6 +472,9 @@ static int __init kmalloc_tests_init(void) ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); + + kasan_restore_multi_shot(multishot); + return -EAGAIN; } diff --git a/lib/ubsan.c b/lib/ubsan.c index fb0409df1bcf8474969ab5bf28f2c9623a77e041..50d1d5c25deb89501d967e3aed1d900870fd8299 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -265,14 +265,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); -static void handle_null_ptr_deref(struct type_mismatch_data *data) +static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], @@ -281,15 +281,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_missaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], @@ -299,15 +299,15 @@ static void handle_missaligned_access(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -static void handle_object_size_mismatch(struct type_mismatch_data *data, +static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); @@ -315,19 +315,47 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, +static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { if (!ptr) handle_null_ptr_deref(data); else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) - handle_missaligned_access(data, ptr); + handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); } + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = data->alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} EXPORT_SYMBOL(__ubsan_handle_type_mismatch); +void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, + unsigned long ptr) +{ + + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = 1UL << data->log_alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); + void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) { unsigned long flags; diff --git a/lib/ubsan.h b/lib/ubsan.h index b2d18d4a53f5a274d73d31bda8cae876ba7ddc34..d8b8085e5dacb002f1db704ab4c20a359da7cc8a 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -36,6 +36,20 @@ struct type_mismatch_data { unsigned char type_check_kind; }; +struct type_mismatch_data_v1 { + struct source_location location; + struct type_descriptor *type; + unsigned char log_alignment; + unsigned char type_check_kind; +}; + +struct type_mismatch_data_common { + struct source_location *location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + struct nonnull_arg_data { struct source_location location; struct source_location attr_location; diff --git a/mm/cma.c b/mm/cma.c index c960459eda7e640ea55be1d4ed80c6a9125a8877..397687fc51f97467901a807b08a5cc5ecc1addea 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct cma *cma) } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, - int align_order) + unsigned int align_order) { if (align_order <= cma->order_per_bit) return 0; @@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, } /* - * Find a PFN aligned to the specified order and return an offset represented in - * order_per_bits. + * Find the offset of the base PFN from the specified align_order. + * The value returned is represented in order_per_bits. */ static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, - int align_order) + unsigned int align_order) { - if (align_order <= cma->order_per_bit) - return 0; - - return (ALIGN(cma->base_pfn, (1UL << align_order)) - - cma->base_pfn) >> cma->order_per_bit; + return (cma->base_pfn & ((1UL << align_order) - 1)) + >> cma->order_per_bit; } static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8258e9eee80640024e28c41292a79148723c578e..c234c078693c2fd5e41442eb8bd2fc04b080f271 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -745,20 +745,15 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) + pmd_t *pmd, int flags) { pmd_t _pmd; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pmd is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pmd to - * set the young bit, instead of the current set_pmd_at. - */ - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); + _pmd = pmd_mkyoung(*pmd); + if (flags & FOLL_WRITE) + _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, 1)) + pmd, _pmd, flags & FOLL_WRITE)) update_mmu_cache_pmd(vma, addr, pmd); } @@ -787,7 +782,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); /* * device mapped pages can only be returned if the @@ -1158,7 +1153,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * We don't mlock() pte-mapped THPs. This way we can avoid @@ -1514,37 +1509,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - int ret = 0; + pmd_t entry; + bool preserve_write; + int ret; ptl = __pmd_trans_huge_lock(pmd, vma); - if (ptl) { - pmd_t entry; - bool preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; + if (!ptl) + return 0; - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) { - spin_unlock(ptl); - return ret; - } + preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; - if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); - entry = pmd_modify(entry, newprot); - if (preserve_write) - entry = pmd_mkwrite(entry); - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - BUG_ON(vma_is_anonymous(vma) && !preserve_write && - pmd_write(entry)); - } - spin_unlock(ptl); - } + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (prot_numa && is_huge_zero_pmd(*pmd)) + goto unlock; + + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + + /* + * In case prot_numa, we are under down_read(mmap_sem). It's critical + * to not clear pmd intermittently to avoid race with MADV_DONTNEED + * which is also under down_read(mmap_sem): + * + * CPU0: CPU1: + * change_huge_pmd(prot_numa=1) + * pmdp_huge_get_and_clear_notify() + * madvise_dontneed() + * zap_pmd_range() + * pmd_trans_huge(*pmd) == 0 (without ptl) + * // skip the pmd + * set_pmd_at(); + * // pmd is re-established + * + * The race makes MADV_DONTNEED miss the huge pmd and don't clear it + * which may break userspace. + * + * pmdp_invalidate() is required to make sure we don't miss + * dirty/young flags set by hardware. + */ + entry = *pmd; + pmdp_invalidate(vma, addr, pmd); + + /* + * Recover dirty/young flags. It relies on pmdp_invalidate to not + * corrupt them. + */ + if (pmd_dirty(*pmd)) + entry = pmd_mkdirty(entry); + if (pmd_young(*pmd)) + entry = pmd_mkyoung(entry); + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); + ret = HPAGE_PMD_NR; + set_pmd_at(mm, addr, pmd, entry); + BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); +unlock: + spin_unlock(ptl); return ret; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65c36acf8a6b5839ac7d74956dad63f73a523cd2..6ff65c40524326890bd088fa5ae052061ed16252 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3135,6 +3135,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) } } +static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) +{ + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; + return 0; +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the @@ -3151,6 +3158,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, .close = hugetlb_vm_op_close, + .split = hugetlb_vm_op_split, }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, diff --git a/mm/internal.h b/mm/internal.h index 34a5459e5989e48fc523c37079a0919b62b35a60..3e2d016947479f5d403734031f2abf66dd985532 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -73,6 +73,12 @@ static inline void set_page_refcounted(struct page *page) extern unsigned long highest_memmap_pfn; +/* + * Maximum number of reclaim retries without progress before the OOM + * killer is consider the only way forward. + */ +#define MAX_RECLAIM_RETRIES 16 + /* * in mm/vmscan.c: */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 0e9505f66ec133bd401d6ec9208d1a6f17b95fb7..7a6d1a1f9894cd020d7bd90374719c73f1658446 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -39,6 +39,16 @@ #include "kasan.h" #include "../slab.h" +void kasan_enable_current(void) +{ + current->kasan_depth++; +} + +void kasan_disable_current(void) +{ + current->kasan_depth--; +} + /* * Poisons the shadow memory for 'size' bytes starting from 'addr'. * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. @@ -428,7 +438,7 @@ void kasan_cache_shrink(struct kmem_cache *cache) quarantine_remove_cache(cache); } -void kasan_cache_destroy(struct kmem_cache *cache) +void kasan_cache_shutdown(struct kmem_cache *cache) { quarantine_remove_cache(cache); } @@ -559,7 +569,8 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) { - kasan_report_double_free(cache, object, shadow_byte); + kasan_report_double_free(cache, object, + __builtin_return_address(1)); return true; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1c260e6b3b3c6a1f26fc1e13a0fdb39099bbbf68..1229298cce646ddc725c4f1ea9d823a0c71e3cd1 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -96,15 +96,10 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) << KASAN_SHADOW_SCALE_SHIFT); } -static inline bool kasan_report_enabled(void) -{ - return !current->kasan_depth; -} - void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_double_free(struct kmem_cache *cache, void *object, - s8 shadow); + void *ip); #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index baabaad4a4aaa89bb13fc691cf5df58af46c8b3b..3a8ddf8baf7dc3d52597bf0e53753c0cc17503cd 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -86,24 +87,9 @@ static void qlist_move_all(struct qlist_head *from, struct qlist_head *to) qlist_init(from); } -static void qlist_move(struct qlist_head *from, struct qlist_node *last, - struct qlist_head *to, size_t size) -{ - if (unlikely(last == from->tail)) { - qlist_move_all(from, to); - return; - } - if (qlist_empty(to)) - to->head = from->head; - else - to->tail->next = from->head; - to->tail = last; - from->head = last->next; - last->next = NULL; - from->bytes -= size; - to->bytes += size; -} - +#define QUARANTINE_PERCPU_SIZE (1 << 20) +#define QUARANTINE_BATCHES \ + (1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS) /* * The object quarantine consists of per-cpu queues and a global queue, @@ -111,11 +97,23 @@ static void qlist_move(struct qlist_head *from, struct qlist_node *last, */ static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine); -static struct qlist_head global_quarantine; +/* Round-robin FIFO array of batches. */ +static struct qlist_head global_quarantine[QUARANTINE_BATCHES]; +static int quarantine_head; +static int quarantine_tail; +/* Total size of all objects in global_quarantine across all batches. */ +static unsigned long quarantine_size; static DEFINE_SPINLOCK(quarantine_lock); +DEFINE_STATIC_SRCU(remove_cache_srcu); /* Maximum size of the global queue. */ -static unsigned long quarantine_size; +static unsigned long quarantine_max_size; + +/* + * Target size of a batch in global_quarantine. + * Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM. + */ +static unsigned long quarantine_batch_size; /* * The fraction of physical memory the quarantine is allowed to occupy. @@ -124,9 +122,6 @@ static unsigned long quarantine_size; */ #define QUARANTINE_FRACTION 32 -#define QUARANTINE_LOW_SIZE (READ_ONCE(quarantine_size) * 3 / 4) -#define QUARANTINE_PERCPU_SIZE (1 << 20) - static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink) { return virt_to_head_page(qlink)->slab_cache; @@ -180,62 +175,89 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) struct qlist_head *q; struct qlist_head temp = QLIST_INIT; + /* + * Note: irq must be disabled until after we move the batch to the + * global quarantine. Otherwise quarantine_remove_cache() can miss + * some objects belonging to the cache if they are in our local temp + * list. quarantine_remove_cache() executes on_each_cpu() at the + * beginning which ensures that it either sees the objects in per-cpu + * lists or in the global quarantine. + */ local_irq_save(flags); q = this_cpu_ptr(&cpu_quarantine); qlist_put(q, &info->quarantine_link, cache->size); - if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) + if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); - local_irq_restore(flags); - - if (unlikely(!qlist_empty(&temp))) { - spin_lock_irqsave(&quarantine_lock, flags); - qlist_move_all(&temp, &global_quarantine); - spin_unlock_irqrestore(&quarantine_lock, flags); + spin_lock(&quarantine_lock); + WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); + qlist_move_all(&temp, &global_quarantine[quarantine_tail]); + if (global_quarantine[quarantine_tail].bytes >= + READ_ONCE(quarantine_batch_size)) { + int new_tail; + + new_tail = quarantine_tail + 1; + if (new_tail == QUARANTINE_BATCHES) + new_tail = 0; + if (new_tail != quarantine_head) + quarantine_tail = new_tail; + } + spin_unlock(&quarantine_lock); } + + local_irq_restore(flags); } void quarantine_reduce(void) { - size_t new_quarantine_size, percpu_quarantines; + size_t total_size, new_quarantine_size, percpu_quarantines; unsigned long flags; + int srcu_idx; struct qlist_head to_free = QLIST_INIT; - size_t size_to_free = 0; - struct qlist_node *last; - if (likely(READ_ONCE(global_quarantine.bytes) <= - READ_ONCE(quarantine_size))) + if (likely(READ_ONCE(quarantine_size) <= + READ_ONCE(quarantine_max_size))) return; + /* + * srcu critical section ensures that quarantine_remove_cache() + * will not miss objects belonging to the cache while they are in our + * local to_free list. srcu is chosen because (1) it gives us private + * grace period domain that does not interfere with anything else, + * and (2) it allows synchronize_srcu() to return without waiting + * if there are no pending read critical sections (which is the + * expected case). + */ + srcu_idx = srcu_read_lock(&remove_cache_srcu); spin_lock_irqsave(&quarantine_lock, flags); /* * Update quarantine size in case of hotplug. Allocate a fraction of * the installed memory to quarantine minus per-cpu queue limits. */ - new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / + total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / QUARANTINE_FRACTION; percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus(); - new_quarantine_size = (new_quarantine_size < percpu_quarantines) ? - 0 : new_quarantine_size - percpu_quarantines; - WRITE_ONCE(quarantine_size, new_quarantine_size); - - last = global_quarantine.head; - while (last) { - struct kmem_cache *cache = qlink_to_cache(last); - - size_to_free += cache->size; - if (!last->next || size_to_free > - global_quarantine.bytes - QUARANTINE_LOW_SIZE) - break; - last = last->next; + new_quarantine_size = (total_size < percpu_quarantines) ? + 0 : total_size - percpu_quarantines; + WRITE_ONCE(quarantine_max_size, new_quarantine_size); + /* Aim at consuming at most 1/2 of slots in quarantine. */ + WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE, + 2 * total_size / QUARANTINE_BATCHES)); + + if (likely(quarantine_size > quarantine_max_size)) { + qlist_move_all(&global_quarantine[quarantine_head], &to_free); + WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes); + quarantine_head++; + if (quarantine_head == QUARANTINE_BATCHES) + quarantine_head = 0; } - qlist_move(&global_quarantine, last, &to_free, size_to_free); spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, NULL); + srcu_read_unlock(&remove_cache_srcu, srcu_idx); } static void qlist_move_cache(struct qlist_head *from, @@ -273,16 +295,34 @@ static void per_cpu_remove_cache(void *arg) qlist_free_all(&to_free, cache); } +/* Free all quarantined objects belonging to cache. */ void quarantine_remove_cache(struct kmem_cache *cache) { - unsigned long flags; + unsigned long flags, i; struct qlist_head to_free = QLIST_INIT; + /* + * Must be careful to not miss any objects that are being moved from + * per-cpu list to the global quarantine in quarantine_put(), + * nor objects being freed in quarantine_reduce(). on_each_cpu() + * achieves the first goal, while synchronize_srcu() achieves the + * second. + */ on_each_cpu(per_cpu_remove_cache, cache, 1); spin_lock_irqsave(&quarantine_lock, flags); - qlist_move_cache(&global_quarantine, &to_free, cache); + for (i = 0; i < QUARANTINE_BATCHES; i++) { + if (qlist_empty(&global_quarantine[i])) + continue; + qlist_move_cache(&global_quarantine[i], &to_free, cache); + /* Scanning whole quarantine can take a while. */ + spin_unlock_irqrestore(&quarantine_lock, flags); + cond_resched(); + spin_lock_irqsave(&quarantine_lock, flags); + } spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); + + synchronize_srcu(&remove_cache_srcu); } diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8ca412aebcf1f39c65de1e09b89d8fdcf071efca..04bb1d3eb9ece0480182f2c738ed9d827fea3c62 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -13,7 +13,9 @@ * */ +#include #include +#include #include #include #include @@ -49,7 +51,13 @@ static const void *find_first_bad_addr(const void *addr, size_t size) return first_bad_addr; } -static void print_error_description(struct kasan_access_info *info) +static bool addr_has_shadow(struct kasan_access_info *info) +{ + return (info->access_addr >= + kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +} + +static const char *get_shadow_bug_type(struct kasan_access_info *info) { const char *bug_type = "unknown-crash"; u8 *shadow_addr; @@ -96,12 +104,39 @@ static void print_error_description(struct kasan_access_info *info) break; } - pr_err("BUG: KASAN: %s in %pS at addr %p\n", - bug_type, (void *)info->ip, - info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, task_pid_nr(current)); + return bug_type; +} + +static const char *get_wild_bug_type(struct kasan_access_info *info) +{ + const char *bug_type = "unknown-crash"; + + if ((unsigned long)info->access_addr < PAGE_SIZE) + bug_type = "null-ptr-deref"; + else if ((unsigned long)info->access_addr < TASK_SIZE) + bug_type = "user-memory-access"; + else + bug_type = "wild-memory-access"; + + return bug_type; +} + +static const char *get_bug_type(struct kasan_access_info *info) +{ + if (addr_has_shadow(info)) + return get_shadow_bug_type(info); + return get_wild_bug_type(info); +} + +static void print_error_description(struct kasan_access_info *info) +{ + const char *bug_type = get_bug_type(info); + + pr_err("BUG: KASAN: %s in %pS\n", + bug_type, (void *)info->ip); + pr_err("%s of size %zu at addr %p by task %s/%d\n", + info->is_write ? "Write" : "Read", info->access_size, + info->access_addr, current->comm, task_pid_nr(current)); } static inline bool kernel_or_module_addr(const void *addr) @@ -137,12 +172,14 @@ static void kasan_end_report(unsigned long *flags) pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); + if (panic_on_warn) + panic("panic_on_warn set ...\n"); kasan_enable_current(); } -static void print_track(struct kasan_track *track) +static void print_track(struct kasan_track *track, const char *prefix) { - pr_err("PID = %u\n", track->pid); + pr_err("%s by task %u:\n", prefix, track->pid); if (track->stack) { struct stack_trace trace; @@ -153,59 +190,84 @@ static void print_track(struct kasan_track *track) } } -static void kasan_object_err(struct kmem_cache *cache, void *object) +static struct page *addr_to_page(const void *addr) { - struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); + if ((addr >= (void *)PAGE_OFFSET) && + (addr < high_memory)) + return virt_to_head_page(addr); + return NULL; +} - dump_stack(); - pr_err("Object at %p, in cache %s size: %d\n", object, cache->name, - cache->object_size); +static void describe_object_addr(struct kmem_cache *cache, void *object, + const void *addr) +{ + unsigned long access_addr = (unsigned long)addr; + unsigned long object_addr = (unsigned long)object; + const char *rel_type; + int rel_bytes; + + pr_err("The buggy address belongs to the object at %p\n" + " which belongs to the cache %s of size %d\n", + object, cache->name, cache->object_size); - if (!(cache->flags & SLAB_KASAN)) + if (!addr) return; - pr_err("Allocated:\n"); - print_track(&alloc_info->alloc_track); - pr_err("Freed:\n"); - print_track(&alloc_info->free_track); + if (access_addr < object_addr) { + rel_type = "to the left"; + rel_bytes = object_addr - access_addr; + } else if (access_addr >= object_addr + cache->object_size) { + rel_type = "to the right"; + rel_bytes = access_addr - (object_addr + cache->object_size); + } else { + rel_type = "inside"; + rel_bytes = access_addr - object_addr; + } + + pr_err("The buggy address is located %d bytes %s of\n" + " %d-byte region [%p, %p)\n", + rel_bytes, rel_type, cache->object_size, (void *)object_addr, + (void *)(object_addr + cache->object_size)); } -void kasan_report_double_free(struct kmem_cache *cache, void *object, - s8 shadow) +static void describe_object(struct kmem_cache *cache, void *object, + const void *addr) { - unsigned long flags; + struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); - kasan_start_report(&flags); - pr_err("BUG: Double free or freeing an invalid pointer\n"); - pr_err("Unexpected shadow byte: 0x%hhX\n", shadow); - kasan_object_err(cache, object); - kasan_end_report(&flags); + if (cache->flags & SLAB_KASAN) { + print_track(&alloc_info->alloc_track, "Allocated"); + pr_err("\n"); + print_track(&alloc_info->free_track, "Freed"); + pr_err("\n"); + } + + describe_object_addr(cache, object, addr); } -static void print_address_description(struct kasan_access_info *info) +static void print_address_description(void *addr) { - const void *addr = info->access_addr; + struct page *page = addr_to_page(addr); - if ((addr >= (void *)PAGE_OFFSET) && - (addr < high_memory)) { - struct page *page = virt_to_head_page(addr); - - if (PageSlab(page)) { - void *object; - struct kmem_cache *cache = page->slab_cache; - object = nearest_obj(cache, page, - (void *)info->access_addr); - kasan_object_err(cache, object); - return; - } - dump_page(page, "kasan: bad access detected"); + dump_stack(); + pr_err("\n"); + + if (page && PageSlab(page)) { + struct kmem_cache *cache = page->slab_cache; + void *object = nearest_obj(cache, page, addr); + + describe_object(cache, object, addr); } - if (kernel_or_module_addr(addr)) { - if (!init_task_stack_addr(addr)) - pr_err("Address belongs to variable %pS\n", addr); + if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) { + pr_err("The buggy address belongs to the variable:\n"); + pr_err(" %pS\n", addr); + } + + if (page) { + pr_err("The buggy address belongs to the page:\n"); + dump_page(page, "kasan: bad access detected"); } - dump_stack(); } static bool row_is_guilty(const void *row, const void *guilty) @@ -260,37 +322,74 @@ static void print_shadow_for_address(const void *addr) } } +void kasan_report_double_free(struct kmem_cache *cache, void *object, + void *ip) +{ + unsigned long flags; + + kasan_start_report(&flags); + pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip); + pr_err("\n"); + print_address_description(object); + pr_err("\n"); + print_shadow_for_address(object); + kasan_end_report(&flags); +} + static void kasan_report_error(struct kasan_access_info *info) { unsigned long flags; - const char *bug_type; kasan_start_report(&flags); - if (info->access_addr < - kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) { - if ((unsigned long)info->access_addr < PAGE_SIZE) - bug_type = "null-ptr-deref"; - else if ((unsigned long)info->access_addr < TASK_SIZE) - bug_type = "user-memory-access"; - else - bug_type = "wild-memory-access"; - pr_err("BUG: KASAN: %s on address %p\n", - bug_type, info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, - task_pid_nr(current)); + print_error_description(info); + pr_err("\n"); + + if (!addr_has_shadow(info)) { dump_stack(); } else { - print_error_description(info); - print_address_description(info); + print_address_description((void *)info->access_addr); + pr_err("\n"); print_shadow_for_address(info->first_bad_addr); } kasan_end_report(&flags); } +static unsigned long kasan_flags; + +#define KASAN_BIT_REPORTED 0 +#define KASAN_BIT_MULTI_SHOT 1 + +bool kasan_save_enable_multi_shot(void) +{ + return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot); + +void kasan_restore_multi_shot(bool enabled) +{ + if (!enabled) + clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); + +static int __init kasan_set_multi_shot(char *str) +{ + set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); + return 1; +} +__setup("kasan_multi_shot", kasan_set_multi_shot); + +static inline bool kasan_report_enabled(void) +{ + if (current->kasan_depth) + return false; + if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + return true; + return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags); +} + void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) { diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d1380ed93fdf084d5043a2fb1f8bb3e476cb7bf9..20cf3be9a5e8289e355fe49aa7653ab410a4c4a4 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1442,6 +1442,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); diff --git a/mm/madvise.c b/mm/madvise.c index 088a5b22f3975e9b551df4e5a92cf88354713571..bb58a44c848ab2f8f9c2d47505634589e71f17c8 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -228,15 +228,14 @@ static long madvise_willneed(struct vm_area_struct *vma, { struct file *file = vma->vm_file; + *prev = vma; #ifdef CONFIG_SWAP if (!file) { - *prev = vma; force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { - *prev = vma; force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; @@ -251,7 +250,6 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } - *prev = vma; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2a800c4a39bdb6581a162b1df9b4ff60d8ed211f..50088150fc1739a9ef2caabd4976115d6c4ea749 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5531,7 +5531,7 @@ static void uncharge_list(struct list_head *page_list) next = page->lru.next; VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); if (!page->mem_cgroup) continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ce7d416edab7f16b72e4aacbca41540f91ed72b9..5aa71a82ca7332b2de2b3d532ad69f595618a43a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -535,6 +535,13 @@ static int delete_from_lru_cache(struct page *p) */ ClearPageActive(p); ClearPageUnevictable(p); + + /* + * Poisoned page might never drop its ref count to 0 so we have + * to uncharge it manually from its memcg. + */ + mem_cgroup_uncharge(p); + /* * drop the page count elevated by isolate_lru_page() */ diff --git a/mm/mmap.c b/mm/mmap.c index 6f90f07f12b8f623d21b0bf282e2ed0c5c5a2084..4b412e271bc0cd95d4eaf136898857d90c8456c8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2249,7 +2249,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) gap_addr = TASK_SIZE; next = vma->vm_next; - if (next && next->vm_start < gap_addr) { + if (next && next->vm_start < gap_addr && + (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(next->vm_flags & VM_GROWSUP)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ @@ -2333,7 +2334,8 @@ int expand_downwards(struct vm_area_struct *vma, if (gap_addr > address) return -ENOMEM; prev = vma->vm_prev; - if (prev && prev->vm_end > gap_addr) { + if (prev && prev->vm_end > gap_addr && + (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(prev->vm_flags & VM_GROWSDOWN)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ @@ -2547,9 +2549,11 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *new; int err; - if (is_vm_hugetlb_page(vma) && (addr & - ~(huge_page_mask(hstate_vma(vma))))) - return -EINVAL; + if (vma->vm_ops && vma->vm_ops->split) { + err = vma->vm_ops->split(vma, addr); + if (err) + return err; + } new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d631d251c150041ca2cb25dbbf37d67524516dda..4a184157cc3dbdc8cb1f422561ffdef9d47272df 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -524,7 +524,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) */ set_bit(MMF_UNSTABLE, &mm->flags); - tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (is_vm_hugetlb_page(vma)) continue; @@ -546,11 +545,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, &details); + tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); + } } - tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4a044134ce843d98fed2dc2c8cc47f4646f7d7a7..94018ea5f93523b367f27286c6497907bbebb2c8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2592,30 +2592,23 @@ int __isolate_free_page(struct page *page, unsigned int order) * Update NUMA hit/miss statistics * * Must be called with interrupts disabled. - * - * When __GFP_OTHER_NODE is set assume the node of the preferred - * zone is the local node. This is useful for daemons who allocate - * memory on behalf of other processes. */ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags) { #ifdef CONFIG_NUMA - int local_nid = numa_node_id(); enum zone_stat_item local_stat = NUMA_LOCAL; - if (unlikely(flags & __GFP_OTHER_NODE)) { + if (z->node != numa_node_id()) local_stat = NUMA_OTHER; - local_nid = preferred_zone->node; - } - if (z->node == local_nid) { + if (z->node == preferred_zone->node) __inc_zone_state(z, NUMA_HIT); - __inc_zone_state(z, local_stat); - } else { + else { __inc_zone_state(z, NUMA_MISS); __inc_zone_state(preferred_zone, NUMA_FOREIGN); } + __inc_zone_state(z, local_stat); #endif } @@ -2828,9 +2821,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, if (!area->nr_free) continue; - if (alloc_harder) - return true; - for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { if (!list_empty(&area->free_list[mt])) return true; @@ -2842,6 +2832,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif + if (alloc_harder && + !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + return true; } return false; } @@ -3428,12 +3421,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return false; } -/* - * Maximum number of reclaim retries without any progress before OOM killer - * is consider as the only way to move forward. - */ -#define MAX_RECLAIM_RETRIES 16 - /* * Checks whether it makes sense to retry the reclaim to make a forward progress * for the given allocation request. @@ -4392,7 +4379,8 @@ void show_free_areas(unsigned int filter) K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), node_page_state(pgdat, NR_PAGES_SCANNED), - !pgdat_reclaimable(pgdat) ? "yes" : "no"); + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? + "yes" : "no"); } for_each_populated_zone(zone) { @@ -7309,11 +7297,18 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* * In case of -EBUSY, we'd like to know which page causes problem. - * So, just fall through. We will check it in test_pages_isolated(). + * So, just fall through. test_pages_isolated() has a tracepoint + * which will report the busy page. + * + * It is possible that busy pages could become available before + * the call to test_pages_isolated, and the range will actually be + * allocated. So, if we fall through be sure to clear ret so that + * -EBUSY is not accidentally used or returned to caller. */ ret = __alloc_contig_migrate_range(&cc, start, end); if (ret && ret != -EBUSY) goto done; + ret =0; /* * Pages from [start, end) are within a MAX_ORDER_NR_PAGES diff --git a/mm/slab_common.c b/mm/slab_common.c index 622f6b6ae84422a80e9c9821f387f9dfdf3f200f..0dc614d1d6f99d589657e32f84727c045bbcabc6 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -458,6 +458,9 @@ EXPORT_SYMBOL(kmem_cache_create); static int shutdown_cache(struct kmem_cache *s, struct list_head *release, bool *need_rcu_barrier) { + /* free asan quarantined objects */ + kasan_cache_shutdown(s); + if (__kmem_cache_shutdown(s) != 0) return -EBUSY; @@ -741,7 +744,6 @@ void kmem_cache_destroy(struct kmem_cache *s) get_online_cpus(); get_online_mems(); - kasan_cache_destroy(s); mutex_lock(&slab_mutex); s->refcount--; diff --git a/mm/vmscan.c b/mm/vmscan.c index 30a88b945a44f58e812a5eaf824a087b51a5dbec..f118dc23f6624ac8b5ad9fb0f719805adf8662ed 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2606,6 +2606,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); + /* + * Kswapd gives up on balancing particular nodes after too + * many failures to reclaim anything from them and goes to + * sleep. On reclaim progress, reset the failure counter. A + * successful direct reclaim run will revive a dormant kswapd. + */ + if (reclaimable) + pgdat->kswapd_failures = 0; + return reclaimable; } @@ -2680,10 +2689,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; - if (sc->priority != DEF_PRIORITY && - !pgdat_reclaimable(zone->zone_pgdat)) - continue; /* Let kswapd poll it */ - /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. @@ -2820,7 +2825,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, return 0; } -static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) +static bool allow_direct_reclaim(pg_data_t *pgdat) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; @@ -2828,6 +2833,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) int i; bool wmark_ok; + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; if (!managed_zone(zone) || @@ -2908,7 +2916,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; - if (pfmemalloc_watermark_ok(pgdat)) + if (allow_direct_reclaim(pgdat)) goto out; break; } @@ -2930,14 +2938,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, */ if (!(gfp_mask & __GFP_FS)) { wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat), HZ); + allow_direct_reclaim(pgdat), HZ); goto check_pending; } /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat)); + allow_direct_reclaim(pgdat)); check_pending: if (fatal_signal_pending(current)) @@ -3116,7 +3124,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) /* * The throttled processes are normally woken up in balance_pgdat() as - * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * soon as allow_direct_reclaim() is true. But there is a potential * race between when kswapd checks the watermarks and a process gets * throttled. There is also a potential race if processes get * throttled, kswapd wakes, a large process exits thereby balancing the @@ -3130,6 +3138,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= classzone_idx; i++) { struct zone *zone = pgdat->node_zones + i; @@ -3216,9 +3228,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); do { + unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; - sc.nr_reclaimed = 0; sc.reclaim_idx = classzone_idx; /* @@ -3297,7 +3309,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && - pfmemalloc_watermark_ok(pgdat)) + allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ @@ -3308,10 +3320,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ - if (raise_priority || !sc.nr_reclaimed) + nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; + if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); + if (!sc.nr_reclaimed) + pgdat->kswapd_failures++; + out: /* * Return the order kswapd stopped reclaiming at as @@ -3511,6 +3527,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) if (!waitqueue_active(&pgdat->kswapd_wait)) return; + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return; + /* Only wake kswapd if all zones are unbalanced */ for (z = 0; z <= classzone_idx; z++) { zone = pgdat->node_zones + z; @@ -3781,9 +3801,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) return NODE_RECLAIM_FULL; - if (!pgdat_reclaimable(pgdat)) - return NODE_RECLAIM_FULL; - /* * Do not scan if the allocation should not be delayed. */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 604f26a4f69678220f8cf41b88694d8355ab01b7..3863b5d6d598f72df2a6a74af4a0218c7bd6dadd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -932,6 +932,7 @@ const char * const vmstat_text[] = { "nr_slab_unreclaimable", "nr_page_table_pages", "nr_kernel_stack", + "nr_overhead", "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", @@ -1420,7 +1421,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n node_unreclaimable: %u" "\n start_pfn: %lu" "\n node_inactive_ratio: %u", - !pgdat_reclaimable(zone->zone_pgdat), + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, zone->zone_start_pfn, zone->zone_pgdat->inactive_ratio); seq_putc(m, '\n'); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 1689bb58e0d170053cc00a8b41c24404cbf0c3f4..d3548c48369f04aaa66a8b517980422a059e767c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1407,7 +1407,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, * pools/users, we can't allow mapping in interrupt context * because it can corrupt another users mappings. */ - WARN_ON_ONCE(in_interrupt()); + BUG_ON(in_interrupt()); /* From now on, migration cannot move the object */ pin_tag(handle); diff --git a/mm/zswap.c b/mm/zswap.c index dbef27822a987fde3b90de71313f6e479cb44d46..ded051e3433dc77d1924096b3df8bf8c3a39214c 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -752,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, pool = zswap_pool_find_get(type, compressor); if (pool) { zswap_pool_debug("using existing", pool); + WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); - } else { - spin_unlock(&zswap_pools_lock); - pool = zswap_pool_create(type, compressor); - spin_lock(&zswap_pools_lock); } + spin_unlock(&zswap_pools_lock); + + if (!pool) + pool = zswap_pool_create(type, compressor); + if (pool) ret = param_set_charp(s, kp); else ret = -EINVAL; + spin_lock(&zswap_pools_lock); + if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4a47074d1d7fa37490ca7e3a2aff5e894da0b64d..c8ea3cf9db8563de9d3d1ff8433c9d8554b4abb6 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ffd09c1675d4bc099c1cb91101bd19bdb527b6dc..2bbca23a9d050434743b8385b164115ebea2bde3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3353,9 +3353,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_EFS: - remote_efs = 1; - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { + remote_efs = 1; memcpy(&efs, (void *) val, olen); + } break; case L2CAP_CONF_EWS: @@ -3574,16 +3575,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { memcpy(&efs, (void *)val, olen); - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); + } break; case L2CAP_CONF_FCS: diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index aa1df1a10dd712d9b47e98d4a69c207fba651399..82ce5713f744b4648d5c58b25c295aef4df8a043 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -706,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } - nf_bridge = nf_bridge_info_get(skb); - /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5d4006e589cbaf0d1ace8e10a6a44b4b1c811a1b..4f831225d34f4c52ad65882a8878723894a20585 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1092,19 +1092,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, struct net_bridge *br = netdev_priv(dev); int err; + err = register_netdevice(dev); + if (err) + return err; + if (tb[IFLA_ADDRESS]) { spin_lock_bh(&br->lock); br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); spin_unlock_bh(&br->lock); } - err = register_netdevice(dev); - if (err) - return err; - err = br_changelink(dev, tb, data); if (err) - unregister_netdevice(dev); + br_dev_delete(dev, NULL); + return err; } diff --git a/net/can/af_can.c b/net/can/af_can.c index 5488e4a6ccd062e6f6e7e2b841dde5ef055d4337..ac1552d8b4ad11ac5544aaa8c0ff947a02b3d0d4 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -722,13 +722,12 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!net_eq(dev_net(dev), &init_net))) goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN, - "PF_CAN: dropped non conform CAN skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); goto drop; + } can_receive(skb, dev); return NET_RX_SUCCESS; @@ -746,13 +745,12 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!net_eq(dev_net(dev), &init_net))) goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN, - "PF_CAN: dropped non conform CAN FD skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); goto drop; + } can_receive(skb, dev); return NET_RX_SUCCESS; diff --git a/net/core/dev.c b/net/core/dev.c index c37891828e4edfcc90363be43b1ee1dc8a858a82..67b5d4d8acb1f842203e229700a882df389bbb47 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev) { rtnl_lock(); call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); rtnl_unlock(); } EXPORT_SYMBOL(netdev_notify_peers); @@ -3082,10 +3083,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e9989b835a662dcf270c86783e378b09ffab980e..7913771ec4744a6f8b5c7089dca1166b9f8bf6bf 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -742,15 +742,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -777,10 +768,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; diff --git a/net/core/filter.c b/net/core/filter.c index 5e42e0e54f6f44cc56ae827f68b8f459d48cce06..c385c55af653b569651f28f31b92de9d181c5b1c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -451,6 +451,10 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, convert_bpf_extensions(fp, &insn)) break; + if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || + fp->code == (BPF_ALU | BPF_MOD | BPF_X)) + *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; @@ -1015,11 +1019,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) */ goto out_err_free; - /* We are guaranteed to never error here with cBPF to eBPF - * transitions, since there's no issue with type compatibility - * checks on program arrays. - */ fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err_free; kfree(old_prog); return fp; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 32e4e0158846b18bdbd7f482847d987a105ef1cf..862d63ec56e4058c0d01e68912d558f8be0e1d7f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -550,8 +550,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb, out_good: ret = true; - key_control->thoff = (u16)nhoff; out: + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; @@ -559,7 +559,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, out_bad: ret = false; - key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); goto out; } EXPORT_SYMBOL(__skb_flow_dissect); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f45f6198851faf4e04bed08f37a691f54692c7d4..7b315663f840816e4849adf6a1c8c1e88ab8db5e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -496,7 +496,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, n1 != NULL; n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { - if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { + if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); rc = n1; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7001da910c6b26cb2300c38147305a459895cc49..b7efe2f19f8375cc90f2f2f71a16bcbb9bde8b07 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -263,7 +263,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) - get_net(peer); + peer = maybe_get_net(peer); spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index aec5605944d3c7977025da6e0689f7a5823b905b..a64515583bc161638e9d62985fab5543f3ee8be0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3823,7 +3823,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) - return; + goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. @@ -3832,7 +3832,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); sock_put(sk); + return; } + +err: + kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index acd2a6caba8565a7e8750e9d87d605f9ace7783d..fb467db2344aa825490b5bcbdaf238302b2f8d7f 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -295,7 +295,7 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + NETLINK_SOCK_DIAG, AF_INET6); break; } return 0; diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 77f396b679ce74304ab6ef6a178900230ca87780..5dce4291f0ed019500b0b68add876de09735a4a5 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -93,6 +93,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) return more_reuse; } +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + /** * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. @@ -101,7 +111,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) */ int reuseport_add_sock(struct sock *sk, struct sock *sk2) { - struct sock_reuseport *reuse; + struct sock_reuseport *old_reuse, *reuse; if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2); @@ -112,10 +122,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "socket already in reuseport group"); + lockdep_is_held(&reuseport_lock)); + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (old_reuse && old_reuse->num_socks != 1) { + spin_unlock_bh(&reuseport_lock); + return -EBUSY; + } if (reuse->num_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); @@ -133,19 +146,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_unlock_bh(&reuseport_lock); + if (old_reuse) + call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } -static void reuseport_free_rcu(struct rcu_head *head) -{ - struct sock_reuseport *reuse; - - reuse = container_of(head, struct sock_reuseport, rcu); - if (reuse->prog) - bpf_prog_destroy(reuse->prog); - kfree(reuse); -} - void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0df2aa6525308a365d89f57f6da76d57a24238f0..1b4619008c4ee734bb79433ef832680c96580efc 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, +#ifndef CONFIG_BPF_JIT_ALWAYS_ON .proc_handler = proc_dointvec +#else + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, +#endif }, # ifdef CONFIG_HAVE_EBPF_JIT { @@ -369,14 +375,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 5e3a7302f7747e4c4f3134eacab2f2c65b13402f..7753681195c15d89746bc42078eb36acf3126232 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_pr_debug("RTO_EXPIRE\n"); + if (sk->sk_state == DCCP_CLOSED) + goto out; + /* back-off timer */ hc->tx_rto <<= 1; if (hc->tx_rto > DCCP_RTO_MAX) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 39e7e2bca8dba2664cc626245337e6472f572856..62522b8d2f9715d28ce019aaeaf901875eaceed9 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06aa1981258eca4857511329af62f9a..9d43c1f4027408f3a2176767da0dd425938ba652 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; @@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; + dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 51b27ae09fbd725bcd8030982e5850215ac4ce5c..e60517eb1c3a55afc5c7832a7b1f1b3d69e0d7ad 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) static int arp_constructor(struct neighbour *neigh) { - __be32 addr = *(__be32 *)neigh->primary_key; + __be32 addr; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; + u32 inaddr_any = INADDR_ANY; + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); + + addr = *(__be32 *)neigh->primary_key; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 062a67ca9a212f6e8a966e5419c4a11867e601d9..f08f984ebc566aa6e7b7f5b1f0e33b50623d239a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1380,7 +1380,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) static bool inetdev_valid_mtu(unsigned int mtu) { - return mtu >= 68; + return mtu >= IPV4_MIN_MTU; } static void inetdev_send_gratuitous_arp(struct net_device *dev, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2ec005c6217c02df14f0f73ec92933f51dc696d3..08b7260447b6e90287f65fdbf049a3598e74fa58 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1254,14 +1254,19 @@ static int __net_init ip_fib_net_init(struct net *net) static void ip_fib_net_exit(struct net *net) { - unsigned int i; + int i; rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + /* Destroy the tables in reverse order to guarantee that the + * local table, ID 255, is destroyed before the main table, ID + * 254. This is necessary as the local table may contain + * references to data contained in the main table. + */ + for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) { struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; struct fib_table *tb; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 691146abde2df65345165516b4aad819d51968f2..42a19fb4ae5fcb54c039bdd624c629318dff2599 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -768,7 +768,7 @@ static bool icmp_tag_validation(int proto) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and * ICMP_PARAMETERPROB. */ @@ -796,7 +796,8 @@ static bool icmp_unreach(struct sk_buff *skb) if (iph->ihl < 5) /* Mangled header, drop. */ goto out_err; - if (icmph->type == ICMP_DEST_UNREACH) { + switch (icmph->type) { + case ICMP_DEST_UNREACH: switch (icmph->code & 15) { case ICMP_NET_UNREACH: case ICMP_HOST_UNREACH: @@ -832,8 +833,16 @@ static bool icmp_unreach(struct sk_buff *skb) } if (icmph->code > NR_ICMP_UNREACH) goto out; - } else if (icmph->type == ICMP_PARAMETERPROB) + break; + case ICMP_PARAMETERPROB: info = ntohl(icmph->un.gateway) >> 24; + break; + case ICMP_TIME_EXCEEDED: + __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS); + if (icmph->code == ICMP_EXC_FRAGTIME) + goto out; + break; + } /* * Throw it at our lower layers diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08575e3bd13532303fa43e939f1433dff12f87bc..7f5fe07d0b13afe762c587ac49294e0a8ccb2894 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +/* source address selection per RFC 3376 section 4.2.13 */ +static __be32 igmpv3_get_srcaddr(struct net_device *dev, + const struct flowi4 *fl4) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return htonl(INADDR_ANY); + + for_ifa(in_dev) { + if (fl4->saddr == ifa->ifa_local) + return fl4->saddr; + } endfor_ifa(in_dev); + + return htonl(INADDR_ANY); +} + static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; @@ -368,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; - pip->saddr = fl4.saddr; + + rcu_read_lock(); + pip->saddr = igmpv3_get_srcaddr(dev, &fl4); + rcu_read_unlock(); + pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); @@ -404,16 +426,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, - int type, struct igmpv3_grec **ppgr) + int type, struct igmpv3_grec **ppgr, unsigned int mtu) { struct net_device *dev = pmc->interface->dev; struct igmpv3_report *pih; struct igmpv3_grec *pgr; - if (!skb) - skb = igmpv3_newpack(dev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = igmpv3_newpack(dev, mtu); + if (!skb) + return NULL; + } pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -436,12 +459,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, struct igmpv3_grec *pgr = NULL; struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV4_MIN_MTU) + return skb; + isquery = type == IGMPV3_MODE_IS_INCLUDE || type == IGMPV3_MODE_IS_EXCLUDE; truncate = type == IGMPV3_MODE_IS_EXCLUDE || @@ -462,7 +490,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); } } first = 1; @@ -498,12 +526,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, pgr->grec_nsrcs = htons(scount); if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -538,7 +566,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 453db950dc9ff9776b717118560839aa7d2ab05e..4bf3b8af02571a6f39da06dabd816c8381cc9d6a 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg) qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); + rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg) __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *head = qp->q.fragments; + struct sk_buff *clone, *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg) if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; - rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) - goto out_rcu_unlock; + goto out; + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) - goto out_rcu_unlock; + goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out_rcu_unlock; + goto out; + + clone = skb_clone(head, GFP_ATOMIC); /* Send an ICMP "Fragment Reassembly Timeout" message. */ - icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); -out_rcu_unlock: - rcu_read_unlock(); + if (clone) { + spin_unlock(&qp->q.lock); + icmp_send(clone, ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0); + consume_skb(clone); + goto out_rcu_unlock; + } } out: spin_unlock(&qp->q.lock); +out_rcu_unlock: + rcu_read_unlock(); ipq_put(qp); } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd7f1836bb703c7c7dff315d86381863b5328527..96536a0d6e2db193083e3a07647e88b4c18466e7 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -346,8 +346,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); - if (mtu < 68) - mtu = 68; + if (mtu < IPV4_MIN_MTU) + mtu = IPV4_MIN_MTU; return mtu; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 071a785c65eb7ab5eef6294888e253ef39f45c45..b23464d9c538524e527c11c8013f70400407ce3e 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 713c09a74b9009cd3132bd388638f25590b5bdc6..0c9ded247ebba0d78ce34f49315adae7d4c235d8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -158,6 +158,10 @@ static unsigned int ipv4_conntrack_local(void *priv, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + + if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ + return NF_ACCEPT; + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index f8aad03d674b05008edb5b9883b3a26b2fa7461f..6f5e8d01b876933a68e5f6cf8b2a48f8c4e17262 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); - /* We never see fragments: conntrack defrags on pre-routing - * and local-out, and nf_nat_out protects post-routing. - */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5a8f7c360887a73b1558022c83e8433190f0f97d..53e49f5011d3ce482c5180edc47da7928db0f224 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ab0bbcbef3b397d690078ebd786a6dab41eabd8b..a860df28300d9fa8b34e9eb35623483efe334cbe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -502,11 +502,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; + int hdrincl; err = -EMSGSIZE; if (len > 0xFFFF) goto out; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields + */ + hdrincl = inet->hdrincl; /* * Check the flags. */ @@ -582,7 +587,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ - if (inet->hdrincl) + if (hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) @@ -604,12 +609,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | - (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), + (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); - if (!inet->hdrincl) { + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; @@ -634,7 +639,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b21bf597d39ee79e4496f95f9c37c768b1432231..1a6b2e18b399f24e27801b3f1268245fc870e79d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -633,9 +633,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; + u32 genid, hval; unsigned int i; int depth; - u32 hval = fnhe_hashfun(daddr); + + genid = fnhe_genid(dev_net(nh->nh_dev)); + hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); @@ -658,12 +661,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, } if (fnhe) { + if (fnhe->fnhe_genid != genid) + fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) { + if (pmtu) fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = max(1UL, expires); - } + fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt) @@ -682,7 +686,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } - fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9b2d1b8de8159e988ec27d37cf9b0a4284fd8c55..7ea802775a98f494eb6d84015abeddc5a9deca94 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -538,7 +538,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; - } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect * Return POLLOUT so application can call write() * in order for kernel to generate SYN+data @@ -1115,9 +1115,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags); - inet->defer_connect = 0; - *copied = tp->fastopen_req->copied; - tcp_free_fastopen_req(tp); + /* fastopen_req could already be freed in __inet_stream_connect + * if the connection times out or gets rst + */ + if (tp->fastopen_req) { + *copied = tp->fastopen_req->copied; + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; + } return err; } @@ -2232,6 +2237,9 @@ void tcp_close(struct sock *sk, long timeout) tcp_send_active_reset(sk, GFP_ATOMIC); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + } else if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_set_state(sk, TCP_CLOSE); } } @@ -2314,6 +2322,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 @@ -2327,8 +2336,18 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); + /* Clean up fastopen related fields */ + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + sk->sk_error_report(sk); return err; } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index cb8db347c680c3eeff2d709667b2b45e2c1db9d7..8ec60532be2b60a7c6df1d038be55043ac4fd0bd 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -81,7 +81,8 @@ struct bbr { u32 lt_last_lost; /* LT intvl start: tp->lost */ u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_cnt:3, /* number of rounds without large bw gains */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ unused_b:5; @@ -151,7 +152,7 @@ static bool bbr_full_bw_reached(const struct sock *sk) { const struct bbr *bbr = inet_csk_ca(sk); - return bbr->full_bw_cnt >= bbr_full_bw_cnt; + return bbr->full_bw_reached; } /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ @@ -451,7 +452,8 @@ static void bbr_advance_cycle_phase(struct sock *sk) bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : + bbr_pacing_gain[bbr->cycle_idx]; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -460,8 +462,7 @@ static void bbr_update_cycle_phase(struct sock *sk, { struct bbr *bbr = inet_csk_ca(sk); - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && - bbr_is_next_cycle_phase(sk, rs)) + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) bbr_advance_cycle_phase(sk); } @@ -688,6 +689,7 @@ static void bbr_check_full_bw_reached(struct sock *sk, return; } ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; } /* If pipe is probably full, drain the queue and then enter steady-state. */ @@ -821,6 +823,7 @@ static void bbr_init(struct sock *sk) bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; + bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; bbr->cycle_mstamp.v64 = 0; @@ -840,6 +843,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk) */ static u32 bbr_undo_cwnd(struct sock *sk) { + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; + bbr_reset_lt_bw_sampling(sk); return tcp_sk(sk)->snd_cwnd; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 649d611e92f96533738dcc48bacf2d7971c32dad..759d83e8756c38c2d6d498597b63b308105d885f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1967,6 +1967,8 @@ void tcp_enter_loss(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2464,6 +2466,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2495,8 +2498,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3590,6 +3595,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3712,7 +3718,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs); tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; @@ -5082,7 +5088,7 @@ static void tcp_check_space(struct sock *sk) if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ - smp_mb__after_atomic(); + smp_mb(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) tcp_new_space(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b8cfda4bf7909de7e67e59be4c45d2053b4beed..710d1af93efa948f749dbaf2c14098f94462f58e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -836,7 +836,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp, req->ts_recent, 0, - tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); @@ -1678,7 +1678,9 @@ int tcp_v4_rcv(struct sk_buff *skb) */ sock_hold(sk); refcounted = true; - nsk = tcp_check_req(sk, skb, req, false); + nsk = NULL; + if (!tcp_filter(sk, skb)) + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); goto discard_and_relse; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 64e1ba49c3e2c4e3be94389a2a6e068be8f0d564..830a5645d8c1d3d4b5455dcd5e3552f1855103ae 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -328,10 +328,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea869ccf8100fd130b33f9d10f185255..366b1becff9d4d6e791c608268b7478a3e9e9d60 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 9be1581a5a08c36f4544fbdabedd9741fb266a1e..18309f58ab8d8056fec9d732006a6f1af281a60f 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs) + bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp.v64) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp.v64 || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74db43b4791701ab736711a4c013a06960a5731d..69523389f067a91f172e986491274c2a3adfae52 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *sk) * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * + * Also close if our net namespace is exiting; in that case there is no + * hope of ever communicating again since all netns interfaces are already + * down (or about to be down), and we need to release our dst references, + * which have been moved to the netns loopback interface, so the namespace + * can finish exiting. This condition is only possible if we are a kernel + * socket, as those do not hold references to the namespace. + * * Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. + * 3. If our net namespace is exiting. */ static int tcp_out_of_resources(struct sock *sk, bool do_reset) { @@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } + + if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_done(sk); + return 1; + } + return 0; } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 4c4bac1b5eab221928c569592c833e1bfcba748d..3ecb61ee42fb33762e28ea80548f4cb3ffb5104e 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd-1); + return min(tp->snd_ssthresh, tp->snd_cwnd); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6401574cd638a0c739d9c0898ab81b717eee4bd4..f4f616eaaeb81758e23ce397d88b255bdceb27b9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -205,6 +205,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 497c79e2958ec0469a39a9f553d3620b29345813..b74fd8c11f711914167f32d18df31a64516fb247 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -290,10 +290,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .keep_addr_on_down = 0, }; -/* Check if a valid qdisc is available */ -static inline bool addrconf_qdisc_ok(const struct net_device *dev) +/* Check if link is ready: is it up and is a valid qdisc available */ +static inline bool addrconf_link_ready(const struct net_device *dev) { - return !qdisc_tx_is_noop(dev); + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); } static void addrconf_del_rs_timer(struct inet6_dev *idev) @@ -438,7 +438,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->token = in6addr_any; - if (netif_running(dev) && addrconf_qdisc_ok(dev)) + if (netif_running(dev) && addrconf_link_ready(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -3397,7 +3397,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, /* restore routes for permanent addresses */ addrconf_permanent_addr(dev); - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name); @@ -3412,7 +3412,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } } else if (event == NETDEV_CHANGE) { - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is still not ready. */ break; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 02816456a53ecd79e8e933fd7adec4a49b5c93e0..11116840b5bcd97f4721df0b06cd1ddba52a2370 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -225,7 +225,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets @@ -291,6 +290,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; + bool saved_ipv6only; int addr_type = 0; int err = 0; @@ -395,19 +395,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; + saved_ipv6only = sk->sk_ipv6only; + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) + sk->sk_ipv6only = 1; + /* Make sure we are allowed to bind here. */ if ((snum || !inet->bind_address_no_port) && sk->sk_prot->get_port(sk, snum)) { + sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); err = -EADDRINUSE; goto out; } - if (addr_type != IPV6_ADDR_ANY) { + if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; - if (addr_type != IPV6_ADDR_MAPPED) - sk->sk_ipv6only = 1; - } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); @@ -926,12 +928,12 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - err = icmpv6_init(); - if (err) - goto icmp_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; + err = icmpv6_init(); + if (err) + goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1061,10 +1063,10 @@ static int __init inet6_init(void) ndisc_cleanup(); ndisc_fail: ip6_mr_cleanup(); -ipmr_fail: - icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); +ipmr_fail: + icmpv6_cleanup(); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 65a58feb9bc383924a44cde86ec9f0bbc04da813..21bd54fd9ba7fe372540ea47aa7f8edd460f98d6 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, 1); if (register_netdevice(dev) < 0) goto failed_free; + ip6gre_tnl_link_config(nt, 1); + /* Can use a lockless transmit, unless we generate output sequences */ if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; @@ -461,7 +462,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) &ipv6h->saddr, &ipv6h->daddr, tpi->key, tpi->proto); if (tunnel) { - ip6_tnl_rcv(tunnel, skb, tpi, NULL, false); + ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); return PACKET_RCVD; } @@ -1267,7 +1268,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { - struct ip6_tnl *tunnel; int ret; ret = ip6gre_tunnel_init_common(dev); @@ -1276,10 +1276,6 @@ static int ip6gre_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - tunnel = netdev_priv(dev); - - ip6gre_tnl_link_config(tunnel, 1); - return 0; } @@ -1374,7 +1370,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); dev->features |= GRE6_FEATURES; dev->hw_features |= GRE6_FEATURES; @@ -1400,6 +1395,11 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + dev_hold(dev); ip6gre_tunnel_link(ign, nt); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4e6c4394a96a78ead468044c0e70d993b15c9b0e..55ca65c7d5e5b3f01eab67b32cffb9c0595c35c5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -165,6 +165,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +{ + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); + else + return np->autoflowlabel; +} + /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified @@ -228,7 +236,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1260,14 +1268,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } + if (mtu < IPV6_MIN_MTU) + return -EINVAL; cork->base.fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->base.flags |= IPCORK_ALLFRAG; @@ -1699,7 +1709,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; @@ -1798,11 +1808,13 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.flags = 0; cork.base.addr = 0; cork.base.opt = NULL; + cork.base.dst = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); - if (err) + if (err) { + ip6_cork_release(&cork, &v6_cork); return ERR_PTR(err); - + } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 64aefc2d13fa2d11062dcd6fd6600ee4e605f2ce..4c522367895c3d7b545e9213c121210dfae30bbd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -911,7 +911,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, if (t->parms.collect_md) { tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) - return 0; + goto drop; } ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); @@ -1080,10 +1080,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index da64b20ded6b90a7c1dc6ad7938a204156926268..afc30a01cc19f4d0afe639dd4de41948171afe9b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -189,12 +189,12 @@ static int vti6_tnl_create2(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; + dev->rtnl_link_ops = &vti6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &vti6_link_ops; dev_hold(dev); vti6_tnl_link(ip6n, t); @@ -485,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); - if (skb->protocol == htons(ETH_P_IPV6)) + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - else + } else { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } return -EMSGSIZE; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 117405dd07a3ce9819e2f7bcd741d965b3de8e16..a30e7e925c9b76eb06e5d992f67fcce0bd88d4a7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(-ENOENT); it->mrt = mrt; + it->cache = NULL; return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 38bee173dc2b347eeb2529bdf75b4d84c6011e90..bcea985dd76b1106b2c64bf3744de4e1e10aeadd 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -874,6 +874,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: np->autoflowlabel = valbool; + np->autoflowlabel_set = 1; retv = 0; break; } @@ -1315,7 +1316,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = np->autoflowlabel; + val = ip6_autoflowlabel(sock_net(sk), np); break; default: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1bdc703cb9668bd77690c3d8f1ec0062d7b88c43..ca8fac6e5a09d326c7f0044066e3d86893f71fd0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, struct mld2_grec **ppgr) + int type, struct mld2_grec **ppgr, unsigned int mtu) { - struct net_device *dev = pmc->idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr; - if (!skb) - skb = mld_newpack(pmc->idev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = mld_newpack(pmc->idev, mtu); + if (!skb) + return NULL; + } pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV6_MIN_MTU) + return skb; + isquery = type == MLD2_MODE_IS_INCLUDE || type == MLD2_MODE_IS_EXCLUDE; truncate = type == MLD2_MODE_IS_EXCLUDE || @@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); } } first = 1; @@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -1814,7 +1819,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, mld_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 40d740572354251a0313616b2113f301955308d1..db6d437002a6733c93a60e91eb52658c80d612c3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1085,6 +1085,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link) { t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d372342dc39d2dac1fa6a5d4d90202aecd9de6ac..82b7a8e0d7987494a85cca824f1cc19821a29f5b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -968,7 +968,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), 0, 0); } @@ -1442,7 +1442,9 @@ static int tcp_v6_rcv(struct sk_buff *skb) } sock_hold(sk); refcounted = true; - nsk = tcp_check_req(sk, skb, req, false); + nsk = NULL; + if (!tcp_filter(sk, skb)) + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); goto discard_and_relse; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01d525fcd51012c34afbf5f8af11ed..278e49cd67d4e2c7b0ab9138fabe84753d628b5a 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7d378c032cb6ebe80323db987ca201e5ae2d845..2bd2087bd10511f585b21054acf0a098d8d3d721 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -55,6 +55,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 7eb0e8fe3ca8a86a56b584398d568155969a3653..22785dc030517ae72e868f55288aa5097a588e10 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1624,60 +1624,35 @@ static struct proto kcm_proto = { }; /* Clone a kcm socket. */ -static int kcm_clone(struct socket *osock, struct kcm_clone *info, - struct socket **newsockp) +static struct file *kcm_clone(struct socket *osock) { struct socket *newsock; struct sock *newsk; - struct file *newfile; - int err, newfd; + struct file *file; - err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out; + return ERR_PTR(-ENFILE); newsock->type = osock->type; newsock->ops = osock->ops; __module_get(newsock->ops->owner); - newfd = get_unused_fd_flags(0); - if (unlikely(newfd < 0)) { - err = newfd; - goto out_fd_fail; - } - - newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (unlikely(IS_ERR(newfile))) { - err = PTR_ERR(newfile); - goto out_sock_alloc_fail; - } - newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, &kcm_proto, true); if (!newsk) { - err = -ENOMEM; - goto out_sk_alloc_fail; + sock_release(newsock); + return ERR_PTR(-ENOMEM); } - sock_init_data(newsock, newsk); init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); - fd_install(newfd, newfile); - *newsockp = newsock; - info->fd = newfd; - - return 0; + file = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); + if (IS_ERR(file)) + sock_release(newsock); -out_sk_alloc_fail: - fput(newfile); -out_sock_alloc_fail: - put_unused_fd(newfd); -out_fd_fail: - sock_release(newsock); -out: - return err; + return file; } static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) @@ -1707,21 +1682,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } case SIOCKCMCLONE: { struct kcm_clone info; - struct socket *newsock = NULL; + struct file *file; - if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - return -EFAULT; + info.fd = get_unused_fd_flags(0); + if (unlikely(info.fd < 0)) + return info.fd; - err = kcm_clone(sock, &info, &newsock); - - if (!err) { - if (copy_to_user((void __user *)arg, &info, - sizeof(info))) { - err = -EFAULT; - sys_close(info.fd); - } + file = kcm_clone(sock); + if (IS_ERR(file)) { + put_unused_fd(info.fd); + return PTR_ERR(file); } - + if (copy_to_user((void __user *)arg, &info, + sizeof(info))) { + put_unused_fd(info.fd); + fput(file); + return -EFAULT; + } + fd_install(info.fd, file); + err = 0; break; } default: diff --git a/net/key/af_key.c b/net/key/af_key.c index 94bf810ad24266deaa0fe2fa6f7498b4cc233554..6482b001f19a25355eac827fd6d8ddeae7617d29 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -401,6 +401,11 @@ static int verify_address_len(const void *p) #endif int len; + if (sp->sadb_address_len < + DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), + sizeof(uint64_t))) + return -EINVAL; + switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); @@ -511,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void * uint16_t ext_type; int ext_len; + if (len < sizeof(*ehdr)) + return -EINVAL; + ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b06acd0f400d7dd6fc01cfc504992b5c91b1d361..cfc4dd8997e54c898ae71ca99030eb507403ad1c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1944,7 +1944,7 @@ static __net_exit void l2tp_exit_net(struct net *net) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - (void)l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3468d5635d0a0e78cde03792382cc4d792486209..9d77a54e8854ea15184a811c20bc0aff5b29c51d 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -48,7 +48,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) return (struct l2tp_ip_sock *)sk; } -static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) +static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, + __be32 raddr, int dif, u32 tunnel_id) { struct sock *sk; @@ -62,6 +63,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -72,15 +74,6 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif return sk; } -static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -186,8 +179,8 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb), - tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, + inet_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip_lock); goto discard; @@ -289,7 +282,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ write_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0, sk->sk_bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip_lock); ret = -EADDRINUSE; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 96efe47ec8c20ee7db2dc6dd0ef2af91e8d10ca2..86ad51a039c4806bd27a12c6898ed95427cc8f74 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk) static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, + const struct in6_addr *raddr, int dif, u32 tunnel_id) { struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -73,6 +75,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && + (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -83,17 +86,6 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, return sk; } -static inline struct sock *l2tp_ip6_bind_lookup(struct net *net, - struct in6_addr *laddr, - int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -200,8 +192,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb), - tunnel_id); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip6_lock); goto discard; @@ -339,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); write_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if, + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip6_lock); err = -EADDRINUSE; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 1ccd310d01a5ff303a958042dcd6d0d9a3c042e5..ee03bc866d1b7141425453dd928e32be18b32527 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -287,7 +287,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_DELETE); - (void) l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); out: return ret; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e75cbf6ecc26e4ec7bde777c385e001c0e49e371..a0d901d8992ea892bbb44890e08d6f782c68b4e3 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -231,9 +231,6 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata) !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) continue; - if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC)) - continue; - max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); } rcu_read_unlock(); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5c67a696e04689696a7ab22ccfa6af983e49e960..b4b3fe07886891618d96bc997775554a5621b68c 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -279,8 +279,6 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ *pos |= ifmsh->ps_peers_deep_sleep ? IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; - *pos++ = 0x00; - return 0; } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index b747c9645e432bffe5b9d266a51e7ffca3b75ec3..fed598a202c8996a6ef22a909318c0ae70ea5939 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -788,7 +788,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -799,7 +799,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -816,7 +816,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -829,7 +832,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -847,7 +850,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -867,7 +870,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 274c564bd9af86185f53676a67c99f7a300d9390..1ffd1e145c13aa0e227390fd4687f578ccbd6723 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1244,7 +1244,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, + struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -1258,10 +1258,13 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, if (!ieee80211_is_data(hdr->frame_control)) return NULL; - if (pubsta) { + if (sta) { u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - txq = pubsta->txq[tid]; + if (!sta->uploaded) + return NULL; + + txq = sta->sta.txq[tid]; } else if (vif) { txq = vif->txq; } @@ -1499,23 +1502,17 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; - struct ieee80211_sta *pubsta; if (!local->ops->wake_tx_queue || sdata->vif.type == NL80211_IFTYPE_MONITOR) return false; - if (sta && sta->uploaded) - pubsta = &sta->sta; - else - pubsta = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); vif = &sdata->vif; - txqi = ieee80211_get_txq(local, vif, pubsta, skb); + txqi = ieee80211_get_txq(local, vif, sta, skb); if (!txqi) return false; diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index efa3f48f1ec5d51ea7191c8ae90dda77c0d330e1..73e8f347802ecadf5efb11fffa7b13d32a5705c1 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -293,7 +293,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) + if (!(status->flag & RX_FLAG_ICV_STRIPPED) && + pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 5c71d60f3a64379946f52175837e6c2f743f56ac..caa5986cb2e47fdcfb4c7be8690ba97d92d69322 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -295,7 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); + if (!(status->flag & RX_FLAG_ICV_STRIPPED)) + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 1309e2c34764de98b7bf6bb1fe964b0f4b07162f..c5a5a6959c1b878cc3935a0bdd470ab71b4f68a8 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -937,6 +937,8 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN; + unsigned int alive; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -946,9 +948,11 @@ static void mpls_ifdown(struct net_device *dev, int event) if (!rt) continue; + alive = 0; change_nexthops(rt) { if (rtnl_dereference(nh->nh_dev) != dev) - continue; + goto next; + switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: @@ -956,13 +960,16 @@ static void mpls_ifdown(struct net_device *dev, int event) /* fall through */ case NETDEV_CHANGE: nh->nh_flags |= RTNH_F_LINKDOWN; - if (event != NETDEV_UNREGISTER) - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); +next: + if (!(nh->nh_flags & nh_flags)) + alive++; } endfor_nexthops(rt); + + WRITE_ONCE(rt->rt_nhn_alive, alive); } } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 004af030ef1abcdf554467f60a350649e205d80e..d869ea50623e095e30154c8d684106361b05a39f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -364,6 +364,11 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) ret = nf_queue(skb, state, &entry, verdict); if (ret == 1 && entry) goto next_hook; + } else { + /* Implicit handling for NF_STOLEN, as well as any other + * non conventional verdicts. + */ + ret = 0; } return ret; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a6e44ef2ec9a97dfd23ced452f2ec260e57f884c..2155c2498aed637c893da6cd3218db2e01e6fb66 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2040,12 +2040,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; + if (svc->ipvs != ipvs) + return 0; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 750b8bf13e6056a2fcfcd191eeb2c1721d5d86eb..8da893c397da146fbdf0dfc5eb06ffafeceb1356 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1023,7 +1023,7 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; - queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) @@ -1922,7 +1922,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); + queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); return 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index b1fcfa08f0b485cd8b90ecf32b9a5aeb6c622d0f..3f499126727c8e9b4b9016a62938c4fea50b8903 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -205,18 +213,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -247,14 +257,100 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; -err: - kfree(helper); +err2: + kfree(helper->expect_policy); +err1: + kfree(nfcth); return ret; } +static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) @@ -265,8 +361,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } @@ -295,7 +390,11 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -306,31 +405,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -338,9 +428,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -504,13 +591,17 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_cthelper_dump_table, @@ -529,45 +620,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { - - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; - } + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -578,10 +663,13 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -594,28 +682,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -664,20 +751,16 @@ static int __init nfnl_cthelper_init(void) static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index af832c5260485f3f99a3dae8fdf0019f896350ef..5efb40291ac319b575f321199809def8202f60d8 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b58104366e4670967f7a169994af3ea97..b589a62e68a29a56e842e8b1bb01df885f6154a4 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *kf = NULL, *sf; int err = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; @@ -113,6 +117,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *sf; int err = -ENOENT; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index cd8309c0936e8814e95db82837a7d5757f521195..3dfa5a000853d402635b73abefc35f57f795dd7c 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -519,13 +519,11 @@ static struct tag_ref *get_tag_ref(tag_t full_tag, DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", full_tag); - spin_lock_bh(&uid_tag_data_tree_lock); tr_entry = lookup_tag_ref(full_tag, &utd_entry); BUG_ON(IS_ERR_OR_NULL(utd_entry)); if (!tr_entry) tr_entry = new_tag_ref(full_tag, utd_entry); - spin_unlock_bh(&uid_tag_data_tree_lock); if (utd_res) *utd_res = utd_entry; DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", @@ -2027,6 +2025,7 @@ static int ctrl_cmd_delete(const char *input) /* Delete socket tags */ spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); node = rb_first(&sock_tag_tree); while (node) { st_entry = rb_entry(node, struct sock_tag, sock_node); @@ -2056,6 +2055,7 @@ static int ctrl_cmd_delete(const char *input) list_del(&st_entry->list); } } + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); sock_tag_tree_erase(&st_to_free_tree); @@ -2265,10 +2265,12 @@ static int ctrl_cmd_tag(const char *input) full_tag = combine_atag_with_uid(acct_tag, uid_int); spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); sock_tag_entry = get_sock_stat_nl(el_socket->sk); tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); if (IS_ERR(tag_ref_entry)) { res = PTR_ERR(tag_ref_entry); + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); goto err_put; } @@ -2295,9 +2297,14 @@ static int ctrl_cmd_tag(const char *input) pr_err("qtaguid: ctrl_tag(%s): " "socket tag alloc failed\n", input); + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + tag_ref_entry->num_sock_tags--; + free_tag_ref_from_utd_entry(tag_ref_entry, + uid_tag_data_entry); + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); res = -ENOMEM; - goto err_tag_unref_put; + goto err_put; } /* * Hold the sk refcount here to make sure the sk pointer cannot @@ -2307,7 +2314,6 @@ static int ctrl_cmd_tag(const char *input) sock_tag_entry->sk = el_socket->sk; sock_tag_entry->pid = current->tgid; sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int); - spin_lock_bh(&uid_tag_data_tree_lock); pqd_entry = proc_qtu_data_tree_search( &proc_qtu_data_tree, current->tgid); /* @@ -2325,11 +2331,11 @@ static int ctrl_cmd_tag(const char *input) else list_add(&sock_tag_entry->list, &pqd_entry->sock_tag_list); - spin_unlock_bh(&uid_tag_data_tree_lock); sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); atomic64_inc(&qtu_events.sockets_tagged); } + spin_unlock_bh(&uid_tag_data_tree_lock); spin_unlock_bh(&sock_tag_list_lock); /* We keep the ref to the sk until it is untagged */ CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n", @@ -2338,10 +2344,6 @@ static int ctrl_cmd_tag(const char *input) sockfd_put(el_socket); return 0; -err_tag_unref_put: - BUG_ON(tag_ref_entry->num_sock_tags <= 0); - tag_ref_entry->num_sock_tags--; - free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); err_put: CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n", input, atomic_read(&el_socket->sk->sk_refcnt) - 1); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c9fac08a53b14e367efd952cccbdfc5ccb84bc54..e1c123d4cddaa86086d5ce7ff27679543e9b663b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); +static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; + +static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { + "nlk_cb_mutex-ROUTE", + "nlk_cb_mutex-1", + "nlk_cb_mutex-USERSOCK", + "nlk_cb_mutex-FIREWALL", + "nlk_cb_mutex-SOCK_DIAG", + "nlk_cb_mutex-NFLOG", + "nlk_cb_mutex-XFRM", + "nlk_cb_mutex-SELINUX", + "nlk_cb_mutex-ISCSI", + "nlk_cb_mutex-AUDIT", + "nlk_cb_mutex-FIB_LOOKUP", + "nlk_cb_mutex-CONNECTOR", + "nlk_cb_mutex-NETFILTER", + "nlk_cb_mutex-IP6_FW", + "nlk_cb_mutex-DNRTMSG", + "nlk_cb_mutex-KOBJECT_UEVENT", + "nlk_cb_mutex-GENERIC", + "nlk_cb_mutex-17", + "nlk_cb_mutex-SCSITRANSPORT", + "nlk_cb_mutex-ECRYPTFS", + "nlk_cb_mutex-RDMA", + "nlk_cb_mutex-CRYPTO", + "nlk_cb_mutex-SMC", + "nlk_cb_mutex-23", + "nlk_cb_mutex-24", + "nlk_cb_mutex-25", + "nlk_cb_mutex-26", + "nlk_cb_mutex-27", + "nlk_cb_mutex-28", + "nlk_cb_mutex-29", + "nlk_cb_mutex-30", + "nlk_cb_mutex-31", + "nlk_cb_mutex-MAX_LINKS" +}; + static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); @@ -223,6 +261,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, struct sock *sk = skb->sk; int ret = -ENOMEM; + if (!net_eq(dev_net(dev), sock_net(sk))) + return 0; + dev_hold(dev); if (is_vmalloc_addr(skb->head)) @@ -585,6 +626,9 @@ static int __netlink_create(struct net *net, struct socket *sock, } else { nlk->cb_mutex = &nlk->cb_def_mutex; mutex_init(nlk->cb_mutex); + lockdep_set_class_and_name(nlk->cb_mutex, + nlk_cb_mutex_keys + protocol, + nlk_cb_mutex_key_strings[protocol]); } init_waitqueue_head(&nlk->wait); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 07925418c2a5ae6afa366f2b1fb577e9d7150dd8..1668916bdbde1fa02e4b265357dfddfe04efb904 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1789,14 +1789,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -1869,12 +1866,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -2500,7 +2500,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e7f6657269e06cf09a72235299741642a443a52a..267db0d603bcc1621bba0750e2adc7c291249632 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1661,7 +1661,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); - po->rollover = rollover; } match = NULL; @@ -1706,6 +1705,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; + po->rollover = rollover; + rollover = NULL; atomic_inc(&match->sk_ref); __fanout_link(sk, po); err = 0; @@ -1719,10 +1720,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: - if (err && rollover) { - kfree_rcu(rollover, rcu); - po->rollover = NULL; - } + kfree(rollover); mutex_unlock(&fanout_mutex); return err; } @@ -1746,11 +1744,6 @@ static struct packet_fanout *fanout_release(struct sock *sk) list_del(&f->list); else f = NULL; - - if (po->rollover) { - kfree_rcu(po->rollover, rcu); - po->rollover = NULL; - } } mutex_unlock(&fanout_mutex); @@ -3039,6 +3032,7 @@ static int packet_release(struct socket *sock) synchronize_net(); if (f) { + kfree(po->rollover); fanout_release_data(f); kfree(f); } @@ -3107,6 +3101,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { if (po->running) { rcu_read_unlock(); + /* prevents packet_notifier() from calling + * register_prot_hook() + */ + po->num = 0; __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3115,6 +3113,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } + BUG_ON(po->running); po->num = proto; po->prot_hook.type = proto; @@ -3853,7 +3852,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; - struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3932,18 +3930,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - rcu_read_lock(); - rollover = rcu_dereference(po->rollover); - if (rollover) { - rstats.tp_all = atomic_long_read(&rollover->num); - rstats.tp_huge = atomic_long_read(&rollover->num_huge); - rstats.tp_failed = atomic_long_read(&rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); - } - rcu_read_unlock(); - if (!rollover) + if (!po->rollover) return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; diff --git a/net/packet/internal.h b/net/packet/internal.h index 9ee46314b7d76df47d683c252a92ce97398d592b..d55bfc34d6b33392ff47abd4f5116164eef6526e 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -92,7 +92,6 @@ struct packet_fanout { struct packet_rollover { int sock; - struct rcu_head rcu; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c985ecbe9bd62a81899d43eb802003d6bb4ebe08..ae5ac175b2bef96ffa614bc799db5cd90a7bdc08 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -252,7 +252,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, const int pkt_len = 20; struct qrtr_hdr *hdr; struct sk_buff *skb; - u32 *buf; + __le32 *buf; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); if (!skb) @@ -269,7 +269,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, hdr->dst_node_id = cpu_to_le32(dst_node); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); - buf = (u32 *)skb_put(skb, pkt_len); + buf = (__le32 *)skb_put(skb, pkt_len); memset(buf, 0, pkt_len); buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); buf[1] = cpu_to_le32(src_node); diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 60e90f76183813b20740e75427fe03775bc965c7..f6027f41cd34c9f50fe75eac0cdbc6beab5f1658 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, long i; int ret; - if (rs->rs_bound_addr == 0) { + if (rs->rs_bound_addr == 0 || !rs->rs_transport) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } @@ -524,6 +524,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], @@ -873,6 +876,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; diff --git a/net/rds/send.c b/net/rds/send.c index ad247dc71ebbd07d2aeb1ecea76d394701f9e083..ef53d164e146b943e4093f8b17bb2b4747ad2873 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1006,6 +1006,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { + if (cmsg->cmsg_len < + CMSG_LEN(sizeof(struct rds_rdma_args))) + return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 20e2923dc827fdc447082159bffe34d39caeca51..78f976d32018b3754f74c0d3b1088a9f7f0a8bb5 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -478,9 +478,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net) * we do need to clean up the listen socket here. */ if (rtn->rds_tcp_listen_sock) { - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + struct socket *lsock = rtn->rds_tcp_listen_sock; + rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); } } @@ -517,10 +518,10 @@ static void rds_tcp_kill_sock(struct net *net) struct rds_tcp_connection *tc, *_tc; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -540,8 +541,12 @@ static void rds_tcp_kill_sock(struct net *net) void *rds_tcp_listen_sock_def_readable(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; + + if (!lsock) + return NULL; - return rtn->rds_tcp_listen_sock->sk->sk_user_data; + return lsock->sk->sk_user_data; } static int rds_tcp_dev_event(struct notifier_block *this, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9a1cc890657679798cf58888c42d5bb2372f0fef..56ea6620fcf97ce40d0926089b5e5b188ea1a1fe 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ struct socket *rds_tcp_listen_init(struct net *); -void rds_tcp_listen_stop(struct socket *); +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 525b624fec8b8e5d7e191c905ea3be73c01dca63..185a56b1e29c57e1e43e5f69db3839148f73dbe7 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -227,6 +227,9 @@ void rds_tcp_listen_data_ready(struct sock *sk) * before it has been accepted and the accepter has set up their * data_ready.. we only want to queue listen work for our listening * socket + * + * (*ready)() may be null if we are racing with netns delete, and + * the listen socket is being torn down. */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); @@ -235,7 +238,8 @@ void rds_tcp_listen_data_ready(struct sock *sk) out: read_unlock_bh(&sk->sk_callback_lock); - ready(sk); + if (ready) + ready(sk); } struct socket *rds_tcp_listen_init(struct net *net) @@ -275,7 +279,7 @@ struct socket *rds_tcp_listen_init(struct net *net) return NULL; } -void rds_tcp_listen_stop(struct socket *sock) +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor) { struct sock *sk; @@ -296,5 +300,6 @@ void rds_tcp_listen_stop(struct socket *sock) /* wait for accepts to stop and close the socket */ flush_workqueue(rds_wq); + flush_work(acceptor); sock_release(sock); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 3f9d8d7ec6323a95de3e08d01098abdfcf33ff4f..b099b64366f356c27dea0a4dd215cc1034e61b55 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, rxrpc_conn_retransmit_call(conn, skb); return 0; + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &wtmp, sizeof(wtmp)) < 0) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d2c4227c09eb8badfca8e36bc92fe..1060d14d4e6af2e21a520cb1ce24cc7a872d5f10 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -649,6 +649,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; unsigned int mtu; + bool wake = false; u32 rwind = ntohl(ackinfo->rwind); _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", @@ -656,9 +657,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; - call->tx_winsize = rwind; + if (call->tx_winsize != rwind) { + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + if (rwind > call->tx_winsize) + wake = true; + call->tx_winsize = rwind; + } + if (call->cong_ssthresh > rwind) call->cong_ssthresh = rwind; @@ -672,6 +678,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, spin_unlock_bh(&peer->lock); _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); } + + if (wake) + wake_up(&call->waitq); } /* diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e0aa30f83c6ccdb38864fbc6a3bdbb529d9da816..9617b42aaf20745d1c96857c8288ddbfebabd9c8 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -161,7 +161,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6b07fba5770b2fd0997eef25be5ea2e2803b54ca..fc3650b0619242d8aed6a51604087ec8d4f8b57b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -211,7 +211,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ae83c3aec3082d3c4a1714c909f817835925a546..da574a16e7b3665844059a792029834fc8b953ae 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -496,6 +496,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; @@ -520,7 +521,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, offload.cls_u32->knode.sel = &n->sel; offload.cls_u32->knode.exts = &n->exts; if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; + offload.cls_u32->knode.link_handle = ht->handle; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -788,8 +789,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, struct tc_u_knode *n) { - struct tc_u_knode *new; + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tc_u32_sel *s = &n->sel; + struct tc_u_knode *new; new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); @@ -807,11 +809,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->fshift = n->fshift; new->res = n->res; new->flags = n->flags; - RCU_INIT_POINTER(new->ht_down, n->ht_down); + RCU_INIT_POINTER(new->ht_down, ht); /* bump reference count as long as we hold pointer to structure */ - if (new->ht_down) - new->ht_down->refcnt++; + if (ht) + ht->refcnt++; #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1308bbf460f7ca7e496fe9d9c48da7eb2b0d282d..b56d57984439afc1b2057d294623b02de154a680 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -200,9 +200,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { + int wlen = skb_network_offset(skb); + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - if (skb_cow_head(skb, sizeof(struct iphdr))) + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) @@ -210,7 +214,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case htons(ETH_P_IPV6): - if (skb_cow_head(skb, sizeof(struct ipv6hdr))) + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 95d7b15dad2143dc6a74003125be719e72aeee24..e371a0d90068f23ece604e7f230a020dab06f1f0 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -166,7 +166,7 @@ static const char *const sctp_timer_tbl[] = { /* Lookup timer debug name. */ const char *sctp_tname(const sctp_subtype_t id) { - if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX) + if (id.timeout < ARRAY_SIZE(sctp_timer_tbl)) return sctp_timer_tbl[id.timeout]; return "unknown_timer"; } diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 4f5a2b580aa52deb75e00c92d8b60992cf5bdaa6..6300f28c95888326113ebfa034fa86eaa6eccb44 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -44,6 +44,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 582585393d35ea43d69a85f836d5d6d864ccad4e..0994ce491e7c298ed681cdc09f8854441ea440dc 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, } static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, - struct sctp_sndrcvinfo *sinfo, - struct list_head *queue, int msg_len) + struct sctp_sndrcvinfo *sinfo, int msg_len) { + struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; - list_for_each_entry_safe(chk, temp, queue, list) { + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); + q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; @@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, return; } - sctp_prsctp_prune_unsent(asoc, sinfo, - &asoc->outqueue.out_chunk_list, - msg_len); + sctp_prsctp_prune_unsent(asoc, sinfo, msg_len); } /* Mark all the eligible packets on a transport for retransmission. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c062ceae19e66c0d37a8063f5ed432f09d240d40..c472b8391dde45ba8920a4effbc50f699ca4cc10 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -82,7 +82,7 @@ /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); -static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); @@ -332,16 +332,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (len < sizeof (struct sockaddr)) return NULL; + if (!opt->pf->af_supported(addr->sa.sa_family, opt)) + return NULL; + /* V4 mapped address are really of AF_INET family */ if (addr->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { - if (!opt->pf->af_supported(AF_INET, opt)) - return NULL; - } else { - /* Does this PF support this AF? */ - if (!opt->pf->af_supported(addr->sa.sa_family, opt)) - return NULL; - } + ipv6_addr_v4mapped(&addr->v6.sin6_addr) && + !opt->pf->af_supported(AF_INET, opt)) + return NULL; /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); @@ -1957,9 +1955,16 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { + /* sk can be changed by peel off when waiting for buf. */ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ + new_asoc = NULL; + err = -EPIPE; + } goto out_free; + } } /* If an address is passed with the sendto/sendmsg call, it is used @@ -4239,7 +4244,7 @@ static int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); - percpu_counter_inc(&sctp_sockets_allocated); + sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); /* Nothing can fail after this block, otherwise @@ -4283,7 +4288,7 @@ static void sctp_destroy_sock(struct sock *sk) } sctp_endpoint_free(sp->ep); local_bh_disable(); - percpu_counter_dec(&sctp_sockets_allocated); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } @@ -4771,12 +4776,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL; - /* If there is a thread waiting on more sndbuf space for - * sending on this asoc, it cannot be peeled. - */ - if (waitqueue_active(&asoc->wait)) - return -EBUSY; - /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -7443,9 +7442,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len) { struct sock *sk = asoc->base.sk; - int err = 0; long current_timeo = *timeo_p; DEFINE_WAIT(wait); + int err = 0; pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); @@ -7457,10 +7456,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, for (;;) { prepare_to_wait_exclusive(&asoc->wait, &wait, TASK_INTERRUPTIBLE); + if (asoc->base.dead) + goto do_dead; if (!*timeo_p) goto do_nonblock; - if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || - asoc->base.dead) + if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; if (signal_pending(current)) goto do_interrupted; @@ -7473,6 +7473,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); + if (sk != asoc->base.sk) + goto do_error; *timeo_p = current_timeo; } @@ -7485,6 +7487,10 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, return err; +do_dead: + err = -ESRCH; + goto out; + do_error: err = -EPIPE; goto out; diff --git a/net/socket.c b/net/socket.c index 40beb5abccbe48fe5549908bfc67fea79de49325..34dabf4b91926d2a3026d1da6cfdd1173ddb92ec 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1716,6 +1716,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; msg.msg_iocb = NULL; + msg.msg_flags = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); @@ -2561,6 +2562,15 @@ static int __init sock_init(void) core_initcall(sock_init); /* early initcall */ +static int __init jit_init(void) +{ +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + bpf_jit_enable = 1; +#endif + return 0; +} +pure_initcall(jit_init); + #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 25d9a9cf7b66b7f4e501d38d91f6a1908830972e..624c322af3ab0645f3540bd45f593ef3f7971e87 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, goto out_free_groups; creds->cr_group_info->gid[i] = kgid; } + groups_sort(creds->cr_group_info); return 0; out_free_groups: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1530825985221a1aeb5f77ee81f4251acdef9d96..6a08bc451247cc5724a2413002b2b6862162f94c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd, goto out; rsci.cred.cr_group_info->gid[i] = kgid; } + groups_sort(rsci.cred.cr_group_info); /* mech name */ len = qword_get(&mesg, buf, mlen); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5db68b371db2cac54e37cc0a883259dbc61125cb..600eacce653ae26e824839c2a03e6bb7476739ba 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - trace_rpc_task_begin(task->tk_client, task, NULL); - rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); + trace_rpc_task_begin(task->tk_client, task, NULL); } /* diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 64af4f034de693f921faf3f818d833e868b0a2e2..738a243c68a264f7d38f102852a1d28b98a965c4 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd, ug.gi->gid[i] = kgid; } + groups_sort(ug.gi); ugp = unix_gid_lookup(cd, uid); if (ugp) { struct cache_head *ch; @@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); cred->cr_group_info->gid[i] = kgid; } + groups_sort(cred->cr_group_info); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e01c825bc68338bff19e4f5ec3763d581f5a58ce..d24d14ea8ba417fc979cd9b277418424b3e902cc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2381,6 +2381,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* retry with existing socket, after a delay */ diff --git a/net/tipc/node.c b/net/tipc/node.c index 27753325e06e4355d71270d8c5c8818bef3e94c5..5b3e1ea37b6dd64502d2a6cee8eb2f2105139520 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1848,36 +1848,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) { err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } else { int bearer_id; struct tipc_node *node; struct tipc_link *link; node = tipc_node_find_by_name(net, name, &bearer_id); - if (!node) - return -EINVAL; + if (!node) { + err = -EINVAL; + goto err_free; + } tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; + err = -EINVAL; + goto err_free; } err = __tipc_nl_add_link(net, &msg, link, 0); tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; } int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) diff --git a/net/tipc/server.c b/net/tipc/server.c index f89c0c2e8c1662afebda77c616d8632acc879aba..f4c1b18c5fb07c11eae3198cb68bacc53bd964d7 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs @@ -319,6 +313,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) newcon->usr_data = s->tipc_conn_new(newcon->conid); if (!newcon->usr_data) { sock_release(newsock); + conn_put(newcon); return -ENOMEM; } @@ -625,14 +620,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(&s->idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(&s->conn_idr, id); if (con) { - total++; spin_unlock_bh(&s->idr_lock); tipc_close_conn(con); spin_lock_bh(&s->idr_lock); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894183b4af94ed24e84b94c0478b551..271cd66e4b3b66534d8686bec94132bc9737314e 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index b58dc95f3d3535a305e529c75282360ea731f82d..107375d80c70be347f8cd095f780523dbb504fae 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2f633eec6b7a0dbb7274191a75cc7b57af9b0171..ee12e176256ca8034e33ae88591bd9fba238bd10 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1101,10 +1101,19 @@ static const struct proto_ops vsock_dgram_ops = { .sendpage = sock_no_sendpage, }; +static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) +{ + if (!transport->cancel_pkt) + return -EOPNOTSUPP; + + return transport->cancel_pkt(vsk); +} + static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; + int cancel = 0; vsk = container_of(work, struct vsock_sock, dwork.work); sk = sk_vsock(vsk); @@ -1115,8 +1124,11 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); + cancel = 1; } release_sock(sk); + if (cancel) + vsock_transport_cancel_pkt(vsk); sock_put(sk); } @@ -1223,11 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = sock_intr_errno(timeout); sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 62c056ea403bc1ad3eaaa0efd81dca2353472fae..9c07c76c504de9ed3a68ab8fa7ec1fa99af336a7 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -57,6 +57,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->len = len; pkt->hdr.len = cpu_to_le32(len); pkt->reply = info->reply; + pkt->vsk = info->vsk; if (info->msg && len > 0) { pkt->buf = kmalloc(len, GFP_KERNEL); @@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .type = type, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .type = VIRTIO_VSOCK_TYPE_STREAM, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .type = VIRTIO_VSOCK_TYPE_STREAM, .msg = msg, .pkt_len = len, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .type = VIRTIO_VSOCK_TYPE_STREAM, .reply = !!pkt, + .vsk = vsk, }; /* Send RST only if the original pkt is not a RST pkt */ @@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c626f679e1c870204c62b0fa5a0d4661bfa06d32..91722e97cdd5732dcd20ec9bec4981e82c01846f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -2014,20 +2015,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { static int parse_txq_params(struct nlattr *tb[], struct ieee80211_txq_params *txq_params) { + u8 ac; + if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || !tb[NL80211_TXQ_ATTR_AIFS]) return -EINVAL; - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); - if (txq_params->ac >= NL80211_NUM_ACS) + if (ac >= NL80211_NUM_ACS) return -EINVAL; - + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dbb91e0235b9ae58ec2f7c1ccf68023631cc85d5..8b3fef7174f1d07625e624f1137572db1f15748c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1393,6 +1393,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; newp->type = old->type; + newp->family = old->family; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2cade027629913a3bd630b885a0c8c0cf4984ca5..b09febe8782e1a3ebf7028beadae467791ebfde5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -872,13 +872,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, &x->replay); if (ret) goto out; - if (x->security) - ret = copy_sec_ctx(x->security, skb); if (x->props.output_mark) { ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark); if (ret) goto out; } + if (x->security) + ret = copy_sec_ctx(x->security, skb); out: return ret; } diff --git a/scripts/coccicheck b/scripts/coccicheck index ec487b8e7051e4c0cee49a800a3d63223a7acf9c..c36b04b4168651a09a239973bb75f54225ba739b 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -29,12 +29,6 @@ else VERBOSE=0 fi -if [ -z "$J" ]; then - NPROC=$(getconf _NPROCESSORS_ONLN) -else - NPROC="$J" -fi - FLAGS="--very-quiet" # You can use SPFLAGS to append extra arguments to coccicheck or override any @@ -69,6 +63,9 @@ if [ "$C" = "1" -o "$C" = "2" ]; then # Take only the last argument, which is the C file to test shift $(( $# - 1 )) OPTIONS="$COCCIINCLUDE $1" + + # No need to parallelize Coccinelle since this mode takes one input file. + NPROC=1 else ONLINE=0 if [ "$KBUILD_EXTMOD" = "" ] ; then @@ -76,6 +73,12 @@ else else OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE" fi + + if [ -z "$J" ]; then + NPROC=$(getconf _NPROCESSORS_ONLN) + else + NPROC="$J" + fi fi if [ "$KBUILD_EXTMOD" != "" ] ; then diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index 1bf949c43b76cd5fb2647c556f8522cc7446e956..f6ab3ccf698ff4e4279e959829eb3e87da8cb2bc 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -96,6 +96,8 @@ def get_thread_info(task): thread_info_addr = task.address + ia64_task_size thread_info = thread_info_addr.cast(thread_info_ptr_type) else: + if task.type.fields()[0].type == thread_info_type.get_type(): + return task['thread_info'] thread_info = task['stack'].cast(thread_info_ptr_type) return thread_info.dereference() diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index bd834975909510335322ec8f34970573a8708dfc..238db4ffd30c02d18c19746c7ee452db70ccac3e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -838,6 +838,7 @@ static const char *const section_white_list[] = ".cmem*", /* EZchip */ ".fmt_slot*", /* EZchip */ ".gnu.lto*", + ".discard.*", NULL }; @@ -2129,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); } +/* Cannot check for assembler */ +static void add_retpoline(struct buffer *b) +{ + buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); + buf_printf(b, "#endif\n"); +} + static void add_staging_flag(struct buffer *b, const char *name) { static const char *staging_dir = "drivers/staging"; @@ -2473,6 +2482,7 @@ int main(int argc, char **argv) add_header(&buf, mod); add_intree_flag(&buf, !external_module); + add_retpoline(&buf); add_staging_flag(&buf, mod->name); err |= add_versions(&buf, mod); add_depends(&buf, mod, modules); diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 73a2c7da0e551dc29d421334061bb9ebad9e8a78..9b6e246a45d09f530b3527b81946a30b1256697f 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -4,7 +4,10 @@ * combine them automatically. */ SECTIONS { - /DISCARD/ : { *(.discard) } + /DISCARD/ : { + *(.discard) + *(.discard.*) + } __ksymtab 0 : { *(SORT(___ksymtab+*)) } __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } @@ -19,4 +22,6 @@ SECTIONS { . = ALIGN(8); .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + + __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } } diff --git a/scripts/package/Makefile b/scripts/package/Makefile index 71b4a8af9d4dcdf1dcec434b6be98636f468b2dd..7badec3498b838e9fb02e33770134a2407f5b714 100644 --- a/scripts/package/Makefile +++ b/scripts/package/Makefile @@ -39,10 +39,9 @@ if test "$(objtree)" != "$(srctree)"; then \ false; \ fi ; \ $(srctree)/scripts/setlocalversion --save-scmversion; \ -ln -sf $(srctree) $(2); \ tar -cz $(RCS_TAR_IGNORE) -f $(2).tar.gz \ - $(addprefix $(2)/,$(TAR_CONTENT) $(3)); \ -rm -f $(2) $(objtree)/.scmversion + --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3); \ +rm -f $(objtree)/.scmversion # rpm-pkg # --------------------------------------------------------------------------- diff --git a/security/Kconfig b/security/Kconfig index 59aea7df12b04d38f35ae4fbe9700a93994a1453..80a2934d3110881cfc11fd4fd74336f9a2104a50 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -40,6 +40,16 @@ config SECURITY If you are unsure how to answer this question, answer N. +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on X86_64 && SMP + help + This enforces a strict kernel and user space isolation, in order + to close hardware side channels on kernel address information. + + If you are unsure how to answer this question, answer Y. + config SECURITYFS bool "Enable the securityfs filesystem" help diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 0e8762945e79b4b186a1c28b9ed4ea2d7d393438..2b3def14b4fb0331f8128a35a252ba140033af1c 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -51,6 +51,8 @@ static int __init hash_setup(char *str) ima_hash_algo = HASH_ALGO_SHA1; else if (strncmp(str, "md5", 3) == 0) ima_hash_algo = HASH_ALGO_MD5; + else + return 1; goto out; } @@ -60,6 +62,8 @@ static int __init hash_setup(char *str) break; } } + if (i == HASH_ALGO__LAST) + return 1; out: hash_setup_done = 1; return 1; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index ae70e828da2eed57feb3dabd3a2c071f40d5b48d..1f3d60021e053f292ce2cf888ace5bffa8488c38 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -141,23 +141,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc) */ static int valid_master_desc(const char *new_desc, const char *orig_desc) { - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN)) - goto out; - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_USER_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN)) - goto out; - } else - goto out; + int prefix_len; + + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) + prefix_len = KEY_TRUSTED_PREFIX_LEN; + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) + prefix_len = KEY_USER_PREFIX_LEN; + else + return -EINVAL; + + if (!new_desc[prefix_len]) + return -EINVAL; + + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) + return -EINVAL; + return 0; -out: - return -EINVAL; } /* diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 5030fcf236818962e3c5d29b9a239d34be3d752d..cb7f8f730c6dd45b1be4b01d38d375cec28acdb8 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -250,11 +250,12 @@ static int construct_key(struct key *key, const void *callout_info, * The keyring selected is returned with an extra reference upon it which the * caller must release. */ -static void construct_get_dest_keyring(struct key **_dest_keyring) +static int construct_get_dest_keyring(struct key **_dest_keyring) { struct request_key_auth *rka; const struct cred *cred = current_cred(); struct key *dest_keyring = *_dest_keyring, *authkey; + int ret; kenter("%p", dest_keyring); @@ -263,6 +264,8 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) /* the caller supplied one */ key_get(dest_keyring); } else { + bool do_perm_check = true; + /* use a default keyring; falling through the cases until we * find one that we actually have */ switch (cred->jit_keyring) { @@ -277,8 +280,10 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) dest_keyring = key_get(rka->dest_keyring); up_read(&authkey->sem); - if (dest_keyring) + if (dest_keyring) { + do_perm_check = false; break; + } } case KEY_REQKEY_DEFL_THREAD_KEYRING: @@ -313,11 +318,29 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) default: BUG(); } + + /* + * Require Write permission on the keyring. This is essential + * because the default keyring may be the session keyring, and + * joining a keyring only requires Search permission. + * + * However, this check is skipped for the "requestor keyring" so + * that /sbin/request-key can itself use request_key() to add + * keys to the original requestor's destination keyring. + */ + if (dest_keyring && do_perm_check) { + ret = key_permission(make_key_ref(dest_keyring, 1), + KEY_NEED_WRITE); + if (ret) { + key_put(dest_keyring); + return ret; + } + } } *_dest_keyring = dest_keyring; kleave(" [dk %d]", key_serial(dest_keyring)); - return; + return 0; } /* @@ -443,11 +466,15 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, if (ctx->index_key.type == &key_type_keyring) return ERR_PTR(-EPERM); - user = key_user_lookup(current_fsuid()); - if (!user) - return ERR_PTR(-ENOMEM); + ret = construct_get_dest_keyring(&dest_keyring); + if (ret) + goto error; - construct_get_dest_keyring(&dest_keyring); + user = key_user_lookup(current_fsuid()); + if (!user) { + ret = -ENOMEM; + goto error_put_dest_keyring; + } ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key); key_user_put(user); @@ -462,7 +489,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, } else if (ret == -EINPROGRESS) { ret = 0; } else { - goto couldnt_alloc_key; + goto error_put_dest_keyring; } key_put(dest_keyring); @@ -472,8 +499,9 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx, construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); -couldnt_alloc_key: +error_put_dest_keyring: key_put(dest_keyring); +error: kleave(" = %d", ret); return ERR_PTR(ret); } diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index ebc9fdfe64df618088a8d55efff8b5312af3385d..3321348fd86bdc00836b4a7555fd7d56eae30fa3 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -466,7 +466,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, v = snd_pcm_hw_param_last(pcm, params, var, dir); else v = snd_pcm_hw_param_first(pcm, params, var, dir); - snd_BUG_ON(v < 0); return v; } @@ -1370,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) @@ -1415,14 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha xfer += tmp; if ((substream->f_flags & O_NONBLOCK) != 0 && tmp != runtime->oss.period_bytes) - break; + tmp = -EAGAIN; } - } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - err: - mutex_unlock(&runtime->oss.params_lock); + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + break; + } + tmp = 0; + } return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1470,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); @@ -1502,12 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } - } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - err: - mutex_unlock(&runtime->oss.params_lock); + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + break; + } + tmp = 0; + } return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 727ac44d39f4a64f7ba203d8dd839caaa66c15ec..a84a1d3d23e56e15c20710c221dfc52a6a743437 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st snd_pcm_sframes_t frames = size; plugin = snd_pcm_plug_first(plug); - while (plugin && frames > 0) { + while (plugin) { + if (frames <= 0) + return frames; if ((next = plugin->next) != NULL) { snd_pcm_sframes_t frames1 = frames; - if (plugin->dst_frames) + if (plugin->dst_frames) { frames1 = plugin->dst_frames(plugin, frames); + if (frames1 <= 0) + return frames1; + } if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) { return err; } if (err != frames1) { frames = err; - if (plugin->src_frames) + if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames1); + if (frames <= 0) + return frames; + } } } else dst_channels = NULL; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 8e980aa678d0d8412e951d78f3026f25e6f6badb..074363b63cc4eeab036596708483ce3bb114e9af 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -149,7 +149,9 @@ static int snd_pcm_control_ioctl(struct snd_card *card, err = -ENXIO; goto _error; } + mutex_lock(&pcm->open_mutex); err = snd_pcm_info_user(substream, info); + mutex_unlock(&pcm->open_mutex); _error: mutex_unlock(®ister_mutex); return err; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index c80d80e312e347f180784a929fe7492858d546f9..9306604f50700a9eda2b5ad7b5adf4fc141e94b7 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -578,7 +578,6 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b, { u_int64_t n = (u_int64_t) a * b; if (c == 0) { - snd_BUG_ON(!n); *r = 0; return UINT_MAX; } @@ -1664,7 +1663,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); @@ -1711,7 +1710,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index b450a27588c8e679cca28fe4384850b031c93168..16f8124b11500cf222e16722d8cf8f8735ec578c 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream, return 0; } -int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +static int __snd_rawmidi_info_select(struct snd_card *card, + struct snd_rawmidi_info *info) { struct snd_rawmidi *rmidi; struct snd_rawmidi_str *pstr; struct snd_rawmidi_substream *substream; - mutex_lock(®ister_mutex); rmidi = snd_rawmidi_search(card, info->device); - mutex_unlock(®ister_mutex); if (!rmidi) return -ENXIO; if (info->stream < 0 || info->stream > 1) @@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info } return -ENXIO; } + +int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +{ + int ret; + + mutex_lock(®ister_mutex); + ret = __snd_rawmidi_info_select(card, info); + mutex_unlock(®ister_mutex); + return ret; +} EXPORT_SYMBOL(snd_rawmidi_info_select); static int snd_rawmidi_info_select_user(struct snd_card *card, diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 45ef5915462c097f7c736f79487fa1b3fc715694..16580a82e1c8cad912ff7368a0cb0e5abd539aa8 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -221,6 +221,7 @@ static struct snd_seq_client *seq_create_client1(int client_index, int poolsize) rwlock_init(&client->ports_lock); mutex_init(&client->ports_mutex); INIT_LIST_HEAD(&client->ports_list_head); + mutex_init(&client->ioctl_mutex); /* find free slot in the client table */ spin_lock_irqsave(&clients_lock, flags); @@ -2127,7 +2128,9 @@ static long snd_seq_ioctl(struct file *file, unsigned int cmd, return -EFAULT; } + mutex_lock(&client->ioctl_mutex); err = handler->func(client, &buf); + mutex_unlock(&client->ioctl_mutex); if (err >= 0) { /* Some commands includes a bug in 'dir' field. */ if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT || diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index c6614254ef8af2f56b68193dc4ce4d35d49b3773..0611e1e0ed5ba01e053ac3a53c5f09ae681e02a2 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h @@ -61,6 +61,7 @@ struct snd_seq_client { struct list_head ports_list_head; rwlock_t ports_lock; struct mutex ports_mutex; + struct mutex ioctl_mutex; int convert32; /* convert 32->64bit */ /* output pool */ diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 37d9cfbc29f9c829facd29ffdc2224ada7a63efd..b80985fbc334cc6598d7ca2953d1ce79edb9f68b 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -355,7 +355,7 @@ static int initialize_timer(struct snd_seq_timer *tmr) unsigned long freq; t = tmr->timeri->timer; - if (snd_BUG_ON(!t)) + if (!t) return -EINVAL; freq = tmr->preferred_resolution; diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 54f348a4fb7811b5c9e4f13f4ace902d0fb845da..cbd20cb8ca1127704b9448952f5d907e4a17432d 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd) return 0; } -static void params_change_substream(struct loopback_pcm *dpcm, - struct snd_pcm_runtime *runtime) -{ - struct snd_pcm_runtime *dst_runtime; - - if (dpcm == NULL || dpcm->substream == NULL) - return; - dst_runtime = dpcm->substream->runtime; - if (dst_runtime == NULL) - return; - dst_runtime->hw = dpcm->cable->hw; -} - static void params_change(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream) cable->hw.rate_max = runtime->rate; cable->hw.channels_min = runtime->channels; cable->hw.channels_max = runtime->channels; - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK], - runtime); - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE], - runtime); } static int loopback_prepare(struct snd_pcm_substream *substream) @@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream) static int rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; + struct snd_mask m; - struct snd_pcm_hardware *hw = rule->private; - struct snd_mask *maskp = hw_param_mask(params, rule->var); - - maskp->bits[0] &= (u_int32_t)hw->formats; - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32); - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */ - if (! maskp->bits[0] && ! maskp->bits[1]) - return -EINVAL; - return 0; + snd_mask_none(&m); + mutex_lock(&dpcm->loopback->cable_lock); + m.bits[0] = (u_int32_t)cable->hw.formats; + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32); + mutex_unlock(&dpcm->loopback->cable_lock); + return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->rate_min; - t.max = hw->rate_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.rate_min; + t.max = cable->hw.rate_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params, static int rule_channels(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->channels_min; - t.max = hw->channels_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.channels_min; + t.max = cable->hw.channels_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); } +static void free_cable(struct snd_pcm_substream *substream) +{ + struct loopback *loopback = substream->private_data; + int dev = get_cable_index(substream); + struct loopback_cable *cable; + + cable = loopback->cables[substream->number][dev]; + if (!cable) + return; + if (cable->streams[!substream->stream]) { + /* other stream is still alive */ + cable->streams[substream->stream] = NULL; + } else { + /* free the cable */ + loopback->cables[substream->number][dev] = NULL; + kfree(cable); + } +} + static int loopback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm; - struct loopback_cable *cable; + struct loopback_cable *cable = NULL; int err = 0; int dev = get_cable_index(substream); @@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream) if (!cable) { cable = kzalloc(sizeof(*cable), GFP_KERNEL); if (!cable) { - kfree(dpcm); err = -ENOMEM; goto unlock; } @@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream) /* are cached -> they do not reflect the actual state */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, - rule_format, &runtime->hw, + rule_format, dpcm, SNDRV_PCM_HW_PARAM_FORMAT, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - rule_rate, &runtime->hw, + rule_rate, dpcm, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, - rule_channels, &runtime->hw, + rule_channels, dpcm, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) goto unlock; @@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream) else runtime->hw = cable->hw; unlock: + if (err < 0) { + free_cable(substream); + kfree(dpcm); + } mutex_unlock(&loopback->cable_lock); return err; } @@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream) { struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; - struct loopback_cable *cable; - int dev = get_cable_index(substream); loopback_timer_stop(dpcm); mutex_lock(&loopback->cable_lock); - cable = loopback->cables[substream->number][dev]; - if (cable->streams[!substream->stream]) { - /* other stream is still alive */ - cable->streams[substream->stream] = NULL; - } else { - /* free the cable */ - loopback->cables[substream->number][dev] = NULL; - kfree(cable); - } + free_cable(substream); mutex_unlock(&loopback->cable_lock); return 0; } diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index c9af022676c2f1071585d6ec3a7975ef19ed55be..47c3e97c3136527ac95ddaa1963c77bcf0240cb1 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -319,7 +319,7 @@ static int hdac_component_master_match(struct device *dev, void *data) */ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) { - if (WARN_ON(!hdac_acomp)) + if (!hdac_acomp) return -ENODEV; hdac_acomp->audio_ops = aops; diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 80bbadc83721447754392238118eee98484616b6..d6e079f4ec09d29624a05d3a18413e55910f0cf6 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -408,6 +408,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { /*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/ /* codec SSID */ + SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index f2e4e99ce651ace584ef15bc08695fd320eaf91c..2c3065c1f3fb160fdcd64d58e7b1404280aec938 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -261,6 +261,7 @@ enum { CXT_FIXUP_HP_530, CXT_FIXUP_CAP_MIX_AMP_5047, CXT_FIXUP_MUTE_LED_EAPD, + CXT_FIXUP_HP_DOCK, CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, }; @@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_eapd, }, + [CXT_FIXUP_HP_DOCK] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x16, 0x21011020 }, /* line-out */ + { 0x18, 0x2181103f }, /* line-in */ + { } + } + }, [CXT_FIXUP_HP_SPECTRE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC), + SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), @@ -872,6 +882,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" }, { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" }, { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, + { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" }, {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d7fa7373cb9460bde1f4c6c9b034e50d707bb750..71a058fcf884a8aaa114bfbd9f3d709121beedd5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4854,6 +4854,7 @@ enum { ALC286_FIXUP_HP_GPIO_LED, ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY, ALC280_FIXUP_HP_DOCK_PINS, + ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, ALC280_FIXUP_HP_9480M, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -5394,6 +5395,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC280_FIXUP_HP_GPIO4 }, + [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x21011020 }, /* line-out */ + { 0x18, 0x2181103f }, /* line-in */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED + }, [ALC280_FIXUP_HP_9480M] = { .type = HDA_FIXUP_FUNC, .v.func = alc280_fixup_hp_9480m, @@ -5606,6 +5617,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), + SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -5646,7 +5658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), - SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), + SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), @@ -5812,6 +5824,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"}, {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"}, {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"}, + {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"}, {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"}, {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"}, @@ -5959,6 +5972,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x1b, 0x01011020}, {0x21, 0x02211010}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index c69e97654fc63e7b9f9a06776daeaba199a8b952..f88632426c0a1571f4374c30a050240fb96450c8 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2519,7 +2519,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) } if (da7218->dev_id == DA7218_DEV_ID) { - hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); + hpldet_np = of_get_child_by_name(np, "da7218_hpldet"); if (!hpldet_np) return pdata; diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 712ed6598c4827c5c4780d3df8d341fc7b39fd9c..ebdf9bd5a64c53e3398e811ab3113946c82cde0d 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { }; module_spi_driver(pcm512x_spi_driver); + +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 5acd5b69fb837a622ddd0c745c5d47f6d41564af..f9b6c5a81b477dc1161203ac4a1123e4047e417e 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -115,7 +115,7 @@ struct aic31xx_pdata { /* INT2 interrupt control */ #define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) #define AIC31XX_DACPRB AIC31XX_REG(0, 60) /* ADC Instruction Set Register */ diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index a2104d68169d9e6bc9aa30fe8269070a6dc88e40..26fd6a664b9b894452cd6dc639b8aec5b10e6808 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); struct device_node *twl4030_codec_node = NULL; - twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, + twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node, "codec"); if (!pdata && twl4030_codec_node) { @@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) GFP_KERNEL); if (!pdata) { dev_err(codec->dev, "Can not allocate memory\n"); + of_node_put(twl4030_codec_node); return NULL; } twl4030_setup_pdata_of(pdata, twl4030_codec_node); + of_node_put(twl4030_codec_node); } return pdata; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 757af795cebdc7250d2f56131b447d7d318f9462..c03c9da076c2d43fe254808a3b65965cc4d77625 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1465,7 +1465,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) le64_to_cpu(footer->timestamp)); while (pos < firmware->size && - pos - firmware->size > sizeof(*region)) { + sizeof(*region) < firmware->size - pos) { region = (void *)&(firmware->data[pos]); region_name = "Unknown"; reg = 0; @@ -1526,8 +1526,8 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); - if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > - firmware->size) { + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, regions, region_name, @@ -1992,7 +1992,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) blocks = 0; while (pos < firmware->size && - pos - firmware->size > sizeof(*blk)) { + sizeof(*blk) < firmware->size - pos) { blk = (void *)(&firmware->data[pos]); type = le16_to_cpu(blk->type); @@ -2066,8 +2066,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { - if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > - firmware->size) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, blocks, region_name, diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index fde08660b63be270f95436424e32e443bdb8a0de..1c03490e1182b639802a61ca73b3763673eef171 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1467,12 +1467,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi_private; - - ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); - if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); - return ret; - } } else { /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1583,6 +1577,14 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ret; } + if (fsl_ssi_is_ac97(ssi_private)) { + ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); + if (ret) { + dev_err(&pdev->dev, "could not set AC'97 ops\n"); + goto error_ac97_ops; + } + } + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, &ssi_private->cpu_dai_drv, 1); if (ret) { @@ -1666,6 +1668,10 @@ static int fsl_ssi_probe(struct platform_device *pdev) fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); error_asoc_register: + if (fsl_ssi_is_ac97(ssi_private)) + snd_soc_set_ac97_ops(NULL); + +error_ac97_ops: if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index f608f8d23f3d9f3b1ce8a3a7f137a20e7922fe07..dd88c2cb64703a4b0999261a92fb06f348af2ea7 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -232,13 +232,19 @@ static int asoc_simple_card_dai_link_of(struct device_node *node, snprintf(prop, sizeof(prop), "%scpu", prefix); cpu = of_get_child_by_name(node, prop); + if (!cpu) { + ret = -EINVAL; + dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); + goto dai_link_of_err; + } + snprintf(prop, sizeof(prop), "%splat", prefix); plat = of_get_child_by_name(node, prop); snprintf(prop, sizeof(prop), "%scodec", prefix); codec = of_get_child_by_name(node, prop); - if (!cpu || !codec) { + if (!codec) { ret = -EINVAL; dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); goto dai_link_of_err; diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c index c1610a054d65aafd9bac2032dc05d1f8b68d31eb..3cf522d66755b95ac0568a8d85d031e13184466a 100644 --- a/sound/soc/img/img-parallel-out.c +++ b/sound/soc/img/img-parallel-out.c @@ -166,9 +166,11 @@ static int img_prl_out_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } + pm_runtime_get_sync(prl->dev); reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL); reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set; img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL); + pm_runtime_put(prl->dev); return 0; } diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 3f8e6f0b7eb5d10647f99efe3c1e5b9be751c2e3..dcf03691ebc81e33927656483aa73e1b9647d529 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -41,7 +41,8 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL); if (obj && obj->type == ACPI_TYPE_BUFFER) { nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer; - nhlt_table = (struct nhlt_acpi_table *) + if (nhlt_ptr->length) + nhlt_table = (struct nhlt_acpi_table *) memremap(nhlt_ptr->min_addr, nhlt_ptr->length, MEMREMAP_WB); ACPI_FREE(obj); diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index ea162fbf68e5b3038f99f1c7ef0ba24590e22c41..d5adc04bb724bd9b0154b53c386baa2bacacf895 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -295,6 +295,7 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, struct uuid_module *module; struct firmware stripped_fw; unsigned int safe_file; + int ret = 0; /* Get the FW pointer to derive ADSP header */ stripped_fw.data = fw->data; @@ -343,8 +344,10 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, for (i = 0; i < num_entry; i++, mod_entry++) { module = kzalloc(sizeof(*module), GFP_KERNEL); - if (!module) - return -ENOMEM; + if (!module) { + ret = -ENOMEM; + goto free_uuid_list; + } uuid_bin = (uuid_le *)mod_entry->uuid.id; memcpy(&module->uuid, uuid_bin, sizeof(module->uuid)); @@ -355,8 +358,8 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, size = sizeof(int) * mod_entry->instance_max_count; module->instance_id = devm_kzalloc(ctx->dev, size, GFP_KERNEL); if (!module->instance_id) { - kfree(module); - return -ENOMEM; + ret = -ENOMEM; + goto free_uuid_list; } list_add_tail(&module->list, &skl->uuid_list); @@ -367,6 +370,10 @@ int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, } return 0; + +free_uuid_list: + skl_freeup_uuid_list(skl); + return ret; } void skl_freeup_uuid_list(struct skl_sst *ctx) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 974915cb4c4fd8a0c0f44c8d5cd2935411c10646..08bfee447a36595159ef6abe64223c20c245940a 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -476,6 +476,7 @@ static bool rockchip_i2s_rd_reg(struct device *dev, unsigned int reg) case I2S_INTCR: case I2S_XFER: case I2S_CLR: + case I2S_TXDR: case I2S_RXDR: case I2S_FIFOLR: case I2S_INTSR: @@ -490,6 +491,9 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) switch (reg) { case I2S_INTSR: case I2S_CLR: + case I2S_FIFOLR: + case I2S_TXDR: + case I2S_RXDR: return true; default: return false; @@ -499,6 +503,8 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) static bool rockchip_i2s_precious_reg(struct device *dev, unsigned int reg) { switch (reg) { + case I2S_RXDR: + return true; default: return false; } diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c index abb5eaac854a9b9c47027e278cd5204c18aceace..7d92a24b7cfa558afbb8331401c974c59d5f1ae5 100644 --- a/sound/soc/sh/rcar/cmd.c +++ b/sound/soc/sh/rcar/cmd.c @@ -31,23 +31,24 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, struct rsnd_mod *mix = rsnd_io_to_mod_mix(io); struct device *dev = rsnd_priv_to_dev(priv); u32 data; + u32 path[] = { + [1] = 1 << 0, + [5] = 1 << 8, + [6] = 1 << 12, + [9] = 1 << 15, + }; if (!mix && !dvc) return 0; + if (ARRAY_SIZE(path) < rsnd_mod_id(mod) + 1) + return -ENXIO; + if (mix) { struct rsnd_dai *rdai; struct rsnd_mod *src; struct rsnd_dai_stream *tio; int i; - u32 path[] = { - [0] = 0, - [1] = 1 << 0, - [2] = 0, - [3] = 0, - [4] = 0, - [5] = 1 << 8 - }; /* * it is assuming that integrater is well understanding about @@ -70,16 +71,19 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, } else { struct rsnd_mod *src = rsnd_io_to_mod_src(io); - u32 path[] = { - [0] = 0x30000, - [1] = 0x30001, - [2] = 0x40000, - [3] = 0x10000, - [4] = 0x20000, - [5] = 0x40100 + u8 cmd_case[] = { + [0] = 0x3, + [1] = 0x3, + [2] = 0x4, + [3] = 0x1, + [4] = 0x2, + [5] = 0x4, + [6] = 0x1, + [9] = 0x2, }; - data = path[rsnd_mod_id(src)]; + data = path[rsnd_mod_id(src)] | + cmd_case[rsnd_mod_id(src)] << 16; } dev_dbg(dev, "ctu/mix path = 0x%08x", data); diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 6bc93cbb3049a823f8b33b559478f2d1175a9052..edeb74a13c0f6bf766988884c2c2ae5b05bdb717 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -361,6 +361,20 @@ static u32 rsnd_dmapp_read(struct rsnd_dma *dma, u32 reg) return ioread32(rsnd_dmapp_addr(dmac, dma, reg)); } +static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg) +{ + struct rsnd_mod *mod = rsnd_mod_get(dma); + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv); + volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg); + u32 val = ioread32(addr); + + val &= ~mask; + val |= (data & mask); + + iowrite32(val, addr); +} + static int rsnd_dmapp_stop(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -368,10 +382,10 @@ static int rsnd_dmapp_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); int i; - rsnd_dmapp_write(dma, 0, PDMACHCR); + rsnd_dmapp_bset(dma, 0, PDMACHCR_DE, PDMACHCR); for (i = 0; i < 1024; i++) { - if (0 == rsnd_dmapp_read(dma, PDMACHCR)) + if (0 == (rsnd_dmapp_read(dma, PDMACHCR) & PDMACHCR_DE)) return 0; udelay(1); } diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 6cb6db005fc45dd3e562fe480c0e353db7a6116a..a9a43acce30ec573bce012f5c8a6758df08305bf 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -172,10 +172,15 @@ static u32 rsnd_ssi_run_mods(struct rsnd_dai_stream *io) { struct rsnd_mod *ssi_mod = rsnd_io_to_mod_ssi(io); struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); + u32 mods; - return rsnd_ssi_multi_slaves_runtime(io) | - 1 << rsnd_mod_id(ssi_mod) | - 1 << rsnd_mod_id(ssi_parent_mod); + mods = rsnd_ssi_multi_slaves_runtime(io) | + 1 << rsnd_mod_id(ssi_mod); + + if (ssi_parent_mod) + mods |= 1 << rsnd_mod_id(ssi_parent_mod); + + return mods; } u32 rsnd_ssi_multi_slaves_runtime(struct rsnd_dai_stream *io) @@ -694,9 +699,14 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io); struct device *dev = rsnd_priv_to_dev(priv); int irq = ssi->irq; + /* Do nothing if non SSI (= SSI parent, multi SSI) mod */ + if (pure_ssi_mod != mod) + return 0; + /* PIO will request IRQ again */ devm_free_irq(dev, irq, mod); diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 6f9b388ec5a8f0c2095a2a88e33d725244706b2e..3f95d6b88f8c93f8df50f6a25cb672fb3112aa20 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -44,7 +44,11 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod, mask1 = (1 << 4) | (1 << 20); /* mask sync bit */ mask2 = (1 << 4); /* mask sync bit */ val1 = val2 = 0; - if (rsnd_ssi_is_pin_sharing(io)) { + if (id == 8) { + /* + * SSI8 pin is sharing with SSI7, nothing to do. + */ + } else if (rsnd_ssi_is_pin_sharing(io)) { int shift = -1; switch (id) { diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 0e1c3ee566755b6e69688b373f23a4d8bd7ea5cb..9735b4caaed3716fb850760a0d2b989af8ef7c7a 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -364,6 +364,8 @@ static int uni_reader_startup(struct snd_pcm_substream *substream, struct uniperif *reader = priv->dai_data.uni; int ret; + reader->substream = substream; + if (!UNIPERIF_TYPE_IS_TDM(reader)) return 0; @@ -393,6 +395,7 @@ static void uni_reader_shutdown(struct snd_pcm_substream *substream, /* Stop the reader */ uni_reader_stop(reader); } + reader->substream = NULL; } static const struct snd_soc_dai_ops uni_reader_dai_ops = { diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9133d3e53d9d64faa5b884078d00cc497317d694..08015c13911640ff6d93eda8a62f0b3c288e638a 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -204,6 +204,10 @@ static int snd_usb_copy_string_desc(struct mixer_build *state, int index, char *buf, int maxlen) { int len = usb_string(state->chip->dev, index, buf, maxlen - 1); + + if (len < 0) + return 0; + buf[len] = 0; return len; } @@ -2163,19 +2167,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, kctl->private_value = (unsigned long)namelist; kctl->private_free = usb_mixer_selector_elem_free; - nameid = uac_selector_unit_iSelector(desc); + /* check the static mapping table at first */ len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); - if (len) - ; - else if (nameid) - snd_usb_copy_string_desc(state, nameid, kctl->id.name, - sizeof(kctl->id.name)); - else { - len = get_term_name(state, &state->oterm, + if (!len) { + /* no mapping ? */ + /* if iSelector is given, use it */ + nameid = uac_selector_unit_iSelector(desc); + if (nameid) + len = snd_usb_copy_string_desc(state, nameid, + kctl->id.name, + sizeof(kctl->id.name)); + /* ... or pick up the terminal name at next */ + if (!len) + len = get_term_name(state, &state->oterm, kctl->id.name, sizeof(kctl->id.name), 0); + /* ... or use the fixed string "USB" as the last resort */ if (!len) strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* and add the proper suffix */ if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) append_ctl_name(kctl, " Clock Source"); else if ((state->oterm.type & 0xff00) == 0x0100) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7613b9e07b5c7a20981bbb08067d015d1407af4e..1cd7f8b0bf77986a00937aa5495edf595b2113cd 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1170,10 +1170,11 @@ static bool is_marantz_denon_dac(unsigned int id) /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch * between PCM/DOP and native DSD mode */ -static bool is_teac_50X_dac(unsigned int id) +static bool is_teac_dsd_dac(unsigned int id) { switch (id) { case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ + case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */ return true; } return false; @@ -1206,7 +1207,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs, break; } mdelay(20); - } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { + } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) { /* Vendor mode switch cmd is required. */ switch (fmt->altsetting) { case 3: /* DSD mode (DSD_U32) requested */ @@ -1376,7 +1377,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, } /* TEAC devices with USB DAC functionality */ - if (is_teac_50X_dac(chip->usb_id)) { + if (is_teac_dsd_dac(chip->usb_id)) { if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index a39629206864e5bb74aaddea15ca1ab762877042..f79669a38c0cfa54bb167f484f584c809aa77b69 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -197,6 +197,9 @@ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1c14c25951583611167fe0d48ab00479d108232a..4b36323ea64bb1acb26395d57166d802e178ec80 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,6 +23,7 @@ #include #include #include +#include #include int monitor_device(const char *device_name, diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index bc7adb84e679cdd237b60646255b425819c5f5b0..60a94b3e532e4d5ec07cd96c179911c378b9ca51 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -193,11 +193,14 @@ static void kvp_update_mem_state(int pool) for (;;) { readp = &record[records_read]; records_read += fread(readp, sizeof(struct kvp_record), - ENTRIES_PER_BLOCK * num_blocks, - filep); + ENTRIES_PER_BLOCK * num_blocks - records_read, + filep); if (ferror(filep)) { - syslog(LOG_ERR, "Failed to read file, pool: %d", pool); + syslog(LOG_ERR, + "Failed to read file, pool: %d; error: %d %s", + pool, errno, strerror(errno)); + kvp_release_lock(pool); exit(EXIT_FAILURE); } @@ -210,6 +213,7 @@ static void kvp_update_mem_state(int pool) if (record == NULL) { syslog(LOG_ERR, "malloc failed"); + kvp_release_lock(pool); exit(EXIT_FAILURE); } continue; @@ -224,15 +228,11 @@ static void kvp_update_mem_state(int pool) fclose(filep); kvp_release_lock(pool); } + static int kvp_file_init(void) { int fd; - FILE *filep; - size_t records_read; char *fname; - struct kvp_record *record; - struct kvp_record *readp; - int num_blocks; int i; int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; @@ -246,61 +246,19 @@ static int kvp_file_init(void) for (i = 0; i < KVP_POOL_COUNT; i++) { fname = kvp_file_info[i].fname; - records_read = 0; - num_blocks = 1; sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i); fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */); if (fd == -1) return 1; - - filep = fopen(fname, "re"); - if (!filep) { - close(fd); - return 1; - } - - record = malloc(alloc_unit * num_blocks); - if (record == NULL) { - fclose(filep); - close(fd); - return 1; - } - for (;;) { - readp = &record[records_read]; - records_read += fread(readp, sizeof(struct kvp_record), - ENTRIES_PER_BLOCK, - filep); - - if (ferror(filep)) { - syslog(LOG_ERR, "Failed to read file, pool: %d", - i); - exit(EXIT_FAILURE); - } - - if (!feof(filep)) { - /* - * We have more data to read. - */ - num_blocks++; - record = realloc(record, alloc_unit * - num_blocks); - if (record == NULL) { - fclose(filep); - close(fd); - return 1; - } - continue; - } - break; - } kvp_file_info[i].fd = fd; - kvp_file_info[i].num_blocks = num_blocks; - kvp_file_info[i].records = record; - kvp_file_info[i].num_records = records_read; - fclose(filep); - + kvp_file_info[i].num_blocks = 1; + kvp_file_info[i].records = malloc(alloc_unit); + if (kvp_file_info[i].records == NULL) + return 1; + kvp_file_info[i].num_records = 0; + kvp_update_mem_state(i); } return 0; diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 51334edec506815a70868dd451d5c72944ed0a59..f306a764250982f0f7beb296eb685149ab35c099 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -14,6 +14,10 @@ # define POISON_POINTER_DELTA 0 #endif +#ifdef __cplusplus +#define LIST_POISON1 NULL +#define LIST_POISON2 NULL +#else /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses @@ -21,6 +25,7 @@ */ #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) #define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) +#endif /********** include/linux/timer.h **********/ /* diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt index 767be7c760340bd33b7e4a18b9a8f3a71d9db33e..1754e094bc288014e1e173fe0858ebb6d9b5f1b3 100644 --- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index b8dadb050d2b3f63eb35cef0ef555fa3b2693fa3..a688a857a7ae88ebde88548e83db19506f948744 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -51,7 +51,7 @@ struct instruction { unsigned int len, state; unsigned char type; unsigned long immediate; - bool alt_group, visited; + bool alt_group, visited, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; struct list_head alts; @@ -352,6 +352,40 @@ static void add_ignores(struct objtool_file *file) } } +/* + * FIXME: For now, just ignore any alternatives which add retpolines. This is + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. + * But it at least allows objtool to understand the control flow *around* the + * retpoline. + */ +static int add_nospec_ignores(struct objtool_file *file) +{ + struct section *sec; + struct rela *rela; + struct instruction *insn; + + sec = find_section_by_name(file->elf, ".rela.discard.nospec"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.nospec entry"); + return -1; + } + + insn->ignore_alts = true; + } + + return 0; +} + /* * Find the destination instructions for all jumps. */ @@ -382,6 +416,13 @@ static int add_jump_destinations(struct objtool_file *file) } else if (rela->sym->sec->idx) { dest_sec = rela->sym->sec; dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else if (strstr(rela->sym->name, "_indirect_thunk_")) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. + */ + insn->type = INSN_JUMP_DYNAMIC; + continue; } else { /* sibling call */ insn->jump_dest = 0; @@ -428,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); + /* + * FIXME: Thanks to retpolines, it's now considered + * normal for a function to call within itself. So + * disable this warning for now. + */ +#if 0 if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at offset 0x%lx", insn->sec, insn->offset, dest_off); return -1; } +#endif } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -594,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file) return ret; list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { - alt = malloc(sizeof(*alt)); - if (!alt) { - WARN("malloc failed"); - ret = -1; - goto out; - } orig_insn = find_insn(file, special_alt->orig_sec, special_alt->orig_off); @@ -610,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + /* Ignore retpoline alternatives. */ + if (orig_insn->ignore_alts) + continue; + new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -635,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + alt->insn = new_insn; list_add_tail(&alt->list, &orig_insn->alts); @@ -854,6 +907,10 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); + ret = add_nospec_ignores(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; @@ -1173,6 +1230,14 @@ static int validate_uncallable_instructions(struct objtool_file *file) for_each_insn(file, insn) { if (!insn->visited && insn->type == INSN_RETURN) { + + /* + * Don't warn about call instructions in unvisited + * retpoline alternatives. + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + WARN_FUNC("return instruction outside of a callable function", insn->sec, insn->offset); warnings++; @@ -1229,7 +1294,7 @@ int cmd_check(int argc, const char **argv) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); - file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); file.rodata = find_section_by_name(file.elf, ".rodata"); file.ignore_unreachables = false; file.c_file = find_section_by_name(file.elf, ".comment"); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d897702ce7427804da2c09387f674077f22accc5..faacf0c899762d747bbc450a46c9abe200e04e3f 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "elf.h" #include "warn.h" @@ -370,7 +371,8 @@ struct elf *elf_open(const char *name) elf->fd = open(name, O_RDONLY); if (elf->fd == -1) { - perror("open"); + fprintf(stderr, "objtool: Can't open '%s': %s\n", + name, strerror(errno)); goto err; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cffdd9cf3ebf764a1d4fbed2421045e4ee148c7b..ff375310efe49d1127f369f6b6c7e4ca97ca773e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -205,12 +205,12 @@ ifeq ($(DEBUG),0) endif CFLAGS += -I$(src-perf)/util/include -CFLAGS += -I$(src-perf)/arch/$(ARCH)/include +CFLAGS += -I$(src-perf)/arch/$(SRCARCH)/include CFLAGS += -I$(srctree)/tools/include/uapi CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -321,7 +321,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -346,7 +346,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PROLOGUE) else - msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); + msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); endif else msg := $(warning DWARF support is off, BPF prologue is disabled); @@ -372,7 +372,7 @@ ifdef PERF_HAVE_JITDUMP endif endif -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif @@ -453,7 +453,7 @@ else endif ifndef NO_LOCAL_LIBUNWIND - ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) msg := $(warning No debug_frame support found in libunwind); @@ -717,7 +717,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq ($(ARCH), x86) + ifneq ($(SRCARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -746,7 +746,7 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - ifeq ($(ARCH),x86) + ifeq ($(SRCARCH),x86) ifeq ($(feature-get_cpuid), 0) msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); NO_AUXTRACE := 1 @@ -793,7 +793,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(ARCH)$(IS_64_BIT), x861) +ifeq ($(SRCARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ef52d1e3d43118d5f7bacec557bd33933aa5f8ea..2b92ffef554b93f16ff266fbb1e38075f70da929 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -192,7 +192,7 @@ endif ifeq ($(config),0) include $(srctree)/tools/scripts/Makefile.arch --include arch/$(ARCH)/Makefile +-include arch/$(SRCARCH)/Makefile endif # The FEATURE_DUMP_EXPORT holds location of the actual diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index 109eb75cf7de4e97c855fe52c3d6437f454b5c8e..d9b6af837c7d392fce6168d9ce5ebe2b9a6b57d5 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ libperf-y += common.o -libperf-y += $(ARCH)/ +libperf-y += $(SRCARCH)/ diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 9213a1273697a801d6b75f4a8804424e68eb6ab7..999a4e8781621677821440e80ec8491689bbc6e3 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -2,7 +2,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o pmu-events-y += pmu-events.o -JDIR = pmu-events/arch/$(ARCH) +JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') # @@ -10,4 +10,4 @@ JSON = $(shell [ -d $(JDIR) ] && \ # directory and create tables in pmu-events.c. # $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS) - $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) + $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 8a4ce492f7b22be2cf8e0dcfd751365cbc601220..546250a273e7ecfa4fa79d4ac5c880c935439da3 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -71,7 +71,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ -ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc)) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 28d1605b033896aa4b87844eeffa6d9dc1646e6b..b60a6fd66517ba6efca4c5f692157a1e49c756a5 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -150,7 +150,7 @@ static int run_dir(const char *d, const char *perf) snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", d, d, perf, vcnt, v); - return system(cmd); + return system(cmd) ? TEST_FAIL : TEST_OK; } int test__attr(int subtest __maybe_unused) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5337f49db36163b45788ce189f005e68fdb286c5..28bdb48357f09ba0ad8e8a2109a008cf620d31de 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -826,7 +826,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, /* * default get_cpuid(): nothing gets recorded - * actual implementation must be in arch/$(ARCH)/util/header.c + * actual implementation must be in arch/$(SRCARCH)/util/header.c */ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f7b35e178582b6d427498638fb4744854c8e8b21..f199d5b11d76a0c28794ab8dfa3ee494ae4d6cfd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols) /* Last entry */ if (curr->end == curr->start) - curr->end = roundup(curr->start, 4096); + curr->end = roundup(curr->start, 4096) + 4096; } void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51baef13bfa5609d2957af76941597f6..2bb3eef7d5c1fbf36d420be2801e74b4f7eeb049 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf90bfe4c441fab62fd26b4cc99267..5b3205f1621749bb6ebc340413ae16d957064cb9 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048dfe89018697926f4f308acd786e694..66d31de60b9ae93ee53175b33983e3d86d67795f 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index bbab7f4664acdc6a0d535b2787c59f00253e127b..d116a19477a76b4c0d9b6c8bb9aa9bd305d43947 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,9 @@ # Makefile for vm selftests +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) BINARIES = compaction_test BINARIES += hugepage-mmap diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 6300c1a41ff6cb8621cf5a90eb65ba9beaf2658b..4af37bfe4aea0f750e543801ddfa00b0e38d9e2d 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,7 +6,7 @@ include ../lib.mk TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ - protection_keys + protection_keys test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 9b4610c6d3fb1a0cee54e023a7e3cf3befc8a51d..f249e042b3b517970733cd32e051f87a8ba740c4 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index e717fed8021916650f17b4eda051dd3076d0bb37..ac1a7a3f87b24bde937280be7c1de11078c80877 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -360,9 +366,24 @@ static void do_simple_tests(void) install_invalid(&desc, false); desc.seg_not_present = 0; - desc.read_exec_only = 0; desc.seg_32bit = 1; + desc.read_exec_only = 0; + desc.limit = 0xfffff; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G); + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G); + desc.contents = 1; + desc.read_exec_only = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G); + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G); + + desc.limit = 0; install_invalid(&desc, true); } @@ -634,7 +655,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -703,7 +724,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -734,7 +755,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -767,7 +788,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -820,7 +841,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h index 093c190178a962dc193f7b02421813971a916a82..28b3c7c553a4aa3b00d60c6ead16effa079f3ec0 100644 --- a/tools/testing/selftests/x86/mpx-hw.h +++ b/tools/testing/selftests/x86/mpx-hw.h @@ -51,14 +51,14 @@ struct mpx_bd_entry { union { char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; - void *contents[1]; + void *contents[0]; }; } __attribute__((packed)); struct mpx_bt_entry { union { char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; - unsigned long contents[1]; + unsigned long contents[0]; }; } __attribute__((packed)); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116b1da0ef57601f8f1840e45fcc775..eaea9243970840dab196cb1ddf84586e30803f0a 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7ec055000ead54c2c80c69b371a6f..a48da95c18fdf1f0ea46e7cb628ff9a9caba931b 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c new file mode 100644 index 0000000000000000000000000000000000000000..6e0bd52ad53d06ba42e2d9b08dd53932e6abcade --- /dev/null +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +# define VSYS(x) (x) +#else +# define VSYS(x) 0 +#endif + +#ifndef SYS_getcpu +# ifdef __x86_64__ +# define SYS_getcpu 309 +# else +# define SYS_getcpu 318 +# endif +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +/* vsyscalls and vDSO */ +bool should_read_vsyscall = false; + +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); +gtod_t vdso_gtod; + +typedef int (*vgettime_t)(clockid_t, struct timespec *); +vgettime_t vdso_gettime; + +typedef long (*time_func_t)(time_t *t); +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); +time_func_t vdso_time; + +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); +getcpu_t vdso_getcpu; + +static void init_vdso(void) +{ + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + printf("[WARN]\tfailed to find vDSO\n"); + return; + } + + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gtod) + printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); + + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_gettime) + printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); + + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); + if (!vdso_time) + printf("[WARN]\tfailed to find time in vDSO\n"); + + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); + if (!vdso_getcpu) { + /* getcpu() was never wired up in the 32-bit vDSO. */ + printf("[%s]\tfailed to find getcpu in vDSO\n", + sizeof(long) == 8 ? "WARN" : "NOTE"); + } +} + +static int init_vsys(void) +{ +#ifdef __x86_64__ + int nerrs = 0; + FILE *maps; + char line[128]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); + should_read_vsyscall = true; + return 0; + } + + while (fgets(line, sizeof(line), maps)) { + char r, x; + void *start, *end; + char name[128]; + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + printf("\tvsyscall map: %s", line); + + if (start != (void *)0xffffffffff600000 || + end != (void *)0xffffffffff601000) { + printf("[FAIL]\taddress range is nonsense\n"); + nerrs++; + } + + printf("\tvsyscall permissions are %c-%c\n", r, x); + should_read_vsyscall = (r == 'r'); + if (x != 'x') { + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("\tno vsyscall map in /proc/self/maps\n"); + should_read_vsyscall = false; + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + return nerrs; +#else + return 0; +#endif +} + +/* syscalls */ +static inline long sys_gtod(struct timeval *tv, struct timezone *tz) +{ + return syscall(SYS_gettimeofday, tv, tz); +} + +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(SYS_clock_gettime, id, ts); +} + +static inline long sys_time(time_t *t) +{ + return syscall(SYS_time, t); +} + +static inline long sys_getcpu(unsigned * cpu, unsigned * node, + void* cache) +{ + return syscall(SYS_getcpu, cpu, node, cache); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static double tv_diff(const struct timeval *a, const struct timeval *b) +{ + return (double)(a->tv_sec - b->tv_sec) + + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; +} + +static int check_gtod(const struct timeval *tv_sys1, + const struct timeval *tv_sys2, + const struct timezone *tz_sys, + const char *which, + const struct timeval *tv_other, + const struct timezone *tz_other) +{ + int nerrs = 0; + double d1, d2; + + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { + printf("[FAIL] %s tz mismatch\n", which); + nerrs++; + } + + d1 = tv_diff(tv_other, tv_sys1); + d2 = tv_diff(tv_sys2, tv_other); + printf("\t%s time offsets: %lf %lf\n", which, d1, d2); + + if (d1 < 0 || d2 < 0) { + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); + nerrs++; + } else { + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); + } + + return nerrs; +} + +static int test_gtod(void) +{ + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; + struct timezone tz_sys, tz_vdso, tz_vsys; + long ret_vdso = -1; + long ret_vsys = -1; + int nerrs = 0; + + printf("[RUN]\ttest gettimeofday()\n"); + + if (sys_gtod(&tv_sys1, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + if (vdso_gtod) + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); + if (vgtod) + ret_vsys = vgtod(&tv_vsys, &tz_vsys); + if (sys_gtod(&tv_sys2, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + + if (vdso_gtod) { + if (ret_vdso == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); + } else { + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); + nerrs++; + } + } + + if (vgtod) { + if (ret_vsys == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); + } else { + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); + nerrs++; + } + } + + return nerrs; +} + +static int test_time(void) { + int nerrs = 0; + + printf("[RUN]\ttest time()\n"); + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; + t_sys1 = sys_time(&t2_sys1); + if (vdso_time) + t_vdso = vdso_time(&t2_vdso); + if (vtime) + t_vsys = vtime(&t2_vsys); + t_sys2 = sys_time(&t2_sys2); + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); + nerrs++; + return nerrs; + } + + if (vdso_time) { + if (t_vdso < 0 || t_vdso != t2_vdso) { + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); + nerrs++; + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); + nerrs++; + } else { + printf("[OK]\tvDSO time() is okay\n"); + } + } + + if (vtime) { + if (t_vsys < 0 || t_vsys != t2_vsys) { + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); + nerrs++; + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); + nerrs++; + } else { + printf("[OK]\tvsyscall time() is okay\n"); + } + } + + return nerrs; +} + +static int test_getcpu(int cpu) +{ + int nerrs = 0; + long ret_sys, ret_vdso = -1, ret_vsys = -1; + + printf("[RUN]\tgetcpu() on CPU %d\n", cpu); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tfailed to force CPU %d\n", cpu); + return nerrs; + } + + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; + unsigned node = 0; + bool have_node = false; + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); + if (vdso_getcpu) + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + if (vgetcpu) + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); + + if (ret_sys == 0) { + if (cpu_sys != cpu) { + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); + nerrs++; + } + + have_node = true; + node = node_sys; + } + + if (vdso_getcpu) { + if (ret_vdso) { + printf("[FAIL]\tvDSO getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vdso; + } + + if (cpu_vdso != cpu) { + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct CPU\n"); + } + + if (node_vdso != node) { + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct node\n"); + } + } + } + + if (vgetcpu) { + if (ret_vsys) { + printf("[FAIL]\tvsyscall getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vsys; + } + + if (cpu_vsys != cpu) { + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct CPU\n"); + } + + if (node_vsys != node) { + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct node\n"); + } + } + } + + return nerrs; +} + +static int test_vsys_r(void) +{ +#ifdef __x86_64__ + printf("[RUN]\tChecking read access to the vsyscall page\n"); + bool can_read; + if (sigsetjmp(jmpbuf, 1) == 0) { + *(volatile int *)0xffffffffff600000; + can_read = true; + } else { + can_read = false; + } + + if (can_read && !should_read_vsyscall) { + printf("[FAIL]\tWe have read access, but we shouldn't\n"); + return 1; + } else if (!can_read && should_read_vsyscall) { + printf("[FAIL]\tWe don't have read access, but we should\n"); + return 1; + } else { + printf("[OK]\tgot expected result\n"); + } +#endif + + return 0; +} + + +#ifdef __x86_64__ +#define X86_EFLAGS_TF (1UL << 8) +static volatile sig_atomic_t num_vsyscall_traps; + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; + + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) + num_vsyscall_traps++; +} + +static int test_native_vsyscall(void) +{ + time_t tmp; + bool is_native; + + if (!vtime) + return 0; + + printf("[RUN]\tchecking for native vsyscall\n"); + sethandler(SIGTRAP, sigtrap, 0); + set_eflags(get_eflags() | X86_EFLAGS_TF); + vtime(&tmp); + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + + /* + * If vsyscalls are emulated, we expect a single trap in the + * vsyscall page -- the call instruction will trap with RIP + * pointing to the entry point before emulation takes over. + * In native mode, we expect two traps, since whatever code + * the vsyscall page contains will be more than just a ret + * instruction. + */ + is_native = (num_vsyscall_traps > 1); + + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "native" : "emulated"), + (int)num_vsyscall_traps); + + return 0; +} +#endif + +int main(int argc, char **argv) +{ + int nerrs = 0; + + init_vdso(); + nerrs += init_vsys(); + + nerrs += test_gtod(); + nerrs += test_time(); + nerrs += test_getcpu(0); + nerrs += test_getcpu(1); + + sethandler(SIGSEGV, sigsegv, 0); + nerrs += test_vsys_r(); + +#ifdef __x86_64__ + nerrs += test_native_vsyscall(); +#endif + + return nerrs ? 1 : 0; +} diff --git a/tools/usb/usbip/Makefile.am b/tools/usb/usbip/Makefile.am index 66f8bf038c9f6a5d3ec5c8d41acc0f91313bf7c1..45eaa70a71e0fe8978322a52cfb19403600f51fc 100644 --- a/tools/usb/usbip/Makefile.am +++ b/tools/usb/usbip/Makefile.am @@ -1,6 +1,7 @@ SUBDIRS := libsrc src includedir = @includedir@/usbip include_HEADERS := $(addprefix libsrc/, \ - usbip_common.h vhci_driver.h usbip_host_driver.h) + usbip_common.h vhci_driver.h usbip_host_driver.h \ + list.h sysfs_utils.h usbip_host_common.h) dist_man_MANS := $(addprefix doc/, usbip.8 usbipd.8) diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c index ac73710473deaecd079d9846f3a9f5239b8055df..1517a232ab18798ba748d2c9bf16a4a3f6fa9ef5 100644 --- a/tools/usb/usbip/libsrc/usbip_common.c +++ b/tools/usb/usbip/libsrc/usbip_common.c @@ -215,9 +215,16 @@ int read_usb_interface(struct usbip_usb_device *udev, int i, struct usbip_usb_interface *uinf) { char busid[SYSFS_BUS_ID_SIZE]; + int size; struct udev_device *sif; - sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i); + size = snprintf(busid, sizeof(busid), "%s:%d.%d", + udev->busid, udev->bConfigurationValue, i); + if (size < 0 || (unsigned int)size >= sizeof(busid)) { + err("busid length %i >= %lu or < 0", size, + (long unsigned)sizeof(busid)); + return -1; + } sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid); if (!sif) { diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index 9d415228883deb286ec684d22a311940aae943a3..6ff7b601f854562090b348097963554113e3fb75 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -40,13 +40,20 @@ struct udev *udev_context; static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) { char status_attr_path[SYSFS_PATH_MAX]; + int size; int fd; int length; char status; int value = 0; - snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status", - udev->path); + size = snprintf(status_attr_path, sizeof(status_attr_path), + "%s/usbip_status", udev->path); + if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) { + err("usbip_status path length %i >= %lu or < 0", size, + (long unsigned)sizeof(status_attr_path)); + return -1; + } + fd = open(status_attr_path, O_RDONLY); if (fd < 0) { @@ -218,6 +225,7 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) { char attr_name[] = "usbip_sockfd"; char sockfd_attr_path[SYSFS_PATH_MAX]; + int size; char sockfd_buff[30]; int ret; @@ -237,10 +245,20 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) } /* only the first interface is true */ - snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", - edev->udev.path, attr_name); + size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", + edev->udev.path, attr_name); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) { + err("exported device path length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_attr_path)); + return -1; + } - snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) { + err("socket length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_buff)); + return -1; + } ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff, strlen(sockfd_buff)); diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index ad9204773533f4e3d6961dc2a9261720a39a1a44..1274f326242ca73b1bf98f4ff8b146a266aaa2df 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -55,12 +55,12 @@ static int parse_status(const char *value) while (*c != '\0') { int port, status, speed, devid; - unsigned long socket; + int sockfd; char lbusid[SYSFS_BUS_ID_SIZE]; - ret = sscanf(c, "%d %d %d %x %lx %31s\n", + ret = sscanf(c, "%d %d %d %x %u %31s\n", &port, &status, &speed, - &devid, &socket, lbusid); + &devid, &sockfd, lbusid); if (ret < 5) { dbg("sscanf failed: %d", ret); @@ -69,7 +69,7 @@ static int parse_status(const char *value) dbg("port %d status %d speed %d devid %x", port, status, speed, devid); - dbg("socket %lx lbusid %s", socket, lbusid); + dbg("sockfd %u lbusid %s", sockfd, lbusid); /* if a device is connected, look at it */ diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c index d7599d9435298286d63642067144c280c6110002..73d8eee8130b3db2121849dead5b7c064b0247c6 100644 --- a/tools/usb/usbip/src/usbip.c +++ b/tools/usb/usbip/src/usbip.c @@ -176,6 +176,8 @@ int main(int argc, char *argv[]) break; case '?': printf("usbip: invalid option\n"); + /* Terminate after printing error */ + /* FALLTHRU */ default: usbip_usage(); goto out; diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c index fa46141ae68bbd866277185e07d9b4fcd81af422..e121cfb1746a4c7c4bf6d4e501d119fbee4bc60e 100644 --- a/tools/usb/usbip/src/usbip_bind.c +++ b/tools/usb/usbip/src/usbip_bind.c @@ -144,6 +144,7 @@ static int bind_device(char *busid) int rc; struct udev *udev; struct udev_device *dev; + const char *devpath; /* Check whether the device with this bus ID exists. */ udev = udev_new(); @@ -152,8 +153,16 @@ static int bind_device(char *busid) err("device with the specified bus ID does not exist"); return -1; } + devpath = udev_device_get_devpath(dev); udev_unref(udev); + /* If the device is already attached to vhci_hcd - bail out */ + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + err("bind loop detected: device: %s is attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + return -1; + } + rc = unbind_other(busid); if (rc == UNBIND_ST_FAILED) { err("could not unbind driver from device on busid %s", busid); diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index f1b38e866dd7ea48012def1ca8d695deaaea0efb..d65a9f444174495029e96bc5d81c0fbfd81e52b5 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -187,6 +187,7 @@ static int list_devices(bool parsable) const char *busid; char product_name[128]; int ret = -1; + const char *devpath; /* Create libudev context. */ udev = udev_new(); @@ -209,6 +210,14 @@ static int list_devices(bool parsable) path = udev_list_entry_get_name(dev_list_entry); dev = udev_device_new_from_syspath(udev, path); + /* Ignore devices attached to vhci_hcd */ + devpath = udev_device_get_devpath(dev); + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + dbg("Skip the device %s already attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + continue; + } + /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c index 2b3d6d2350158b73a2597dc1a78ac007b6a3563c..3d7b42e7729941b1ca8f35fb7f6d10d4151a1a2f 100644 --- a/tools/usb/usbip/src/utils.c +++ b/tools/usb/usbip/src/utils.c @@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add) char command[SYSFS_BUS_ID_SIZE + 4]; char match_busid_attr_path[SYSFS_PATH_MAX]; int rc; + int cmd_size; snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, @@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add) attr_name); if (add) - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", + busid); else - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", + busid); rc = write_sysfs_attribute(match_busid_attr_path, command, - sizeof(command)); + cmd_size); if (rc < 0) { dbg("failed to write match_busid: %s", strerror(errno)); return -1; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 27a1f6341d4111d923501ab1033750d53c4eaed7..7b49a1378c905f85b3315cce1995d77d58191742 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -89,9 +89,6 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - - WARN_ON(!kvm_timer_should_fire(vcpu)); /* * If the vcpu is blocked we want to wake it up so that it will see diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index c8aeb7b91ec89bdf023151fc3ba5456be67f4d53..95021246ee266171bf10f0fc1cb6c13bc5c9e6c1 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -77,11 +77,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) else elrsr1 = 0; -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; -#else cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -#endif } static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c635b51e1f0094f6f5c3c7706b0a6fd..a26c6773d6df65b021de12aa16ed6ad41e6dc20c 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -112,8 +112,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) u32 nr = dist->nr_spis; int i, ret; - entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), - GFP_KERNEL); + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 4660a7d04eeaf9792b4df044a092054759977046..31f5625079157254ecb5a073ced2cda1408402e5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -322,6 +322,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) int ret = 0; u32 *intids; int nr_irqs, i; + u8 pendmask; nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); if (nr_irqs < 0) @@ -329,7 +330,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) for (i = 0; i < nr_irqs; i++) { int byte_offset, bit_nr; - u8 pendmask; byte_offset = intids[i] / BITS_PER_BYTE; bit_nr = intids[i] % BITS_PER_BYTE; @@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) return ret; } -static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) @@ -687,6 +664,8 @@ static int vgic_its_alloc_collection(struct vgic_its *its, return E_ITS_MAPC_COLLECTION_OOR; collection = kzalloc(sizeof(*collection), GFP_KERNEL); + if (!collection) + return -ENOMEM; collection->collection_id = coll_id; collection->target_addr = COLLECTION_NOT_MAPPED; @@ -1160,33 +1139,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, #define ITS_CMD_SIZE 32 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) -/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) { gpa_t cbaser; u64 cmd_buf[4]; - u32 reg; - - if (!its) - return; - - mutex_lock(&its->cmd_lock); - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) return; - } - its->cwriter = reg; cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { @@ -1206,6 +1168,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) its->creadr = 0; } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); mutex_unlock(&its->cmd_lock); } @@ -1286,6 +1276,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, *regptr = reg; } +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + its->enabled = !!(val & GITS_CTLR_ENABLE); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ { \ .reg_offset = off, \ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f4c6d4f6d2e89d874816218a790429e57cc7fb04..1b20768e781df38efca02085c1c205f08dc4ef16 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -125,6 +125,11 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) @@ -361,6 +366,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + + kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + srcu_read_unlock(&kvm->srcu, idx); } @@ -1052,7 +1060,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * changes) is disallowed above, so any other attribute changes getting * here can be skipped. */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) { r = kvm_iommu_map_pages(kvm, &new); return r; } @@ -3896,7 +3904,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, - 0, NULL); + SLAB_ACCOUNT, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; goto out_free_3;