diff --git a/Documentation/android.txt b/Documentation/android.txt index d53aba49ad123c0f314d90eea2d5c8896915d3be..0f40a78b045f77aa3daf24c62b78da61ac9fdcb9 100644 --- a/Documentation/android.txt +++ b/Documentation/android.txt @@ -67,6 +67,8 @@ PREEMPT RAMFS RTC_CLASS RTC_LIB +SWITCH +SWITCH_GPIO TMPFS UID_STAT UID16 diff --git a/Documentation/conf.py b/Documentation/conf.py index bf6f310e51705300e8f060c39125463b40d45873..d769cd89a9f7d8ba24e09bd4a03eec3910ccb796 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -37,7 +37,7 @@ from load_config import loadConfig extensions = ['kernel-doc', 'rstFlatTable', 'kernel_include', 'cdomain'] # The name of the math extension changed on Sphinx 1.4 -if minor > 3: +if major == 1 and minor > 3: extensions.append("sphinx.ext.imgmath") else: extensions.append("sphinx.ext.pngmath") @@ -332,6 +332,10 @@ latex_elements = { ''' } +# Fix reference escape troubles with Sphinx 1.4.x +if major == 1 and minor > 3: + latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n' + # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index ac8a37e0c76a6893d71735cde83d7dcb8e1c979f..0cf9a6bff6a548305a7e2aafecb98918a0bf67c4 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -227,7 +227,49 @@ interactive workloads. This governor sets the CPU speed depending on usage, similar to "ondemand" and "conservative" governors, but with a different set of configurable behaviors. -The tuneable values for this governor are: +The tunable values for this governor are: + +above_hispeed_delay: When speed is at or above hispeed_freq, wait for +this long before raising speed in response to continued high load. +The format is a single delay value, optionally followed by pairs of +CPU speeds and the delay to use at or above those speeds. Colons can +be used between the speeds and associated delays for readability. For +example: + + 80000 1300000:200000 1500000:40000 + +uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay +200000 uS is used until speed 1.5 GHz, at which speed (and above) +delay 40000 uS is used. If speeds are specified these must appear in +ascending order. Default is 20000 uS. + +boost: If non-zero, immediately boost speed of all CPUs to at least +hispeed_freq until zero is written to this attribute. If zero, allow +CPU speeds to drop below hispeed_freq according to load as usual. +Default is zero. + +boostpulse: On each write, immediately boost speed of all CPUs to +hispeed_freq for at least the period of time specified by +boostpulse_duration, after which speeds are allowed to drop below +hispeed_freq according to load as usual. Its a write-only file. + +boostpulse_duration: Length of time to hold CPU speed at hispeed_freq +on a write to boostpulse, before allowing speed to drop according to +load as usual. Default is 80000 uS. + +go_hispeed_load: The CPU load at which to ramp to hispeed_freq. +Default is 99%. + +hispeed_freq: An intermediate "high speed" at which to initially ramp +when CPU load hits the value specified in go_hispeed_load. If load +stays high for the amount of time specified in above_hispeed_delay, +then speed may be bumped higher. Default is the maximum speed allowed +by the policy at governor initialization time. + +io_is_busy: If set, the governor accounts IO time as CPU busy time. + +min_sample_time: The minimum amount of time to spend at the current +frequency before ramping down. Default is 80000 uS. target_loads: CPU load values used to adjust speed to influence the current CPU load toward that value. In general, the lower the target @@ -246,32 +288,6 @@ values are typically specified for higher speeds, that is, target load values also usually appear in an ascending order. The default is target load 90% for all speeds. -min_sample_time: The minimum amount of time to spend at the current -frequency before ramping down. Default is 80000 uS. - -hispeed_freq: An intermediate "hi speed" at which to initially ramp -when CPU load hits the value specified in go_hispeed_load. If load -stays high for the amount of time specified in above_hispeed_delay, -then speed may be bumped higher. Default is the maximum speed -allowed by the policy at governor initialization time. - -go_hispeed_load: The CPU load at which to ramp to hispeed_freq. -Default is 99%. - -above_hispeed_delay: When speed is at or above hispeed_freq, wait for -this long before raising speed in response to continued high load. -The format is a single delay value, optionally followed by pairs of -CPU speeds and the delay to use at or above those speeds. Colons can -be used between the speeds and associated delays for readability. For -example: - - 80000 1300000:200000 1500000:40000 - -uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay -200000 uS is used until speed 1.5 GHz, at which speed (and above) -delay 40000 uS is used. If speeds are specified these must appear in -ascending order. Default is 20000 uS. - timer_rate: Sample rate for reevaluating CPU load when the CPU is not idle. A deferrable timer is used, such that the CPU will not be woken from idle to service this timer until something else needs to run. @@ -288,21 +304,6 @@ timer_slack is 10000uS then timers will be deferred for up to 30msec when not at lowest speed. A value of -1 means defer timers indefinitely at all speeds. Default is 80000 uS. -boost: If non-zero, immediately boost speed of all CPUs to at least -hispeed_freq until zero is written to this attribute. If zero, allow -CPU speeds to drop below hispeed_freq according to load as usual. -Default is zero. - -boostpulse: On each write, immediately boost speed of all CPUs to -hispeed_freq for at least the period of time specified by -boostpulse_duration, after which speeds are allowed to drop below -hispeed_freq according to load as usual. - -boostpulse_duration: Length of time to hold CPU speed at hispeed_freq -on a write to boostpulse, before allowing speed to drop according to -load as usual. Default is 80000 uS. - - 3. The Governor Interface in the CPUfreq Core ============================================= diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.txt b/Documentation/devicetree/bindings/clock/imx31-clock.txt index 19df842c694f0b7d9c42cd0944ed076d68aadf11..8163d565f697709d38e2f482adfa2ab385ebc64a 100644 --- a/Documentation/devicetree/bindings/clock/imx31-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx31-clock.txt @@ -77,7 +77,7 @@ Examples: clks: ccm@53f80000{ compatible = "fsl,imx31-ccm"; reg = <0x53f80000 0x4000>; - interrupts = <0 31 0x04 0 53 0x04>; + interrupts = <31>, <53>; #clock-cells = <1>; }; diff --git a/Documentation/devicetree/bindings/mfd/tps65086.txt b/Documentation/devicetree/bindings/mfd/tps65086.txt index d3705612a84655b59bfa235eb73a679e40de0c66..9cfa886fe99fc113a2c87ba2a7d953e86e3a7dfe 100644 --- a/Documentation/devicetree/bindings/mfd/tps65086.txt +++ b/Documentation/devicetree/bindings/mfd/tps65086.txt @@ -23,7 +23,7 @@ Required properties: defined below. Optional regulator properties: - - ti,regulator-step-size-25mv : This is applicable for buck[1,2,6], set this + - ti,regulator-step-size-25mv : This is applicable for buck[1-6], set this if the regulator is factory set with a 25mv step voltage mapping. - ti,regulator-decay : This is applicable for buck[1-6], set this if diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index be49573cc40d8ca6dade8d33b93b8e2035d87d9d..9c5e23f719de3749c9ef866cb50c06294f44aa88 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4008,10 +4008,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. it if 0 is given (See Documentation/cgroup-v1/memory.txt) swiotlb= [ARM,IA-64,PPC,MIPS,X86] - Format: { | force } + Format: { | force | noforce } -- Number of I/O TLB slabs force -- force using of bounce buffers even if they wouldn't be automatically used by the kernel + noforce -- Never use bounce buffers (for debugging) switches= [HW,M68k] diff --git a/Documentation/media/index.rst b/Documentation/media/index.rst index e347a3e7bdef4e5db38ec75fdacbf024d04378e4..7f8f0af620ceb281416462b88e332491ddbdaa98 100644 --- a/Documentation/media/index.rst +++ b/Documentation/media/index.rst @@ -1,11 +1,6 @@ Linux Media Subsystem Documentation =================================== -.. Sphinx 1.4.x has a definition for DUrole that doesn't work on alltt blocks -.. raw:: latex - - \renewcommand*{\DUrole}[2]{ #2 } - Contents: .. toctree:: diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt new file mode 100644 index 0000000000000000000000000000000000000000..dab2f9088b336f4a703933c86b46a2c103a5a20a --- /dev/null +++ b/Documentation/scheduler/sched-energy.txt @@ -0,0 +1,362 @@ +Energy cost model for energy-aware scheduling (EXPERIMENTAL) + +Introduction +============= + +The basic energy model uses platform energy data stored in sched_group_energy +data structures attached to the sched_groups in the sched_domain hierarchy. The +energy cost model offers two functions that can be used to guide scheduling +decisions: + +1. static unsigned int sched_group_energy(struct energy_env *eenv) +2. static int energy_diff(struct energy_env *eenv) + +sched_group_energy() estimates the energy consumed by all cpus in a specific +sched_group including any shared resources owned exclusively by this group of +cpus. Resources shared with other cpus are excluded (e.g. later level caches). + +energy_diff() estimates the total energy impact of a utilization change. That +is, adding, removing, or migrating utilization (tasks). + +Both functions use a struct energy_env to specify the scenario to be evaluated: + + struct energy_env { + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int util_delta; + int src_cpu; + int dst_cpu; + int energy; + }; + +sg_top: sched_group to be evaluated. Not used by energy_diff(). + +sg_cap: sched_group covering the cpus in the same frequency domain. Set by +sched_group_energy(). + +cap_idx: Capacity state to be used for energy calculations. Set by +find_new_capacity(). + +util_delta: Amount of utilization to be added, removed, or migrated. + +src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be +-1 if no source (e.g. task wake-up). + +dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1 +if utilization is removed (e.g. terminating tasks). + +energy: Result of sched_group_energy(). + +The metric used to represent utilization is the actual per-entity running time +averaged over time using a geometric series. Very similar to the existing +per-entity load-tracking, but _not_ scaled by task priority and capped by the +capacity of the cpu. The latter property does mean that utilization may +underestimate the compute requirements for task on fully/over utilized cpus. +The greatest potential for energy savings without affecting performance too much +is scenarios where the system isn't fully utilized. If the system is deemed +fully utilized load-balancing should be done with task load (includes task +priority) instead in the interest of fairness and performance. + + +Background and Terminology +=========================== + +To make it clear from the start: + +energy = [joule] (resource like a battery on powered devices) +power = energy/time = [joule/second] = [watt] + +The goal of energy-aware scheduling is to minimize energy, while still getting +the job done. That is, we want to maximize: + + performance [inst/s] + -------------------- + power [W] + +which is equivalent to minimizing: + + energy [J] + ----------- + instruction + +while still getting 'good' performance. It is essentially an alternative +optimization objective to the current performance-only objective for the +scheduler. This alternative considers two objectives: energy-efficiency and +performance. Hence, there needs to be a user controllable knob to switch the +objective. Since it is early days, this is currently a sched_feature +(ENERGY_AWARE). + +The idea behind introducing an energy cost model is to allow the scheduler to +evaluate the implications of its decisions rather than applying energy-saving +techniques blindly that may only have positive effects on some platforms. At +the same time, the energy cost model must be as simple as possible to minimize +the scheduler latency impact. + +Platform topology +------------------ + +The system topology (cpus, caches, and NUMA information, not peripherals) is +represented in the scheduler by the sched_domain hierarchy which has +sched_groups attached at each level that covers one or more cpus (see +sched-domains.txt for more details). To add energy awareness to the scheduler +we need to consider power and frequency domains. + +Power domain: + +A power domain is a part of the system that can be powered on/off +independently. Power domains are typically organized in a hierarchy where you +may be able to power down just a cpu or a group of cpus along with any +associated resources (e.g. shared caches). Powering up a cpu means that all +power domains it is a part of in the hierarchy must be powered up. Hence, it is +more expensive to power up the first cpu that belongs to a higher level power +domain than powering up additional cpus in the same high level domain. Two +level power domain hierarchy example: + + Power source + +-------------------------------+----... +per group PD G G + | +----------+ | + +--------+-------| Shared | (other groups) +per-cpu PD G G | resource | + | | +----------+ + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + +Frequency domain: + +Frequency domains (P-states) typically cover the same group of cpus as one of +the power domain levels. That is, there might be several smaller power domains +sharing the same frequency (P-state) or there might be a power domain spanning +multiple frequency domains. + +From a scheduling point of view there is no need to know the actual frequencies +[Hz]. All the scheduler cares about is the compute capacity available at the +current state (P-state) the cpu is in and any other available states. For that +reason, and to also factor in any cpu micro-architecture differences, compute +capacity scaling states are called 'capacity states' in this document. For SMP +systems this is equivalent to P-states. For mixed micro-architecture systems +(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture +performance relative to the other cpus in the system. + +Energy modelling: +------------------ + +Due to the hierarchical nature of the power domains, the most obvious way to +model energy costs is therefore to associate power and energy costs with +domains (groups of cpus). Energy costs of shared resources are associated with +the group of cpus that share the resources, only the cost of powering the +cpu itself and any private resources (e.g. private L1 caches) is associated +with the per-cpu groups (lowest level). + +For example, for an SMP system with per-cpu power domains and a cluster level +(group of cpus) power domain we get the overall energy costs to be: + + energy = energy_cluster + n * energy_cpu + +where 'n' is the number of cpus powered up and energy_cluster is the cost paid +as soon as any cpu in the cluster is powered up. + +The power and frequency domains can naturally be mapped onto the existing +sched_domain hierarchy and sched_groups by adding the necessary data to the +existing data structures. + +The energy model considers energy consumption from two contributors (shown in +the illustration below): + +1. Busy energy: Energy consumed while a cpu and the higher level groups that it +belongs to are busy running tasks. Busy energy is associated with the state of +the cpu, not an event. The time the cpu spends in this state varies. Thus, the +most obvious platform parameter for this contribution is busy power +(energy/time). + +2. Idle energy: Energy consumed while a cpu and higher level groups that it +belongs to are idle (in a C-state). Like busy energy, idle energy is associated +with the state of the cpu. Thus, the platform parameter for this contribution +is idle power (energy/time). + +Energy consumed during transitions from an idle-state (C-state) to a busy state +(P-state) or going the other way is ignored by the model to simplify the energy +model calculations. + + + Power + ^ + | busy->idle idle->busy + | transition transition + | + | _ __ + | / \ / \__________________ + |______________/ \ / + | \ / + | Busy \ Idle / Busy + | low P-state \____________/ high P-state + | + +------------------------------------------------------------> time + +Busy |--------------| |-----------------| + +Wakeup |------| |------| + +Idle |------------| + + +The basic algorithm +==================== + +The basic idea is to determine the total energy impact when utilization is +added or removed by estimating the impact at each level in the sched_domain +hierarchy starting from the bottom (sched_group contains just a single cpu). +The energy cost comes from busy time (sched_group is awake because one or more +cpus are busy) and idle time (in an idle-state). Energy model numbers account +for energy costs associated with all cpus in the sched_group as a group. + + for_each_domain(cpu, sd) { + sg = sched_group_of(cpu) + energy_before = curr_util(sg) * busy_power(sg) + + (1-curr_util(sg)) * idle_power(sg) + energy_after = new_util(sg) * busy_power(sg) + + (1-new_util(sg)) * idle_power(sg) + energy_diff += energy_before - energy_after + + } + + return energy_diff + +{curr, new}_util: The cpu utilization at the lowest level and the overall +non-idle time for the entire group for higher levels. Utilization is in the +range 0.0 to 1.0 in the pseudo-code. + +busy_power: The power consumption of the sched_group. + +idle_power: The power consumption of the sched_group when idle. + +Note: It is a fundamental assumption that the utilization is (roughly) scale +invariant. Task utilization tracking factors in any frequency scaling and +performance scaling differences due to difference cpu microarchitectures such +that task utilization can be used across the entire system. + + +Platform energy data +===================== + +struct sched_group_energy can be attached to sched_groups in the sched_domain +hierarchy and has the following members: + +cap_states: + List of struct capacity_state representing the supported capacity states + (P-states). struct capacity_state has two members: cap and power, which + represents the compute capacity and the busy_power of the state. The + list must be ordered by capacity low->high. + +nr_cap_states: + Number of capacity states in cap_states list. + +idle_states: + List of struct idle_state containing idle_state power cost for each + idle-state supported by the system orderd by shallowest state first. + All states must be included at all level in the hierarchy, i.e. a + sched_group spanning just a single cpu must also include coupled + idle-states (cluster states). In addition to the cpuidle idle-states, + the list must also contain an entry for the idling using the arch + default idle (arch_idle_cpu()). Despite this state may not be a true + hardware idle-state it is considered the shallowest idle-state in the + energy model and must be the first entry. cpus may enter this state + (possibly 'active idling') if cpuidle decides not enter a cpuidle + idle-state. Default idle may not be used when cpuidle is enabled. + In this case, it should just be a copy of the first cpuidle idle-state. + +nr_idle_states: + Number of idle states in idle_states list. + +There are no unit requirements for the energy cost data. Data can be normalized +with any reference, however, the normalization must be consistent across all +energy cost data. That is, one bogo-joule/watt must be the same quantity for +data, but we don't care what it is. + +A recipe for platform characterization +======================================= + +Obtaining the actual model data for a particular platform requires some way of +measuring power/energy. There isn't a tool to help with this (yet). This +section provides a recipe for use as reference. It covers the steps used to +characterize the ARM TC2 development platform. This sort of measurements is +expected to be done anyway when tuning cpuidle and cpufreq for a given +platform. + +The energy model needs two types of data (struct sched_group_energy holds +these) for each sched_group where energy costs should be taken into account: + +1. Capacity state information + +A list containing the compute capacity and power consumption when fully +utilized attributed to the group as a whole for each available capacity state. +At the lowest level (group contains just a single cpu) this is the power of the +cpu alone without including power consumed by resources shared with other cpus. +It basically needs to fit the basic modelling approach described in "Background +and Terminology" section: + + energy_system = energy_shared + n * energy_cpu + +for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at +the lowest level. 'energy_shared' is included at the next level which +represents the group of cpus among which the resources are shared. + +This model is, of course, a simplification of reality. Thus, power/energy +attributions might not always exactly represent how the hardware is designed. +Also, busy power is likely to depend on the workload. It is therefore +recommended to use a representative mix of workloads when characterizing the +capacity states. + +If the group has no capacity scaling support, the list will contain a single +state where power is the busy power attributed to the group. The capacity +should be set to a default value (1024). + +When frequency domains include multiple power domains, the group representing +the frequency domain and all child groups share capacity states. This must be +indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at +all levels that share the capacity state must have the list of capacity states +with the power set to the contribution of the individual group. + +2. Idle power information + +Stored in the idle_states list. The power number is the group idle power +consumption in each idle state as well when the group is idle but has not +entered an idle-state ('active idle' as mentioned earlier). Due to the way the +energy model is defined, the idle power of the deepest group idle state can +alternatively be accounted for in the parent group busy power. In that case the +group idle state power values are offset such that the idle power of the +deepest state is zero. It is less intuitive, but it is easier to measure as +idle power consumed by the group and the busy/idle power of the parent group +cannot be distinguished without per group measurement points. + +Measuring capacity states and idle power: + +The capacity states' capacity and power can be estimated by running a benchmark +workload at each available capacity state. By restricting the benchmark to run +on subsets of cpus it is possible to extrapolate the power consumption of +shared resources. + +ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a +shared L2 cache. TC2 has on-chip energy counters per cluster. Running a +benchmark workload on just one cpu in a cluster means that power is consumed in +the cluster (higher level group) and a single cpu (lowest level group). Adding +another benchmark task to another cpu increases the power consumption by the +amount consumed by the additional cpu. Hence, it is possible to extrapolate the +cluster busy power. + +For platforms that don't have energy counters or equivalent instrumentation +built-in, it may be possible to use an external DAQ to acquire similar data. + +If the benchmark includes some performance score (for example sysbench cpu +benchmark), this can be used to record the compute capacity. + +Measuring idle power requires insight into the idle state implementation on the +particular platform. Specifically, if the platform has coupled idle-states (or +package states). To measure non-coupled per-cpu idle-states it is necessary to +keep one cpu busy to keep any shared resources alive to isolate the idle power +of the cpu from idle/busy power of the shared resources. The cpu can be tricked +into different per-cpu idle states by disabling the other states. Based on +various combinations of measurements with specific cpus busy and disabling +idle-states it is possible to extrapolate the idle-state power. diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt new file mode 100644 index 0000000000000000000000000000000000000000..9bd2231c01b1466a23e49de6cc0f7432474c0c98 --- /dev/null +++ b/Documentation/scheduler/sched-tune.txt @@ -0,0 +1,366 @@ + Central, scheduler-driven, power-performance control + (EXPERIMENTAL) + +Abstract +======== + +The topic of a single simple power-performance tunable, that is wholly +scheduler centric, and has well defined and predictable properties has come up +on several occasions in the past [1,2]. With techniques such as a scheduler +driven DVFS [3], we now have a good framework for implementing such a tunable. +This document describes the overall ideas behind its design and implementation. + + +Table of Contents +================= + +1. Motivation +2. Introduction +3. Signal Boosting Strategy +4. OPP selection using boosted CPU utilization +5. Per task group boosting +6. Question and Answers + - What about "auto" mode? + - What about boosting on a congested system? + - How CPUs are boosted when we have tasks with multiple boost values? +7. References + + +1. Motivation +============= + +Sched-DVFS [3] is a new event-driven cpufreq governor which allows the +scheduler to select the optimal DVFS operating point (OPP) for running a task +allocated to a CPU. The introduction of sched-DVFS enables running workloads at +the most energy efficient OPPs. + +However, sometimes it may be desired to intentionally boost the performance of +a workload even if that could imply a reasonable increase in energy +consumption. For example, in order to reduce the response time of a task, we +may want to run the task at a higher OPP than the one that is actually required +by it's CPU bandwidth demand. + +This last requirement is especially important if we consider that one of the +main goals of the sched-DVFS component is to replace all currently available +CPUFreq policies. Since sched-DVFS is event based, as opposed to the sampling +driven governors we currently have, it is already more responsive at selecting +the optimal OPP to run tasks allocated to a CPU. However, just tracking the +actual task load demand may not be enough from a performance standpoint. For +example, it is not possible to get behaviors similar to those provided by the +"performance" and "interactive" CPUFreq governors. + +This document describes an implementation of a tunable, stacked on top of the +sched-DVFS which extends its functionality to support task performance +boosting. + +By "performance boosting" we mean the reduction of the time required to +complete a task activation, i.e. the time elapsed from a task wakeup to its +next deactivation (e.g. because it goes back to sleep or it terminates). For +example, if we consider a simple periodic task which executes the same workload +for 5[s] every 20[s] while running at a certain OPP, a boosted execution of +that task must complete each of its activations in less than 5[s]. + +A previous attempt [5] to introduce such a boosting feature has not been +successful mainly because of the complexity of the proposed solution. The +approach described in this document exposes a single simple interface to +user-space. This single tunable knob allows the tuning of system wide +scheduler behaviours ranging from energy efficiency at one end through to +incremental performance boosting at the other end. This first tunable affects +all tasks. However, a more advanced extension of the concept is also provided +which uses CGroups to boost the performance of only selected tasks while using +the energy efficient default for all others. + +The rest of this document introduces in more details the proposed solution +which has been named SchedTune. + + +2. Introduction +=============== + +SchedTune exposes a simple user-space interface with a single power-performance +tunable: + + /proc/sys/kernel/sched_cfs_boost + +This permits expressing a boost value as an integer in the range [0..100]. + +A value of 0 (default) configures the CFS scheduler for maximum energy +efficiency. This means that sched-DVFS runs the tasks at the minimum OPP +required to satisfy their workload demand. +A value of 100 configures scheduler for maximum performance, which translates +to the selection of the maximum OPP on that CPU. + +The range between 0 and 100 can be set to satisfy other scenarios suitably. For +example to satisfy interactive response or depending on other system events +(battery level etc). + +A CGroup based extension is also provided, which permits further user-space +defined task classification to tune the scheduler for different goals depending +on the specific nature of the task, e.g. background vs interactive vs +low-priority. + +The overall design of the SchedTune module is built on top of "Per-Entity Load +Tracking" (PELT) signals and sched-DVFS by introducing a bias on the Operating +Performance Point (OPP) selection. +Each time a task is allocated on a CPU, sched-DVFS has the opportunity to tune +the operating frequency of that CPU to better match the workload demand. The +selection of the actual OPP being activated is influenced by the global boost +value, or the boost value for the task CGroup when in use. + +This simple biasing approach leverages existing frameworks, which means minimal +modifications to the scheduler, and yet it allows to achieve a range of +different behaviours all from a single simple tunable knob. +The only new concept introduced is that of signal boosting. + + +3. Signal Boosting Strategy +=========================== + +The whole PELT machinery works based on the value of a few load tracking signals +which basically track the CPU bandwidth requirements for tasks and the capacity +of CPUs. The basic idea behind the SchedTune knob is to artificially inflate +some of these load tracking signals to make a task or RQ appears more demanding +that it actually is. + +Which signals have to be inflated depends on the specific "consumer". However, +independently from the specific (signal, consumer) pair, it is important to +define a simple and possibly consistent strategy for the concept of boosting a +signal. + +A boosting strategy defines how the "abstract" user-space defined +sched_cfs_boost value is translated into an internal "margin" value to be added +to a signal to get its inflated value: + + margin := boosting_strategy(sched_cfs_boost, signal) + boosted_signal := signal + margin + +Different boosting strategies were identified and analyzed before selecting the +one found to be most effective. + +Signal Proportional Compensation (SPC) +-------------------------------------- + +In this boosting strategy the sched_cfs_boost value is used to compute a +margin which is proportional to the complement of the original signal. +When a signal has a maximum possible value, its complement is defined as +the delta from the actual value and its possible maximum. + +Since the tunable implementation uses signals which have SCHED_LOAD_SCALE as +the maximum possible value, the margin becomes: + + margin := sched_cfs_boost * (SCHED_LOAD_SCALE - signal) + +Using this boosting strategy: +- a 100% sched_cfs_boost means that the signal is scaled to the maximum value +- each value in the range of sched_cfs_boost effectively inflates the signal in + question by a quantity which is proportional to the maximum value. + +For example, by applying the SPC boosting strategy to the selection of the OPP +to run a task it is possible to achieve these behaviors: + +- 0% boosting: run the task at the minimum OPP required by its workload +- 100% boosting: run the task at the maximum OPP available for the CPU +- 50% boosting: run at the half-way OPP between minimum and maximum + +Which means that, at 50% boosting, a task will be scheduled to run at half of +the maximum theoretically achievable performance on the specific target +platform. + +A graphical representation of an SPC boosted signal is represented in the +following figure where: + a) "-" represents the original signal + b) "b" represents a 50% boosted signal + c) "p" represents a 100% boosted signal + + + ^ + | SCHED_LOAD_SCALE + +-----------------------------------------------------------------+ + |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp + | + | boosted_signal + | bbbbbbbbbbbbbbbbbbbbbbbb + | + | original signal + | bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+ + | | + |bbbbbbbbbbbbbbbbbb | + | | + | | + | | + | +-----------------------+ + | | + | | + | | + |------------------+ + | + | + +-----------------------------------------------------------------------> + +The plot above shows a ramped load signal (titled 'original_signal') and it's +boosted equivalent. For each step of the original signal the boosted signal +corresponding to a 50% boost is midway from the original signal and the upper +bound. Boosting by 100% generates a boosted signal which is always saturated to +the upper bound. + + +4. OPP selection using boosted CPU utilization +============================================== + +It is worth calling out that the implementation does not introduce any new load +signals. Instead, it provides an API to tune existing signals. This tuning is +done on demand and only in scheduler code paths where it is sensible to do so. +The new API calls are defined to return either the default signal or a boosted +one, depending on the value of sched_cfs_boost. This is a clean an non invasive +modification of the existing existing code paths. + +The signal representing a CPU's utilization is boosted according to the +previously described SPC boosting strategy. To sched-DVFS, this allows a CPU +(ie CFS run-queue) to appear more used then it actually is. + +Thus, with the sched_cfs_boost enabled we have the following main functions to +get the current utilization of a CPU: + + cpu_util() + boosted_cpu_util() + +The new boosted_cpu_util() is similar to the first but returns a boosted +utilization signal which is a function of the sched_cfs_boost value. + +This function is used in the CFS scheduler code paths where sched-DVFS needs to +decide the OPP to run a CPU at. +For example, this allows selecting the highest OPP for a CPU which has +the boost value set to 100%. + + +5. Per task group boosting +========================== + +The availability of a single knob which is used to boost all tasks in the +system is certainly a simple solution but it quite likely doesn't fit many +utilization scenarios, especially in the mobile device space. + +For example, on battery powered devices there usually are many background +services which are long running and need energy efficient scheduling. On the +other hand, some applications are more performance sensitive and require an +interactive response and/or maximum performance, regardless of the energy cost. +To better service such scenarios, the SchedTune implementation has an extension +that provides a more fine grained boosting interface. + +A new CGroup controller, namely "schedtune", could be enabled which allows to +defined and configure task groups with different boosting values. +Tasks that require special performance can be put into separate CGroups. +The value of the boost associated with the tasks in this group can be specified +using a single knob exposed by the CGroup controller: + + schedtune.boost + +This knob allows the definition of a boost value that is to be used for +SPC boosting of all tasks attached to this group. + +The current schedtune controller implementation is really simple and has these +main characteristics: + + 1) It is only possible to create 1 level depth hierarchies + + The root control groups define the system-wide boost value to be applied + by default to all tasks. Its direct subgroups are named "boost groups" and + they define the boost value for specific set of tasks. + Further nested subgroups are not allowed since they do not have a sensible + meaning from a user-space standpoint. + + 2) It is possible to define only a limited number of "boost groups" + + This number is defined at compile time and by default configured to 16. + This is a design decision motivated by two main reasons: + a) In a real system we do not expect utilization scenarios with more then few + boost groups. For example, a reasonable collection of groups could be + just "background", "interactive" and "performance". + b) It simplifies the implementation considerably, especially for the code + which has to compute the per CPU boosting once there are multiple + RUNNABLE tasks with different boost values. + +Such a simple design should allow servicing the main utilization scenarios identified +so far. It provides a simple interface which can be used to manage the +power-performance of all tasks or only selected tasks. +Moreover, this interface can be easily integrated by user-space run-times (e.g. +Android, ChromeOS) to implement a QoS solution for task boosting based on tasks +classification, which has been a long standing requirement. + +Setup and usage +--------------- + +0. Use a kernel with CGROUP_SCHEDTUNE support enabled + +1. Check that the "schedtune" CGroup controller is available: + + root@linaro-nano:~# cat /proc/cgroups + #subsys_name hierarchy num_cgroups enabled + cpuset 0 1 1 + cpu 0 1 1 + schedtune 0 1 1 + +2. Mount a tmpfs to create the CGroups mount point (Optional) + + root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup + +3. Mount the "schedtune" controller + + root@linaro-nano:~# mkdir /sys/fs/cgroup/stune + root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune + +4. Setup the system-wide boost value (Optional) + + If not configured the root control group has a 0% boost value, which + basically disables boosting for all tasks in the system thus running in + an energy-efficient mode. + + root@linaro-nano:~# echo $SYSBOOST > /sys/fs/cgroup/stune/schedtune.boost + +5. Create task groups and configure their specific boost value (Optional) + + For example here we create a "performance" boost group configure to boost + all its tasks to 100% + + root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance + root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost + +6. Move tasks into the boost group + + For example, the following moves the tasks with PID $TASKPID (and all its + threads) into the "performance" boost group. + + root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs + +This simple configuration allows only the threads of the $TASKPID task to run, +when needed, at the highest OPP in the most capable CPU of the system. + + +6. Question and Answers +======================= + +What about "auto" mode? +----------------------- + +The 'auto' mode as described in [5] can be implemented by interfacing SchedTune +with some suitable user-space element. This element could use the exposed +system-wide or cgroup based interface. + +How are multiple groups of tasks with different boost values managed? +--------------------------------------------------------------------- + +The current SchedTune implementation keeps track of the boosted RUNNABLE tasks +on a CPU. Once sched-DVFS selects the OPP to run a CPU at, the CPU utilization +is boosted with a value which is the maximum of the boost values of the +currently RUNNABLE tasks in its RQ. + +This allows sched-DVFS to boost a CPU only while there are boosted tasks ready +to run and switch back to the energy efficient mode as soon as the last boosted +task is dequeued. + + +7. References +============= +[1] http://lwn.net/Articles/552889 +[2] http://lkml.org/lkml/2012/5/18/91 +[3] http://lkml.org/lkml/2015/6/26/620 diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py index 55f27579302830c4e07634c6b17e56f6fa14e227..25feb0d35e7abc7f6ff7797faf02e9e79321e644 100755 --- a/Documentation/sphinx/rstFlatTable.py +++ b/Documentation/sphinx/rstFlatTable.py @@ -157,6 +157,11 @@ class ListTableBuilder(object): def buildTableNode(self): colwidths = self.directive.get_column_widths(self.max_cols) + if isinstance(colwidths, tuple): + # Since docutils 0.13, get_column_widths returns a (widths, + # colwidths) tuple, where widths is a string (i.e. 'auto'). + # See https://sourceforge.net/p/docutils/patches/120/. + colwidths = colwidths[1] stub_columns = self.directive.options.get('stub-columns', 0) header_rows = self.directive.options.get('header-rows', 0) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index e20aacb9a6e83a1af69ebb3922083db94df43d6d..91723ed5347038346b1196cc1af205ef8cedffd3 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -362,6 +362,26 @@ of ftrace. Here is a list of some of the key files: to correlate events across hypervisor/guest if tb_offset is known. + mono: This uses the fast monotonic clock (CLOCK_MONOTONIC) + which is monotonic and is subject to NTP rate adjustments. + + mono_raw: + This is the raw monotonic clock (CLOCK_MONOTONIC_RAW) + which is montonic but is not subject to any rate adjustments + and ticks at the same rate as the hardware clocksource. + + boot: This is the boot clock (CLOCK_BOOTTIME) and is based on the + fast monotonic clock, but also accounts for time spent in + suspend. Since the clock access is designed for use in + tracing in the suspend path, some side effects are possible + if clock is accessed after the suspend time is accounted before + the fast mono clock is updated. In this case, the clock update + appears to happen slightly sooner than it normally would have. + Also on 32-bit systems, it's possible that the 64-bit boot offset + sees a partial update. These effects are rare and post + processing should be able to handle them. See comments in the + ktime_get_boot_fast_ns() function for more information. + To set a clock, simply echo the clock name into this file. echo global > trace_clock diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6bbceb9a3a19d5ce30734493e3c8785a04c1b00d..1f5eab4ef88fb3d201ec1e1586aade20f54cd31a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2050,6 +2050,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TM_VSCR | 32 PPC | KVM_REG_PPC_TM_DSCR | 64 PPC | KVM_REG_PPC_TM_TAR | 64 + PPC | KVM_REG_PPC_TM_XER | 64 | | MIPS | KVM_REG_MIPS_R0 | 64 ... diff --git a/Makefile b/Makefile index 2a73c9b15379607baabc141cb6897410dcc46249..18d0eaad7409dff62f9000189b66dfe15107d1f8 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 0 +SUBLEVEL = 9 EXTRAVERSION = NAME = Roaring Lionus diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 940dfb4065910822d42f3d11c02ba2305f25b02a..04abdec7f49605ecee1b460162440b6cfdb3f554 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index bd204bfa29edd52f8e4d43c989ab07024e66ab98..249e10190d20c5ea61b599a4424eb47123d82983 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -28,7 +28,7 @@ config ARC select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_MEMBLOCK - select HAVE_MOD_ARCH_SPECIFIC if ARC_DW2_UNWIND + select HAVE_MOD_ARCH_SPECIFIC select HAVE_OPROFILE select HAVE_PERF_EVENTS select HANDLE_DOMAIN_IRQ diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index a093adbdb017580f6da74abb551ea02e14e04da5..fc662f49c55ac91916af7cbd830b1c978f827ffe 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -85,6 +85,10 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 +#define CACHE_COLORS_NUM 4 +#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) +#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) + /* * Simple wrapper over config option * Bootup code ensures that hardware matches kernel configuration @@ -94,8 +98,6 @@ static inline int cache_is_vipt_aliasing(void) return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); } -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & 1) - /* * checks if two addresses (after page aligning) index into same cache set */ diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index a36e8601114d2ca2970f257f9f8d7e5c03ec08a2..d5da2115d78a678e343da2abec51f6c8efbbe0a4 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -26,7 +26,9 @@ static inline void __delay(unsigned long loops) " lp 1f \n" " nop \n" "1: \n" - : : "r"(loops)); + : + : "r"(loops) + : "lp_count"); } extern void __bad_udelay(void); diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h index 6e91d8b339c3616b59d7b389353c477acba8a418..567590ea8f6c9166d7cef4643529ae621910ddce 100644 --- a/arch/arc/include/asm/module.h +++ b/arch/arc/include/asm/module.h @@ -14,13 +14,13 @@ #include -#ifdef CONFIG_ARC_DW2_UNWIND struct mod_arch_specific { +#ifdef CONFIG_ARC_DW2_UNWIND void *unw_info; int unw_sec_idx; +#endif const char *secstr; }; -#endif #define MODULE_PROC_FAMILY "ARC700" diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 42e964db29677877438f8b9bc8e6225cc5f64174..3d99a60913325d1ac5229b7b6e5933caddb0cbbc 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -32,8 +32,8 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, #ifdef CONFIG_ARC_DW2_UNWIND mod->arch.unw_sec_idx = 0; mod->arch.unw_info = NULL; - mod->arch.secstr = secstr; #endif + mod->arch.secstr = secstr; return 0; } @@ -113,8 +113,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } +#ifdef CONFIG_ARC_DW2_UNWIND if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0) module->arch.unw_sec_idx = tgtsec; +#endif return 0; diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index abd961f3e7639f13fb7954da68885ed0215f432c..91ebe382147f6d915a58f94027fa748ca8319f30 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -241,8 +241,9 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, if (state.fault) goto fault; + /* clear any remanants of delay slot */ if (delay_mode(regs)) { - regs->ret = regs->bta; + regs->ret = regs->bta ~1U; regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 50d71695cd4ecbeefd64f28e9f44265195b01d15..8147583c44340f1a5d5be60ce70d77327a90dcff 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -979,11 +979,16 @@ void arc_cache_init(void) /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ if (is_isa_arcompact()) { int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - - if (dc->alias && !handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dc->alias && handled) + int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE); + + if (dc->alias) { + if (!handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + if (CACHE_COLORS_NUM != num_colors) + panic("CACHE_COLORS_NUM not optimized for config\n"); + } else if (!dc->alias && handled) { panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } } } diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 5af3ec14af9bcf753e9d29c8914d350c5a94fc5b..54f95d365bf70961129eb910157cdb791b87ab80 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -485,6 +485,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \ am3517-evm.dtb \ am3517_mt_ventoux.dtb \ logicpd-torpedo-37xx-devkit.dtb \ + logicpd-som-lv-37xx-devkit.dtb \ omap3430-sdp.dtb \ omap3-beagle.dtb \ omap3-beagle-xm.dtb \ diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 194d884c9de13781f586ca5f8dbce474d8ffdebf..795c1467fa50311402fd7fb8d7dabda94081b022 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -16,6 +16,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c0; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index a275fa956813df7a34d6505c97a52c578207e3aa..a20a71d9d22ef2733fcde817f480ff36418d9a9f 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -16,6 +16,7 @@ interrupt-parent = <&wakeupgen>; #address-cells = <1>; #size-cells = <1>; + chosen { }; memory@0 { device_type = "memory"; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 46d46d894a441d69dcf784fb2cf54d4d72038aa0..74dd21b7373c7564ede01d84a4f63b93a6d52fa7 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -104,7 +104,7 @@ reg = <0x7e104000 0x10>; }; - mailbox: mailbox@7e00b800 { + mailbox: mailbox@7e00b880 { compatible = "brcm,bcm2835-mbox"; reg = <0x7e00b880 0x40>; interrupts = <0 1>; diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index 41de15fe15a2f0dfa241748b2b49ef90be48b323..78492a0bbbab94ffec665bc36a205e48c21e0070 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -99,6 +99,7 @@ #size-cells = <1>; compatible = "m25p64"; spi-max-frequency = <30000000>; + m25p,fast-read; reg = <0>; partition@0 { label = "U-Boot-SPL"; diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi index ff90a6ce6bdc248066e895cb03916e6b1d26ccc8..d87efab24fa22452c259419ac831213bb10e7428 100644 --- a/arch/arm/boot/dts/dm814x.dtsi +++ b/arch/arm/boot/dts/dm814x.dtsi @@ -12,6 +12,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index f1e0f771ff297df8cdde113f5237e03a63bee18e..cbdfbc4e4a26dabf136406f69228ef1ff923ba51 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -12,6 +12,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index d4fcd68f634966ac9e34361b8c677ee54b176783..064d84f87e45f76828dc571a6c4119fbe4cd6cd1 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -18,6 +18,7 @@ compatible = "ti,dra7xx"; interrupt-parent = <&crossbar_mpu>; + chosen { }; aliases { i2c0 = &i2c1; @@ -1376,6 +1377,7 @@ phy-names = "sata-phy"; clocks = <&sata_ref_clk>; ti,hwmods = "sata"; + ports-implemented = <0x1>; }; rtc: rtc@48838000 { diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 1ce7ae94e7ad26a3d1e89bbc5804bab6e3a5f71c..11e9e6bd8abbd1b91c0994737add92b973297177 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -30,11 +30,11 @@ }; }; - avic: avic-interrupt-controller@60000000 { + avic: interrupt-controller@68000000 { compatible = "fsl,imx31-avic", "fsl,avic"; interrupt-controller; #interrupt-cells = <1>; - reg = <0x60000000 0x100000>; + reg = <0x68000000 0x100000>; }; soc { @@ -118,13 +118,6 @@ interrupts = <19>; clocks = <&clks 25>; }; - - clks: ccm@53f80000{ - compatible = "fsl,imx31-ccm"; - reg = <0x53f80000 0x4000>; - interrupts = <0 31 0x04 0 53 0x04>; - #clock-cells = <1>; - }; }; aips@53f00000 { /* AIPS2 */ @@ -134,6 +127,13 @@ reg = <0x53f00000 0x100000>; ranges; + clks: ccm@53f80000{ + compatible = "fsl,imx31-ccm"; + reg = <0x53f80000 0x4000>; + interrupts = <31>, <53>; + #clock-cells = <1>; + }; + gpt: timer@53f90000 { compatible = "fsl,imx31-gpt"; reg = <0x53f90000 0x4000>; diff --git a/arch/arm/boot/dts/imx6q-cm-fx6.dts b/arch/arm/boot/dts/imx6q-cm-fx6.dts index 59bc5a4dce17b0586db94520d3b1a22efd3d0656..a150bca84daa4245e1004a6d0a0006dd8eedd08c 100644 --- a/arch/arm/boot/dts/imx6q-cm-fx6.dts +++ b/arch/arm/boot/dts/imx6q-cm-fx6.dts @@ -183,7 +183,6 @@ MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK 0x1b0b0 MX6QDL_PAD_ENET_MDIO__ENET_MDIO 0x1b0b0 MX6QDL_PAD_ENET_MDC__ENET_MDC 0x1b0b0 - MX6QDL_PAD_GPIO_16__ENET_REF_CLK 0x4001b0a8 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi index b0b3220a1fd90530586b29e84fe276ee79858844..01166ba36f2753be0cda2fc29908c7355989c7c0 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi @@ -319,8 +319,6 @@ compatible = "fsl,imx6q-nitrogen6_max-sgtl5000", "fsl,imx-audio-sgtl5000"; model = "imx6q-nitrogen6_max-sgtl5000"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_sgtl5000>; ssi-controller = <&ssi1>; audio-codec = <&codec>; audio-routing = @@ -402,6 +400,8 @@ codec: sgtl5000@0a { compatible = "fsl,sgtl5000"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_sgtl5000>; reg = <0x0a>; clocks = <&clks IMX6QDL_CLK_CKO>; VDDA-supply = <®_2p5v>; diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts index da8598402ab8b33137a74fac30a69432155fac17..38faa90007d7f0c7e3042a90aef486321cc4bfa0 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts @@ -158,7 +158,7 @@ &mmc1 { interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>; pinctrl-names = "default"; - pinctrl-0 = <&mmc1_pins &mmc1_cd>; + pinctrl-0 = <&mmc1_pins>; wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */ cd-gpios = <&gpio4 14 IRQ_TYPE_LEVEL_LOW>; /* gpio_110 */ vmmc-supply = <&vmmc1>; @@ -193,7 +193,8 @@ OMAP3_CORE1_IOPAD(0x214a, PIN_INPUT | MUX_MODE0) /* sdmmc1_dat1.sdmmc1_dat1 */ OMAP3_CORE1_IOPAD(0x214c, PIN_INPUT | MUX_MODE0) /* sdmmc1_dat2.sdmmc1_dat2 */ OMAP3_CORE1_IOPAD(0x214e, PIN_INPUT | MUX_MODE0) /* sdmmc1_dat3.sdmmc1_dat3 */ - OMAP3_CORE1_IOPAD(0x2132, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_strobe.gpio_126 sdmmc1_wp*/ + OMAP3_CORE1_IOPAD(0x2132, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_strobe.gpio_126 */ + OMAP3_CORE1_IOPAD(0x212c, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_d11.gpio_110 */ >; }; @@ -242,12 +243,6 @@ OMAP3_WKUP_IOPAD(0x2a16, PIN_OUTPUT | PIN_OFF_OUTPUT_LOW | MUX_MODE4) /* sys_boot6.gpio_8 */ >; }; - - mmc1_cd: pinmux_mmc1_cd { - pinctrl-single,pins = < - OMAP3_WKUP_IOPAD(0x212c, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_d11.gpio_110 */ - >; - }; }; diff --git a/arch/arm/boot/dts/omap2.dtsi b/arch/arm/boot/dts/omap2.dtsi index 4f793a025a721b03f3a5e1f074c4c531033cf62f..f1d6de8b3c193eee0d88b0465f33de4e122ba266 100644 --- a/arch/arm/boot/dts/omap2.dtsi +++ b/arch/arm/boot/dts/omap2.dtsi @@ -17,6 +17,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { serial0 = &uart1; diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 353d818ce5a6dc059488db7a842bd217b4836ff6..2008648b8c9f908d8d5d04d30687e3fd87af2199 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -17,6 +17,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 0ced079b7ae3d715ff4dc1d20b6353c96f0ec224..9c289ddab3df6ec0a566d2267f4bc349dcab4b0f 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -15,6 +15,7 @@ interrupt-parent = <&wakeupgen>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 25262118ec3d042f01479daba0808b683d9976ff..1d1d8e90cd80500d6566baea8504f6a96c23ccea 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -17,6 +17,7 @@ compatible = "ti,omap5"; interrupt-parent = <&wakeupgen>; + chosen { }; aliases { i2c0 = &i2c1; @@ -985,6 +986,7 @@ phy-names = "sata-phy"; clocks = <&sata_ref_clk>; ti,hwmods = "sata"; + ports-implemented = <0x1>; }; dss: dss@58000000 { diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index 9365580a194facc788ad5e3f76fa4808da69eb68..7e860d3737ff00ca599ccc2dadb030e30e9b5b72 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -319,7 +319,7 @@ "ch12"; clocks = <&mstp5_clks R8A7794_CLK_AUDIO_DMAC0>; clock-names = "fck"; - power-domains = <&cpg_clocks>; + power-domains = <&sysc R8A7794_PD_ALWAYS_ON>; #dma-cells = <1>; dma-channels = <13>; }; @@ -1025,8 +1025,7 @@ clocks = <&extal_clk &usb_extal_clk>; #clock-cells = <1>; clock-output-names = "main", "pll0", "pll1", "pll3", - "lb", "qspi", "sdh", "sd0", "z", - "rcan"; + "lb", "qspi", "sdh", "sd0", "rcan"; #power-domain-cells = <0>; }; /* Variable factor clocks */ @@ -1260,7 +1259,7 @@ mstp7_clks: mstp7_clks@e615014c { compatible = "renesas,r8a7794-mstp-clocks", "renesas,cpg-mstp-clocks"; reg = <0 0xe615014c 0 4>, <0 0xe61501c4 0 4>; - clocks = <&mp_clk>, <&mp_clk>, + clocks = <&mp_clk>, <&hp_clk>, <&zs_clk>, <&p_clk>, <&p_clk>, <&zs_clk>, <&zs_clk>, <&p_clk>, <&p_clk>, <&p_clk>, <&p_clk>, <&zx_clk>; @@ -1483,7 +1482,7 @@ "mix.0", "mix.1", "dvc.0", "dvc.1", "clk_a", "clk_b", "clk_c", "clk_i"; - power-domains = <&cpg_clocks>; + power-domains = <&sysc R8A7794_PD_ALWAYS_ON>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index ba5bca0fe997b755f113c14597588668f04e477d..44377a98cc89b262e08c05a30b74e9470c7ade2e 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -227,3 +227,8 @@ pinctrl-0 = <&uart0_pins_a>; status = "okay"; }; + +&usbphy { + /* VBUS on usb host ports are tied to DC5V and therefore always on */ + status = "okay"; +}; diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index c2dff4fd5fc4d7577cc96b483d790a3667eea33c..9f6d2a69a6f7a5e85ca9b80d18d2531a48add0b3 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -162,8 +162,8 @@ CONFIG_APQ_MMCC_8084=y CONFIG_IPQ_LCC_806X=y CONFIG_MSM_GCC_8660=y CONFIG_MSM_LCC_8960=y -CONFIG_MSM_GCC_9615=y -CONFIG_MSM_LCC_9615=y +CONFIG_MDM_GCC_9615=y +CONFIG_MDM_LCC_9615=y CONFIG_MSM_MMCC_8960=y CONFIG_MSM_MMCC_8974=y CONFIG_HWSPINLOCK_QCOM=y diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig new file mode 100644 index 0000000000000000000000000000000000000000..49e7bbd5825a715f9a8a097be5d1eed8295c9600 --- /dev/null +++ b/arch/arm/configs/ranchu_defconfig @@ -0,0 +1,316 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_ARCH_MMAP_RND_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_VIRT=y +CONFIG_ARM_KERNMEM_PERMS=y +CONFIG_SMP=y +CONFIG_PREEMPT=y +CONFIG_AEABI=y +CONFIG_HIGHMEM=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_VFP=y +CONFIG_NEON=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMSC911X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_VIRTUALIZATION=y diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index aef022a87c5379bc9627b0fa8fe144b266e17e9c..04410d9f5e72e018296447f5e079d4642679ca98 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -88,8 +88,13 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); rko[0] = rko[0] ^ rki[0] ^ rcon[i]; +#else + rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8); + rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24); +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 522b5feb4eaa34dcbd0e66b7f417a6e857076db8..b62eaeb147aa9a0b8caa73bf96a73a1d02e5f708 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -94,6 +94,9 @@ #define ARM_CPU_XSCALE_ARCH_V2 0x4000 #define ARM_CPU_XSCALE_ARCH_V3 0x6000 +/* Qualcomm implemented cores */ +#define ARM_CPU_PART_SCORPION 0x510002d0 + extern unsigned int processor_id; #ifdef CONFIG_CPU_CP15 diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 370f7a732900ae12e8831e6f3ce7390d16455fc8..d0606412069492767c5f62158e4f9b24ee073570 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -3,6 +3,7 @@ #ifdef CONFIG_ARM_CPU_TOPOLOGY +#include #include struct cputopo_arm { @@ -24,6 +25,12 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +#ifdef CONFIG_CPU_FREQ +#define arch_scale_freq_capacity cpufreq_scale_freq_capacity +#endif +#define arch_scale_cpu_capacity scale_cpu_capacity +extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); + #else static inline void init_cpu_topology(void) { } diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b8df45883cf78e36aab24522d0da348335801695..25538a9358741a5884e53d39a8bb38c3fcd944b3 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1066,6 +1066,22 @@ static int __init arch_hw_breakpoint_init(void) return 0; } + /* + * Scorpion CPUs (at least those in APQ8060) seem to set DBGPRSR.SPD + * whenever a WFI is issued, even if the core is not powered down, in + * violation of the architecture. When DBGPRSR.SPD is set, accesses to + * breakpoint and watchpoint registers are treated as undefined, so + * this results in boot time and runtime failures when these are + * accessed and we unexpectedly take a trap. + * + * It's not clear if/how this can be worked around, so we blacklist + * Scorpion CPUs to avoid these issues. + */ + if (read_cpuid_part() == ARM_CPU_PART_SCORPION) { + pr_info("Scorpion CPU detected. Hardware breakpoints and watchpoints disabled\n"); + return 0; + } + has_ossr = core_has_os_save_restore(); /* Determine how many BRPs/WRPs are available. */ diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 22313cb5336257cffa870b15e21279a1b4684e99..9af0701f7094be972b0f8c60a0a6c8417ed844eb 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include @@ -40,8 +41,11 @@ static inline void ipi_flush_tlb_mm(void *arg) static inline void ipi_flush_tlb_page(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; + unsigned int __ua_flags = uaccess_save_and_enable(); local_flush_tlb_page(ta->ta_vma, ta->ta_start); + + uaccess_restore(__ua_flags); } static inline void ipi_flush_tlb_kernel_page(void *arg) @@ -54,8 +58,11 @@ static inline void ipi_flush_tlb_kernel_page(void *arg) static inline void ipi_flush_tlb_range(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; + unsigned int __ua_flags = uaccess_save_and_enable(); local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); + + uaccess_restore(__ua_flags); } static inline void ipi_flush_tlb_kernel_range(void *arg) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index c16e7d6670b2d8772a75a5364d7ba1ac7ab59788..792340f2fde352c4edc1e6660b0b5ac645ccf790 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -42,9 +42,15 @@ */ static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; -unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) +unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) { +#ifdef CONFIG_CPU_FREQ + unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu); + + return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT; +#else return per_cpu(cpu_scale, cpu); +#endif } static void set_capacity_scale(unsigned int cpu, unsigned long capacity) @@ -153,6 +159,8 @@ static void __init parse_dt_topology(void) } +static const struct sched_group_energy * const cpu_core_energy(int cpu); + /* * Look for a customed capacity of a CPU in the cpu_capacity table during the * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the @@ -160,10 +168,14 @@ static void __init parse_dt_topology(void) */ static void update_cpu_capacity(unsigned int cpu) { - if (!cpu_capacity(cpu)) - return; + unsigned long capacity = SCHED_CAPACITY_SCALE; + + if (cpu_core_energy(cpu)) { + int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; + capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; + } - set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); + set_capacity_scale(cpu, capacity); pr_info("CPU%u: update cpu_capacity %lu\n", cpu, arch_scale_cpu_capacity(NULL, cpu)); @@ -275,17 +287,138 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } +/* + * ARM TC2 specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ +static struct idle_state idle_states_cluster_a7[] = { + { .power = 25 }, /* arch_cpu_idle() (active idle) = WFI */ + { .power = 25 }, /* WFI */ + { .power = 10 }, /* cluster-sleep-l */ + }; + +static struct idle_state idle_states_cluster_a15[] = { + { .power = 70 }, /* arch_cpu_idle() (active idle) = WFI */ + { .power = 70 }, /* WFI */ + { .power = 25 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_cluster_a7[] = { + /* Cluster only power */ + { .cap = 150, .power = 2967, }, /* 350 MHz */ + { .cap = 172, .power = 2792, }, /* 400 MHz */ + { .cap = 215, .power = 2810, }, /* 500 MHz */ + { .cap = 258, .power = 2815, }, /* 600 MHz */ + { .cap = 301, .power = 2919, }, /* 700 MHz */ + { .cap = 344, .power = 2847, }, /* 800 MHz */ + { .cap = 387, .power = 3917, }, /* 900 MHz */ + { .cap = 430, .power = 4905, }, /* 1000 MHz */ + }; + +static struct capacity_state cap_states_cluster_a15[] = { + /* Cluster only power */ + { .cap = 426, .power = 7920, }, /* 500 MHz */ + { .cap = 512, .power = 8165, }, /* 600 MHz */ + { .cap = 597, .power = 8172, }, /* 700 MHz */ + { .cap = 682, .power = 8195, }, /* 800 MHz */ + { .cap = 768, .power = 8265, }, /* 900 MHz */ + { .cap = 853, .power = 8446, }, /* 1000 MHz */ + { .cap = 938, .power = 11426, }, /* 1100 MHz */ + { .cap = 1024, .power = 15200, }, /* 1200 MHz */ + }; + +static struct sched_group_energy energy_cluster_a7 = { + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7), + .idle_states = idle_states_cluster_a7, + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a7), + .cap_states = cap_states_cluster_a7, +}; + +static struct sched_group_energy energy_cluster_a15 = { + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15), + .idle_states = idle_states_cluster_a15, + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a15), + .cap_states = cap_states_cluster_a15, +}; + +static struct idle_state idle_states_core_a7[] = { + { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */ + { .power = 0 }, /* WFI */ + { .power = 0 }, /* cluster-sleep-l */ + }; + +static struct idle_state idle_states_core_a15[] = { + { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */ + { .power = 0 }, /* WFI */ + { .power = 0 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_core_a7[] = { + /* Power per cpu */ + { .cap = 150, .power = 187, }, /* 350 MHz */ + { .cap = 172, .power = 275, }, /* 400 MHz */ + { .cap = 215, .power = 334, }, /* 500 MHz */ + { .cap = 258, .power = 407, }, /* 600 MHz */ + { .cap = 301, .power = 447, }, /* 700 MHz */ + { .cap = 344, .power = 549, }, /* 800 MHz */ + { .cap = 387, .power = 761, }, /* 900 MHz */ + { .cap = 430, .power = 1024, }, /* 1000 MHz */ + }; + +static struct capacity_state cap_states_core_a15[] = { + /* Power per cpu */ + { .cap = 426, .power = 2021, }, /* 500 MHz */ + { .cap = 512, .power = 2312, }, /* 600 MHz */ + { .cap = 597, .power = 2756, }, /* 700 MHz */ + { .cap = 682, .power = 3125, }, /* 800 MHz */ + { .cap = 768, .power = 3524, }, /* 900 MHz */ + { .cap = 853, .power = 3846, }, /* 1000 MHz */ + { .cap = 938, .power = 5177, }, /* 1100 MHz */ + { .cap = 1024, .power = 6997, }, /* 1200 MHz */ + }; + +static struct sched_group_energy energy_core_a7 = { + .nr_idle_states = ARRAY_SIZE(idle_states_core_a7), + .idle_states = idle_states_core_a7, + .nr_cap_states = ARRAY_SIZE(cap_states_core_a7), + .cap_states = cap_states_core_a7, +}; + +static struct sched_group_energy energy_core_a15 = { + .nr_idle_states = ARRAY_SIZE(idle_states_core_a15), + .idle_states = idle_states_core_a15, + .nr_cap_states = ARRAY_SIZE(cap_states_core_a15), + .cap_states = cap_states_core_a15, +}; + +/* sd energy functions */ +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + return cpu_topology[cpu].socket_id ? &energy_cluster_a7 : + &energy_cluster_a15; +} + +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + return cpu_topology[cpu].socket_id ? &energy_core_a7 : + &energy_core_a15; +} + static inline int cpu_corepower_flags(void) { - return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES; } static struct sched_domain_topology_level arm_topology[] = { #ifdef CONFIG_SCHED_MC - { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) }, #endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) }, { NULL, }, }; diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index ed3d0e9f72ac35bec2681ca2980075cb354b3f88..d7a43afdfac0e25a73e973abf50c58b1fc10d632 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -319,6 +319,16 @@ static struct clk emac_clk = { .gpsc = 1, }; +/* + * In order to avoid adding the emac_clk to the clock lookup table twice (and + * screwing up the linked list in the process) create a separate clock for + * mdio inheriting the rate from emac_clk. + */ +static struct clk mdio_clk = { + .name = "mdio", + .parent = &emac_clk, +}; + static struct clk mcasp_clk = { .name = "mcasp", .parent = &async3_clk, @@ -494,7 +504,7 @@ static struct clk_lookup da850_clks[] = { CLK(NULL, "arm", &arm_clk), CLK(NULL, "rmii", &rmii_clk), CLK("davinci_emac.1", NULL, &emac_clk), - CLK("davinci_mdio.0", "fck", &emac_clk), + CLK("davinci_mdio.0", "fck", &mdio_clk), CLK("davinci-mcasp.0", NULL, &mcasp_clk), CLK("davinci-mcbsp.0", NULL, &mcbsp0_clk), CLK("davinci-mcbsp.1", NULL, &mcbsp1_clk), diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 5b37ec29996e9de4451a918b9dccb894a138ec02..e37ceb81a379cd7a3c64a633ce4c9eed4417a83a 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -80,7 +80,7 @@ endif # Power Management omap-4-5-pm-common = omap-mpuss-lowpower.o obj-$(CONFIG_ARCH_OMAP4) += $(omap-4-5-pm-common) -obj-$(CONFIG_ARCH_OMAP5) += $(omap-4-5-pm-common) +obj-$(CONFIG_SOC_OMAP5) += $(omap-4-5-pm-common) obj-$(CONFIG_OMAP_PM_NOOP) += omap-pm-noop.o ifeq ($(CONFIG_PM),y) diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index bab814d2f37dcc9a18229d5e912422a653307088..60f5e838a3bccc74d6e69ecb42d43995b02d35e5 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -306,7 +306,7 @@ DT_MACHINE_START(AM43_DT, "Generic AM43 (Flattened Device Tree)") .init_late = am43xx_init_late, .init_irq = omap_gic_of_init, .init_machine = omap_generic_init, - .init_time = omap4_local_timer_init, + .init_time = omap3_gptimer_timer_init, .dt_compat = am43_boards_compat, .restart = omap44xx_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index deed42e1dd9c903aba522d306416537eb26f7762..6dcca2957e23ef3ffc26ea56d9450734262c76d6 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -262,8 +262,6 @@ extern void __iomem *omap4_get_sar_ram_base(void); extern void omap4_mpuss_early_init(void); extern void omap_do_wfi(void); -extern void omap4_secondary_startup(void); -extern void omap4460_secondary_startup(void); #ifdef CONFIG_SMP /* Needed for secondary core boot */ @@ -275,16 +273,11 @@ extern void omap4_cpu_die(unsigned int cpu); extern int omap4_cpu_kill(unsigned int cpu); extern const struct smp_operations omap4_smp_ops; - -extern void omap5_secondary_startup(void); -extern void omap5_secondary_hyp_startup(void); #endif #if defined(CONFIG_SMP) && defined(CONFIG_PM) extern int omap4_mpuss_init(void); extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state); -extern int omap4_finish_suspend(unsigned long cpu_state); -extern void omap4_cpu_resume(void); extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state); #else static inline int omap4_enter_lowpower(unsigned int cpu, @@ -305,14 +298,41 @@ static inline int omap4_mpuss_init(void) return 0; } +#endif + +#ifdef CONFIG_ARCH_OMAP4 +void omap4_secondary_startup(void); +void omap4460_secondary_startup(void); +int omap4_finish_suspend(unsigned long cpu_state); +void omap4_cpu_resume(void); +#else +static inline void omap4_secondary_startup(void) +{ +} + +static inline void omap4460_secondary_startup(void) +{ +} static inline int omap4_finish_suspend(unsigned long cpu_state) { return 0; } - static inline void omap4_cpu_resume(void) -{} +{ +} +#endif +#if defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) +void omap5_secondary_startup(void); +void omap5_secondary_hyp_startup(void); +#else +static inline void omap5_secondary_startup(void) +{ +} + +static inline void omap5_secondary_hyp_startup(void) +{ +} #endif void pdata_quirks_init(const struct of_device_id *); diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 0e9acdd95d707d59a4727fe794379244543b3675..f0da5259762aa493098021cc7aaf9fda8ec63dd8 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -717,10 +717,11 @@ void __init omap5_init_early(void) OMAP2_L4_IO_ADDRESS(OMAP54XX_SCM_BASE)); omap2_set_globals_prcm_mpu(OMAP2_L4_IO_ADDRESS(OMAP54XX_PRCM_MPU_BASE)); omap2_control_base_init(); - omap4_pm_init_early(); omap2_prcm_base_init(); omap5xxx_check_revision(); omap4_sar_ram_init(); + omap4_mpuss_early_init(); + omap4_pm_init_early(); omap54xx_voltagedomains_init(); omap54xx_powerdomains_init(); omap54xx_clockdomains_init(); diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index ad982465efd0d0faffbf3e1d812c8e0bd44ec313..7d62ad48c7c9dd1dfb290cdcf5d61479b081d7a9 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "soc.h" @@ -244,10 +245,9 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) save_state = 1; break; case PWRDM_POWER_RET: - if (IS_PM44XX_ERRATUM(PM_OMAP4_CPU_OSWR_DISABLE)) { + if (IS_PM44XX_ERRATUM(PM_OMAP4_CPU_OSWR_DISABLE)) save_state = 0; - break; - } + break; default: /* * CPUx CSWR is invalid hardware state. Also CPUx OSWR @@ -371,8 +371,12 @@ int __init omap4_mpuss_init(void) pm_info = &per_cpu(omap4_pm_info, 0x0); if (sar_base) { pm_info->scu_sar_addr = sar_base + SCU_OFFSET0; - pm_info->wkup_sar_addr = sar_base + - CPU0_WAKEUP_NS_PA_ADDR_OFFSET; + if (cpu_is_omap44xx()) + pm_info->wkup_sar_addr = sar_base + + CPU0_WAKEUP_NS_PA_ADDR_OFFSET; + else + pm_info->wkup_sar_addr = sar_base + + OMAP5_CPU0_WAKEUP_NS_PA_ADDR_OFFSET; pm_info->l2x0_sar_addr = sar_base + L2X0_SAVE_OFFSET0; } pm_info->pwrdm = pwrdm_lookup("cpu0_pwrdm"); @@ -391,8 +395,12 @@ int __init omap4_mpuss_init(void) pm_info = &per_cpu(omap4_pm_info, 0x1); if (sar_base) { pm_info->scu_sar_addr = sar_base + SCU_OFFSET1; - pm_info->wkup_sar_addr = sar_base + - CPU1_WAKEUP_NS_PA_ADDR_OFFSET; + if (cpu_is_omap44xx()) + pm_info->wkup_sar_addr = sar_base + + CPU1_WAKEUP_NS_PA_ADDR_OFFSET; + else + pm_info->wkup_sar_addr = sar_base + + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET; pm_info->l2x0_sar_addr = sar_base + L2X0_SAVE_OFFSET1; } @@ -453,15 +461,24 @@ void __init omap4_mpuss_early_init(void) { unsigned long startup_pa; - if (!cpu_is_omap44xx()) + if (!(cpu_is_omap44xx() || soc_is_omap54xx())) return; sar_base = omap4_get_sar_ram_base(); if (cpu_is_omap443x()) startup_pa = virt_to_phys(omap4_secondary_startup); - else + else if (cpu_is_omap446x()) startup_pa = virt_to_phys(omap4460_secondary_startup); + else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE) + startup_pa = virt_to_phys(omap5_secondary_hyp_startup); + else + startup_pa = virt_to_phys(omap5_secondary_startup); - writel_relaxed(startup_pa, sar_base + CPU1_WAKEUP_NS_PA_ADDR_OFFSET); + if (cpu_is_omap44xx()) + writel_relaxed(startup_pa, sar_base + + CPU1_WAKEUP_NS_PA_ADDR_OFFSET); + else + writel_relaxed(startup_pa, sar_base + + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET); } diff --git a/arch/arm/mach-omap2/omap4-sar-layout.h b/arch/arm/mach-omap2/omap4-sar-layout.h index 792b1069f724ff6b121a406c077826ff916ed36a..5b2966a0f73308d2e552d7f768f45a107b05a7be 100644 --- a/arch/arm/mach-omap2/omap4-sar-layout.h +++ b/arch/arm/mach-omap2/omap4-sar-layout.h @@ -31,6 +31,8 @@ /* CPUx Wakeup Non-Secure Physical Address offsets in SAR_BANK3 */ #define CPU0_WAKEUP_NS_PA_ADDR_OFFSET 0xa04 #define CPU1_WAKEUP_NS_PA_ADDR_OFFSET 0xa08 +#define OMAP5_CPU0_WAKEUP_NS_PA_ADDR_OFFSET 0xe00 +#define OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET 0xe04 #define SAR_BACKUP_STATUS_OFFSET (SAR_BANK3_OFFSET + 0x500) #define SAR_SECURE_RAM_SIZE_OFFSET (SAR_BANK3_OFFSET + 0x504) diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 5e2e2218a4023a28b25657f2174735d908fd2a68..b2f2448bfa6dc0d95993378320f077578ad8329a 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -510,18 +510,19 @@ void __init omap3_secure_sync32k_timer_init(void) } #endif /* CONFIG_ARCH_OMAP3 */ -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \ + defined(CONFIG_SOC_AM43XX) void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); - - clocksource_probe(); + if (of_have_populated_dt()) + clocksource_probe(); } #endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \ - defined(CONFIG_SOC_DRA7XX) || defined(CONFIG_SOC_AM43XX) + defined(CONFIG_SOC_DRA7XX) static void __init omap4_sync32k_timer_init(void) { __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index 12b94357fbc11f868a792f70f56c5fe17cc586c6..c725baf119e1135e0b8f796c18e3684dc5b74862 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -156,7 +156,7 @@ static int __init __init pxa25x_dt_init_irq(struct device_node *node, struct device_node *parent) { pxa_dt_irq_init(pxa25x_set_wake); - set_handle_irq(ichp_handle_irq); + set_handle_irq(icip_handle_irq); return 0; } diff --git a/arch/arm/mach-ux500/pm.c b/arch/arm/mach-ux500/pm.c index 8538910db202ab6a13e0e3588f27b2a3157c4bc2..a970e7fcba9e02fe6e2651cd5cfca76321d26314 100644 --- a/arch/arm/mach-ux500/pm.c +++ b/arch/arm/mach-ux500/pm.c @@ -134,8 +134,8 @@ bool prcmu_pending_irq(void) */ bool prcmu_is_cpu_in_wfi(int cpu) { - return readl(PRCM_ARM_WFI_STANDBY) & cpu ? PRCM_ARM_WFI_STANDBY_WFI1 : - PRCM_ARM_WFI_STANDBY_WFI0; + return readl(PRCM_ARM_WFI_STANDBY) & + (cpu ? PRCM_ARM_WFI_STANDBY_WFI1 : PRCM_ARM_WFI_STANDBY_WFI0); } /* diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index d12002cd63bc6f6b27fc4f55c2076c0c23481807..ed118648313f63df2c58a77199491a168d4783a3 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -59,7 +59,7 @@ void __iomem *zynq_scu_base; static void __init zynq_memory_init(void) { if (!__pa(PAGE_OFFSET)) - memblock_reserve(__pa(PAGE_OFFSET), __pa(swapper_pg_dir)); + memblock_reserve(__pa(PAGE_OFFSET), 0x80000); } static struct platform_device zynq_cpuidle_device = { diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index f193414d0f6ff78a2ac91b48160390935fa68546..4986dc0c1dff05f564756d584cde2c06c4ee01ce 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -372,8 +372,7 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), - sizeof(struct vcpu_info)); + xen_vcpu_info = alloc_percpu(struct vcpu_info); if (xen_vcpu_info == NULL) return -ENOMEM; diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 718bd0b45e383a3008cae5e5718f5c6a6f3c4327..92dc1e631ae8c10dd27fec1b9d8cb6dfe5821348 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,6 +14,8 @@ # Based on the ia64 boot/Makefile. # +include $(srctree)/arch/arm64/boot/dts/Makefile + OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.gz diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts index 7841b724e340e1441867254c9a89c53c014eb031..4617759670da80735dc7637745e7db29ff7641ac 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts @@ -15,13 +15,6 @@ act { gpios = <&gpio 47 0>; }; - - pwr { - label = "PWR"; - gpios = <&gpio 35 0>; - default-state = "keep"; - linux,default-trigger = "default-on"; - }; }; }; diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi index 8216bbb29fe0758ff315098fb1319d97fea91469..c1f719b7097a60e7a35197c23617067448932c70 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi @@ -1,7 +1,7 @@ #include "bcm283x.dtsi" / { - compatible = "brcm,bcm2836"; + compatible = "brcm,bcm2837"; soc { ranges = <0x7e000000 0x3f000000 0x1000000>, diff --git a/arch/arm64/boot/dts/hisilicon/hip06.dtsi b/arch/arm64/boot/dts/hisilicon/hip06.dtsi index b548763366dd65cb3ace314f6c26873341f4137f..af450413b9ddbe33d9e767714df4f248a83a5789 100644 --- a/arch/arm64/boot/dts/hisilicon/hip06.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip06.dtsi @@ -322,7 +322,7 @@ compatible = "generic-ohci"; reg = <0x0 0xa7030000 0x0 0x10000>; interrupt-parent = <&mbigen_usb>; - interrupts = <64 4>; + interrupts = <640 4>; dma-coherent; status = "disabled"; }; @@ -331,7 +331,7 @@ compatible = "generic-ehci"; reg = <0x0 0xa7020000 0x0 0x10000>; interrupt-parent = <&mbigen_usb>; - interrupts = <65 4>; + interrupts = <641 4>; dma-coherent; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 1c71e256601dc44b9a2d30ce7cb54db8b4811ee7..6c03c1702bb32ff1766d478c872034d725e85e1b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -450,6 +450,9 @@ auxadc: auxadc@11001000 { compatible = "mediatek,mt8173-auxadc"; reg = <0 0x11001000 0 0x1000>; + clocks = <&pericfg CLK_PERI_AUXADC>; + clock-names = "main"; + #io-channel-cells = <1>; }; uart0: serial@11002000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index 5fda583351d7667de1632b44186b454316ce7093..906fb836d24188aa6b1b0471461d0d15086a2c4c 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -21,6 +21,10 @@ reg = <0x0 0x80000000 0x1 0x0>; }; + gpu@57000000 { + vdd-supply = <&vdd_gpu>; + }; + /* debug port */ serial@70006000 { status = "okay"; @@ -291,4 +295,18 @@ clock-frequency = <32768>; }; }; + + regulators { + vdd_gpu: regulator@100 { + compatible = "pwm-regulator"; + reg = <100>; + pwms = <&pwm 1 4880>; + regulator-name = "VDD_GPU"; + regulator-min-microvolt = <710000>; + regulator-max-microvolt = <1320000>; + enable-gpios = <&pmic 6 GPIO_ACTIVE_HIGH>; + regulator-ramp-delay = <80>; + regulator-enable-ramp-delay = <1000>; + }; + }; }; diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig new file mode 100644 index 0000000000000000000000000000000000000000..fc55008d8c4c9777be3abaa52d9f7db3af575a42 --- /dev/null +++ b/arch/arm64/configs/ranchu64_defconfig @@ -0,0 +1,312 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_ARCH_MMAP_RND_BITS=24 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_NR_CPUS=4 +CONFIG_PREEMPT=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_RPFILTER=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_ECN=y +CONFIG_IP_NF_TARGET_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=y +CONFIG_IP6_NF_MATCH_EUI64=y +CONFIG_IP6_NF_MATCH_FRAG=y +CONFIG_IP6_NF_MATCH_OPTS=y +CONFIG_IP6_NF_MATCH_HL=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_MH=y +CONFIG_IP6_NF_MATCH_RT=y +CONFIG_IP6_NF_TARGET_HL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMC91X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_BATTERY_GOLDFISH=y +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT2_FS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_RODATA=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index a2a7fbcacc141ed595f31026510cecd459a733f0..3363560c79b7e69376ac4a50a5401456640a35af 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -9,6 +9,7 @@ */ #include +#include .text .arch armv8-a+crypto @@ -19,7 +20,7 @@ */ ENTRY(ce_aes_ccm_auth_data) ldr w8, [x3] /* leftover from prev round? */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v0.16b}, [x0] /* load mac */ cbz w8, 1f sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b @@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) beq 8f /* out of input? */ cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.2d}, [x4] /* load first round key */ +1: ld1 {v3.16b}, [x4] /* load first round key */ prfm pldl1strm, [x1] cmp w5, #12 /* which key size? */ add x6, x4, #16 @@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) mov v5.16b, v3.16b b 4f 2: mov v4.16b, v3.16b - ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ 3: aese v0.16b, v4.16b aesmc v0.16b, v0.16b -4: ld1 {v3.2d}, [x6], #16 /* load next round key */ +4: ld1 {v3.16b}, [x6], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b -5: ld1 {v4.2d}, [x6], #16 /* load next round key */ +5: ld1 {v4.16b}, [x6], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b - ld1 {v5.2d}, [x6], #16 /* load next round key */ + ld1 {v5.16b}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b subs w2, w2, #16 /* last data? */ @@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ bne 1b -6: st1 {v0.2d}, [x0] /* store mac */ +6: st1 {v0.16b}, [x0] /* store mac */ beq 10f adds w2, w2, #16 beq 10f @@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data) adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.2d}, [x0] + st1 {v0.16b}, [x0] 10: str w8, [x3] ret ENDPROC(ce_aes_ccm_auth_data) @@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data) * u32 rounds); */ ENTRY(ce_aes_ccm_final) - ld1 {v3.2d}, [x2], #16 /* load first round key */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v3.16b}, [x2], #16 /* load first round key */ + ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ sub w3, w3, #2 /* modified # of rounds */ - ld1 {v1.2d}, [x1] /* load 1st ctriv */ + ld1 {v1.16b}, [x1] /* load 1st ctriv */ bmi 0f bne 3f mov v5.16b, v3.16b b 2f 0: mov v4.16b, v3.16b -1: ld1 {v5.2d}, [x2], #16 /* load next round key */ +1: ld1 {v5.16b}, [x2], #16 /* load next round key */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -2: ld1 {v3.2d}, [x2], #16 /* load next round key */ +2: ld1 {v3.16b}, [x2], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -3: ld1 {v4.2d}, [x2], #16 /* load next round key */ +3: ld1 {v4.16b}, [x2], #16 /* load next round key */ subs w3, w3, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b @@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final) aese v1.16b, v4.16b /* final round key cancels out */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ - st1 {v0.2d}, [x0] /* store result */ + st1 {v0.16b}, [x0] /* store result */ ret ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.2d}, [x5] /* load mac */ - rev x8, x8 /* keep swabbed ctr in reg */ + ld1 {v0.16b}, [x5] /* load mac */ +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.1d}, [x6] /* load upper ctr */ + ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x1] add x8, x8, #1 rev x9, x8 cmp w4, #12 /* which key size? */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.2d}, [x3] /* load first round key */ + ld1 {v3.16b}, [x3] /* load first round key */ add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b b 3f 1: mov v4.16b, v3.16b - ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -3: ld1 {v3.2d}, [x10], #16 /* load next round key */ +3: ld1 {v3.16b}, [x10], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -4: ld1 {v4.2d}, [x10], #16 /* load next round key */ +4: ld1 {v4.16b}, [x10], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b aese v1.16b, v3.16b aesmc v1.16b, v1.16b - ld1 {v5.2d}, [x10], #16 /* load next round key */ + ld1 {v5.16b}, [x10], #16 /* load next round key */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b @@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final) eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v1.16b}, [x0], #16 /* write output block */ bne 0b - rev x8, x8 - st1 {v0.2d}, [x5] /* store mac */ +CPU_LE( rev x8, x8 ) + st1 {v0.16b}, [x5] /* store mac */ str x8, [x6, #8] /* store lsb end of ctr (BE) */ 5: ret 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.2d}, [x5] /* store mac */ + st1 {v0.16b}, [x5] /* store mac */ add w2, w2, #16 /* process partial tail block */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index f7bd9bf0bbb398bbb2a355c1b24d1f9c035bacf4..50d9fe11d0c862bbd9cdd75855bfbc0fbffcfea7 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aese v0.16b, v2.16b ;" " aesmc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aese v0.16b, v3.16b ;" " aesmc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aese v0.16b, v1.16b ;" " aesmc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aese v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aesd v0.16b, v2.16b ;" " aesimc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aesd v0.16b, v3.16b ;" " aesimc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aesd v0.16b, v1.16b ;" " aesimc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aesd v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; +#else + rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ + rki[0]; +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 78f3cfe92c0872345992203a6501ba0460cc2c91..b46093d567e5449256dbf3bf1d4981144bdd8739 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -10,6 +10,7 @@ */ #include +#include #define AES_ENTRY(func) ENTRY(ce_ ## func) #define AES_ENDPROC(func) ENDPROC(ce_ ## func) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index f6e372c528eb438b6517a236315afeb1694a8002..838dad5c209fae0f3a660e79d1f5fef8eb1f0c68 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt) cbz w6, .Lcbcencloop ld1 {v0.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x5 + enc_prepare w3, x2, x6 .Lcbcencloop: ld1 {v1.16b}, [x1], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x5, w6 + encrypt_block v0, w3, x2, x6, w7 st1 {v0.16b}, [x0], #16 subs w4, w4, #1 bne .Lcbcencloop + st1 {v0.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) @@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt) cbz w6, .LcbcdecloopNx ld1 {v7.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x5 + dec_prepare w3, x2, x6 .LcbcdecloopNx: #if INTERLEAVE >= 2 @@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt) .Lcbcdecloop: ld1 {v1.16b}, [x1], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x5, w6 + decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ mov v7.16b, v1.16b /* ct is next iv */ st1 {v0.16b}, [x0], #16 @@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt) bne .Lcbcdecloop .Lcbcdecout: FRAME_POP + st1 {v7.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_decrypt) @@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt) AES_ENTRY(aes_ctr_encrypt) FRAME_PUSH - cbnz w6, .Lctrfirst /* 1st time around? */ - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 -#if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ - bcs .Lctrinc - add x5, x5, #1 /* increment BE ctr */ - b .LctrincNx -#else - b .Lctrinc -#endif -.Lctrfirst: + cbz w6, .Lctrnotfirst /* 1st time around? */ enc_prepare w3, x2, x6 ld1 {v4.16b}, [x5] - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 + +.Lctrnotfirst: + umov x8, v4.d[1] /* keep swabbed ctr in reg */ + rev x8, x8 #if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ + cmn w8, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: subs w4, w4, #INTERLEAVE @@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt) #if INTERLEAVE == 2 mov v0.8b, v4.8b mov v1.8b, v4.8b - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v0.d[1], x7 - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v1.d[1], x7 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ do_encrypt_block2x @@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt) st1 {v0.16b-v1.16b}, [x0], #32 #else ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w5 + dup v7.4s, w8 mov v0.16b, v4.16b add v7.4s, v7.4s, v8.4s mov v1.16b, v4.16b @@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt) eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b st1 {v0.16b-v3.16b}, [x0], #64 - add x5, x5, #INTERLEAVE + add x8, x8, #INTERLEAVE #endif - cbz w4, .LctroutNx -.LctrincNx: - rev x7, x5 + rev x7, x8 ins v4.d[1], x7 + cbz w4, .Lctrout b .LctrloopNx -.LctroutNx: - sub x5, x5, #1 - rev x7, x5 - ins v4.d[1], x7 - b .Lctrout .Lctr1x: adds w4, w4, #INTERLEAVE beq .Lctrout @@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrloop: mov v0.16b, v4.16b encrypt_block v0, w3, x2, x6, w7 + + adds x8, x8, #1 /* increment BE ctr */ + rev x7, x8 + ins v4.d[1], x7 + bcs .Lctrcarry /* overflow? */ + +.Lctrcarrydone: subs w4, w4, #1 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ ld1 {v3.16b}, [x1], #16 eor v3.16b, v0.16b, v3.16b st1 {v3.16b}, [x0], #16 - beq .Lctrout -.Lctrinc: - adds x5, x5, #1 /* increment BE ctr */ - rev x7, x5 - ins v4.d[1], x7 - bcc .Lctrloop /* no overflow? */ - umov x7, v4.d[0] /* load upper word of ctr */ - rev x7, x7 /* ... to handle the carry */ - add x7, x7, #1 - rev x7, x7 - ins v4.d[0], x7 - b .Lctrloop + bne .Lctrloop + +.Lctrout: + st1 {v4.16b}, [x5] /* return next CTR value */ + FRAME_POP + ret + .Lctrhalfblock: ld1 {v3.8b}, [x1] eor v3.8b, v0.8b, v3.8b st1 {v3.8b}, [x0] -.Lctrout: FRAME_POP ret + +.Lctrcarry: + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrcarrydone AES_ENDPROC(aes_ctr_encrypt) .ltorg @@ -386,7 +382,8 @@ AES_ENDPROC(aes_ctr_encrypt) .endm .Lxts_mul_x: - .word 1, 0, 0x87, 0 +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) FRAME_PUSH diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index b93170e1cc933806806292057a4eecb5c2666daf..85f07ead7c5c2792c44df0c2970774c60159615d 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -9,6 +9,7 @@ */ #include +#include #define AES_ENTRY(func) ENTRY(neon_ ## func) #define AES_ENDPROC(func) ENDPROC(neon_ ## func) @@ -83,13 +84,13 @@ .endm .macro do_block, enc, in, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -229,7 +230,7 @@ .endm .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -237,7 +238,7 @@ sub_bytes_2x \in0, \in1 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -254,7 +255,7 @@ .endm .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -266,7 +267,7 @@ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -306,12 +307,16 @@ .text .align 4 .LForward_ShiftRows: - .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 - .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb +CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) +CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) +CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) +CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) .LReverse_ShiftRows: - .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb - .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 +CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) +CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) +CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) +CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) .LForward_Sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index dc457015884e04345ea814d0aa1d274793ad7bc4..f0bb9f0b524fceb8b62ef4be45f57bee3308156e 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -29,8 +29,8 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - ld1 {SHASH.16b}, [x3] - ld1 {XL.16b}, [x1] + ld1 {SHASH.2d}, [x3] + ld1 {XL.2d}, [x1] movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 shl MASK.2d, MASK.2d, #57 @@ -74,6 +74,6 @@ CPU_LE( rev64 T1.16b, T1.16b ) cbnz w0, 0b - st1 {XL.16b}, [x1] + st1 {XL.2d}, [x1] ret ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 033aae6d732a14464d0167e9aaee5e4e9c1a427b..c98e7e849f06f460d99cbddbe31aee73c8d1c686 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform) ld1r {k3.4s}, [x6] /* load state */ - ldr dga, [x0] + ld1 {dgav.4s}, [x0] ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ @@ -144,7 +144,7 @@ CPU_LE( rev32 v11.16b, v11.16b ) b 1b /* store new state */ -3: str dga, [x0] +3: st1 {dgav.4s}, [x0] str dgb, [x0, #16] ret ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 5df9d9d470adb53156ae88acd785fdf0b78c43bd..01cfee066837cd9a65f6d54d8a27d4916e5a1675 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform) ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ldp dga, dgb, [x0] + ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] @@ -148,6 +148,6 @@ CPU_LE( rev32 v19.16b, v19.16b ) b 1b /* store new state */ -3: stp dga, dgb, [x0] +3: st1 {dgav.4s, dgbv.4s}, [x0] ret ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b71086d251954f7b72837899346525322dc5d724..53211a0acf0f071e15cfd3c8f66c7fb7806724ba 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -217,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(page) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) +#define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index e708b3d3af628c7adc1807d6e35713d11919a308..61e032f22e710c88d44ec42ac5752a2f02313422 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -32,6 +32,14 @@ int pcibus_to_node(struct pci_bus *bus); cpumask_of_node(pcibus_to_node(bus))) #endif /* CONFIG_NUMA */ +struct sched_domain; +#ifdef CONFIG_CPU_FREQ +#define arch_scale_freq_capacity cpufreq_scale_freq_capacity +extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); +extern unsigned long cpufreq_scale_max_freq_capacity(int cpu); +#endif +#define arch_scale_cpu_capacity scale_cpu_capacity +extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); #include diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index b5c3933ed44163b2fb489a00553195afc1ff6806..d1ff83dfe5deae580cdb0ebf93f25b32a63f5a74 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -77,6 +77,7 @@ struct user_fpsimd_state { __uint128_t vregs[32]; __u32 fpsr; __u32 fpcr; + __u32 __reserved[2]; }; struct user_hwdebug_state { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 16d1b34dad033db21b1dd2172d7ae86f2bb4993f..324b2885ba3081ddfcb77d0f2876b17ff33baa86 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -687,7 +687,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mov x2, x25 - bl bad_mode + bl bad_el0_sync b ret_to_user ENDPROC(el0_sync) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e0c81da60f76270ad122fee3595464852e7d9ee2..8eedeef375d606961db5235b8d3c60d2eb184d51 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -550,6 +550,8 @@ static int hw_break_set(struct task_struct *target, /* (address, ctrl) registers */ limit = regset->n * regset->size; while (count && offset < limit) { + if (count < PTRACE_HBP_ADDR_SZ) + return -EINVAL; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, offset, offset + PTRACE_HBP_ADDR_SZ); if (ret) @@ -559,6 +561,8 @@ static int hw_break_set(struct task_struct *target, return ret; offset += PTRACE_HBP_ADDR_SZ; + if (!count) + break; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, offset, offset + PTRACE_HBP_CTRL_SZ); if (ret) @@ -595,7 +599,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_pt_regs newregs; + struct user_pt_regs newregs = task_pt_regs(target)->user_regs; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); if (ret) @@ -625,7 +629,8 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_fpsimd_state newstate; + struct user_fpsimd_state newstate = + target->thread.fpsimd_state.user_fpsimd; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) @@ -649,7 +654,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls; + unsigned long tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) @@ -675,7 +680,8 @@ static int system_call_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int syscallno, ret; + int syscallno = task_pt_regs(target)->syscallno; + int ret; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); if (ret) @@ -947,7 +953,7 @@ static int compat_tls_set(struct task_struct *target, const void __user *ubuf) { int ret; - compat_ulong_t tls; + compat_ulong_t tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 349b131baec3b715ea1e4dbd67f0763cd26eaf7c..3b40f2644e840569f89560b7d62a22dc5e00aeee 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,7 @@ * rebalance_domains for all idle cores and the cpu_power can be updated * during this sequence. */ -static DEFINE_PER_CPU(unsigned long, cpu_scale); +static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) { @@ -47,6 +48,22 @@ static void set_power_scale(unsigned int cpu, unsigned long power) per_cpu(cpu_scale, cpu) = power; } +unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) +{ +#ifdef CONFIG_CPU_FREQ + unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu); + + return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT; +#else + return per_cpu(cpu_scale, cpu); +#endif +} + +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) +{ + per_cpu(cpu_scale, cpu) = capacity; +} + static int __init get_cpu_for_node(struct device_node *node) { struct device_node *cpu_node; @@ -371,11 +388,67 @@ static void update_cpu_power(unsigned int cpu) struct cpu_topology cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); +/* sd energy functions */ +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1]; + + if (!sge) { + pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu); + return NULL; + } + + return sge; +} + +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0]; + + if (!sge) { + pr_warn("Invalid sched_group_energy for CPU%d\n", cpu); + return NULL; + } + + return sge; +} + const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_topology[cpu].core_sibling; } +static inline int cpu_corepower_flags(void) +{ + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES; +} + +static struct sched_domain_topology_level arm64_topology[] = { +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +static void update_cpu_capacity(unsigned int cpu) +{ + unsigned long capacity = SCHED_CAPACITY_SCALE; + + if (cpu_core_energy(cpu)) { + int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; + capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; + } + + set_capacity_scale(cpu, capacity); + + pr_info("CPU%d: update cpu_capacity %lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); +} + static void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; @@ -438,6 +511,7 @@ void store_cpu_topology(unsigned int cpuid) topology_populated: update_siblings_masks(cpuid); update_cpu_power(cpuid); + update_cpu_capacity(cpuid); } static void __init reset_cpu_topology(void) @@ -479,10 +553,12 @@ void __init init_cpu_topology(void) if (of_have_populated_dt() && parse_dt_topology()) { reset_cpu_topology(); } else { + set_sched_topology(arm64_topology); for_each_possible_cpu(cpu) update_siblings_masks(cpu); } reset_cpu_power(); parse_dt_cpu_power(); + init_sched_energy_costs(); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a2e211897426a8af6c62e001e39db3582d989511..d5c42426a316decbb6375e291598944031119815 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -631,17 +631,34 @@ const char *esr_get_class_string(u32 esr) } /* - * bad_mode handles the impossible case in the exception vector. + * bad_mode handles the impossible case in the exception vector. This is always + * fatal. */ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { - siginfo_t info; - void __user *pc = (void __user *)instruction_pointer(regs); console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + + die("Oops - bad mode", regs, 0); + local_irq_disable(); + panic("bad mode"); +} + +/* + * bad_el0_sync handles unexpected, but potentially recoverable synchronous + * exceptions taken from EL0. Unlike bad_mode, this returns. + */ +asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) +{ + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + console_verbose(); + + pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n", + smp_processor_id(), esr, esr_get_class_string(esr)); __show_regs(regs); info.si_signo = SIGILL; @@ -649,7 +666,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) info.si_code = ILL_ILLOPC; info.si_addr = pc; - arm64_notify_die("Oops - bad mode", regs, &info, 0); + current->thread.fault_address = 0; + current->thread.fault_code = 0; + + force_sig_info(info.si_signo, &info, current); } void __pte_error(const char *file, int line, unsigned long val) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 83037cd62d013afa36b85788bccb757508ffcb00..0c848c18ca44706aad457c7b3a4b77a8d379eb33 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -85,7 +85,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, hcr_el2); /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); - /* Make sure we trap PMU access from EL0 to EL2 */ + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); __activate_traps_arch()(); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e9c55cecfb490d1af028c160254b465c21a4dba4..a4312d0a12b3e08a62a6879908057fcd338590e6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -658,7 +658,8 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; return atomic_pool_init(); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2e49bd252fe7643adb66906e2381159b60949520..45bec627bae3ecb3fc533c64c971fa53554c2514 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,20 +51,8 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, *pgsize = PAGE_SIZE; if (!pte_cont(pte)) return 1; - if (!pgd_present(*pgd)) { - VM_BUG_ON(!pgd_present(*pgd)); - return 1; - } pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) { - VM_BUG_ON(!pud_present(*pud)); - return 1; - } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - VM_BUG_ON(!pmd_present(*pmd)); - return 1; - } if ((pte_t *)pmd == ptep) { *pgsize = PMD_SIZE; return CONT_PMDS; @@ -212,7 +200,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); /* save the 1st pte to return */ pte = ptep_get_and_clear(mm, addr, cpte); - for (i = 1; i < ncontig; ++i) { + for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) { /* * If HW_AFDBM is enabled, then the HW could * turn on the dirty bit for any of the page @@ -250,8 +238,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pfn = pte_pfn(*cpte); ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) { - changed = ptep_set_access_flags(vma, addr, cpte, + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) { + changed |= ptep_set_access_flags(vma, addr, cpte, pfn_pte(pfn, hugeprot), dirty); @@ -273,7 +261,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, cpte = huge_pte_offset(mm, addr); ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) ptep_set_wrprotect(mm, addr, cpte); } else { ptep_set_wrprotect(mm, addr, ptep); @@ -291,7 +279,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, cpte = huge_pte_offset(vma->vm_mm, addr); ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) ptep_clear_flush(vma, addr, cpte); } else { ptep_clear_flush(vma, addr, ptep); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index af38d027fe2eaf882c015c857dba79972dabcec4..0b9492e85ffce4add905f89957ceb09b53bcf832 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -403,8 +403,11 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); + else + swiotlb_force = SWIOTLB_NO_FORCE; set_max_mapnr(pfn_to_page(max_pfn) - mem_map); diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8d79286ee4e878044b02c963f5fb3c6deaaafb19..360d996451633c8281256048305f88c245ae3819 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &tmp, + copied = ptrace_access_vm(child, addr, &tmp, to_copy, FOLL_FORCE); if (copied) break; @@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &data, + copied = ptrace_access_vm(child, addr, &data, to_copy, FOLL_FORCE | FOLL_WRITE); break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f0df654ac6fc5ca53ffd6d6951817e92f492e0be..fe1f9cf7b391e54c9b973b07fe00d29ab6099ec5 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; diff --git a/arch/cris/boot/rescue/Makefile b/arch/cris/boot/rescue/Makefile index 52bd0bd1dd22f20e2d11e5d5e492b0e4ed2fb67a..d98edbb30a18e8925d77f10c75cfe64565595398 100644 --- a/arch/cris/boot/rescue/Makefile +++ b/arch/cris/boot/rescue/Makefile @@ -10,6 +10,9 @@ asflags-y += $(LINUXINCLUDE) ccflags-y += -O2 $(LINUXINCLUDE) + +ifdef CONFIG_ETRAX_AXISFLASHMAP + arch-$(CONFIG_ETRAX_ARCH_V10) = v10 arch-$(CONFIG_ETRAX_ARCH_V32) = v32 @@ -28,6 +31,11 @@ $(obj)/rescue.bin: $(obj)/rescue.o FORCE $(call if_changed,objcopy) cp -p $(obj)/rescue.bin $(objtree) +else +$(obj)/rescue.bin: + +endif + $(obj)/testrescue.bin: $(obj)/testrescue.o $(OBJCOPY) $(OBJCOPYFLAGS) $(obj)/testrescue.o tr.bin # Pad it to 784 bytes diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 31aa8c0f68e14a284e0f2b088afe4c29de7daee5..36f660da81242f6cb13be8ded36f4d80bad4759d 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request, case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - if (access_process_vm(child, addr, &data, sizeof(data), + if (ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE) != sizeof(data)) return -EIO; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 7e71a4e0281ba9cc3c2190dd9a06d958754155fb..5fcbdcd7abd0bcecff728726812aca54519608c7 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &data, + if (ptrace_access_vm(child, (u64)addrOthers, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE) == sizeof(data)) break; diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index 6a02b3a3fa6513a4f517c31c88aa435ddc766f3a..e92fb190e2d628e878b209a14e8c8be551510654 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c @@ -521,6 +521,9 @@ void *kvm_mips_build_exit(void *addr) uasm_i_and(&p, V0, V0, AT); uasm_i_lui(&p, AT, ST0_CU0 >> 16); uasm_i_or(&p, V0, V0, AT); +#ifdef CONFIG_64BIT + uasm_i_ori(&p, V0, V0, ST0_SX | ST0_UX); +#endif uasm_i_mtc0(&p, V0, C0_STATUS); uasm_i_ehb(&p); @@ -643,7 +646,7 @@ static void *kvm_mips_build_ret_to_guest(void *addr) /* Setup status register for running guest in UM */ uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE); - UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX)); + UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX)); uasm_i_and(&p, V1, V1, AT); uasm_i_mtc0(&p, V1, C0_STATUS); uasm_i_ehb(&p); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 06a60b19acfb53c2788ba6b9c79de983bb6fe43e..29ec9ab3fd5570737caa7f5c63768177a5caffa7 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -360,8 +360,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); /* Invalidate the icache for these ranges */ - local_flush_icache_range((unsigned long)gebase, - (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); + flush_icache_range((unsigned long)gebase, + (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); /* * Allocate comm page for guest kernel, a TLB will be reserved for diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 3f9406d9b9d675c833cdfd6f661b24e359bd3453..da87943328a510d468d089fdf648edfa5933445b 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -6,7 +6,7 @@ #endif #include -#include /* for BITS_PER_LONG/SHIFT_PER_LONG */ +#include #include #include #include @@ -17,6 +17,12 @@ * to include/asm-i386/bitops.h or kerneldoc */ +#if __BITS_PER_LONG == 64 +#define SHIFT_PER_LONG 6 +#else +#define SHIFT_PER_LONG 5 +#endif + #define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1)) diff --git a/arch/parisc/include/uapi/asm/bitsperlong.h b/arch/parisc/include/uapi/asm/bitsperlong.h index e0a23c7bdd432adc59f4f4ebefcc4d51f91b05f1..07fa7e50bdc069dd989cd52dd283517250a3b1e4 100644 --- a/arch/parisc/include/uapi/asm/bitsperlong.h +++ b/arch/parisc/include/uapi/asm/bitsperlong.h @@ -3,10 +3,8 @@ #if defined(__LP64__) #define __BITS_PER_LONG 64 -#define SHIFT_PER_LONG 6 #else #define __BITS_PER_LONG 32 -#define SHIFT_PER_LONG 5 #endif #include diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h index e78403b129ef927361032c8c92c5e3c82e830941..928e1bbac98fef4fa098163d12a98670000065d2 100644 --- a/arch/parisc/include/uapi/asm/swab.h +++ b/arch/parisc/include/uapi/asm/swab.h @@ -1,6 +1,7 @@ #ifndef _PARISC_SWAB_H #define _PARISC_SWAB_H +#include #include #include @@ -38,7 +39,7 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x) } #define __arch_swab32 __arch_swab32 -#if BITS_PER_LONG > 32 +#if __BITS_PER_LONG > 32 /* ** From "PA-RISC 2.0 Architecture", HP Professional Books. ** See Appendix I page 8 , "Endian Byte Swapping". @@ -61,6 +62,6 @@ static inline __attribute_const__ __u64 __arch_swab64(__u64 x) return x; } #define __arch_swab64 __arch_swab64 -#endif /* BITS_PER_LONG > 32 */ +#endif /* __BITS_PER_LONG > 32 */ #endif /* _PARISC_SWAB_H */ diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 325f30d82b6434368425d652402fabf66fd4f8ee..47ef8fdcd382cb5616f6375ecedf13a1fb4ea251 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -289,9 +289,26 @@ void __init time_init(void) cr16_hz = 100 * PAGE0->mem_10msec; /* Hz */ - /* register at clocksource framework */ - clocksource_register_hz(&clocksource_cr16, cr16_hz); - /* register as sched_clock source */ sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz); } + +static int __init init_cr16_clocksource(void) +{ + /* + * The cr16 interval timers are not syncronized across CPUs, so mark + * them unstable and lower rating on SMP systems. + */ + if (num_online_cpus() > 1) { + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + } + + /* register at clocksource framework */ + clocksource_register_hz(&clocksource_cr16, + 100 * PAGE0->mem_10msec); + + return 0; +} + +device_initcall(init_cr16_clocksource); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 8ff9253930af776b5ca7d408f4bd134cdb88c9b0..1a0b4f63f0e90fbb4e4cb58ea6da95d326bcba3c 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -234,7 +234,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long code, tsk->comm, code, address); print_vma_addr(KERN_CONT " in ", regs->iaoq[0]); - pr_cont(" trap #%lu: %s%c", code, trap_name(code), + pr_cont("\ntrap #%lu: %s%c", code, trap_name(code), vma ? ',':'\n'); if (vma) diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S index b6fcbaf5027bedc23c6da663b79453b08f5be4f2..3dc44b05fb9724a9d2761233c6d4bad54c6258cc 100644 --- a/arch/powerpc/boot/ps3-head.S +++ b/arch/powerpc/boot/ps3-head.S @@ -57,11 +57,6 @@ __system_reset_overlay: bctr 1: - /* Save the value at addr zero for a null pointer write check later. */ - - li r4, 0 - lwz r3, 0(r4) - /* Primary delays then goes to _zimage_start in wrapper. */ or 31, 31, 31 /* db16cyc */ diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 4ec2d86d3c50571a2a62f27c31f00739595ed219..a05558a7e51ad3b2698e8cf6f1acb8922ff5bfa0 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -119,13 +119,12 @@ void ps3_copy_vectors(void) flush_cache((void *)0x100, 512); } -void platform_init(unsigned long null_check) +void platform_init(void) { const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */ void *chosen; unsigned long ft_addr; u64 rm_size; - unsigned long val; console_ops.write = ps3_console_write; platform_ops.exit = ps3_exit; @@ -153,11 +152,6 @@ void platform_init(unsigned long null_check) printf(" flat tree at 0x%lx\n\r", ft_addr); - val = *(unsigned long *)0; - - if (val != null_check) - printf("null check failed: %lx != %lx\n\r", val, null_check); - ((kernel_entry_t)0)(ft_addr, 0, NULL); ps3_exit(); diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 404b3aabdb4dcda41e2e06f2b90d9c73b2b3fe39..76fe3ccfd381d6e9099f5490c36145d36abaa296 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -181,6 +181,28 @@ case "$elfformat" in elf32-powerpc) format=elf32ppc ;; esac +ld_version() +{ + # Poached from scripts/ld-version.sh, but we don't want to call that because + # this script (wrapper) is distributed separately from the kernel source. + # Extract linker version number from stdin and turn into single number. + awk '{ + gsub(".*\\)", ""); + gsub(".*version ", ""); + gsub("-.*", ""); + split($1,a, "."); + print a[1]*100000000 + a[2]*1000000 + a[3]*10000; + exit + }' +} + +# Do not include PT_INTERP segment when linking pie. Non-pie linking +# just ignores this option. +LD_VERSION=$(${CROSS}ld --version | ld_version) +LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version) +if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then + nodl="--no-dynamic-linker" +fi platformo=$object/"$platform".o lds=$object/zImage.lds @@ -446,7 +468,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e407af2b7333500a4ebcc319c5337827abe34509..2e6a823fa502724f6c3a7fb17312b21514ef2868 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -70,7 +70,9 @@ #define HPTE_V_SSIZE_SHIFT 62 #define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff) #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) +#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) @@ -80,14 +82,16 @@ #define HPTE_V_VALID ASM_CONST(0x0000000000000001) /* - * ISA 3.0 have a different HPTE format. + * ISA 3.0 has a different HPTE format. */ #define HPTE_R_3_0_SSIZE_SHIFT 58 +#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT) #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) +#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) @@ -316,11 +320,42 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } +/* + * ISA v3.0 defines a new HPTE format, which differs from the old + * format in having smaller AVPN and ARPN fields, and the B field + * in the second dword instead of the first. + */ +static inline unsigned long hpte_old_to_new_v(unsigned long v) +{ + /* trim AVPN, drop B */ + return v & HPTE_V_COMMON_BITS; +} + +static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r) +{ + /* move B field from 1st to 2nd dword, trim ARPN */ + return (r & ~HPTE_R_3_0_SSIZE_MASK) | + (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT); +} + +static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r) +{ + /* insert B field */ + return (v & HPTE_V_COMMON_BITS) | + ((r & HPTE_R_3_0_SSIZE_MASK) << + (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT)); +} + +static inline unsigned long hpte_new_to_old_r(unsigned long r) +{ + /* clear out B field */ + return r & ~HPTE_R_3_0_SSIZE_MASK; +} + /* * This function sets the AVPN and L fields of the HPTE appropriately * using the base page size and actual page size. @@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize, int ssize) + int actual_psize) { - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; - /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index b312b152461b0539a22c5939ba7728a38f5d8d32..6e834caa37206a476792823463e81ac4c9617f9c 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -23,7 +23,9 @@ static __always_inline bool cpu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28350a294b1e04decfb4fed94a5f89dc2e87d7c9..5e12e19940e27d5b373d03cfa0175dc163bb90e5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -546,6 +546,7 @@ struct kvm_vcpu_arch { u64 tfiar; u32 cr_tm; + u64 xer_tm; u64 lr_tm; u64 ctr_tm; u64 amr_tm; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e311c25751a4111d20f8bef1165ff52934a18576..a244e09d2d882843e489ecc9cb04616725a59a9c 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -160,7 +160,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c56ea8c84abb1771ff65f66ba91ffff02bff5fae..c4ced1d01d579fe0b7aeb13a71b21eb2ffb7072c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -157,7 +157,7 @@ #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 -#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff +#define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c #define PPC_INST_MSGCLR 0x7c0001dc @@ -174,13 +174,13 @@ #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR_DSCR 0x7c1102a6 -#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 -#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MFSPR_DSCR_USER 0x7c0302a6 -#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 -#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MFVSRD 0x7c000066 #define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SLBFEE 0x7c0007a7 diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c93cf35ce379550e37bcfe1d127def78af7de11c..0fb1326c3ea24bd8fa0833e0e39a4c1ff89f8aa5 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -596,6 +596,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index caec7bf3b99aebc22f5ed14b468e376b571cd9b6..c833d88c423d8be96859a5bb5f2cd37b87ece0a0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -569,6 +569,7 @@ int main(void) DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 5c31369435f24200ae8c06bc69a8795b12ffd8e3..a5dd493670a063c8bce09a4cadf618e1228a3115 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - bool *clear_sw_state = flag; + bool clear_sw_state = *(bool *)flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 04c546e20cc05e2db9c3f3022efcc4d2b9a91ee7..1f7f908f186e2a6f1111f50af91c6377a6cdb20d 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -214,9 +214,9 @@ booting_thread_hwid: */ _GLOBAL(book3e_start_thread) LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 11f /* If the thread id is invalid, just exit. */ b 13f @@ -241,9 +241,9 @@ _GLOBAL(book3e_start_thread) * r3 = the thread physical id */ _GLOBAL(book3e_stop_thread) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 10f /* If the thread id is invalid, just exit. */ b 13f diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 6ca9a2ffaac72d38b20202b6c6e845c39541608f..35f5244782d9f227ed079e6ddbea37c844e9f8b5 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -180,6 +180,7 @@ static int ibmebus_create_device(struct device_node *dn) static int ibmebus_create_devices(const struct of_device_id *matches) { struct device_node *root, *child; + struct device *dev; int ret = 0; root = of_find_node_by_path("/"); @@ -188,9 +189,12 @@ static int ibmebus_create_devices(const struct of_device_id *matches) if (!of_match_node(matches, child)) continue; - if (bus_find_device(&ibmebus_bus_type, NULL, child, - ibmebus_match_node)) + dev = bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node); + if (dev) { + put_device(dev); continue; + } ret = ibmebus_create_device(child); if (ret) { @@ -262,6 +266,7 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, const char *buf, size_t count) { struct device_node *dn = NULL; + struct device *dev; char *path; ssize_t rc = 0; @@ -269,8 +274,10 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, if (!path) return -ENOMEM; - if (bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path)) { + dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path); + if (dev) { + put_device(dev); printk(KERN_WARNING "%s: %s has already been probed\n", __func__, path); rc = -EEXIST; @@ -307,6 +314,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, ibmebus_match_path))) { of_device_unregister(to_platform_device(dev)); + put_device(dev); kfree(path); return count; diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 93cf7a5846a6f5875534cc43a2a3409357dc482a..030d72df5dd5357f54f36d7ad11725204336cef5 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -296,7 +296,7 @@ _GLOBAL(flush_instruction_cache) lis r3, KERNELBASE@h iccci 0,r3 #endif -#elif CONFIG_FSL_BOOKE +#elif defined(CONFIG_FSL_BOOKE) BEGIN_FTR_SECTION mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 88ac964f48586bb90f5b1daf19e9e192237dec6a..1e8c57207346e6f9447d8bdc63714282abbbbe62 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2747,6 +2747,9 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + if (!PHANDLE_VALID(cpu_pkg)) + return; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index b1ec62f2cc316a337c77fc3fe5a3bd6a23680d40..5c8f12fe9721d0945af35666348a26891c023a04 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -463,6 +463,10 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -672,6 +676,9 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) @@ -1019,6 +1026,10 @@ static int tm_cfpr_set(struct task_struct *target, flush_fp_to_thread(target); flush_altivec_to_thread(target); + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -1283,6 +1294,9 @@ static int tm_cvsx_set(struct task_struct *target, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237e4be38ef1d7bcc15a76050fc4469..1e887f3a61a675a0dd9a7ff5aa29c2b205c809ec 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3686471be32bd1a8e987c85dea958e553625fec2..094deb60c6fe0ab0ce035ed5ceb8799846b26d5d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1288,6 +1288,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: *val = get_reg_val(id, vcpu->arch.cr_tm); break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; case KVM_REG_PPC_TM_LR: *val = get_reg_val(id, vcpu->arch.lr_tm); break; @@ -1498,6 +1501,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: vcpu->arch.cr_tm = set_reg_val(id, *val); break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; case KVM_REG_PPC_TM_LR: vcpu->arch.lr_tm = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 99b4e9d5dd238c70b795db5e9fedbaa7741fc753..5420d060c6f61af588d392737a48941dce0979b8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -653,6 +653,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + /* Don't lose R/C bit updates done by hardware */ + r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); } } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c3c1d1bcfc67dad17b522b029af9f4ed07dcdab0..6f81adb112f1bf52543cd936b8992b9a98cfde00 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2600,11 +2600,13 @@ kvmppc_save_tm: mfctr r7 mfspr r8, SPRN_AMR mfspr r10, SPRN_TAR + mfxer r11 std r5, VCPU_LR_TM(r9) stw r6, VCPU_CR_TM(r9) std r7, VCPU_CTR_TM(r9) std r8, VCPU_AMR_TM(r9) std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) /* Restore r12 as trap number. */ lwz r12, VCPU_TRAP(r9) @@ -2697,11 +2699,13 @@ kvmppc_restore_tm: ld r7, VCPU_CTR_TM(r4) ld r8, VCPU_AMR_TM(r4) ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) mtlr r5 mtcr r6 mtctr r7 mtspr SPRN_AMR, r8 mtspr SPRN_TAR, r9 + mtxer r10 /* * Load up PPR and DSCR values but don't put them in the actual SPRs diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 83ddc0e171b0d64e069e288e15f08074fce8e3a6..ad9fd5245be280571bec40ebb7ed31e615f16f25 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, return -1; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", i, hpte_v, hpte_r); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); + hpte_v = hpte_old_to_new_v(hpte_v); + } + hptep->r = cpu_to_be64(hpte_r); /* Guarantee the second dword is visible before the valid bit */ eieio(); @@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, vpn, want_v & HPTE_V_AVPN, slot, newpp); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); /* recheck with locks held */ hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ @@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid, want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, /* Look at the 8 bit LP value */ unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); + hpte_r = hpte_new_to_old_r(hpte_r); + } if (!(hpte_v & HPTE_V_LARGE)) { size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; @@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; - else - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; - + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); @@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local) want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, + be64_to_cpu(hptep->r)); if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 688b54517655f1ef787023f18f3cacdcbd62ba3b..9a25dce878757517a5a79fcd9b04d280d02f72d6 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -65,7 +65,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, if (!pmdp) return -ENOMEM; if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } ptep = pte_alloc_kernel(pmdp, ea); @@ -90,7 +90,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { @@ -159,7 +159,7 @@ static void __init radix_init_pgtable(void) * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large."); + BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -181,7 +181,7 @@ static void __init radix_init_partition_table(void) rts_field = radix__get_tree_size(); - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); + BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR); diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index 6447dc1c3d896cea18615d3b5bacc4bb6285fbb1..929b56d47ad9bf6bc79f7ae9dd0ab27b2142427d 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -16,7 +16,7 @@ EVENT(PM_CYC, 0x0001e) EVENT(PM_ICT_NOSLOT_CYC, 0x100f8) EVENT(PM_CMPLU_STALL, 0x1e054) EVENT(PM_INST_CMPL, 0x00002) -EVENT(PM_BRU_CMPL, 0x40060) +EVENT(PM_BRU_CMPL, 0x10012) EVENT(PM_BR_MPRED_CMPL, 0x400f6) /* All L1 D cache load references counted at finish, gated by reject */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d4b33dd2d9e740d789da5768957beefcafd7f997..dcdfee0cd4f2f134f6fddeabe8130232bed4d9c6 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -145,7 +145,7 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) */ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc != OPAL_SUCCESS) + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n", __func__, rc, phb->hose->global_number, pe_no); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index cb3c50328de8758eea51258b556160b0be1b248a..cc2b281a3766c697e204479b3d9575e3d0dfb14b 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index aa35245d8d6d337204806bbb5888e9df4d5455ef..f2c98f6c1c9c93afec8a674f0e27c6a5e499767e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index d38e86fd5720f181ac10cc869b35e8b093ca5803..60c57657c772fef576e5c4703dfb2a17203978a7 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -20,6 +20,7 @@ #include #include #include +#include static void icp_opal_teardown_cpu(void) { @@ -39,7 +40,26 @@ static void icp_opal_flush_ipi(void) * Should we be flagging idle loop instead? * Or creating some task to be scheduled? */ - opal_int_eoi((0x00 << 24) | XICS_IPI); + if (opal_int_eoi((0x00 << 24) | XICS_IPI) > 0) + force_external_irq_replay(); +} + +static unsigned int icp_opal_get_xirr(void) +{ + unsigned int kvm_xirr; + __be32 hw_xirr; + int64_t rc; + + /* Handle an interrupt latched by KVM first */ + kvm_xirr = kvmppc_get_xics_latch(); + if (kvm_xirr) + return kvm_xirr; + + /* Then ask OPAL */ + rc = opal_int_get_xirr(&hw_xirr, false); + if (rc < 0) + return 0; + return be32_to_cpu(hw_xirr); } static unsigned int icp_opal_get_irq(void) @@ -47,12 +67,8 @@ static unsigned int icp_opal_get_irq(void) unsigned int xirr; unsigned int vec; unsigned int irq; - int64_t rc; - rc = opal_int_get_xirr(&xirr, false); - if (rc < 0) - return 0; - xirr = be32_to_cpu(xirr); + xirr = icp_opal_get_xirr(); vec = xirr & 0x00ffffff; if (vec == XICS_IRQ_SPURIOUS) return 0; @@ -67,7 +83,8 @@ static unsigned int icp_opal_get_irq(void) xics_mask_unknown_vec(vec); /* We might learn about it later, so EOI it */ - opal_int_eoi(xirr); + if (opal_int_eoi(xirr) > 0) + force_external_irq_replay(); return 0; } diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 9cc050f9536c10f1658472c132e5ac41c13db4fe..1113389d0a39fe661229b68d28cf99e7a2e18eab 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -507,8 +507,10 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, prng_data->prngws.byte_counter += n; prng_data->prngws.reseed_counter += n; - if (copy_to_user(ubuf, prng_data->buf, chunk)) - return -EFAULT; + if (copy_to_user(ubuf, prng_data->buf, chunk)) { + ret = -EFAULT; + break; + } nbytes -= chunk; ret += chunk; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9336e824e2db5e119810b438c8c0e272487937b4..fc2974b929c37691c59f8839dcb7b447f5678c09 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -963,6 +963,11 @@ static int s390_fpregs_set(struct task_struct *target, if (target == current) save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp(fprs, target->thread.fpu.vxrs); + else + memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs)); + /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; @@ -1067,6 +1072,9 @@ static int s390_vxrs_low_set(struct task_struct *target, if (target == current) save_fpu_regs(); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); if (rc == 0) for (i = 0; i < __NUM_VXRS_LOW; i++) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7f7ba5f23f130fae0639b44af7e93c84ed85a654..d027f2eb3559f6a095b709dc10fbfc94235752b1 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -445,7 +445,7 @@ static void __init setup_resources(void) * part of the System RAM resource. */ if (crashk_res.end) { - memblock_add(crashk_res.start, resource_size(&crashk_res)); + memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); memblock_reserve(crashk_res.start, resource_size(&crashk_res)); insert_resource(&iomem_resource, &crashk_res); } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index e959c02e0cac1fede8ed48466783df45efafa594..8705ee66c0874fb73fe6cdf3c751a2d717fbca90 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -448,6 +448,7 @@ static int __init s390_topology_init(void) struct sysinfo_15_1_x *info; int i; + set_sched_topology(s390_topology); if (!MACHINE_HAS_TOPOLOGY) return 0; tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); @@ -460,7 +461,6 @@ static int __init s390_topology_init(void) alloc_masks(info, &socket_info, 1); alloc_masks(info, &book_info, 2); alloc_masks(info, &drawer_info, 3); - set_sched_topology(s390_topology); return 0; } early_initcall(s390_topology_init); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9c7a1ecfe6bd1f41b2c98061b2cb5471ed57b5e1..47a1de77b18dc0a5a7ec0a38dcf32db60bb4e171 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -916,7 +916,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_BYTE); + sizeof(S390_lowcore.stfle_fac_list)); if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) ret = -EFAULT; kfree(mach); @@ -1437,7 +1437,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Populate the facility mask initially. */ memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_BYTE); + sizeof(S390_lowcore.stfle_fac_list)); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7a1897c51c5495f3f2b13d86ba8f6344e2234788..d56ef26d46816b834068609ceb940ce01901d731 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -202,7 +202,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm, return pgste; } -static inline void ptep_xchg_commit(struct mm_struct *mm, +static inline pte_t ptep_xchg_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pgste_t pgste, pte_t old, pte_t new) { @@ -220,6 +220,7 @@ static inline void ptep_xchg_commit(struct mm_struct *mm, } else { *ptep = new; } + return old; } pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, @@ -231,7 +232,7 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_direct(mm, addr, ptep); - ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; } @@ -246,7 +247,7 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); - ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 6b2f72f523b91bb6c68347602b33ca2446807229..049e3860ac5483712bc51789f42a3e8552af71da 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -419,6 +419,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, enum dma_data_direction dir) { + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); dma_addr_t dma_addr_base, dma_addr; int flags = ZPCI_PTE_VALID; @@ -426,8 +427,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, unsigned long pa = 0; int ret; - size = PAGE_ALIGN(size); - dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT); + dma_addr_base = dma_alloc_address(dev, nr_pages); if (dma_addr_base == DMA_ERROR_CODE) return -ENOMEM; @@ -436,26 +436,27 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, flags |= ZPCI_TABLE_PROTECTED; for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { - pa = page_to_phys(sg_page(s)) + s->offset; - ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags); + pa = page_to_phys(sg_page(s)); + ret = __dma_update_trans(zdev, pa, dma_addr, + s->offset + s->length, flags); if (ret) goto unmap; - dma_addr += s->length; + dma_addr += s->offset + s->length; } ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); if (ret) goto unmap; *handle = dma_addr_base; - atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages); + atomic64_add(nr_pages, &zdev->mapped_pages); return ret; unmap: dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, ZPCI_PTE_INVALID); - dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT); + dma_free_address(dev, dma_addr_base, nr_pages); zpci_err("map error:\n"); zpci_err_dma(ret, pa); return ret; diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index d89b7011667cb4f1a6f3ad55238d2c815e229c41..e279572824b15e07616b98215fb51c1fa65f4c9f 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -111,7 +111,7 @@ static int tile_gpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - struct pt_regs regs; + struct pt_regs regs = *task_pt_regs(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®s, 0, sizeof(regs)); diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a36051183c8468f469a8816e6c1e9e7c..75725dc4df7a7a31c8f88c7b59c88d70529ed3de 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -97,6 +97,8 @@ else KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) + KBUILD_CFLAGS += -fno-pic + # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig new file mode 100644 index 0000000000000000000000000000000000000000..a1c83c4e78ae847023aa6ca6bc011102d46a0414 --- /dev/null +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -0,0 +1,424 @@ +# CONFIG_64BIT is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_X86_BIGSMP=y +CONFIG_MCORE2=y +CONFIG_X86_GENERIC=y +CONFIG_HPET_TIMER=y +CONFIG_NR_CPUS=512 +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_REBOOTFIXUPS=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=16 +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SYNC_FILE=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_SYNC=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_AES_586=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig new file mode 100644 index 0000000000000000000000000000000000000000..d50434f501fbbc89a48869f3d73e26fc3fcfdf2c --- /dev/null +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -0,0 +1,419 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=32 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_MCORE2=y +CONFIG_MAXSMP=y +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SYNC_FILE=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_SYNC=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21b352a11b493f4868b1a091dc416c7e2279ad34..edba8606b99a0daa86853e6869553c6854b0efe3 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -889,8 +889,8 @@ ftrace_graph_call: jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +/* This is weak to keep gas from relaxing the jumps */ +WEAK(ftrace_stub) ret END(ftrace_caller) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6e395c9969002839fc0fd1e94f95fe0ba77abe0b..7fe88bb57e366cbae38a68307e094b3c48ed2e65 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -365,7 +365,11 @@ int x86_add_exclusive(unsigned int what) { int i; - if (x86_pmu.lbr_pt_coexist) + /* + * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS. + * LBR and BTS are still mutually exclusive. + */ + if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return 0; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { @@ -388,7 +392,7 @@ int x86_add_exclusive(unsigned int what) void x86_del_exclusive(unsigned int what) { - if (x86_pmu.lbr_pt_coexist) + if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index da51e5a3e2ff799d54257aa3db5151b4723c8292..fec8a461bdef6da49c0e7f46c655f235e7fc1275 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -594,6 +594,9 @@ static int __init cstate_probe(const struct cstate_model *cm) static inline void cstate_cleanup(void) { + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); + if (has_cstate_core) perf_pmu_unregister(&cstate_core_pmu); @@ -606,16 +609,16 @@ static int __init cstate_init(void) int err; cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, - "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init, - NULL); + "perf/x86/cstate:starting", cstate_cpu_init, NULL); cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, - "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit); + "perf/x86/cstate:online", NULL, cstate_cpu_exit); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { has_cstate_core = false; pr_info("Failed to register cstate core pmu\n"); + cstate_cleanup(); return err; } } @@ -629,8 +632,7 @@ static int __init cstate_init(void) return err; } } - - return err; + return 0; } static int __init cstate_pmu_init(void) @@ -655,8 +657,6 @@ module_init(cstate_pmu_init); static void __exit cstate_pmu_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); cstate_cleanup(); } module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index dbaaf7dc8373cb0248a4637abe4a211f266fb2db..19d646a783fd6c71373294fb8e16d626c2afe5fc 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -763,30 +763,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) -{ - struct intel_uncore_pmu *pmu = type->pmus; - struct intel_uncore_box *box; - int i, pkg; - - if (pmu) { - pkg = topology_physical_package_id(cpu); - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box) - uncore_box_exit(box); - } - } -} - -static void uncore_exit_boxes(void *dummy) -{ - struct intel_uncore_type **types; - - for (types = uncore_msr_uncores; *types; types++) - __uncore_exit_boxes(*types++, smp_processor_id()); -} - static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { int pkg; @@ -1077,22 +1053,12 @@ static int uncore_cpu_dying(unsigned int cpu) return 0; } -static int first_init; - static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, pkg, ncpus = 1; - - if (first_init) { - /* - * On init we get the number of online cpus in the package - * and set refcount for all of them. - */ - ncpus = cpumask_weight(topology_core_cpumask(cpu)); - } + int i, pkg; pkg = topology_logical_package_id(cpu); for (; *types; types++) { @@ -1103,7 +1069,7 @@ static int uncore_cpu_starting(unsigned int cpu) if (!box) continue; /* The first cpu on a package activates the box */ - if (atomic_add_return(ncpus, &box->refcnt) == ncpus) + if (atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } @@ -1407,19 +1373,17 @@ static int __init intel_uncore_init(void) "PERF_X86_UNCORE_PREP", uncore_cpu_prepare, NULL); } - first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, "AP_PERF_X86_UNCORE_STARTING", uncore_cpu_starting, uncore_cpu_dying); - first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, "AP_PERF_X86_UNCORE_ONLINE", uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: - /* Undo box->init_box() */ - on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); return ret; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a77ee026643d23fac4808a1f1c32ca42e4999769..bcbb1d2ae10b21c2346c725d0f2ea74b0b641326 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -604,7 +604,7 @@ struct x86_pmu { u64 lbr_sel_mask; /* LBR_SELECT valid bits */ const int *lbr_sel_map; /* lbr_select mappings */ bool lbr_double_abort; /* duplicated lbr aborts */ - bool lbr_pt_coexist; /* LBR may coexist with PT */ + bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ /* * Intel PT/LBR/BTS are exclusive diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h new file mode 100644 index 0000000000000000000000000000000000000000..44b8762fa0c784b44a44d22e390af48ce2dcb5d0 --- /dev/null +++ b/arch/x86/include/asm/asm-prototypes.h @@ -0,0 +1,16 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG64 +extern void cmpxchg8b_emu(void); +#endif diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a39629206864e5bb74aaddea15ca1ab762877042..ed10b5bf9b93d73b3102263c7b025af6fa9b47e9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -311,4 +311,6 @@ #define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index ae135de547f561d5888afcf3da408b128904d64e..835aa51c7f6ebb914592752373f55287ca8cc235 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,10 +6,8 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_CHECKPOINT_RESTORE -# define ARCH_MAP_VDSO_X32 0x2001 -# define ARCH_MAP_VDSO_32 0x2002 -# define ARCH_MAP_VDSO_64 0x2003 -#endif +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 88c657b057e223eb0eaccdac699aa91505638129..f2234918e4944e559d082761985860107880c71d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2158,21 +2158,6 @@ int __generic_processor_info(int apicid, int version, bool enabled) } } - /* - * This can happen on physical hotplug. The sanity check at boot time - * is done from native_smp_prepare_cpus() after num_possible_cpus() is - * established. - */ - if (topology_update_package_map(apicid, cpu) < 0) { - int thiscpu = max + disabled_cpus; - - pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", - thiscpu, apicid); - - disabled_cpus++; - return -ENOSPC; - } - /* * Validate version */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 48e6d84f173e2325a40a50e5de29a386a2c9277b..7249f1500bcbe840eefc0f4f9bcdee1cb435cc7f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1876,6 +1876,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1887,6 +1888,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2116,6 +2118,7 @@ static inline void __init check_timer(void) if (idx != -1 && irq_trigger(idx)) unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) @@ -2137,6 +2140,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1e81a37c034e7821580f5165eb4359e209ef7823..1d3167269a6717902149171fe755123a5c654eb6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,6 +20,10 @@ #include "cpu.h" +static const int amd_erratum_383[]; +static const int amd_erratum_400[]; +static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* * nodes_per_socket: Stores the number of nodes per socket. * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX @@ -305,20 +309,32 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 eax, ebx, ecx, edx; - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 7; + node_id = cpuid_ecx(0x8000001e) & 7; - /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->x86_max_cores /= smp_num_siblings; - c->cpu_core_id = ebx & 0xff; + /* + * We may have multiple LLCs if L3 caches exist, so check if we + * have an L3 cache by looking at the L3 cache CPUID leaf. + */ + if (cpuid_edx(0x80000006)) { + if (c->x86 == 0x17) { + /* + * LLC is at the core complex level. + * Core complex id is ApicId[3]. + */ + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; + } else { + /* LLC is at the node level. */ + per_cpu(cpu_llc_id, cpu) = node_id; + } + } } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); node_id = value & 7; + + per_cpu(cpu_llc_id, cpu) = node_id; } else return; @@ -329,9 +345,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_AMD_DCM); cus_per_node = c->x86_max_cores / nodes_per_socket; - /* store NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = node_id; - /* core id has to be in the [0 .. cores_per_node - 1] range */ c->cpu_core_id %= cus_per_node; } @@ -356,15 +369,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; amd_get_topology(c); - - /* - * Fix percpu cpu_llc_id here as LLC topology is different - * for Fam17h systems. - */ - if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) - return; - - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } @@ -585,11 +589,16 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); -} -static const int amd_erratum_383[]; -static const int amd_erratum_400[]; -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* + * Check whether the machine is affected by erratum 400. This is + * used to select the proper idle routine and to enable the check + * whether the machine is affected in arch_post_acpi_init(), which + * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. + */ + if (cpu_has_amd_erratum(c, amd_erratum_400)) + set_cpu_bug(c, X86_BUG_AMD_E400); +} static void init_amd_k8(struct cpuinfo_x86 *c) { @@ -770,9 +779,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (cpu_has_amd_erratum(c, amd_erratum_400)) - set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); - rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); /* 3DNow or LM implies PREFETCHW */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc9e980c68ec47b39a8c4ae7ad3497d3d94bd53d..023c7bfa24dfeda6a61f5c6ca879fe4d6b9d25c7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -667,13 +667,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_1_EDX] = edx; } + /* Thermal and Power Management Leaf: level 0x00000006 (eax) */ + if (c->cpuid_level >= 0x00000006) + c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); + /* Additional Intel-defined flags: level 0x00000007 */ if (c->cpuid_level >= 0x00000007) { cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_7_0_EBX] = ebx; - - c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); c->x86_capability[CPUID_7_ECX] = ecx; } @@ -979,29 +980,21 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } /* - * The physical to logical package id mapping is initialized from the - * acpi/mptables information. Make sure that CPUID actually agrees with - * that. + * Validate that ACPI/mptables have the same information about the + * effective APIC id and update the package map. */ -static void sanitize_package_id(struct cpuinfo_x86 *c) +static void validate_apic_and_package_id(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - unsigned int pkg, apicid, cpu = smp_processor_id(); + unsigned int apicid, cpu = smp_processor_id(); apicid = apic->cpu_present_to_apicid(cpu); - pkg = apicid >> boot_cpu_data.x86_coreid_bits; - if (apicid != c->initial_apicid) { - pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + if (apicid != c->apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", cpu, apicid, c->initial_apicid); - c->initial_apicid = apicid; - } - if (pkg != c->phys_proc_id) { - pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", - cpu, pkg, c->phys_proc_id); - c->phys_proc_id = pkg; } - c->logical_proc_id = topology_phys_to_logical_pkg(pkg); + BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); #else c->logical_proc_id = 0; #endif @@ -1132,7 +1125,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - sanitize_package_id(c); } /* @@ -1188,6 +1180,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) enable_sep_cpu(); #endif mtrr_ap_init(); + validate_apic_and_package_id(c); } struct msr_range { @@ -1282,7 +1275,7 @@ static __init int setup_disablecpuid(char *arg) { int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) + if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) setup_clear_cpu_cap(bit); else return 0; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 274fab99169d8235628b13fa955fc98b520ee27f..932348fbb6ea05aff88f004a0feeaa5092b0d1f6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index b47edb8f52566e223f89800b5b0465349331985a..8da13d4e77cc1ee2a0f3715a9138d02f45a2c98d 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -70,7 +70,7 @@ int __init pci_swiotlb_detect_override(void) { int use_swiotlb = swiotlb | swiotlb_force; - if (swiotlb_force) + if (swiotlb_force == SWIOTLB_FORCE) swiotlb = 1; return use_swiotlb; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 76629f48fdb0d380b55c6043ecf476de1cc4cb88..fc7cf64587f2c8a85c744b22f7b320f2777798b2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -435,8 +435,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c) if (x86_idle || boot_option_idle_override == IDLE_POLL) return; - if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { - /* E400: APIC timer interrupt does not wake up CPU from C1e */ + if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { pr_info("using AMD E400 aware idle routine\n"); x86_idle = amd_e400_idle; } else if (prefer_mwait_c1_over_halt(c)) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 42f5eb7b4f6c85251f4ab65d6de44268b6de06ea..e9bbe02950adb779d24b0382bcde04a1f93fe6c8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -104,7 +104,6 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; -static bool logical_packages_frozen __read_mostly; /* Maximum number of SMT threads on any online core */ int __max_smt_threads __read_mostly; @@ -263,9 +262,14 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -int topology_update_package_map(unsigned int apicid, unsigned int cpu) +/** + * topology_update_package_map - Update the physical to logical package map + * @pkg: The physical package id as retrieved via CPUID + * @cpu: The cpu for which this is updated + */ +int topology_update_package_map(unsigned int pkg, unsigned int cpu) { - unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits; + unsigned int new; /* Called from early boot ? */ if (!physical_package_map) @@ -278,16 +282,17 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) if (test_and_set_bit(pkg, physical_package_map)) goto found; - if (logical_packages_frozen) { - physical_to_logical_pkg[pkg] = -1; - pr_warn("APIC(%x) Package %u exceeds logical package max\n", - apicid, pkg); + if (logical_packages >= __max_logical_packages) { + pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n", + logical_packages, cpu, __max_logical_packages); return -ENOSPC; } new = logical_packages++; - pr_info("APIC(%x) Converting physical %u to logical package %u\n", - apicid, pkg, new); + if (new != pkg) { + pr_info("CPU %u Converting physical %u to logical package %u\n", + cpu, pkg, new); + } physical_to_logical_pkg[pkg] = new; found: @@ -308,9 +313,9 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) } EXPORT_SYMBOL(topology_phys_to_logical_pkg); -static void __init smp_init_package_map(void) +static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) { - unsigned int ncpus, cpu; + unsigned int ncpus; size_t size; /* @@ -355,27 +360,9 @@ static void __init smp_init_package_map(void) size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); physical_package_map = kzalloc(size, GFP_KERNEL); - for_each_present_cpu(cpu) { - unsigned int apicid = apic->cpu_present_to_apicid(cpu); - - if (apicid == BAD_APICID || !apic->apic_id_valid(apicid)) - continue; - if (!topology_update_package_map(apicid, cpu)) - continue; - pr_warn("CPU %u APICId %x disabled\n", cpu, apicid); - per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID; - set_cpu_possible(cpu, false); - set_cpu_present(cpu, false); - } - - if (logical_packages > __max_logical_packages) { - pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", - logical_packages, __max_logical_packages); - logical_packages_frozen = true; - __max_logical_packages = logical_packages; - } - pr_info("Max logical packages: %u\n", __max_logical_packages); + + topology_update_package_map(c->phys_proc_id, cpu); } void __init smp_store_boot_cpu_info(void) @@ -385,7 +372,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; - smp_init_package_map(); + smp_init_package_map(c, id); } /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3ce9d260d68756fa675ce40994b79aedd206413..9f676adcdfc20bc9e7af0c421e31f7978cdbd263 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -171,6 +171,7 @@ #define NearBranch ((u64)1 << 52) /* Near branches */ #define No16 ((u64)1 << 53) /* No 16 bit operand */ #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ +#define Aligned16 ((u64)1 << 55) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -446,6 +447,26 @@ FOP_END; FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET FOP_END; +/* + * XXX: inoutclob user must know where the argument is being expanded. + * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault. + */ +#define asm_safe(insn, inoutclob...) \ +({ \ + int _fault = 0; \ + \ + asm volatile("1:" insn "\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: movl $1, %[_fault]\n" \ + " jmp 2b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [_fault] "+qm"(_fault) inoutclob ); \ + \ + _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \ +}) + static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, enum x86_intercept intercept, enum x86_intercept_stage stage) @@ -632,21 +653,24 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, * depending on whether they're AVX encoded or not. * * Also included is CMPXCHG16B which is not a vector instruction, yet it is - * subject to the same check. + * subject to the same check. FXSAVE and FXRSTOR are checked here too as their + * 512 bytes of data must be aligned to a 16 byte boundary. */ -static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) +static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) { if (likely(size < 16)) - return false; + return 1; if (ctxt->d & Aligned) - return true; + return size; else if (ctxt->d & Unaligned) - return false; + return 1; else if (ctxt->d & Avx) - return false; + return 1; + else if (ctxt->d & Aligned16) + return 16; else - return true; + return size; } static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, @@ -704,7 +728,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, } break; } - if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) + if (la & (insn_alignment(ctxt, size) - 1)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; bad: @@ -791,6 +815,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } +static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned int size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); +} + /* * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. @@ -1544,7 +1582,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, &ctxt->exception); } -/* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1581,20 +1618,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; - /* NULL selector is not valid for TR, CS and SS (except for long mode) */ - if ((seg == VCPU_SREG_CS - || (seg == VCPU_SREG_SS - && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) - || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - /* TR should be in GDT only */ if (seg == VCPU_SREG_TR && (selector & (1 << 2))) goto exception; - if (null_selector) /* for NULL selector skip all following checks */ + /* NULL selector is not valid for TR, CS and (except for long mode) SS */ + if (null_selector) { + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) + goto exception; + + if (seg == VCPU_SREG_SS) { + if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) + goto exception; + + /* + * ctxt->ops->set_segment expects the CPL to be in + * SS.DPL, so fake an expand-up 32-bit data segment. + */ + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = cpl; + seg_desc.d = 1; + seg_desc.g = 1; + } + + /* Skip all following checks */ goto load; + } ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); if (ret != X86EMUL_CONTINUE) @@ -1710,6 +1761,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); + + /* + * None of MOV, POP and LSS can load a NULL selector in CPL=3, but + * they can load it at CPL<3 (Intel's manual says only LSS can, + * but it's wrong). + * + * However, the Intel manual says that putting IST=1/DPL=3 in + * an interrupt gate will result in SS=3 (the AMD manual instead + * says it doesn't), so allow SS=3 in __load_segment_descriptor + * and only forbid it here. + */ + if (seg == VCPU_SREG_SS && selector == 3 && + ctxt->mode == X86EMUL_MODE_PROT64) + return emulate_exception(ctxt, GP_VECTOR, 0, true); + return __load_segment_descriptor(ctxt, selector, seg, cpl, X86_TRANSFER_NONE, NULL); } @@ -3658,8 +3724,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, } /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return segmented_write(ctxt, ctxt->dst.addr.mem, - &desc_ptr, 2 + ctxt->op_bytes); + return segmented_write_std(ctxt, ctxt->dst.addr.mem, + &desc_ptr, 2 + ctxt->op_bytes); } static int em_sgdt(struct x86_emulate_ctxt *ctxt) @@ -3842,6 +3908,131 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_fxsr(struct x86_emulate_ctxt *ctxt) +{ + u32 eax = 1, ebx, ecx = 0, edx; + + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + if (!(edx & FFL(FXSR))) + return emulate_ud(ctxt); + + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + /* + * Don't emulate a case that should never be hit, instead of working + * around a lack of fxsave64/fxrstor64 on old compilers. + */ + if (ctxt->mode >= X86EMUL_MODE_PROT64) + return X86EMUL_UNHANDLEABLE; + + return X86EMUL_CONTINUE; +} + +/* + * FXSAVE and FXRSTOR have 4 different formats depending on execution mode, + * 1) 16 bit mode + * 2) 32 bit mode + * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs + * preserve whole 32 bit values, though, so (1) and (2) are the same wrt. + * save and restore + * 3) 64-bit mode with REX.W prefix + * - like (2), but XMM 8-15 are being saved and restored + * 4) 64-bit mode without REX.W prefix + * - like (3), but FIP and FDP are 64 bit + * + * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the + * desired result. (4) is not emulated. + * + * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS + * and FPU DS) should match. + */ +static int em_fxsave(struct x86_emulate_ctxt *ctxt) +{ + struct fxregs_state fx_state; + size_t size; + int rc; + + rc = check_fxsr(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + ctxt->ops->get_fpu(ctxt); + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + + ctxt->ops->put_fpu(ctxt); + + if (rc != X86EMUL_CONTINUE) + return rc; + + if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR) + size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]); + else + size = offsetof(struct fxregs_state, xmm_space[0]); + + return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); +} + +static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, + struct fxregs_state *new) +{ + int rc = X86EMUL_CONTINUE; + struct fxregs_state old; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old)); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* + * 64 bit host will restore XMM 8-15, which is not correct on non-64 + * bit guests. Load the current values in order to preserve 64 bit + * XMMs after fxrstor. + */ +#ifdef CONFIG_X86_64 + /* XXX: accessing XMM 8-15 very awkwardly */ + memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16); +#endif + + /* + * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but + * does save and restore MXCSR. + */ + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) + memcpy(new->xmm_space, old.xmm_space, 8 * 16); + + return rc; +} + +static int em_fxrstor(struct x86_emulate_ctxt *ctxt) +{ + struct fxregs_state fx_state; + int rc; + + rc = check_fxsr(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512); + if (rc != X86EMUL_CONTINUE) + return rc; + + if (fx_state.mxcsr >> 16) + return emulate_gp(ctxt, 0); + + ctxt->ops->get_fpu(ctxt); + + if (ctxt->mode < X86EMUL_MODE_PROT64) + rc = fxrstor_fixup(ctxt, &fx_state); + + if (rc == X86EMUL_CONTINUE) + rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); + + ctxt->ops->put_fpu(ctxt); + + return rc; +} + static bool valid_cr(int nr) { switch (nr) { @@ -4194,7 +4385,9 @@ static const struct gprefix pfx_0f_ae_7 = { }; static const struct group_dual group15 = { { - N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), + I(ModRM | Aligned16, em_fxsave), + I(ModRM | Aligned16, em_fxrstor), + N, N, N, N, N, GP(0, &pfx_0f_ae_7), }, { N, N, N, N, N, N, N, N, } }; @@ -5066,21 +5259,13 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { - bool fault = false; + int rc; ctxt->ops->get_fpu(ctxt); - asm volatile("1: fwait \n\t" - "2: \n\t" - ".pushsection .fixup,\"ax\" \n\t" - "3: \n\t" - "movb $1, %[fault] \n\t" - "jmp 2b \n\t" - ".popsection \n\t" - _ASM_EXTABLE(1b, 3b) - : [fault]"+qm"(fault)); + rc = asm_safe("fwait"); ctxt->ops->put_fpu(ctxt); - if (unlikely(fault)) + if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); return X86EMUL_CONTINUE; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6f69340f9fa31496cf9ec8d91f69a5fc0b550ff6..3f05c044720b7a6ab5258603a3577dd2054b9110 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2360,3 +2360,9 @@ void kvm_lapic_init(void) jump_label_rate_limit(&apic_hw_disabled, HZ); jump_label_rate_limit(&apic_sw_disabled, HZ); } + +void kvm_lapic_exit(void) +{ + static_key_deferred_flush(&apic_hw_disabled); + static_key_deferred_flush(&apic_sw_disabled); +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f60d01c29d5107a49b37713addffed27c30097be..4dfe4d6cb3381c060ec8fbc548360ee73c7d5c0a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -108,6 +108,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); void kvm_lapic_init(void); +void kvm_lapic_exit(void); #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5382b82462fcba28fed9a5064776cfb527e8eaa3..64774f419c723d7c24e669a5580e22171e75f003 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1343,10 +1343,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; } -static inline bool is_exception(u32 intr_info) +static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); + == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); } static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, @@ -5476,7 +5476,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_machine_check(intr_info)) return handle_machine_check(vcpu); - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) + if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -8018,7 +8018,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: - if (!is_exception(intr_info)) + if (is_nmi(intr_info)) return false; else if (is_page_fault(intr_info)) return enable_ept; @@ -8611,8 +8611,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) { + if (is_nmi(exit_intr_info)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); kvm_after_handle_nmi(&vmx->vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 04c5d96b1d678a6eeec993b71efb93f509496bdf..731044efb195f47449afec82a28f10b4e5cdee47 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3036,6 +3036,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, memset(&events->reserved, 0, sizeof(events->reserved)); } +static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags); + static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { @@ -3072,10 +3074,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { + u32 hflags = vcpu->arch.hflags; if (events->smi.smm) - vcpu->arch.hflags |= HF_SMM_MASK; + hflags |= HF_SMM_MASK; else - vcpu->arch.hflags &= ~HF_SMM_MASK; + hflags &= ~HF_SMM_MASK; + kvm_set_hflags(vcpu, hflags); + vcpu->arch.smi_pending = events->smi.pending; if (events->smi.smm_inside_nmi) vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; @@ -3143,6 +3148,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) memcpy(dest, xsave, XSAVE_HDR_OFFSET); /* Set XSTATE_BV */ + xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; /* @@ -3303,6 +3309,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, switch (cap->cap) { case KVM_CAP_HYPERV_SYNIC: + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; return kvm_hv_activate_synic(vcpu); default: return -EINVAL; @@ -5958,6 +5966,7 @@ int kvm_arch_init(void *opaque) void kvm_arch_exit(void) { + kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fe04a04dab8ec0df10c827623577102a994df71c..15f74361592366b0990e4fbecfd0cbb758e2cf25 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1172,6 +1172,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; prog->jited = 1; + } else { + prog = orig_prog; } out_addrs: diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 3cd69832d7f4c6f3743bbb3a190c39672c89461d..3961103e91760a14d24eec1ad3caffa5ad1f2adb 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -114,6 +114,16 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), }, }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */ + { + .callback = set_nouse_crs, + .ident = "Supermicro X8DTH", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"), + DMI_MATCH(DMI_BIOS_VERSION, "2.0a"), + }, + }, /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ { diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 936a488d6cf6df3c2aadbbdbc036b8eb06701cb0..274dfc48184977db435a9c5c78607cd8a182067c 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -210,6 +210,70 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) +{ + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; + + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); + } + return false; +} + +static void __init efi_clean_memmap(void) +{ + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; + + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } + + if (n_removal > 0) { + u64 size = efi.memmap.nr_map - n_removal; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(efi.memmap.phys_map, size); + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -472,6 +536,8 @@ void __init efi_init(void) } } + efi_clean_memmap(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 319148bd4b05091d24576a7535b10aad7bec0c2d..2f25a363068cf9723e8b418e8c1942a6d3ca4029 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -268,6 +268,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.use_pgd = true; + /* + * Certain firmware versions are way too sentimential and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 10aca63a50d7bbff9fa94dec60e0df00591ccbd5..30031d5293c483202c526d5045cda23be6617359 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -214,7 +214,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Could not allocate boot services memmap\n"); return; @@ -355,7 +355,7 @@ void __init efi_free_boot_services(void) } new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Failed to allocate new EFI memmap\n"); return; diff --git a/arch/x86/platform/mellanox/mlx-platform.c b/arch/x86/platform/mellanox/mlx-platform.c index 7dcfcca97399752a1c0475fdfa5543b9f3227d8d..c0355d789fce22a53407e3956a2aeb864fadecb9 100644 --- a/arch/x86/platform/mellanox/mlx-platform.c +++ b/arch/x86/platform/mellanox/mlx-platform.c @@ -233,7 +233,7 @@ static int __init mlxplat_init(void) return 0; fail_platform_mux_register: - for (i--; i > 0 ; i--) + while (--i >= 0) platform_device_unregister(priv->pdev_mux[i]); platform_device_unregister(priv->pdev_i2c); fail_alloc: diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 0e98e5d241d04847c7a9a54c242b7a232ebae148..5f8b4b0302b61fc9e908dade72fe1da1b965edc3 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -49,7 +49,7 @@ int __init pci_xen_swiotlb_detect(void) * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force)) + if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 9fa27ceeecfdaea1cfdbba751e79b405d857d851..311acad7dad2abdc8501c70866674ed4f5858495 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -87,12 +87,6 @@ static void cpu_bringup(void) cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); - /* - * identify_cpu() may have set logical_pkg_id to -1 due - * to incorrect phys_proc_id. Let's re-comupte it. - */ - topology_update_package_map(apic->cpu_present_to_apicid(cpu), cpu); - xen_setup_cpu_clockevents(); notify_cpu_starting(cpu); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 88a044af7504dd9c47d0f97d64620ac8de4b5739..32cdc2c52e98c2a380b60bb7e907cccd40121bd4 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -540,7 +540,7 @@ subsys_initcall(topology_init); void cpu_reset(void) { -#if XCHAL_HAVE_PTP_MMU +#if XCHAL_HAVE_PTP_MMU && IS_ENABLED(CONFIG_MMU) local_irq_disable(); /* * We have full MMU: all autoload ways, ways 7, 8 and 9 of DTLB must diff --git a/block/blk-mq.c b/block/blk-mq.c index f3d27a6dee09dfa48dd7b78bc024f4a9809e8f88..81caceb96c3c1c444f82b0eaa389084b39f7308d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -895,7 +895,7 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) return WORK_CPU_UNBOUND; if (--hctx->next_cpu_batch <= 0) { - int cpu = hctx->next_cpu, next_cpu; + int next_cpu; next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); if (next_cpu >= nr_cpu_ids) @@ -903,8 +903,6 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) hctx->next_cpu = next_cpu; hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; - - return cpu; } return hctx->next_cpu; @@ -1332,9 +1330,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); if (!old_rq) goto done; - if (!blk_mq_direct_issue_request(old_rq, &cookie)) - goto done; - blk_mq_insert_request(old_rq, false, true, true); + if (test_bit(BLK_MQ_S_STOPPED, &data.hctx->state) || + blk_mq_direct_issue_request(old_rq, &cookie) != 0) + blk_mq_insert_request(old_rq, false, true, true); goto done; } diff --git a/block/bsg.c b/block/bsg.c index d214e929ce1855bb653808f03482580b45b5344e..b9a53615bdef4945748057e5efbfef56c5cb4847 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -655,6 +655,9 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) dprintk("%s: write %Zd bytes\n", bd->name, count); + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + bsg_set_block(bd, file); bytes_written = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5e24d880306c2aa3614630f4ebb85d244b66d33f..3ab6807773eeb8e05d74143ca65391b3abef2bd0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1596,7 +1596,7 @@ static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp) { struct cfq_group_data *cgd; - cgd = kzalloc(sizeof(*cgd), GFP_KERNEL); + cgd = kzalloc(sizeof(*cgd), gfp); if (!cgd) return NULL; return &cgd->cpd; diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm new file mode 100644 index 0000000000000000000000000000000000000000..866da9361b71102e3f6ba0be60fcc3ff044d2e88 --- /dev/null +++ b/build.config.goldfish.arm @@ -0,0 +1,12 @@ +ARCH=arm +BRANCH=android-4.4 +CROSS_COMPILE=arm-linux-androidkernel- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin +FILES=" +arch/arm/boot/zImage +vmlinux +System.map +" diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64 new file mode 100644 index 0000000000000000000000000000000000000000..9c963cf4a3d8d76737eeb994943bb33fb8a004f4 --- /dev/null +++ b/build.config.goldfish.arm64 @@ -0,0 +1,12 @@ +ARCH=arm64 +BRANCH=android-4.4 +CROSS_COMPILE=aarch64-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image +vmlinux +System.map +" diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips new file mode 100644 index 0000000000000000000000000000000000000000..8af53d2c294048247d791061ae097a8b88d16fce --- /dev/null +++ b/build.config.goldfish.mips @@ -0,0 +1,11 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64 new file mode 100644 index 0000000000000000000000000000000000000000..2a33d36dc4c8e8f78ae79902e3a60fa5d6fe0a3d --- /dev/null +++ b/build.config.goldfish.mips64 @@ -0,0 +1,11 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86 new file mode 100644 index 0000000000000000000000000000000000000000..f86253f58d4d17c6b4beaeb74efee11125a299df --- /dev/null +++ b/build.config.goldfish.x86 @@ -0,0 +1,12 @@ +ARCH=x86 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=i386_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..e1738861ec5c25c17234c92a68e9bfe322398bd4 --- /dev/null +++ b/build.config.goldfish.x86_64 @@ -0,0 +1,12 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=x86_64_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/crypto/algapi.c b/crypto/algapi.c index df939b54b09f731eac02657957f3f573c51a1ec5..1fad2a6b3bbbf0d1d4ee07f585bdc4d501467b5d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -356,6 +356,7 @@ int crypto_register_alg(struct crypto_alg *alg) struct crypto_larval *larval; int err; + alg->cra_flags &= ~CRYPTO_ALG_DEAD; err = crypto_check_alg(alg); if (err) return err; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0d099a24f776ac9bd665f3aab8006793031aef48..e53bef6cf53c627d7c9a6a566d75fbe1805d1138 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -852,6 +852,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) if (ghes_read_estatus(ghes, 1)) { ghes_clear_estatus(ghes); continue; + } else { + ret = NMI_HANDLED; } sev = ghes_severity(ghes->estatus->error_severity); @@ -863,12 +865,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) __process_error(ghes); ghes_clear_estatus(ghes); - - ret = NMI_HANDLED; } #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - irq_work_queue(&ghes_proc_irq_work); + if (ret == NMI_HANDLED) + irq_work_queue(&ghes_proc_irq_work); #endif atomic_dec(&ghes_in_nmi); return ret; diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d0d0504b7c895b47bc29f8c50c20c50b3ce98f13..e0ea8f56d2bfd9ceb19fa63eda3544195e4c1e56 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -784,8 +784,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) /* Add per logical CPU nodes for reading its feedback counters. */ cpu_dev = get_cpu_device(pr->id); - if (!cpu_dev) + if (!cpu_dev) { + ret = -EINVAL; goto out_free; + } ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj, "acpi_cppc"); diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index a6b36fc53aec54dfc642dd260aa25cfbbec037b5..02ded25c82e4a06e1e79bf2f0a4855aa933b3df1 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -296,6 +296,26 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V131"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1123661 */ + .callback = video_detect_force_native, + .ident = "Dell XPS 17 L702X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L702X"), + }, + }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1204476 */ + /* https://bugs.launchpad.net/ubuntu/+source/linux-lts-trusty/+bug/1416940 */ + .callback = video_detect_force_native, + .ident = "HP Pavilion dv6", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv6 Notebook PC"), + }, + }, + { }, }; diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index bdfc6c6f4f5a7c8d9363055be25954c66eb55d3f..a82fc022d34ba88ba7adeb35b6b4ee0fafdbdb72 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -19,6 +19,18 @@ config ANDROID_BINDER_IPC Android process, using Binder to identify, invoke and pass arguments between said processes. +config ANDROID_BINDER_DEVICES + string "Android Binder devices" + depends on ANDROID_BINDER_IPC + default "binder" + ---help--- + Default value for the binder.devices parameter. + + The binder.devices parameter is a comma-separated list of strings + that specifies the names of the binder device nodes that will be + created. Each binder device has its own context manager, and is + therefore logically separated from the other devices. + config ANDROID_BINDER_IPC_32BIT bool depends on !64BIT && ANDROID_BINDER_IPC diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3c71b982bf2a35ae1a406e35fac2683577edb2b8..1bd840198c28f08bbb25f45bc3921d317ea60b11 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -50,14 +50,13 @@ static DEFINE_MUTEX(binder_main_lock); static DEFINE_MUTEX(binder_deferred_lock); static DEFINE_MUTEX(binder_mmap_lock); +static HLIST_HEAD(binder_devices); static HLIST_HEAD(binder_procs); static HLIST_HEAD(binder_deferred_list); static HLIST_HEAD(binder_dead_nodes); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; -static struct binder_node *binder_context_mgr_node; -static kuid_t binder_context_mgr_uid = INVALID_UID; static int binder_last_id; #define BINDER_DEBUG_ENTRY(name) \ @@ -115,6 +114,9 @@ module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); static bool binder_debug_no_lock; module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); +static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; +module_param_named(devices, binder_devices_param, charp, S_IRUGO); + static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); static int binder_stop_on_user_error; @@ -145,6 +147,17 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, binder_stop_on_user_error = 2; \ } while (0) +#define to_flat_binder_object(hdr) \ + container_of(hdr, struct flat_binder_object, hdr) + +#define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr) + +#define to_binder_buffer_object(hdr) \ + container_of(hdr, struct binder_buffer_object, hdr) + +#define to_binder_fd_array_object(hdr) \ + container_of(hdr, struct binder_fd_array_object, hdr) + enum binder_stat_types { BINDER_STAT_PROC, BINDER_STAT_THREAD, @@ -158,7 +171,7 @@ enum binder_stat_types { struct binder_stats { int br[_IOC_NR(BR_FAILED_REPLY) + 1]; - int bc[_IOC_NR(BC_DEAD_BINDER_DONE) + 1]; + int bc[_IOC_NR(BC_REPLY_SG) + 1]; int obj_created[BINDER_STAT_COUNT]; int obj_deleted[BINDER_STAT_COUNT]; }; @@ -186,6 +199,7 @@ struct binder_transaction_log_entry { int to_node; int data_size; int offsets_size; + const char *context_name; }; struct binder_transaction_log { int next; @@ -210,6 +224,18 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( return e; } +struct binder_context { + struct binder_node *binder_context_mgr_node; + kuid_t binder_context_mgr_uid; + const char *name; +}; + +struct binder_device { + struct hlist_node hlist; + struct miscdevice miscdev; + struct binder_context context; +}; + struct binder_work { struct list_head entry; enum { @@ -282,6 +308,7 @@ struct binder_buffer { struct binder_node *target_node; size_t data_size; size_t offsets_size; + size_t extra_buffers_size; uint8_t data[0]; }; @@ -325,6 +352,7 @@ struct binder_proc { int ready_threads; long default_priority; struct dentry *debugfs_entry; + struct binder_context *context; }; enum { @@ -648,7 +676,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, size_t data_size, - size_t offsets_size, int is_async) + size_t offsets_size, + size_t extra_buffers_size, + int is_async) { struct rb_node *n = proc->free_buffers.rb_node; struct binder_buffer *buffer; @@ -656,7 +686,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, struct rb_node *best_fit = NULL; void *has_page_addr; void *end_page_addr; - size_t size; + size_t size, data_offsets_size; if (proc->vma == NULL) { pr_err("%d: binder_alloc_buf, no vma\n", @@ -664,15 +694,20 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, return NULL; } - size = ALIGN(data_size, sizeof(void *)) + + data_offsets_size = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); - if (size < data_size || size < offsets_size) { + if (data_offsets_size < data_size || data_offsets_size < offsets_size) { binder_user_error("%d: got transaction with invalid size %zd-%zd\n", proc->pid, data_size, offsets_size); return NULL; } - + size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); + if (size < data_offsets_size || size < extra_buffers_size) { + binder_user_error("%d: got transaction with invalid extra_buffers_size %zd\n", + proc->pid, extra_buffers_size); + return NULL; + } if (is_async && proc->free_async_space < size + sizeof(struct binder_buffer)) { binder_debug(BINDER_DEBUG_BUFFER_ALLOC, @@ -741,6 +776,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, proc->pid, size, buffer); buffer->data_size = data_size; buffer->offsets_size = offsets_size; + buffer->extra_buffers_size = extra_buffers_size; buffer->async_transaction = is_async; if (is_async) { proc->free_async_space -= size + sizeof(struct binder_buffer); @@ -815,7 +851,8 @@ static void binder_free_buf(struct binder_proc *proc, buffer_size = binder_buffer_size(proc, buffer); size = ALIGN(buffer->data_size, sizeof(void *)) + - ALIGN(buffer->offsets_size, sizeof(void *)); + ALIGN(buffer->offsets_size, sizeof(void *)) + + ALIGN(buffer->extra_buffers_size, sizeof(void *)); binder_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_free_buf %p size %zd buffer_size %zd\n", @@ -929,8 +966,10 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, if (internal) { if (target_list == NULL && node->internal_strong_refs == 0 && - !(node == binder_context_mgr_node && - node->has_strong_ref)) { + !(node->proc && + node == node->proc->context-> + binder_context_mgr_node && + node->has_strong_ref)) { pr_err("invalid inc strong node for %d\n", node->debug_id); return -EINVAL; @@ -1031,6 +1070,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, struct rb_node **p = &proc->refs_by_node.rb_node; struct rb_node *parent = NULL; struct binder_ref *ref, *new_ref; + struct binder_context *context = proc->context; while (*p) { parent = *p; @@ -1053,7 +1093,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, rb_link_node(&new_ref->rb_node_node, parent, p); rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node); - new_ref->desc = (node == binder_context_mgr_node) ? 0 : 1; + new_ref->desc = (node == context->binder_context_mgr_node) ? 0 : 1; for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->desc > new_ref->desc) @@ -1240,11 +1280,158 @@ static void binder_send_failed_reply(struct binder_transaction *t, } } +/** + * binder_validate_object() - checks for a valid metadata object in a buffer. + * @buffer: binder_buffer that we're parsing. + * @offset: offset in the buffer at which to validate an object. + * + * Return: If there's a valid metadata object at @offset in @buffer, the + * size of that object. Otherwise, it returns zero. + */ +static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset) +{ + /* Check if we can read a header first */ + struct binder_object_header *hdr; + size_t object_size = 0; + + if (offset > buffer->data_size - sizeof(*hdr) || + buffer->data_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) + return 0; + + /* Ok, now see if we can read a complete object. */ + hdr = (struct binder_object_header *)(buffer->data + offset); + switch (hdr->type) { + case BINDER_TYPE_BINDER: + case BINDER_TYPE_WEAK_BINDER: + case BINDER_TYPE_HANDLE: + case BINDER_TYPE_WEAK_HANDLE: + object_size = sizeof(struct flat_binder_object); + break; + case BINDER_TYPE_FD: + object_size = sizeof(struct binder_fd_object); + break; + case BINDER_TYPE_PTR: + object_size = sizeof(struct binder_buffer_object); + break; + case BINDER_TYPE_FDA: + object_size = sizeof(struct binder_fd_array_object); + break; + default: + return 0; + } + if (offset <= buffer->data_size - object_size && + buffer->data_size >= object_size) + return object_size; + else + return 0; +} + +/** + * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer. + * @b: binder_buffer containing the object + * @index: index in offset array at which the binder_buffer_object is + * located + * @start: points to the start of the offset array + * @num_valid: the number of valid offsets in the offset array + * + * Return: If @index is within the valid range of the offset array + * described by @start and @num_valid, and if there's a valid + * binder_buffer_object at the offset found in index @index + * of the offset array, that object is returned. Otherwise, + * %NULL is returned. + * Note that the offset found in index @index itself is not + * verified; this function assumes that @num_valid elements + * from @start were previously verified to have valid offsets. + */ +static struct binder_buffer_object *binder_validate_ptr(struct binder_buffer *b, + binder_size_t index, + binder_size_t *start, + binder_size_t num_valid) +{ + struct binder_buffer_object *buffer_obj; + binder_size_t *offp; + + if (index >= num_valid) + return NULL; + + offp = start + index; + buffer_obj = (struct binder_buffer_object *)(b->data + *offp); + if (buffer_obj->hdr.type != BINDER_TYPE_PTR) + return NULL; + + return buffer_obj; +} + +/** + * binder_validate_fixup() - validates pointer/fd fixups happen in order. + * @b: transaction buffer + * @objects_start start of objects buffer + * @buffer: binder_buffer_object in which to fix up + * @offset: start offset in @buffer to fix up + * @last_obj: last binder_buffer_object that we fixed up in + * @last_min_offset: minimum fixup offset in @last_obj + * + * Return: %true if a fixup in buffer @buffer at offset @offset is + * allowed. + * + * For safety reasons, we only allow fixups inside a buffer to happen + * at increasing offsets; additionally, we only allow fixup on the last + * buffer object that was verified, or one of its parents. + * + * Example of what is allowed: + * + * A + * B (parent = A, offset = 0) + * C (parent = A, offset = 16) + * D (parent = C, offset = 0) + * E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset) + * + * Examples of what is not allowed: + * + * Decreasing offsets within the same parent: + * A + * C (parent = A, offset = 16) + * B (parent = A, offset = 0) // decreasing offset within A + * + * Referring to a parent that wasn't the last object or any of its parents: + * A + * B (parent = A, offset = 0) + * C (parent = A, offset = 0) + * C (parent = A, offset = 16) + * D (parent = B, offset = 0) // B is not A or any of A's parents + */ +static bool binder_validate_fixup(struct binder_buffer *b, + binder_size_t *objects_start, + struct binder_buffer_object *buffer, + binder_size_t fixup_offset, + struct binder_buffer_object *last_obj, + binder_size_t last_min_offset) +{ + if (!last_obj) { + /* Nothing to fix up in */ + return false; + } + + while (last_obj != buffer) { + /* + * Safe to retrieve the parent of last_obj, since it + * was already previously verified by the driver. + */ + if ((last_obj->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0) + return false; + last_min_offset = last_obj->parent_offset + sizeof(uintptr_t); + last_obj = (struct binder_buffer_object *) + (b->data + *(objects_start + last_obj->parent)); + } + return (fixup_offset >= last_min_offset); +} + static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_buffer *buffer, binder_size_t *failed_at) { - binder_size_t *offp, *off_end; + binder_size_t *offp, *off_start, *off_end; int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, @@ -1255,28 +1442,30 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); - offp = (binder_size_t *)(buffer->data + - ALIGN(buffer->data_size, sizeof(void *))); + off_start = (binder_size_t *)(buffer->data + + ALIGN(buffer->data_size, sizeof(void *))); if (failed_at) off_end = failed_at; else - off_end = (void *)offp + buffer->offsets_size; - for (; offp < off_end; offp++) { - struct flat_binder_object *fp; + off_end = (void *)off_start + buffer->offsets_size; + for (offp = off_start; offp < off_end; offp++) { + struct binder_object_header *hdr; + size_t object_size = binder_validate_object(buffer, *offp); - if (*offp > buffer->data_size - sizeof(*fp) || - buffer->data_size < sizeof(*fp) || - !IS_ALIGNED(*offp, sizeof(u32))) { - pr_err("transaction release %d bad offset %lld, size %zd\n", + if (object_size == 0) { + pr_err("transaction release %d bad object at offset %lld, size %zd\n", debug_id, (u64)*offp, buffer->data_size); continue; } - fp = (struct flat_binder_object *)(buffer->data + *offp); - switch (fp->type) { + hdr = (struct binder_object_header *)(buffer->data + *offp); + switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { - struct binder_node *node = binder_get_node(proc, fp->binder); + struct flat_binder_object *fp; + struct binder_node *node; + fp = to_flat_binder_object(hdr); + node = binder_get_node(proc, fp->binder); if (node == NULL) { pr_err("transaction release %d bad node %016llx\n", debug_id, (u64)fp->binder); @@ -1285,15 +1474,17 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%016llx\n", node->debug_id, (u64)node->ptr); - binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0); + binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER, + 0); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { + struct flat_binder_object *fp; struct binder_ref *ref; + fp = to_flat_binder_object(hdr); ref = binder_get_ref(proc, fp->handle, - fp->type == BINDER_TYPE_HANDLE); - + hdr->type == BINDER_TYPE_HANDLE); if (ref == NULL) { pr_err("transaction release %d bad handle %d\n", debug_id, fp->handle); @@ -1302,32 +1493,348 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d (node %d)\n", ref->debug_id, ref->desc, ref->node->debug_id); - binder_dec_ref(ref, fp->type == BINDER_TYPE_HANDLE); + binder_dec_ref(ref, hdr->type == BINDER_TYPE_HANDLE); } break; - case BINDER_TYPE_FD: + case BINDER_TYPE_FD: { + struct binder_fd_object *fp = to_binder_fd_object(hdr); + binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %d\n", fp->handle); + " fd %d\n", fp->fd); if (failed_at) - task_close_fd(proc, fp->handle); + task_close_fd(proc, fp->fd); + } break; + case BINDER_TYPE_PTR: + /* + * Nothing to do here, this will get cleaned up when the + * transaction buffer gets freed + */ break; - + case BINDER_TYPE_FDA: { + struct binder_fd_array_object *fda; + struct binder_buffer_object *parent; + uintptr_t parent_buffer; + u32 *fd_array; + size_t fd_index; + binder_size_t fd_buf_size; + + fda = to_binder_fd_array_object(hdr); + parent = binder_validate_ptr(buffer, fda->parent, + off_start, + offp - off_start); + if (!parent) { + pr_err("transaction release %d bad parent offset", + debug_id); + continue; + } + /* + * Since the parent was already fixed up, convert it + * back to kernel address space to access it + */ + parent_buffer = parent->buffer - + proc->user_buffer_offset; + + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { + pr_err("transaction release %d invalid number of fds (%lld)\n", + debug_id, (u64)fda->num_fds); + continue; + } + if (fd_buf_size > parent->length || + fda->parent_offset > parent->length - fd_buf_size) { + /* No space for all file descriptors here. */ + pr_err("transaction release %d not enough space for %lld fds in buffer\n", + debug_id, (u64)fda->num_fds); + continue; + } + fd_array = (u32 *)(parent_buffer + fda->parent_offset); + for (fd_index = 0; fd_index < fda->num_fds; fd_index++) + task_close_fd(proc, fd_array[fd_index]); + } break; default: pr_err("transaction release %d bad object type %x\n", - debug_id, fp->type); + debug_id, hdr->type); break; } } } +static int binder_translate_binder(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +{ + struct binder_node *node; + struct binder_ref *ref; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + node = binder_get_node(proc, fp->binder); + if (!node) { + node = binder_new_node(proc, fp->binder, fp->cookie); + if (!node) + return -ENOMEM; + + node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + } + if (fp->cookie != node->cookie) { + binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", + proc->pid, thread->pid, (u64)fp->binder, + node->debug_id, (u64)fp->cookie, + (u64)node->cookie); + return -EINVAL; + } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) + return -EPERM; + + ref = binder_get_ref_for_node(target_proc, node); + if (!ref) + return -EINVAL; + + if (fp->hdr.type == BINDER_TYPE_BINDER) + fp->hdr.type = BINDER_TYPE_HANDLE; + else + fp->hdr.type = BINDER_TYPE_WEAK_HANDLE; + fp->binder = 0; + fp->handle = ref->desc; + fp->cookie = 0; + binder_inc_ref(ref, fp->hdr.type == BINDER_TYPE_HANDLE, &thread->todo); + + trace_binder_transaction_node_to_ref(t, node, ref); + binder_debug(BINDER_DEBUG_TRANSACTION, + " node %d u%016llx -> ref %d desc %d\n", + node->debug_id, (u64)node->ptr, + ref->debug_id, ref->desc); + + return 0; +} + +static int binder_translate_handle(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +{ + struct binder_ref *ref; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + ref = binder_get_ref(proc, fp->handle, + fp->hdr.type == BINDER_TYPE_HANDLE); + if (!ref) { + binder_user_error("%d:%d got transaction with invalid handle, %d\n", + proc->pid, thread->pid, fp->handle); + return -EINVAL; + } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) + return -EPERM; + + if (ref->node->proc == target_proc) { + if (fp->hdr.type == BINDER_TYPE_HANDLE) + fp->hdr.type = BINDER_TYPE_BINDER; + else + fp->hdr.type = BINDER_TYPE_WEAK_BINDER; + fp->binder = ref->node->ptr; + fp->cookie = ref->node->cookie; + binder_inc_node(ref->node, fp->hdr.type == BINDER_TYPE_BINDER, + 0, NULL); + trace_binder_transaction_ref_to_node(t, ref); + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d -> node %d u%016llx\n", + ref->debug_id, ref->desc, ref->node->debug_id, + (u64)ref->node->ptr); + } else { + struct binder_ref *new_ref; + + new_ref = binder_get_ref_for_node(target_proc, ref->node); + if (!new_ref) + return -EINVAL; + + fp->binder = 0; + fp->handle = new_ref->desc; + fp->cookie = 0; + binder_inc_ref(new_ref, fp->hdr.type == BINDER_TYPE_HANDLE, + NULL); + trace_binder_transaction_ref_to_ref(t, ref, new_ref); + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d -> ref %d desc %d (node %d)\n", + ref->debug_id, ref->desc, new_ref->debug_id, + new_ref->desc, ref->node->debug_id); + } + return 0; +} + +static int binder_translate_fd(int fd, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) +{ + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + int target_fd; + struct file *file; + int ret; + bool target_allows_fd; + + if (in_reply_to) + target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS); + else + target_allows_fd = t->buffer->target_node->accept_fds; + if (!target_allows_fd) { + binder_user_error("%d:%d got %s with fd, %d, but target does not allow fds\n", + proc->pid, thread->pid, + in_reply_to ? "reply" : "transaction", + fd); + ret = -EPERM; + goto err_fd_not_accepted; + } + + file = fget(fd); + if (!file) { + binder_user_error("%d:%d got transaction with invalid fd, %d\n", + proc->pid, thread->pid, fd); + ret = -EBADF; + goto err_fget; + } + ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file); + if (ret < 0) { + ret = -EPERM; + goto err_security; + } + + target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); + if (target_fd < 0) { + ret = -ENOMEM; + goto err_get_unused_fd; + } + task_fd_install(target_proc, target_fd, file); + trace_binder_transaction_fd(t, fd, target_fd); + binder_debug(BINDER_DEBUG_TRANSACTION, " fd %d -> %d\n", + fd, target_fd); + + return target_fd; + +err_get_unused_fd: +err_security: + fput(file); +err_fget: +err_fd_not_accepted: + return ret; +} + +static int binder_translate_fd_array(struct binder_fd_array_object *fda, + struct binder_buffer_object *parent, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) +{ + binder_size_t fdi, fd_buf_size, num_installed_fds; + int target_fd; + uintptr_t parent_buffer; + u32 *fd_array; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { + binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", + proc->pid, thread->pid, (u64)fda->num_fds); + return -EINVAL; + } + if (fd_buf_size > parent->length || + fda->parent_offset > parent->length - fd_buf_size) { + /* No space for all file descriptors here. */ + binder_user_error("%d:%d not enough space to store %lld fds in buffer\n", + proc->pid, thread->pid, (u64)fda->num_fds); + return -EINVAL; + } + /* + * Since the parent was already fixed up, convert it + * back to the kernel address space to access it + */ + parent_buffer = parent->buffer - target_proc->user_buffer_offset; + fd_array = (u32 *)(parent_buffer + fda->parent_offset); + if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { + binder_user_error("%d:%d parent offset not aligned correctly.\n", + proc->pid, thread->pid); + return -EINVAL; + } + for (fdi = 0; fdi < fda->num_fds; fdi++) { + target_fd = binder_translate_fd(fd_array[fdi], t, thread, + in_reply_to); + if (target_fd < 0) + goto err_translate_fd_failed; + fd_array[fdi] = target_fd; + } + return 0; + +err_translate_fd_failed: + /* + * Failed to allocate fd or security error, free fds + * installed so far. + */ + num_installed_fds = fdi; + for (fdi = 0; fdi < num_installed_fds; fdi++) + task_close_fd(target_proc, fd_array[fdi]); + return target_fd; +} + +static int binder_fixup_parent(struct binder_transaction *t, + struct binder_thread *thread, + struct binder_buffer_object *bp, + binder_size_t *off_start, + binder_size_t num_valid, + struct binder_buffer_object *last_fixup_obj, + binder_size_t last_fixup_min_off) +{ + struct binder_buffer_object *parent; + u8 *parent_buffer; + struct binder_buffer *b = t->buffer; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT)) + return 0; + + parent = binder_validate_ptr(b, bp->parent, off_start, num_valid); + if (!parent) { + binder_user_error("%d:%d got transaction with invalid parent offset or type\n", + proc->pid, thread->pid); + return -EINVAL; + } + + if (!binder_validate_fixup(b, off_start, + parent, bp->parent_offset, + last_fixup_obj, + last_fixup_min_off)) { + binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", + proc->pid, thread->pid); + return -EINVAL; + } + + if (parent->length < sizeof(binder_uintptr_t) || + bp->parent_offset > parent->length - sizeof(binder_uintptr_t)) { + /* No space for a pointer here! */ + binder_user_error("%d:%d got transaction with invalid parent offset\n", + proc->pid, thread->pid); + return -EINVAL; + } + parent_buffer = (u8 *)(parent->buffer - + target_proc->user_buffer_offset); + *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; + + return 0; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, - struct binder_transaction_data *tr, int reply) + struct binder_transaction_data *tr, int reply, + binder_size_t extra_buffers_size) { + int ret; struct binder_transaction *t; struct binder_work *tcomplete; - binder_size_t *offp, *off_end; + binder_size_t *offp, *off_end, *off_start; binder_size_t off_min; + u8 *sg_bufp, *sg_buf_end; struct binder_proc *target_proc; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; @@ -1336,6 +1843,9 @@ static void binder_transaction(struct binder_proc *proc, struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; uint32_t return_error; + struct binder_buffer_object *last_fixup_obj = NULL; + binder_size_t last_fixup_min_off = 0; + struct binder_context *context = proc->context; e = binder_transaction_log_add(&binder_transaction_log); e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); @@ -1344,6 +1854,7 @@ static void binder_transaction(struct binder_proc *proc, e->target_handle = tr->target.handle; e->data_size = tr->data_size; e->offsets_size = tr->offsets_size; + e->context_name = proc->context->name; if (reply) { in_reply_to = thread->transaction_stack; @@ -1396,7 +1907,7 @@ static void binder_transaction(struct binder_proc *proc, } target_node = ref->node; } else { - target_node = binder_context_mgr_node; + target_node = context->binder_context_mgr_node; if (target_node == NULL) { return_error = BR_DEAD_REPLY; goto err_no_context_mgr_node; @@ -1463,20 +1974,22 @@ static void binder_transaction(struct binder_proc *proc, if (reply) binder_debug(BINDER_DEBUG_TRANSACTION, - "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld\n", + "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld-%lld\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_thread->pid, (u64)tr->data.ptr.buffer, (u64)tr->data.ptr.offsets, - (u64)tr->data_size, (u64)tr->offsets_size); + (u64)tr->data_size, (u64)tr->offsets_size, + (u64)extra_buffers_size); else binder_debug(BINDER_DEBUG_TRANSACTION, - "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld\n", + "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld-%lld\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_node->debug_id, (u64)tr->data.ptr.buffer, (u64)tr->data.ptr.offsets, - (u64)tr->data_size, (u64)tr->offsets_size); + (u64)tr->data_size, (u64)tr->offsets_size, + (u64)extra_buffers_size); if (!reply && !(tr->flags & TF_ONE_WAY)) t->from = thread; @@ -1492,7 +2005,8 @@ static void binder_transaction(struct binder_proc *proc, trace_binder_transaction(reply, t, target_node); t->buffer = binder_alloc_buf(target_proc, tr->data_size, - tr->offsets_size, !reply && (t->flags & TF_ONE_WAY)); + tr->offsets_size, extra_buffers_size, + !reply && (t->flags & TF_ONE_WAY)); if (t->buffer == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_alloc_buf_failed; @@ -1505,8 +2019,9 @@ static void binder_transaction(struct binder_proc *proc, if (target_node) binder_inc_node(target_node, 1, 0, NULL); - offp = (binder_size_t *)(t->buffer->data + - ALIGN(tr->data_size, sizeof(void *))); + off_start = (binder_size_t *)(t->buffer->data + + ALIGN(tr->data_size, sizeof(void *))); + offp = off_start; if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t) tr->data.ptr.buffer, tr->data_size)) { @@ -1528,177 +2043,138 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_bad_offset; } - off_end = (void *)offp + tr->offsets_size; + if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { + binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n", + proc->pid, thread->pid, + (u64)extra_buffers_size); + return_error = BR_FAILED_REPLY; + goto err_bad_offset; + } + off_end = (void *)off_start + tr->offsets_size; + sg_bufp = (u8 *)(PTR_ALIGN(off_end, sizeof(void *))); + sg_buf_end = sg_bufp + extra_buffers_size; off_min = 0; for (; offp < off_end; offp++) { - struct flat_binder_object *fp; + struct binder_object_header *hdr; + size_t object_size = binder_validate_object(t->buffer, *offp); - if (*offp > t->buffer->data_size - sizeof(*fp) || - *offp < off_min || - t->buffer->data_size < sizeof(*fp) || - !IS_ALIGNED(*offp, sizeof(u32))) { - binder_user_error("%d:%d got transaction with invalid offset, %lld (min %lld, max %lld)\n", + if (object_size == 0 || *offp < off_min) { + binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, (u64)*offp, (u64)off_min, - (u64)(t->buffer->data_size - - sizeof(*fp))); + (u64)t->buffer->data_size); return_error = BR_FAILED_REPLY; goto err_bad_offset; } - fp = (struct flat_binder_object *)(t->buffer->data + *offp); - off_min = *offp + sizeof(struct flat_binder_object); - switch (fp->type) { + + hdr = (struct binder_object_header *)(t->buffer->data + *offp); + off_min = *offp + object_size; + switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { - struct binder_ref *ref; - struct binder_node *node = binder_get_node(proc, fp->binder); + struct flat_binder_object *fp; - if (node == NULL) { - node = binder_new_node(proc, fp->binder, fp->cookie); - if (node == NULL) { - return_error = BR_FAILED_REPLY; - goto err_binder_new_node_failed; - } - node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); - } - if (fp->cookie != node->cookie) { - binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", - proc->pid, thread->pid, - (u64)fp->binder, node->debug_id, - (u64)fp->cookie, (u64)node->cookie); - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; - } - if (security_binder_transfer_binder(proc->tsk, - target_proc->tsk)) { + fp = to_flat_binder_object(hdr); + ret = binder_translate_binder(fp, t, thread); + if (ret < 0) { return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; + goto err_translate_failed; } - ref = binder_get_ref_for_node(target_proc, node); - if (ref == NULL) { - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; - } - if (fp->type == BINDER_TYPE_BINDER) - fp->type = BINDER_TYPE_HANDLE; - else - fp->type = BINDER_TYPE_WEAK_HANDLE; - fp->binder = 0; - fp->handle = ref->desc; - fp->cookie = 0; - binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE, - &thread->todo); - - trace_binder_transaction_node_to_ref(t, node, ref); - binder_debug(BINDER_DEBUG_TRANSACTION, - " node %d u%016llx -> ref %d desc %d\n", - node->debug_id, (u64)node->ptr, - ref->debug_id, ref->desc); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref; + struct flat_binder_object *fp; - ref = binder_get_ref(proc, fp->handle, - fp->type == BINDER_TYPE_HANDLE); + fp = to_flat_binder_object(hdr); + ret = binder_translate_handle(fp, t, thread); + if (ret < 0) { + return_error = BR_FAILED_REPLY; + goto err_translate_failed; + } + } break; - if (ref == NULL) { - binder_user_error("%d:%d got transaction with invalid handle, %d\n", - proc->pid, - thread->pid, fp->handle); + case BINDER_TYPE_FD: { + struct binder_fd_object *fp = to_binder_fd_object(hdr); + int target_fd = binder_translate_fd(fp->fd, t, thread, + in_reply_to); + + if (target_fd < 0) { return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_failed; + goto err_translate_failed; } - if (security_binder_transfer_binder(proc->tsk, - target_proc->tsk)) { + fp->pad_binder = 0; + fp->fd = target_fd; + } break; + case BINDER_TYPE_FDA: { + struct binder_fd_array_object *fda = + to_binder_fd_array_object(hdr); + struct binder_buffer_object *parent = + binder_validate_ptr(t->buffer, fda->parent, + off_start, + offp - off_start); + if (!parent) { + binder_user_error("%d:%d got transaction with invalid parent offset or type\n", + proc->pid, thread->pid); return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_failed; + goto err_bad_parent; } - if (ref->node->proc == target_proc) { - if (fp->type == BINDER_TYPE_HANDLE) - fp->type = BINDER_TYPE_BINDER; - else - fp->type = BINDER_TYPE_WEAK_BINDER; - fp->binder = ref->node->ptr; - fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL); - trace_binder_transaction_ref_to_node(t, ref); - binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> node %d u%016llx\n", - ref->debug_id, ref->desc, ref->node->debug_id, - (u64)ref->node->ptr); - } else { - struct binder_ref *new_ref; - - new_ref = binder_get_ref_for_node(target_proc, ref->node); - if (new_ref == NULL) { - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; - } - fp->binder = 0; - fp->handle = new_ref->desc; - fp->cookie = 0; - binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); - trace_binder_transaction_ref_to_ref(t, ref, - new_ref); - binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, new_ref->debug_id, - new_ref->desc, ref->node->debug_id); + if (!binder_validate_fixup(t->buffer, off_start, + parent, fda->parent_offset, + last_fixup_obj, + last_fixup_min_off)) { + binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + goto err_bad_parent; } - } break; - - case BINDER_TYPE_FD: { - int target_fd; - struct file *file; - - if (reply) { - if (!(in_reply_to->flags & TF_ACCEPT_FDS)) { - binder_user_error("%d:%d got reply with fd, %d, but target does not allow fds\n", - proc->pid, thread->pid, fp->handle); - return_error = BR_FAILED_REPLY; - goto err_fd_not_allowed; - } - } else if (!target_node->accept_fds) { - binder_user_error("%d:%d got transaction with fd, %d, but target does not allow fds\n", - proc->pid, thread->pid, fp->handle); + ret = binder_translate_fd_array(fda, parent, t, thread, + in_reply_to); + if (ret < 0) { return_error = BR_FAILED_REPLY; - goto err_fd_not_allowed; + goto err_translate_failed; } - - file = fget(fp->handle); - if (file == NULL) { - binder_user_error("%d:%d got transaction with invalid fd, %d\n", - proc->pid, thread->pid, fp->handle); + last_fixup_obj = parent; + last_fixup_min_off = + fda->parent_offset + sizeof(u32) * fda->num_fds; + } break; + case BINDER_TYPE_PTR: { + struct binder_buffer_object *bp = + to_binder_buffer_object(hdr); + size_t buf_left = sg_buf_end - sg_bufp; + + if (bp->length > buf_left) { + binder_user_error("%d:%d got transaction with too large buffer\n", + proc->pid, thread->pid); return_error = BR_FAILED_REPLY; - goto err_fget_failed; + goto err_bad_offset; } - if (security_binder_transfer_file(proc->tsk, - target_proc->tsk, - file) < 0) { - fput(file); + if (copy_from_user(sg_bufp, + (const void __user *)(uintptr_t) + bp->buffer, bp->length)) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); return_error = BR_FAILED_REPLY; - goto err_get_unused_fd_failed; + goto err_copy_data_failed; } - target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); - if (target_fd < 0) { - fput(file); + /* Fixup buffer pointer to target proc address space */ + bp->buffer = (uintptr_t)sg_bufp + + target_proc->user_buffer_offset; + sg_bufp += ALIGN(bp->length, sizeof(u64)); + + ret = binder_fixup_parent(t, thread, bp, off_start, + offp - off_start, + last_fixup_obj, + last_fixup_min_off); + if (ret < 0) { return_error = BR_FAILED_REPLY; - goto err_get_unused_fd_failed; + goto err_translate_failed; } - task_fd_install(target_proc, target_fd, file); - trace_binder_transaction_fd(t, fp->handle, target_fd); - binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %d -> %d\n", fp->handle, target_fd); - /* TODO: fput? */ - fp->binder = 0; - fp->handle = target_fd; + last_fixup_obj = bp; + last_fixup_min_off = 0; } break; - default: binder_user_error("%d:%d got transaction with invalid object type, %x\n", - proc->pid, thread->pid, fp->type); + proc->pid, thread->pid, hdr->type); return_error = BR_FAILED_REPLY; goto err_bad_object_type; } @@ -1728,14 +2204,10 @@ static void binder_transaction(struct binder_proc *proc, wake_up_interruptible(target_wait); return; -err_get_unused_fd_failed: -err_fget_failed: -err_fd_not_allowed: -err_binder_get_ref_for_node_failed: -err_binder_get_ref_failed: -err_binder_new_node_failed: +err_translate_failed: err_bad_object_type: err_bad_offset: +err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); @@ -1779,6 +2251,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_size_t *consumed) { uint32_t cmd; + struct binder_context *context = proc->context; void __user *buffer = (void __user *)(uintptr_t)binder_buffer; void __user *ptr = buffer + *consumed; void __user *end = buffer + size; @@ -1805,10 +2278,10 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (target == 0 && binder_context_mgr_node && + if (target == 0 && context->binder_context_mgr_node && (cmd == BC_INCREFS || cmd == BC_ACQUIRE)) { ref = binder_get_ref_for_node(proc, - binder_context_mgr_node); + context->binder_context_mgr_node); if (ref->desc != target) { binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n", proc->pid, thread->pid, @@ -1953,6 +2426,17 @@ static int binder_thread_write(struct binder_proc *proc, break; } + case BC_TRANSACTION_SG: + case BC_REPLY_SG: { + struct binder_transaction_data_sg tr; + + if (copy_from_user(&tr, ptr, sizeof(tr))) + return -EFAULT; + ptr += sizeof(tr); + binder_transaction(proc, thread, &tr.transaction_data, + cmd == BC_REPLY_SG, tr.buffers_size); + break; + } case BC_TRANSACTION: case BC_REPLY: { struct binder_transaction_data tr; @@ -1960,7 +2444,8 @@ static int binder_thread_write(struct binder_proc *proc, if (copy_from_user(&tr, ptr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); - binder_transaction(proc, thread, &tr, cmd == BC_REPLY); + binder_transaction(proc, thread, &tr, + cmd == BC_REPLY, 0); break; } @@ -2714,9 +3199,11 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) { int ret = 0; struct binder_proc *proc = filp->private_data; + struct binder_context *context = proc->context; + kuid_t curr_euid = current_euid(); - if (binder_context_mgr_node != NULL) { + if (context->binder_context_mgr_node) { pr_err("BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto out; @@ -2724,27 +3211,27 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) ret = security_binder_set_context_mgr(proc->tsk); if (ret < 0) goto out; - if (uid_valid(binder_context_mgr_uid)) { - if (!uid_eq(binder_context_mgr_uid, curr_euid)) { + if (uid_valid(context->binder_context_mgr_uid)) { + if (!uid_eq(context->binder_context_mgr_uid, curr_euid)) { pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n", from_kuid(&init_user_ns, curr_euid), from_kuid(&init_user_ns, - binder_context_mgr_uid)); + context->binder_context_mgr_uid)); ret = -EPERM; goto out; } } else { - binder_context_mgr_uid = curr_euid; + context->binder_context_mgr_uid = curr_euid; } - binder_context_mgr_node = binder_new_node(proc, 0, 0); - if (binder_context_mgr_node == NULL) { + context->binder_context_mgr_node = binder_new_node(proc, 0, 0); + if (!context->binder_context_mgr_node) { ret = -ENOMEM; goto out; } - binder_context_mgr_node->local_weak_refs++; - binder_context_mgr_node->local_strong_refs++; - binder_context_mgr_node->has_strong_ref = 1; - binder_context_mgr_node->has_weak_ref = 1; + context->binder_context_mgr_node->local_weak_refs++; + context->binder_context_mgr_node->local_strong_refs++; + context->binder_context_mgr_node->has_strong_ref = 1; + context->binder_context_mgr_node->has_weak_ref = 1; out: return ret; } @@ -2969,6 +3456,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; + struct binder_device *binder_dev; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_open: %d:%d\n", current->group_leader->pid, current->pid); @@ -2982,6 +3470,9 @@ static int binder_open(struct inode *nodp, struct file *filp) INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); + binder_dev = container_of(filp->private_data, struct binder_device, + miscdev); + proc->context = &binder_dev->context; binder_lock(__func__); @@ -2997,8 +3488,17 @@ static int binder_open(struct inode *nodp, struct file *filp) char strbuf[11]; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); + /* + * proc debug entries are shared between contexts, so + * this will fail if the process tries to open the driver + * again with a different context. The priting code will + * anyway print all contexts that a given PID has, so this + * is not a problem. + */ proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO, - binder_debugfs_dir_entry_proc, proc, &binder_proc_fops); + binder_debugfs_dir_entry_proc, + (void *)(unsigned long)proc->pid, + &binder_proc_fops); } return 0; @@ -3091,6 +3591,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_transaction *t; + struct binder_context *context = proc->context; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count; @@ -3100,11 +3601,12 @@ static void binder_deferred_release(struct binder_proc *proc) hlist_del(&proc->proc_node); - if (binder_context_mgr_node && binder_context_mgr_node->proc == proc) { + if (context->binder_context_mgr_node && + context->binder_context_mgr_node->proc == proc) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "%s: %d context_mgr_node gone\n", __func__, proc->pid); - binder_context_mgr_node = NULL; + context->binder_context_mgr_node = NULL; } threads = 0; @@ -3391,6 +3893,7 @@ static void print_binder_proc(struct seq_file *m, size_t header_pos; seq_printf(m, "proc %d\n", proc->pid); + seq_printf(m, "context %s\n", proc->context->name); header_pos = m->count; for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) @@ -3460,7 +3963,9 @@ static const char * const binder_command_strings[] = { "BC_EXIT_LOOPER", "BC_REQUEST_DEATH_NOTIFICATION", "BC_CLEAR_DEATH_NOTIFICATION", - "BC_DEAD_BINDER_DONE" + "BC_DEAD_BINDER_DONE", + "BC_TRANSACTION_SG", + "BC_REPLY_SG", }; static const char * const binder_objstat_strings[] = { @@ -3515,6 +4020,7 @@ static void print_binder_proc_stats(struct seq_file *m, int count, strong, weak; seq_printf(m, "proc %d\n", proc->pid); + seq_printf(m, "context %s\n", proc->context->name); count = 0; for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) count++; @@ -3622,23 +4128,18 @@ static int binder_transactions_show(struct seq_file *m, void *unused) static int binder_proc_show(struct seq_file *m, void *unused) { struct binder_proc *itr; - struct binder_proc *proc = m->private; + int pid = (unsigned long)m->private; int do_lock = !binder_debug_no_lock; - bool valid_proc = false; if (do_lock) binder_lock(__func__); hlist_for_each_entry(itr, &binder_procs, proc_node) { - if (itr == proc) { - valid_proc = true; - break; + if (itr->pid == pid) { + seq_puts(m, "binder proc state:\n"); + print_binder_proc(m, itr, 1); } } - if (valid_proc) { - seq_puts(m, "binder proc state:\n"); - print_binder_proc(m, proc, 1); - } if (do_lock) binder_unlock(__func__); return 0; @@ -3648,11 +4149,11 @@ static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { seq_printf(m, - "%d: %s from %d:%d to %d:%d node %d handle %d size %d:%d\n", + "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d\n", e->debug_id, (e->call_type == 2) ? "reply" : ((e->call_type == 1) ? "async" : "call "), e->from_proc, - e->from_thread, e->to_proc, e->to_thread, e->to_node, - e->target_handle, e->data_size, e->offsets_size); + e->from_thread, e->to_proc, e->to_thread, e->context_name, + e->to_node, e->target_handle, e->data_size, e->offsets_size); } static int binder_transaction_log_show(struct seq_file *m, void *unused) @@ -3680,26 +4181,50 @@ static const struct file_operations binder_fops = { .release = binder_release, }; -static struct miscdevice binder_miscdev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "binder", - .fops = &binder_fops -}; - BINDER_DEBUG_ENTRY(state); BINDER_DEBUG_ENTRY(stats); BINDER_DEBUG_ENTRY(transactions); BINDER_DEBUG_ENTRY(transaction_log); +static int __init init_binder_device(const char *name) +{ + int ret; + struct binder_device *binder_device; + + binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL); + if (!binder_device) + return -ENOMEM; + + binder_device->miscdev.fops = &binder_fops; + binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; + binder_device->miscdev.name = name; + + binder_device->context.binder_context_mgr_uid = INVALID_UID; + binder_device->context.name = name; + + ret = misc_register(&binder_device->miscdev); + if (ret < 0) { + kfree(binder_device); + return ret; + } + + hlist_add_head(&binder_device->hlist, &binder_devices); + + return ret; +} + static int __init binder_init(void) { int ret; + char *device_name, *device_names; + struct binder_device *device; + struct hlist_node *tmp; binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); if (binder_debugfs_dir_entry_root) binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", binder_debugfs_dir_entry_root); - ret = misc_register(&binder_miscdev); + if (binder_debugfs_dir_entry_root) { debugfs_create_file("state", S_IRUGO, @@ -3727,6 +4252,35 @@ static int __init binder_init(void) &binder_transaction_log_failed, &binder_transaction_log_fops); } + + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } + strcpy(device_names, binder_devices_param); + + while ((device_name = strsep(&device_names, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } + + return ret; + +err_init_binder_device_failed: + hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { + misc_deregister(&device->miscdev); + hlist_del(&device->hlist); + kfree(device); + } +err_alloc_device_names_failed: + debugfs_remove_recursive(binder_debugfs_dir_entry_root); + return ret; } diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 223a770f78f36557f64d9db66635bd0cae8b4f42..33e363dcc63bf8581f9d1cc38f666083e88c9f4b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1695,6 +1695,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, if (qc->err_mask & ~AC_ERR_OTHER) qc->err_mask &= ~AC_ERR_OTHER; + } else if (qc->tf.command == ATA_CMD_REQ_SENSE_DATA) { + qc->result_tf.command |= ATA_SENSE; } /* finish up */ @@ -4316,10 +4318,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, /* - * Device times out with higher max sects. + * These devices time out with higher max sects. * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ - { "LITEON CX1-JB256-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 823e938c9a7877a1cadefde9127d447832630061..2f32782cea6d9c584797d1f7d9dc8e99eac0b796 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -4132,6 +4132,9 @@ static int mv_platform_probe(struct platform_device *pdev) host->iomap = NULL; hpriv->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!hpriv->base) + return -ENOMEM; + hpriv->base -= SATAHC0_REG_BASE; hpriv->clk = clk_get(&pdev->dev, NULL); diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 958e2555cff73acd5f714c3c155e3267ba059aa1..26cf6b914f49f49f57a44a2bf40fb6798c1d62d7 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -956,13 +956,14 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, timeout = MAX_JIFFY_OFFSET; } - retval = wait_for_completion_interruptible_timeout(&buf->completion, + timeout = wait_for_completion_interruptible_timeout(&buf->completion, timeout); - if (retval == -ERESTARTSYS || !retval) { + if (timeout == -ERESTARTSYS || !timeout) { + retval = timeout; mutex_lock(&fw_lock); fw_load_abort(fw_priv); mutex_unlock(&fw_lock); - } else if (retval > 0) { + } else if (timeout > 0) { retval = 0; } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 62c63c0c5c22dcfee3e2d706e68be053519cc40c..c5cdd190b7816ce446d6d7b06235c5f995c4c3f2 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -391,33 +391,33 @@ static ssize_t show_valid_zones(struct device *dev, { struct memory_block *mem = to_memory_block(dev); unsigned long start_pfn, end_pfn; + unsigned long valid_start, valid_end, valid_pages; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - struct page *first_page; struct zone *zone; int zone_shift = 0; start_pfn = section_nr_to_pfn(mem->start_section_nr); end_pfn = start_pfn + nr_pages; - first_page = pfn_to_page(start_pfn); /* The block contains more than one zone can not be offlined. */ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return sprintf(buf, "none\n"); - zone = page_zone(first_page); + zone = page_zone(pfn_to_page(valid_start)); + valid_pages = valid_end - valid_start; /* MMOP_ONLINE_KEEP */ sprintf(buf, "%s", zone->name); /* MMOP_ONLINE_KERNEL */ - zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL); + zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); } /* MMOP_ONLINE_MOVABLE */ - zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE); + zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 4c7c6da7a989187e32efbb284f3daeb518ca0435..6441dfda489f80a404bacd6132d8900b9d98c6bf 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -584,6 +584,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) struct clk *clk; unsigned long freq, old_freq; unsigned long u_volt, u_volt_min, u_volt_max; + unsigned long old_u_volt, old_u_volt_min, old_u_volt_max; int ret; if (unlikely(!target_freq)) { @@ -633,6 +634,14 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) return ret; } + if (IS_ERR(old_opp)) { + old_u_volt = 0; + } else { + old_u_volt = old_opp->u_volt; + old_u_volt_min = old_opp->u_volt_min; + old_u_volt_max = old_opp->u_volt_max; + } + u_volt = opp->u_volt; u_volt_min = opp->u_volt_min; u_volt_max = opp->u_volt_max; @@ -677,9 +686,10 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) __func__, old_freq); restore_voltage: /* This shouldn't harm even if the voltages weren't updated earlier */ - if (!IS_ERR(old_opp)) - _set_opp_voltage(dev, reg, old_opp->u_volt, - old_opp->u_volt_min, old_opp->u_volt_max); + if (old_u_volt) { + _set_opp_voltage(dev, reg, old_u_volt, old_u_volt_min, + old_u_volt_max); + } return ret; } @@ -1316,7 +1326,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); * that this function is *NOT* called under RCU protection or in contexts where * mutex cannot be locked. */ -int dev_pm_opp_set_regulator(struct device *dev, const char *name) +struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name) { struct opp_table *opp_table; struct regulator *reg; @@ -1354,20 +1364,20 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name) opp_table->regulator = reg; mutex_unlock(&opp_table_lock); - return 0; + return opp_table; err: _remove_opp_table(opp_table); unlock: mutex_unlock(&opp_table_lock); - return ret; + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); /** * dev_pm_opp_put_regulator() - Releases resources blocked for regulator - * @dev: Device for which regulator was set. + * @opp_table: OPP table returned from dev_pm_opp_set_regulator(). * * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks @@ -1375,22 +1385,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); * that this function is *NOT* called under RCU protection or in contexts where * mutex cannot be locked. */ -void dev_pm_opp_put_regulator(struct device *dev) +void dev_pm_opp_put_regulator(struct opp_table *opp_table) { - struct opp_table *opp_table; - mutex_lock(&opp_table_lock); - /* Check for existing table for 'dev' first */ - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) { - dev_err(dev, "Failed to find opp_table: %ld\n", - PTR_ERR(opp_table)); - goto unlock; - } - if (IS_ERR(opp_table->regulator)) { - dev_err(dev, "%s: Doesn't have regulator set\n", __func__); + pr_err("%s: Doesn't have regulator set\n", __func__); goto unlock; } diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 50e30e7b059d18a034c80b2a71fd846a098189a1..a84332aefc2da73aa3252877cf970efe9e2b9f77 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -21,14 +21,22 @@ extern void pm_runtime_init(struct device *dev); extern void pm_runtime_reinit(struct device *dev); extern void pm_runtime_remove(struct device *dev); +#define WAKE_IRQ_DEDICATED_ALLOCATED BIT(0) +#define WAKE_IRQ_DEDICATED_MANAGED BIT(1) +#define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \ + WAKE_IRQ_DEDICATED_MANAGED) + struct wake_irq { struct device *dev; + unsigned int status; int irq; - bool dedicated_irq:1; }; extern void dev_pm_arm_wake_irq(struct wake_irq *wirq); extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq); +extern void dev_pm_enable_wake_irq_check(struct device *dev, + bool can_change_status); +extern void dev_pm_disable_wake_irq_check(struct device *dev); #ifdef CONFIG_PM_SLEEP @@ -104,6 +112,15 @@ static inline void dev_pm_disarm_wake_irq(struct wake_irq *wirq) { } +static inline void dev_pm_enable_wake_irq_check(struct device *dev, + bool can_change_status) +{ +} + +static inline void dev_pm_disable_wake_irq_check(struct device *dev) +{ +} + #endif #ifdef CONFIG_PM_SLEEP diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 82a081ea43176ca8e1d8accd886ad7065287a8aa..23f3b95a1158bc44b88a0f2a510a12516138141a 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -515,7 +515,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) callback = RPM_GET_CALLBACK(dev, runtime_suspend); - dev_pm_enable_wake_irq(dev); + dev_pm_enable_wake_irq_check(dev, true); retval = rpm_callback(callback, dev); if (retval) goto fail; @@ -554,7 +554,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) return retval; fail: - dev_pm_disable_wake_irq(dev); + dev_pm_disable_wake_irq_check(dev); __update_runtime_status(dev, RPM_ACTIVE); dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); @@ -737,12 +737,12 @@ static int rpm_resume(struct device *dev, int rpmflags) callback = RPM_GET_CALLBACK(dev, runtime_resume); - dev_pm_disable_wake_irq(dev); + dev_pm_disable_wake_irq_check(dev); retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_cancel_pending(dev); - dev_pm_enable_wake_irq(dev); + dev_pm_enable_wake_irq_check(dev, false); } else { no_callback: __update_runtime_status(dev, RPM_ACTIVE); diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 0d77cd6fd8d11653c789fa8834e8eb53b801a6d9..404d94c6c8bc6a4531b0666a77c5b86daab207a6 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -110,8 +110,10 @@ void dev_pm_clear_wake_irq(struct device *dev) dev->power.wakeirq = NULL; spin_unlock_irqrestore(&dev->power.lock, flags); - if (wirq->dedicated_irq) + if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED) { free_irq(wirq->irq, wirq); + wirq->status &= ~WAKE_IRQ_DEDICATED_MASK; + } kfree(wirq); } EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq); @@ -179,7 +181,6 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) wirq->dev = dev; wirq->irq = irq; - wirq->dedicated_irq = true; irq_set_status_flags(irq, IRQ_NOAUTOEN); /* @@ -195,6 +196,8 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) if (err) goto err_free_irq; + wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED; + return err; err_free_irq: @@ -210,9 +213,9 @@ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq); * dev_pm_enable_wake_irq - Enable device wake-up interrupt * @dev: Device * - * Called from the bus code or the device driver for - * runtime_suspend() to enable the wake-up interrupt while - * the device is running. + * Optionally called from the bus code or the device driver for + * runtime_resume() to override the PM runtime core managed wake-up + * interrupt handling to enable the wake-up interrupt. * * Note that for runtime_suspend()) the wake-up interrupts * should be unconditionally enabled unlike for suspend() @@ -222,7 +225,7 @@ void dev_pm_enable_wake_irq(struct device *dev) { struct wake_irq *wirq = dev->power.wakeirq; - if (wirq && wirq->dedicated_irq) + if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)) enable_irq(wirq->irq); } EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq); @@ -231,19 +234,72 @@ EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq); * dev_pm_disable_wake_irq - Disable device wake-up interrupt * @dev: Device * - * Called from the bus code or the device driver for - * runtime_resume() to disable the wake-up interrupt while - * the device is running. + * Optionally called from the bus code or the device driver for + * runtime_suspend() to override the PM runtime core managed wake-up + * interrupt handling to disable the wake-up interrupt. */ void dev_pm_disable_wake_irq(struct device *dev) { struct wake_irq *wirq = dev->power.wakeirq; - if (wirq && wirq->dedicated_irq) + if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)) disable_irq_nosync(wirq->irq); } EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq); +/** + * dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt + * @dev: Device + * @can_change_status: Can change wake-up interrupt status + * + * Enables wakeirq conditionally. We need to enable wake-up interrupt + * lazily on the first rpm_suspend(). This is needed as the consumer device + * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would + * otherwise try to disable already disabled wakeirq. The wake-up interrupt + * starts disabled with IRQ_NOAUTOEN set. + * + * Should be only called from rpm_suspend() and rpm_resume() path. + * Caller must hold &dev->power.lock to change wirq->status + */ +void dev_pm_enable_wake_irq_check(struct device *dev, + bool can_change_status) +{ + struct wake_irq *wirq = dev->power.wakeirq; + + if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK))) + return; + + if (likely(wirq->status & WAKE_IRQ_DEDICATED_MANAGED)) { + goto enable; + } else if (can_change_status) { + wirq->status |= WAKE_IRQ_DEDICATED_MANAGED; + goto enable; + } + + return; + +enable: + enable_irq(wirq->irq); +} + +/** + * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt + * @dev: Device + * + * Disables wake-up interrupt conditionally based on status. + * Should be only called from rpm_suspend() and rpm_resume() path. + */ +void dev_pm_disable_wake_irq_check(struct device *dev) +{ + struct wake_irq *wirq = dev->power.wakeirq; + + if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK))) + return; + + if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) + disable_irq_nosync(wirq->irq); +} + /** * dev_pm_arm_wake_irq - Arm device wake-up * @wirq: Device wake-up interrupt diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index f642c4264c277bc05d98dc99eb15ac8091886ba5..168fa175d65a08319f5e92ad10f8f5ff4baea54a 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -45,6 +45,9 @@ int bcma_sprom_get(struct bcma_bus *bus); void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); +#ifdef CONFIG_BCMA_DRIVER_MIPS +void bcma_chipco_serial_init(struct bcma_drv_cc *cc); +#endif /* CONFIG_BCMA_DRIVER_MIPS */ /* driver_chipcommon_b.c */ int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb); diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index b4f6520e74f05b8b7a0c1ec710042c0f38a02d7d..62f5bfa5065d919ee3acd9e596373923070e574a 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -15,8 +15,6 @@ #include #include -static void bcma_chipco_serial_init(struct bcma_drv_cc *cc); - static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset, u32 mask, u32 value) { @@ -186,9 +184,6 @@ void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc) if (cc->capabilities & BCMA_CC_CAP_PMU) bcma_pmu_early_init(cc); - if (IS_BUILTIN(CONFIG_BCM47XX) && bus->hosttype == BCMA_HOSTTYPE_SOC) - bcma_chipco_serial_init(cc); - if (bus->hosttype == BCMA_HOSTTYPE_SOC) bcma_core_chipcommon_flash_detect(cc); @@ -378,9 +373,9 @@ u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value) return res; } -static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) +#ifdef CONFIG_BCMA_DRIVER_MIPS +void bcma_chipco_serial_init(struct bcma_drv_cc *cc) { -#if IS_BUILTIN(CONFIG_BCM47XX) unsigned int irq; u32 baud_base; u32 i; @@ -422,5 +417,5 @@ static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) ports[i].baud_base = baud_base; ports[i].reg_shift = 0; } -#endif /* CONFIG_BCM47XX */ } +#endif /* CONFIG_BCMA_DRIVER_MIPS */ diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 96f17132820080843e9216523bc1328b2f8f7939..89af807cf29ce49e38f60e9e1c3e177ceb0e261a 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -278,9 +278,12 @@ static void bcma_core_mips_nvram_init(struct bcma_drv_mips *mcore) void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { + struct bcma_bus *bus = mcore->core->bus; + if (mcore->early_setup_done) return; + bcma_chipco_serial_init(&bus->drv_cc); bcma_core_mips_nvram_init(mcore); mcore->early_setup_done = true; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fa1b7a90ba11deecda3ba5853b545fc2fc0dc579..4af81876679766d8e3609daf43302d8967b68d00 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1646,7 +1646,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (lo->lo_state != Lo_bound) - return -EIO; + return BLK_MQ_RQ_QUEUE_ERROR; switch (req_op(cmd->rq)) { case REQ_OP_FLUSH: diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5545a679abd8887123fc83d57beb37dede77a685..3c3b8f601469a1e4ac2bc332feebc0eab321773b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -56,6 +56,7 @@ struct virtblk_req { struct virtio_blk_outhdr out_hdr; struct virtio_scsi_inhdr in_hdr; u8 status; + u8 sense[SCSI_SENSE_BUFFERSIZE]; struct scatterlist sg[]; }; @@ -102,7 +103,8 @@ static int __virtblk_add_req(struct virtqueue *vq, } if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); + memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); + sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); sgs[num_out + num_in++] = &sense; sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); sgs[num_out + num_in++] = &inhdr; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 5497f7fc44d04c0287a6c51d968a7df0165a37c3..d2ef51ca9cf47c514dfe40cb3f822041ce9cc2b5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,14 @@ static inline bool is_partial_io(struct bio_vec *bvec) return bvec->bv_len != PAGE_SIZE; } +static void zram_revalidate_disk(struct zram *zram) +{ + revalidate_disk(zram->disk); + /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ + zram->disk->queue->backing_dev_info.capabilities |= + BDI_CAP_STABLE_WRITES; +} + /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -1094,15 +1103,9 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + zram_revalidate_disk(zram); up_write(&zram->init_lock); - /* - * Revalidate disk out of the init_lock to avoid lockdep splat. - * It's okay because disk's capacity is protected by init_lock - * so that revalidate_disk always sees up-to-date capacity. - */ - revalidate_disk(zram->disk); - return len; out_destroy_comp: @@ -1148,7 +1151,7 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk(zram->disk); + zram_revalidate_disk(zram); bdput(bdev); mutex_lock(&bdev->bd_mutex); diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index d1074d9b38ba163e624ea1b996df38ad2164f285..aee83462b796bde9b8f6b2e7d500e9d5d5320288 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1570,7 +1570,10 @@ static int __init arm_ccn_init(void) for (i = 0; i < ARRAY_SIZE(arm_ccn_pmu_events); i++) arm_ccn_pmu_events_attrs[i] = &arm_ccn_pmu_events[i].attr.attr; - return platform_driver_register(&arm_ccn_driver); + ret = platform_driver_register(&arm_ccn_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); + return ret; } static void __exit arm_ccn_exit(void) diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c index 9efdf1de4035e226825c8435f72f07e8055584d2..493e7b9fc81357bc7366539cc4a8eaf22413be7f 100644 --- a/drivers/bus/vexpress-config.c +++ b/drivers/bus/vexpress-config.c @@ -171,6 +171,7 @@ static int vexpress_config_populate(struct device_node *node) { struct device_node *bridge; struct device *parent; + int ret; bridge = of_parse_phandle(node, "arm,vexpress,config-bridge", 0); if (!bridge) @@ -182,7 +183,11 @@ static int vexpress_config_populate(struct device_node *node) if (WARN_ON(!parent)) return -ENODEV; - return of_platform_populate(node, NULL, NULL, parent); + ret = of_platform_populate(node, NULL, NULL, parent); + + put_device(parent); + + return ret; } static int __init vexpress_config_init(void) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 5bb1985ec484aef267e3c551c3068ec0dee14b1c..6d9cc2d39d22306fd68f30bac6f4a60e6cfa5a87 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -381,9 +381,6 @@ static ssize_t read_kmem(struct file *file, char __user *buf, char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ int err = 0; - if (!pfn_valid(PFN_DOWN(p))) - return -EIO; - read = 0; if (p < (unsigned long) high_memory) { low_count = count; @@ -412,6 +409,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf, * by the kernel or data corruption may occur */ kbuf = xlate_dev_kmem_ptr((void *)p); + if (!virt_addr_valid(kbuf)) + return -ENXIO; if (copy_to_user(buf, kbuf, sz)) return -EFAULT; @@ -482,6 +481,8 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf, * corruption may occur. */ ptr = xlate_dev_kmem_ptr((void *)p); + if (!virt_addr_valid(ptr)) + return -ENXIO; copied = copy_from_user(ptr, buf, sz); if (copied) { @@ -512,9 +513,6 @@ static ssize_t write_kmem(struct file *file, const char __user *buf, char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ int err = 0; - if (!pfn_valid(PFN_DOWN(p))) - return -EIO; - if (p < (unsigned long) high_memory) { unsigned long to_write = min_t(unsigned long, count, (unsigned long)high_memory - p); diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index e3bf31b3713862cc759391f9ce0bfd22a2932007..a1ce0607bf7b450bd64d1101e44bd60368bbb0d7 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -185,7 +185,12 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) TPM_STS_DATA_AVAIL | TPM_STS_VALID, chip->timeout_c, &priv->read_queue, true) == 0) { - burstcnt = min_t(int, get_burstcount(chip), count - size); + burstcnt = get_burstcount(chip); + if (burstcnt < 0) { + dev_err(&chip->dev, "Unable to read burstcount\n"); + return burstcnt; + } + burstcnt = min_t(int, burstcnt, count - size); rc = tpm_tis_read_bytes(priv, TPM_DATA_FIFO(priv->locality), burstcnt, buf + size); @@ -271,7 +276,13 @@ static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len) } while (count < len - 1) { - burstcnt = min_t(int, get_burstcount(chip), len - count - 1); + burstcnt = get_burstcount(chip); + if (burstcnt < 0) { + dev_err(&chip->dev, "Unable to read burstcount\n"); + rc = burstcnt; + goto out_err; + } + burstcnt = min_t(int, burstcnt, len - count - 1); rc = tpm_tis_write_bytes(priv, TPM_DATA_FIFO(priv->locality), burstcnt, buf + count); if (rc < 0) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 62028f483bbaacfc78b77b3b65ed2e39c8d1e0e7..a2ab00831df1cc385364f370b78e89e9c609e053 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -307,7 +307,6 @@ static int tpmfront_probe(struct xenbus_device *dev, rv = setup_ring(dev, priv); if (rv) { chip = dev_get_drvdata(&dev->dev); - tpm_chip_unregister(chip); ring_free(priv); return rv; } diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 8c7763fd9efc52b30f02d9ebcd4fdb10d2876465..3bbd2a58db470a89b870a793e59ddf9fc4f48e57 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -751,7 +751,9 @@ static void bcm2835_pll_divider_off(struct clk_hw *hw) cprman_write(cprman, data->cm_reg, (cprman_read(cprman, data->cm_reg) & ~data->load_mask) | data->hold_mask); - cprman_write(cprman, data->a2w_reg, A2W_PLL_CHANNEL_DISABLE); + cprman_write(cprman, data->a2w_reg, + cprman_read(cprman, data->a2w_reg) | + A2W_PLL_CHANNEL_DISABLE); spin_unlock(&cprman->regs_lock); } diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c index f4fdac55727c8017d0d8990b35ee19f6044be8fa..0621fbfb4bebfd548b21f62e7320f1bab2294b48 100644 --- a/drivers/clk/clk-wm831x.c +++ b/drivers/clk/clk-wm831x.c @@ -243,7 +243,7 @@ static int wm831x_clkout_is_prepared(struct clk_hw *hw) if (ret < 0) { dev_err(wm831x->dev, "Unable to read CLOCK_CONTROL_1: %d\n", ret); - return true; + return false; } return (ret & WM831X_CLKOUT_ENA) != 0; diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 97ae60fa15849954fdcd68ab36c5510b5c044ce4..bb8a77a5985f8627e6ea396298745c9b6c7eaf77 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -448,12 +448,20 @@ EXPORT_SYMBOL(clk_register_clkdev); * * con_id or dev_id may be NULL as a wildcard, just as in the rest of * clkdev. + * + * To make things easier for mass registration, we detect error clk_hws + * from a previous clk_hw_register_*() call, and return the error code for + * those. This is to permit this function to be called immediately + * after clk_hw_register_*(). */ int clk_hw_register_clkdev(struct clk_hw *hw, const char *con_id, const char *dev_id) { struct clk_lookup *cl; + if (IS_ERR(hw)) + return PTR_ERR(hw); + /* * Since dev_id can be NULL, and NULL is handled specially, we must * pass it as either a NULL format string, or with "%s". diff --git a/drivers/clk/imx/clk-imx31.c b/drivers/clk/imx/clk-imx31.c index 6a964144a5b5df1d3d134e22bfcd8464f510e6a3..6a49ba2b96714f213f9630f5a2ad5e37b53bf559 100644 --- a/drivers/clk/imx/clk-imx31.c +++ b/drivers/clk/imx/clk-imx31.c @@ -157,10 +157,8 @@ static void __init _mx31_clocks_init(unsigned long fref) } } -int __init mx31_clocks_init(void) +int __init mx31_clocks_init(unsigned long fref) { - u32 fref = 26000000; /* default */ - _mx31_clocks_init(fref); clk_register_clkdev(clk[gpt_gate], "per", "imx-gpt.0"); diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c index 52a7d3959875b2eee15bc75fc8398764849e8c63..28eb200d0f1ee008989e5a253811a316c6951ee9 100644 --- a/drivers/clk/qcom/gcc-ipq806x.c +++ b/drivers/clk/qcom/gcc-ipq806x.c @@ -2990,11 +2990,11 @@ static int gcc_ipq806x_probe(struct platform_device *pdev) struct regmap *regmap; int ret; - ret = qcom_cc_register_board_clk(dev, "cxo_board", "cxo", 19200000); + ret = qcom_cc_register_board_clk(dev, "cxo_board", "cxo", 25000000); if (ret) return ret; - ret = qcom_cc_register_board_clk(dev, "pxo_board", "pxo", 27000000); + ret = qcom_cc_register_board_clk(dev, "pxo_board", "pxo", 25000000); if (ret) return ret; diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c index 9375777776d994071a476d2e198be933838db685..b533f99550e1b97dd55944a85cbf5d64153309f5 100644 --- a/drivers/clk/renesas/clk-mstp.c +++ b/drivers/clk/renesas/clk-mstp.c @@ -37,12 +37,14 @@ * @smstpcr: module stop control register * @mstpsr: module stop status register (optional) * @lock: protects writes to SMSTPCR + * @width_8bit: registers are 8-bit, not 32-bit */ struct mstp_clock_group { struct clk_onecell_data data; void __iomem *smstpcr; void __iomem *mstpsr; spinlock_t lock; + bool width_8bit; }; /** @@ -59,6 +61,18 @@ struct mstp_clock { #define to_mstp_clock(_hw) container_of(_hw, struct mstp_clock, hw) +static inline u32 cpg_mstp_read(struct mstp_clock_group *group, + u32 __iomem *reg) +{ + return group->width_8bit ? readb(reg) : clk_readl(reg); +} + +static inline void cpg_mstp_write(struct mstp_clock_group *group, u32 val, + u32 __iomem *reg) +{ + group->width_8bit ? writeb(val, reg) : clk_writel(val, reg); +} + static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) { struct mstp_clock *clock = to_mstp_clock(hw); @@ -70,12 +84,12 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) spin_lock_irqsave(&group->lock, flags); - value = clk_readl(group->smstpcr); + value = cpg_mstp_read(group, group->smstpcr); if (enable) value &= ~bitmask; else value |= bitmask; - clk_writel(value, group->smstpcr); + cpg_mstp_write(group, value, group->smstpcr); spin_unlock_irqrestore(&group->lock, flags); @@ -83,7 +97,7 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) return 0; for (i = 1000; i > 0; --i) { - if (!(clk_readl(group->mstpsr) & bitmask)) + if (!(cpg_mstp_read(group, group->mstpsr) & bitmask)) break; cpu_relax(); } @@ -114,9 +128,9 @@ static int cpg_mstp_clock_is_enabled(struct clk_hw *hw) u32 value; if (group->mstpsr) - value = clk_readl(group->mstpsr); + value = cpg_mstp_read(group, group->mstpsr); else - value = clk_readl(group->smstpcr); + value = cpg_mstp_read(group, group->smstpcr); return !(value & BIT(clock->bit_index)); } @@ -188,6 +202,9 @@ static void __init cpg_mstp_clocks_init(struct device_node *np) return; } + if (of_device_is_compatible(np, "renesas,r7s72100-mstp-clocks")) + group->width_8bit = true; + for (i = 0; i < MSTP_MAX_CLOCKS; ++i) clks[i] = ERR_PTR(-ENOENT); diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index e1365e7491ae02a0519ec94aaea92baa5f58d55e..25c41cd9cdfc76332a7836114e939a225cf72d17 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -33,9 +33,9 @@ #include "clk-div6.h" #ifdef DEBUG -#define WARN_DEBUG(x) do { } while (0) -#else #define WARN_DEBUG(x) WARN_ON(x) +#else +#define WARN_DEBUG(x) do { } while (0) #endif diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c index 2646d980087bf3011205156a068f48b97a9b81bf..5c6d37bdf247cfef1b4da1bcf185d1b56082f036 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c @@ -344,10 +344,10 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4, static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x", "pll-audio-2x", "pll-audio" }; static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents, - 0x0b0, 16, 2, BIT(31), 0); + 0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents, - 0x0b4, 16, 2, BIT(31), 0); + 0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT); /* TODO: the parent for most of the USB clocks is not known */ static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", @@ -415,7 +415,7 @@ static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", 0x13c, 16, 3, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(ac_dig_clk, "ac-dig", "pll-audio", - 0x140, BIT(31), 0); + 0x140, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x144, BIT(31), 0); diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 4d70590f05e36efe4171a93dc46db0884d3d68a2..21c427d86f289b2a588cc41190525c408e2be3bf 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -394,16 +394,16 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4, static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x", "pll-audio-2x", "pll-audio" }; static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents, - 0x0b0, 16, 2, BIT(31), 0); + 0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents, - 0x0b4, 16, 2, BIT(31), 0); + 0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s2_clk, "i2s2", i2s_parents, - 0x0b8, 16, 2, BIT(31), 0); + 0x0b8, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_M_WITH_GATE(spdif_clk, "spdif", "pll-audio", - 0x0c0, 0, 4, BIT(31), 0); + 0x0c0, 0, 4, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", 0x0cc, BIT(8), 0); @@ -466,7 +466,7 @@ static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", 0x13c, 16, 3, BIT(31), 0); static SUNXI_CCU_GATE(ac_dig_clk, "ac-dig", "pll-audio", - 0x140, BIT(31), 0); + 0x140, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x144, BIT(31), 0); diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 8831e1a05367ad9c7473e3ee723adc3f29dc9936..11d8aa3ec18604a7ce5c04ddded50ff660f554c1 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -22,13 +22,6 @@ #include "clock.h" -/* - * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks - * that are sourced by DPLL5, and both of these require this clock - * to be at 120 MHz for proper operation. - */ -#define DPLL5_FREQ_FOR_USBHOST 120000000 - #define OMAP3430ES2_ST_DSS_IDLE_SHIFT 1 #define OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT 5 #define OMAP3430ES2_ST_SSI_IDLE_SHIFT 8 @@ -546,14 +539,21 @@ void __init omap3_clk_lock_dpll5(void) struct clk *dpll5_clk; struct clk *dpll5_m2_clk; + /* + * Errata sprz319f advisory 2.1 documents a USB host clock drift issue + * that can be worked around using specially crafted dpll5 settings + * with a dpll5_m2 divider set to 8. Set the dpll5 rate to 8x the USB + * host clock rate, its .set_rate handler() will detect that frequency + * and use the errata settings. + */ dpll5_clk = clk_get(NULL, "dpll5_ck"); - clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST); + clk_set_rate(dpll5_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST * 8); clk_prepare_enable(dpll5_clk); - /* Program dpll5_m2_clk divider for no division */ + /* Program dpll5_m2_clk divider */ dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck"); clk_prepare_enable(dpll5_m2_clk); - clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST); + clk_set_rate(dpll5_m2_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST); clk_disable_unprepare(dpll5_m2_clk); clk_disable_unprepare(dpll5_clk); diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index bfa17d33ef3b948e8c679e7ef6609fb0ed4ab1c1..9fd6043314eb3968815750b084664d593e9cde51 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -201,7 +201,6 @@ static struct ti_dt_clk dra7xx_clks[] = { DT_CLK(NULL, "atl_dpll_clk_mux", "atl_dpll_clk_mux"), DT_CLK(NULL, "atl_gfclk_mux", "atl_gfclk_mux"), DT_CLK(NULL, "dcan1_sys_clk_mux", "dcan1_sys_clk_mux"), - DT_CLK(NULL, "gmac_gmii_ref_clk_div", "gmac_gmii_ref_clk_div"), DT_CLK(NULL, "gmac_rft_clk_mux", "gmac_rft_clk_mux"), DT_CLK(NULL, "gpu_core_gclk_mux", "gpu_core_gclk_mux"), DT_CLK(NULL, "gpu_hyd_gclk_mux", "gpu_hyd_gclk_mux"), diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 90f3f472ae1c4c70e537c69a7a58e2d1168abddd..13c37f48d9d69d6f4b950671cc735b1ed49fefba 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -257,11 +257,20 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +/* + * OMAP3_DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks + * that are sourced by DPLL5, and both of these require this clock + * to be at 120 MHz for proper operation. + */ +#define OMAP3_DPLL5_FREQ_FOR_USBHOST 120000000 + unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate, u8 index); +int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); void omap3_clk_lock_dpll5(void); unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index 9fc8754a6e6160081c067a5a96b364b8bf0ec40a..4b9a419d8e14133d04bb405b84d79da9b6bc5be5 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -114,6 +114,18 @@ static const struct clk_ops omap3_dpll_ck_ops = { .round_rate = &omap2_dpll_round_rate, }; +static const struct clk_ops omap3_dpll5_ck_ops = { + .enable = &omap3_noncore_dpll_enable, + .disable = &omap3_noncore_dpll_disable, + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap3_dpll_recalc, + .set_rate = &omap3_dpll5_set_rate, + .set_parent = &omap3_noncore_dpll_set_parent, + .set_rate_and_parent = &omap3_noncore_dpll_set_rate_and_parent, + .determine_rate = &omap3_noncore_dpll_determine_rate, + .round_rate = &omap2_dpll_round_rate, +}; + static const struct clk_ops omap3_dpll_per_ck_ops = { .enable = &omap3_noncore_dpll_enable, .disable = &omap3_noncore_dpll_disable, @@ -474,7 +486,12 @@ static void __init of_ti_omap3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); + if ((of_machine_is_compatible("ti,omap3630") || + of_machine_is_compatible("ti,omap36xx")) && + !strcmp(node->name, "dpll5_ck")) + of_ti_dpll_setup(node, &omap3_dpll5_ck_ops, &dd); + else + of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock", of_ti_omap3_dpll_setup); diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c index 88f2ce81ba55988164c9e77657b78b65ac65a3a8..4cdd28a25584424f5d5bc9730abf5e7a1d8b6bb7 100644 --- a/drivers/clk/ti/dpll3xxx.c +++ b/drivers/clk/ti/dpll3xxx.c @@ -838,3 +838,70 @@ int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate, index); } + +/* Apply DM3730 errata sprz319 advisory 2.1. */ +static bool omap3_dpll5_apply_errata(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct omap3_dpll5_settings { + unsigned int rate, m, n; + }; + + static const struct omap3_dpll5_settings precomputed[] = { + /* + * From DM3730 errata advisory 2.1, table 35 and 36. + * The N value is increased by 1 compared to the tables as the + * errata lists register values while last_rounded_field is the + * real divider value. + */ + { 12000000, 80, 0 + 1 }, + { 13000000, 443, 5 + 1 }, + { 19200000, 50, 0 + 1 }, + { 26000000, 443, 11 + 1 }, + { 38400000, 25, 0 + 1 } + }; + + const struct omap3_dpll5_settings *d; + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(precomputed); ++i) { + if (parent_rate == precomputed[i].rate) + break; + } + + if (i == ARRAY_SIZE(precomputed)) + return false; + + d = &precomputed[i]; + + /* Update the M, N and rounded rate values and program the DPLL. */ + dd = clk->dpll_data; + dd->last_rounded_m = d->m; + dd->last_rounded_n = d->n; + dd->last_rounded_rate = div_u64((u64)parent_rate * d->m, d->n); + omap3_noncore_dpll_program(clk, 0); + + return true; +} + +/** + * omap3_dpll5_set_rate - set rate for omap3 dpll5 + * @hw: clock to change + * @rate: target rate for clock + * @parent_rate: rate of the parent clock + * + * Set rate for the DPLL5 clock. Apply the sprz319 advisory 2.1 on OMAP36xx if + * the DPLL is used for USB host (detected through the requested rate). + */ +int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + if (rate == OMAP3_DPLL5_FREQ_FOR_USBHOST * 8) { + if (omap3_dpll5_apply_errata(hw, parent_rate)) + return 0; + } + + return omap3_noncore_dpll_set_rate(hw, rate, parent_rate); +} diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 8f3488b808964b1884eaf18d602fe2ae9b4921c2..7f6fed9f07035350f2c0f4a727bd242ff3519536 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -495,6 +495,7 @@ static int exynos4_mct_dying_cpu(unsigned int cpu) if (mct_int_type == MCT_INT_SPI) { if (evt->irq != -1) disable_irq_nosync(evt->irq); + exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); } else { disable_percpu_irq(mct_irqs[MCT_L0_IRQ]); } diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 61be70eb3ac07342b4031b3703143f5bafb7e58b..0173b8b4bd5add385837d50b04b69bfae7880f16 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -102,15 +102,23 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. -config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL - bool "schedutil" - depends on SMP - select CPU_FREQ_GOV_SCHEDUTIL +config CPU_FREQ_DEFAULT_GOV_SCHED + bool "sched" + select CPU_FREQ_GOV_SCHED + help + Use the CPUfreq governor 'sched' as default. This scales + cpu frequency using CPU utilization estimates from the + scheduler. + +config CPU_FREQ_DEFAULT_GOV_INTERACTIVE + bool "interactive" + select CPU_FREQ_GOV_INTERACTIVE select CPU_FREQ_GOV_PERFORMANCE help - Use the 'schedutil' CPUFreq governor by default. If unsure, - have a look at the help section of that governor. The fallback - governor will be 'performance'. + Use the CPUFreq governor 'interactive' as default. This allows + you to get a full dynamic cpu frequency capable system by simply + loading your cpufreq low-level hardware driver, using the + 'interactive' governor for latency-sensitive workloads. config CPU_FREQ_DEFAULT_GOV_INTERACTIVE bool "interactive" @@ -178,20 +186,6 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. -config CPU_FREQ_GOV_INTERACTIVE - bool "'interactive' cpufreq policy governor" - help - 'interactive' - This driver adds a dynamic cpufreq policy governor - designed for latency-sensitive workloads. - - This governor attempts to reduce the latency of clock - increases so that the system is more responsive to - interactive workloads. - - For details, take a look at linux/Documentation/cpu-freq. - - If in doubt, say N. - config CPU_FREQ_GOV_CONSERVATIVE tristate "'conservative' cpufreq governor" depends on CPU_FREQ @@ -216,20 +210,36 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. -config CPU_FREQ_GOV_SCHEDUTIL - bool "'schedutil' cpufreq policy governor" - depends on CPU_FREQ && SMP +config CPU_FREQ_GOV_SCHED + bool "'sched' cpufreq governor" + depends on CPU_FREQ + depends on SMP + select CPU_FREQ_GOV_COMMON + help + 'sched' - this governor scales cpu frequency from the + scheduler as a function of cpu capacity utilization. It does + not evaluate utilization on a periodic basis (as ondemand + does) but instead is event-driven by the scheduler. + + If in doubt, say N. + +config CPU_FREQ_GOV_INTERACTIVE + tristate "'interactive' cpufreq policy governor" + depends on CPU_FREQ select CPU_FREQ_GOV_ATTR_SET select IRQ_WORK help - This governor makes decisions based on the utilization data provided - by the scheduler. It sets the CPU frequency to be proportional to - the utilization/capacity ratio coming from the scheduler. If the - utilization is frequency-invariant, the new frequency is also - proportional to the maximum available frequency. If that is not the - case, it is proportional to the current frequency of the CPU. The - frequency tipping point is at utilization/capacity equal to 80% in - both cases. + 'interactive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads. + + This governor attempts to reduce the latency of clock + increases so that the system is more responsive to + interactive workloads. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_interactive. + + For details, take a look at linux/Documentation/cpu-freq. If in doubt, say N. diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 5c07ae05d69ac80a2cb101e8123c2d69c11bcb4d..4d3ec92cbabfe7f886db4bab7c964f4798c2e7a3 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -28,6 +28,7 @@ #include "cpufreq-dt.h" struct private_data { + struct opp_table *opp_table; struct device *cpu_dev; struct thermal_cooling_device *cdev; const char *reg_name; @@ -143,6 +144,7 @@ static int resources_available(void) static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; + struct opp_table *opp_table = NULL; struct private_data *priv; struct device *cpu_dev; struct clk *cpu_clk; @@ -186,8 +188,9 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ name = find_supply_name(cpu_dev); if (name) { - ret = dev_pm_opp_set_regulator(cpu_dev, name); - if (ret) { + opp_table = dev_pm_opp_set_regulator(cpu_dev, name); + if (IS_ERR(opp_table)) { + ret = PTR_ERR(opp_table); dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", policy->cpu, ret); goto out_put_clk; @@ -237,6 +240,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) } priv->reg_name = name; + priv->opp_table = opp_table; ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { @@ -285,7 +289,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); if (name) - dev_pm_opp_put_regulator(cpu_dev); + dev_pm_opp_put_regulator(opp_table); out_put_clk: clk_put(cpu_clk); @@ -300,7 +304,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); if (priv->reg_name) - dev_pm_opp_put_regulator(priv->cpu_dev); + dev_pm_opp_put_regulator(priv->opp_table); clk_put(policy->clk); kfree(priv); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c910111621d7cc861e90c8467df3d84e095ad885..019e81797418af728b76dc78d05950a924ce3bc8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -29,6 +29,9 @@ #include #include #include +#ifdef CONFIG_SMP +#include +#endif #include static LIST_HEAD(cpufreq_policy_list); @@ -117,6 +120,12 @@ bool have_governor_per_policy(void) } EXPORT_SYMBOL_GPL(have_governor_per_policy); +bool cpufreq_driver_is_slow(void) +{ + return !(cpufreq_driver->flags & CPUFREQ_DRIVER_FAST); +} +EXPORT_SYMBOL_GPL(cpufreq_driver_is_slow); + struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) { if (have_governor_per_policy()) @@ -301,6 +310,50 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) #endif } +/********************************************************************* + * FREQUENCY INVARIANT CPU CAPACITY * + *********************************************************************/ + +static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; +static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE; + +static void +scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) +{ + unsigned long cur = freqs ? freqs->new : policy->cur; + unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max; + struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo; + int cpu; + + pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n", + cpumask_pr_args(policy->cpus), cur, policy->max, scale); + + for_each_cpu(cpu, policy->cpus) + per_cpu(freq_scale, cpu) = scale; + + if (freqs) + return; + + scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq; + + pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n", + cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq, + scale); + + for_each_cpu(cpu, policy->cpus) + per_cpu(max_freq_scale, cpu) = scale; +} + +unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu) +{ + return per_cpu(freq_scale, cpu); +} + +unsigned long cpufreq_scale_max_freq_capacity(int cpu) +{ + return per_cpu(max_freq_scale, cpu); +} + static void __cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { @@ -378,6 +431,9 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy, void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) { +#ifdef CONFIG_SMP + int cpu; +#endif /* * Catch double invocations of _begin() which lead to self-deadlock. @@ -405,6 +461,12 @@ void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, spin_unlock(&policy->transition_lock); + scale_freq_capacity(policy, freqs); +#ifdef CONFIG_SMP + for_each_cpu(cpu, policy->cpus) + trace_cpu_capacity(capacity_curr_of(cpu), cpu); +#endif + cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE); } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin); @@ -2192,6 +2254,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_NOTIFY, new_policy); + scale_freq_capacity(new_policy, NULL); + policy->min = new_policy->min; policy->max = new_policy->max; trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 13475890d792d7bb24209a8fa8617f011883e660..e8e16a5dbd1e4d52584864720ca439ece0062555 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -302,7 +302,10 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } -static struct dbs_governor cs_governor = { +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif +struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_attrs = cs_attributes }, .gov_dbs_timer = cs_dbs_timer, diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index f3266a3f405796003444ebecd844479c30a6bd8e..d6cac0e8e7a68808d1c7498dc2f8d88e20f01069 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -1,7 +1,7 @@ /* * drivers/cpufreq/cpufreq_interactive.c * - * Copyright (C) 2010 Google, Inc. + * Copyright (C) 2010-2016 Google, Inc. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -13,12 +13,14 @@ * GNU General Public License for more details. * * Author: Mike Chan (mike@android.com) - * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include +#include #include #include #include @@ -27,72 +29,50 @@ #include #include #include -#include #include #include #define CREATE_TRACE_POINTS #include -struct cpufreq_interactive_cpuinfo { - struct timer_list cpu_timer; - struct timer_list cpu_slack_timer; - spinlock_t load_lock; /* protects the next 4 fields */ - u64 time_in_idle; - u64 time_in_idle_timestamp; - u64 cputime_speedadj; - u64 cputime_speedadj_timestamp; - struct cpufreq_policy *policy; - struct cpufreq_frequency_table *freq_table; - spinlock_t target_freq_lock; /*protects target freq */ - unsigned int target_freq; - unsigned int floor_freq; - u64 pol_floor_val_time; /* policy floor_validate_time */ - u64 loc_floor_val_time; /* per-cpu floor_validate_time */ - u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ - u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ - struct rw_semaphore enable_sem; - int governor_enabled; -}; +#define gov_attr_ro(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) -static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); +#define gov_attr_wo(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0200, NULL, store_##_name) -/* realtime thread handles frequency scaling */ -static struct task_struct *speedchange_task; -static cpumask_t speedchange_cpumask; -static spinlock_t speedchange_cpumask_lock; -static struct mutex gov_lock; - -/* Target load. Lower values result in higher CPU speeds. */ -#define DEFAULT_TARGET_LOAD 90 -static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; +#define gov_attr_rw(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) -#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) -#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE -static unsigned int default_above_hispeed_delay[] = { - DEFAULT_ABOVE_HISPEED_DELAY }; +/* Separate instance required for each 'interactive' directory in sysfs */ +struct interactive_tunables { + struct gov_attr_set attr_set; -struct cpufreq_interactive_tunables { - int usage_count; /* Hi speed to bump to from lo speed when load burst (default max) */ unsigned int hispeed_freq; + /* Go to hi speed when CPU load at or above this value. */ #define DEFAULT_GO_HISPEED_LOAD 99 unsigned long go_hispeed_load; + /* Target load. Lower values result in higher CPU speeds. */ spinlock_t target_loads_lock; unsigned int *target_loads; int ntarget_loads; + /* * The minimum amount of time to spend at a frequency before we can ramp * down. */ #define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) unsigned long min_sample_time; - /* - * The sample rate of the timer used to increase frequency - */ - unsigned long timer_rate; + + /* The sample rate of the timer used to increase frequency */ + unsigned long sampling_rate; + /* * Wait this long before raising speed above hispeed, by default a * single timer interval. @@ -100,114 +80,175 @@ struct cpufreq_interactive_tunables { spinlock_t above_hispeed_delay_lock; unsigned int *above_hispeed_delay; int nabove_hispeed_delay; + /* Non-zero means indefinite speed boost active */ - int boost_val; + int boost; /* Duration of a boot pulse in usecs */ - int boostpulse_duration_val; + int boostpulse_duration; /* End time of boost pulse in ktime converted to usecs */ u64 boostpulse_endtime; bool boosted; + /* - * Max additional time to wait in idle, beyond timer_rate, at speeds + * Max additional time to wait in idle, beyond sampling_rate, at speeds * above minimum before wakeup to reduce speed, or -1 if unnecessary. */ -#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) - int timer_slack_val; +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_SAMPLING_RATE) + unsigned long timer_slack_delay; + unsigned long timer_slack; bool io_is_busy; }; -/* For cases where we have single governor instance for system */ -static struct cpufreq_interactive_tunables *common_tunables; +/* Separate instance required for each 'struct cpufreq_policy' */ +struct interactive_policy { + struct cpufreq_policy *policy; + struct interactive_tunables *tunables; + struct list_head tunables_hook; +}; + +/* Separate instance required for each CPU */ +struct interactive_cpu { + struct update_util_data update_util; + struct interactive_policy *ipolicy; + + struct irq_work irq_work; + u64 last_sample_time; + unsigned long next_sample_jiffies; + bool work_in_progress; + + struct rw_semaphore enable_sem; + struct timer_list slack_timer; + + spinlock_t load_lock; /* protects the next 4 fields */ + u64 time_in_idle; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; + + spinlock_t target_freq_lock; /*protects target freq */ + unsigned int target_freq; + + unsigned int floor_freq; + u64 pol_floor_val_time; /* policy floor_validate_time */ + u64 loc_floor_val_time; /* per-cpu floor_validate_time */ + u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ + u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ +}; + +static DEFINE_PER_CPU(struct interactive_cpu, interactive_cpu); -static struct attribute_group *get_sysfs_attr(void); +/* Realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; + +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; -static void cpufreq_interactive_timer_resched( - struct cpufreq_interactive_cpuinfo *pcpu) +#define DEFAULT_SAMPLING_RATE (20 * USEC_PER_MSEC) +#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_SAMPLING_RATE +static unsigned int default_above_hispeed_delay[] = { + DEFAULT_ABOVE_HISPEED_DELAY +}; + +/* Iterate over interactive policies for tunables */ +#define for_each_ipolicy(__ip) \ + list_for_each_entry(__ip, &tunables->attr_set.policy_list, tunables_hook) + +static struct interactive_tunables *global_tunables; +static DEFINE_MUTEX(global_tunables_lock); + +static inline void update_slack_delay(struct interactive_tunables *tunables) { - struct cpufreq_interactive_tunables *tunables = - pcpu->policy->governor_data; - unsigned long expires; - unsigned long flags; + tunables->timer_slack_delay = usecs_to_jiffies(tunables->timer_slack + + tunables->sampling_rate); +} - spin_lock_irqsave(&pcpu->load_lock, flags); - pcpu->time_in_idle = - get_cpu_idle_time(smp_processor_id(), - &pcpu->time_in_idle_timestamp, - tunables->io_is_busy); - pcpu->cputime_speedadj = 0; - pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; - expires = jiffies + usecs_to_jiffies(tunables->timer_rate); - mod_timer(&pcpu->cpu_timer, expires); - - if (tunables->timer_slack_val >= 0 && - pcpu->target_freq > pcpu->policy->min) { - expires += usecs_to_jiffies(tunables->timer_slack_val); - mod_timer(&pcpu->cpu_slack_timer, expires); - } +static bool timer_slack_required(struct interactive_cpu *icpu) +{ + struct interactive_policy *ipolicy = icpu->ipolicy; + struct interactive_tunables *tunables = ipolicy->tunables; + + if (tunables->timer_slack < 0) + return false; - spin_unlock_irqrestore(&pcpu->load_lock, flags); + if (icpu->target_freq > ipolicy->policy->min) + return true; + + return false; } -/* The caller shall take enable_sem write semaphore to avoid any timer race. - * The cpu_timer and cpu_slack_timer must be deactivated when calling this - * function. - */ -static void cpufreq_interactive_timer_start( - struct cpufreq_interactive_tunables *tunables, int cpu) +static void gov_slack_timer_start(struct interactive_cpu *icpu, int cpu) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + + icpu->slack_timer.expires = jiffies + tunables->timer_slack_delay; + add_timer_on(&icpu->slack_timer, cpu); +} + +static void gov_slack_timer_modify(struct interactive_cpu *icpu) { - struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); - unsigned long expires = jiffies + - usecs_to_jiffies(tunables->timer_rate); + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + + mod_timer(&icpu->slack_timer, jiffies + tunables->timer_slack_delay); +} + +static void slack_timer_resched(struct interactive_cpu *icpu, int cpu, + bool modify) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; unsigned long flags; - pcpu->cpu_timer.expires = expires; - add_timer_on(&pcpu->cpu_timer, cpu); - if (tunables->timer_slack_val >= 0 && - pcpu->target_freq > pcpu->policy->min) { - expires += usecs_to_jiffies(tunables->timer_slack_val); - pcpu->cpu_slack_timer.expires = expires; - add_timer_on(&pcpu->cpu_slack_timer, cpu); + spin_lock_irqsave(&icpu->load_lock, flags); + + icpu->time_in_idle = get_cpu_idle_time(cpu, + &icpu->time_in_idle_timestamp, + tunables->io_is_busy); + icpu->cputime_speedadj = 0; + icpu->cputime_speedadj_timestamp = icpu->time_in_idle_timestamp; + + if (timer_slack_required(icpu)) { + if (modify) + gov_slack_timer_modify(icpu); + else + gov_slack_timer_start(icpu, cpu); } - spin_lock_irqsave(&pcpu->load_lock, flags); - pcpu->time_in_idle = - get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp, - tunables->io_is_busy); - pcpu->cputime_speedadj = 0; - pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; - spin_unlock_irqrestore(&pcpu->load_lock, flags); + spin_unlock_irqrestore(&icpu->load_lock, flags); } -static unsigned int freq_to_above_hispeed_delay( - struct cpufreq_interactive_tunables *tunables, - unsigned int freq) +static unsigned int +freq_to_above_hispeed_delay(struct interactive_tunables *tunables, + unsigned int freq) { - int i; - unsigned int ret; unsigned long flags; + unsigned int ret; + int i; spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); for (i = 0; i < tunables->nabove_hispeed_delay - 1 && - freq >= tunables->above_hispeed_delay[i+1]; i += 2) + freq >= tunables->above_hispeed_delay[i + 1]; i += 2) ; ret = tunables->above_hispeed_delay[i]; spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; } -static unsigned int freq_to_targetload( - struct cpufreq_interactive_tunables *tunables, unsigned int freq) +static unsigned int freq_to_targetload(struct interactive_tunables *tunables, + unsigned int freq) { - int i; - unsigned int ret; unsigned long flags; + unsigned int ret; + int i; spin_lock_irqsave(&tunables->target_loads_lock, flags); for (i = 0; i < tunables->ntarget_loads - 1 && - freq >= tunables->target_loads[i+1]; i += 2) + freq >= tunables->target_loads[i + 1]; i += 2) ; ret = tunables->target_loads[i]; @@ -220,78 +261,71 @@ static unsigned int freq_to_targetload( * choose_freq() will find the minimum frequency that does not exceed its * target load given the current load. */ -static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu, - unsigned int loadadjfreq) +static unsigned int choose_freq(struct interactive_cpu *icpu, + unsigned int loadadjfreq) { - unsigned int freq = pcpu->policy->cur; - unsigned int prevfreq, freqmin, freqmax; - unsigned int tl; + struct cpufreq_policy *policy = icpu->ipolicy->policy; + struct cpufreq_frequency_table *freq_table = policy->freq_table; + unsigned int prevfreq, freqmin = 0, freqmax = UINT_MAX, tl; + unsigned int freq = policy->cur; int index; - freqmin = 0; - freqmax = UINT_MAX; - do { prevfreq = freq; - tl = freq_to_targetload(pcpu->policy->governor_data, freq); + tl = freq_to_targetload(icpu->ipolicy->tunables, freq); /* * Find the lowest frequency where the computed load is less * than or equal to the target load. */ - index = cpufreq_frequency_table_target( - pcpu->policy, loadadjfreq / tl, - CPUFREQ_RELATION_L); - freq = pcpu->freq_table[index].frequency; + index = cpufreq_frequency_table_target(policy, loadadjfreq / tl, + CPUFREQ_RELATION_L); + + freq = freq_table[index].frequency; if (freq > prevfreq) { - /* The previous frequency is too low. */ + /* The previous frequency is too low */ freqmin = prevfreq; - if (freq >= freqmax) { + if (freq < freqmax) + continue; + + /* Find highest frequency that is less than freqmax */ + index = cpufreq_frequency_table_target(policy, + freqmax - 1, CPUFREQ_RELATION_H); + + freq = freq_table[index].frequency; + + if (freq == freqmin) { /* - * Find the highest frequency that is less - * than freqmax. + * The first frequency below freqmax has already + * been found to be too low. freqmax is the + * lowest speed we found that is fast enough. */ - index = cpufreq_frequency_table_target( - pcpu->policy, - freqmax - 1, CPUFREQ_RELATION_H); - freq = pcpu->freq_table[index].frequency; - - if (freq == freqmin) { - /* - * The first frequency below freqmax - * has already been found to be too - * low. freqmax is the lowest speed - * we found that is fast enough. - */ - freq = freqmax; - break; - } + freq = freqmax; + break; } } else if (freq < prevfreq) { /* The previous frequency is high enough. */ freqmax = prevfreq; - if (freq <= freqmin) { - /* - * Find the lowest frequency that is higher - * than freqmin. - */ - index = cpufreq_frequency_table_target( - pcpu->policy, - freqmin + 1, CPUFREQ_RELATION_L); - freq = pcpu->freq_table[index].frequency; + if (freq > freqmin) + continue; - /* - * If freqmax is the first frequency above - * freqmin then we have already found that - * this speed is fast enough. - */ - if (freq == freqmax) - break; - } + /* Find lowest frequency that is higher than freqmin */ + index = cpufreq_frequency_table_target(policy, + freqmin + 1, CPUFREQ_RELATION_L); + + freq = freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; } /* If same frequency chosen as previous then done. */ @@ -300,115 +334,97 @@ static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu, return freq; } -static u64 update_load(int cpu) +static u64 update_load(struct interactive_cpu *icpu, int cpu) { - struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); - struct cpufreq_interactive_tunables *tunables = - pcpu->policy->governor_data; - u64 now; - u64 now_idle; - unsigned int delta_idle; - unsigned int delta_time; - u64 active_time; + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + unsigned int delta_idle, delta_time; + u64 now_idle, now, active_time; now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); - delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); - delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + delta_idle = (unsigned int)(now_idle - icpu->time_in_idle); + delta_time = (unsigned int)(now - icpu->time_in_idle_timestamp); if (delta_time <= delta_idle) active_time = 0; else active_time = delta_time - delta_idle; - pcpu->cputime_speedadj += active_time * pcpu->policy->cur; + icpu->cputime_speedadj += active_time * icpu->ipolicy->policy->cur; + + icpu->time_in_idle = now_idle; + icpu->time_in_idle_timestamp = now; - pcpu->time_in_idle = now_idle; - pcpu->time_in_idle_timestamp = now; return now; } -static void cpufreq_interactive_timer(unsigned long data) +/* Re-evaluate load to see if a frequency change is required or not */ +static void eval_target_freq(struct interactive_cpu *icpu) { - u64 now; - unsigned int delta_time; - u64 cputime_speedadj; - int cpu_load; - struct cpufreq_interactive_cpuinfo *pcpu = - &per_cpu(cpuinfo, data); - struct cpufreq_interactive_tunables *tunables = - pcpu->policy->governor_data; - unsigned int new_freq; - unsigned int loadadjfreq; - unsigned int index; + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + struct cpufreq_policy *policy = icpu->ipolicy->policy; + struct cpufreq_frequency_table *freq_table = policy->freq_table; + u64 cputime_speedadj, now, max_fvtime; + unsigned int new_freq, loadadjfreq, index, delta_time; unsigned long flags; - u64 max_fvtime; - - if (!down_read_trylock(&pcpu->enable_sem)) - return; - if (!pcpu->governor_enabled) - goto exit; + int cpu_load; + int cpu = smp_processor_id(); - spin_lock_irqsave(&pcpu->load_lock, flags); - now = update_load(data); - delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); - cputime_speedadj = pcpu->cputime_speedadj; - spin_unlock_irqrestore(&pcpu->load_lock, flags); + spin_lock_irqsave(&icpu->load_lock, flags); + now = update_load(icpu, smp_processor_id()); + delta_time = (unsigned int)(now - icpu->cputime_speedadj_timestamp); + cputime_speedadj = icpu->cputime_speedadj; + spin_unlock_irqrestore(&icpu->load_lock, flags); if (WARN_ON_ONCE(!delta_time)) - goto rearm; + return; - spin_lock_irqsave(&pcpu->target_freq_lock, flags); + spin_lock_irqsave(&icpu->target_freq_lock, flags); do_div(cputime_speedadj, delta_time); loadadjfreq = (unsigned int)cputime_speedadj * 100; - cpu_load = loadadjfreq / pcpu->policy->cur; - tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime; + cpu_load = loadadjfreq / policy->cur; + tunables->boosted = tunables->boost || + now < tunables->boostpulse_endtime; if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { - if (pcpu->policy->cur < tunables->hispeed_freq) { + if (policy->cur < tunables->hispeed_freq) { new_freq = tunables->hispeed_freq; } else { - new_freq = choose_freq(pcpu, loadadjfreq); + new_freq = choose_freq(icpu, loadadjfreq); if (new_freq < tunables->hispeed_freq) new_freq = tunables->hispeed_freq; } } else { - new_freq = choose_freq(pcpu, loadadjfreq); + new_freq = choose_freq(icpu, loadadjfreq); if (new_freq > tunables->hispeed_freq && - pcpu->policy->cur < tunables->hispeed_freq) + policy->cur < tunables->hispeed_freq) new_freq = tunables->hispeed_freq; } - if (pcpu->policy->cur >= tunables->hispeed_freq && - new_freq > pcpu->policy->cur && - now - pcpu->pol_hispeed_val_time < - freq_to_above_hispeed_delay(tunables, pcpu->policy->cur)) { - trace_cpufreq_interactive_notyet( - data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); - spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); - goto rearm; + if (policy->cur >= tunables->hispeed_freq && + new_freq > policy->cur && + now - icpu->pol_hispeed_val_time < freq_to_above_hispeed_delay(tunables, policy->cur)) { + trace_cpufreq_interactive_notyet(cpu, cpu_load, + icpu->target_freq, policy->cur, new_freq); + goto exit; } - pcpu->loc_hispeed_val_time = now; + icpu->loc_hispeed_val_time = now; - index = cpufreq_frequency_table_target(pcpu->policy, - new_freq, CPUFREQ_RELATION_L); - new_freq = pcpu->freq_table[index].frequency; + index = cpufreq_frequency_table_target(policy, new_freq, + CPUFREQ_RELATION_L); + new_freq = freq_table[index].frequency; /* * Do not scale below floor_freq unless we have been at or above the * floor frequency for the minimum sample time since last validated. */ - max_fvtime = max(pcpu->pol_floor_val_time, pcpu->loc_floor_val_time); - if (new_freq < pcpu->floor_freq && - pcpu->target_freq >= pcpu->policy->cur) { + max_fvtime = max(icpu->pol_floor_val_time, icpu->loc_floor_val_time); + if (new_freq < icpu->floor_freq && icpu->target_freq >= policy->cur) { if (now - max_fvtime < tunables->min_sample_time) { - trace_cpufreq_interactive_notyet( - data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); - spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); - goto rearm; + trace_cpufreq_interactive_notyet(cpu, cpu_load, + icpu->target_freq, policy->cur, new_freq); + goto exit; } } @@ -421,82 +437,78 @@ static void cpufreq_interactive_timer(unsigned long data) */ if (!tunables->boosted || new_freq > tunables->hispeed_freq) { - pcpu->floor_freq = new_freq; - if (pcpu->target_freq >= pcpu->policy->cur || - new_freq >= pcpu->policy->cur) - pcpu->loc_floor_val_time = now; + icpu->floor_freq = new_freq; + if (icpu->target_freq >= policy->cur || new_freq >= policy->cur) + icpu->loc_floor_val_time = now; } - if (pcpu->target_freq == new_freq && - pcpu->target_freq <= pcpu->policy->cur) { - trace_cpufreq_interactive_already( - data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); - spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); - goto rearm; + if (icpu->target_freq == new_freq && + icpu->target_freq <= policy->cur) { + trace_cpufreq_interactive_already(cpu, cpu_load, + icpu->target_freq, policy->cur, new_freq); + goto exit; } - trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); + trace_cpufreq_interactive_target(cpu, cpu_load, icpu->target_freq, + policy->cur, new_freq); + + icpu->target_freq = new_freq; + spin_unlock_irqrestore(&icpu->target_freq_lock, flags); - pcpu->target_freq = new_freq; - spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); spin_lock_irqsave(&speedchange_cpumask_lock, flags); - cpumask_set_cpu(data, &speedchange_cpumask); + cpumask_set_cpu(cpu, &speedchange_cpumask); spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); - wake_up_process(speedchange_task); -rearm: - if (!timer_pending(&pcpu->cpu_timer)) - cpufreq_interactive_timer_resched(pcpu); + wake_up_process(speedchange_task); + return; exit: - up_read(&pcpu->enable_sem); - return; + spin_unlock_irqrestore(&icpu->target_freq_lock, flags); +} + +static void cpufreq_interactive_update(struct interactive_cpu *icpu) +{ + eval_target_freq(icpu); + slack_timer_resched(icpu, smp_processor_id(), true); } static void cpufreq_interactive_idle_end(void) { - struct cpufreq_interactive_cpuinfo *pcpu = - &per_cpu(cpuinfo, smp_processor_id()); + struct interactive_cpu *icpu = &per_cpu(interactive_cpu, + smp_processor_id()); - if (!down_read_trylock(&pcpu->enable_sem)) - return; - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); + if (!down_read_trylock(&icpu->enable_sem)) return; - } - /* Arm the timer for 1-2 ticks later if not already. */ - if (!timer_pending(&pcpu->cpu_timer)) { - cpufreq_interactive_timer_resched(pcpu); - } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { - del_timer(&pcpu->cpu_timer); - del_timer(&pcpu->cpu_slack_timer); - cpufreq_interactive_timer(smp_processor_id()); + if (icpu->ipolicy) { + /* + * We haven't sampled load for more than sampling_rate time, do + * it right now. + */ + if (time_after_eq(jiffies, icpu->next_sample_jiffies)) + cpufreq_interactive_update(icpu); } - up_read(&pcpu->enable_sem); + up_read(&icpu->enable_sem); } static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, unsigned int *pmax_freq, u64 *phvt, u64 *pfvt) { - struct cpufreq_interactive_cpuinfo *pcpu; - unsigned int max_freq = 0; + struct interactive_cpu *icpu; u64 hvt = ~0ULL, fvt = 0; - unsigned int i; + unsigned int max_freq = 0, i; for_each_cpu(i, policy->cpus) { - pcpu = &per_cpu(cpuinfo, i); - - fvt = max(fvt, pcpu->loc_floor_val_time); - if (pcpu->target_freq > max_freq) { - max_freq = pcpu->target_freq; - hvt = pcpu->loc_hispeed_val_time; - } else if (pcpu->target_freq == max_freq) { - hvt = min(hvt, pcpu->loc_hispeed_val_time); + icpu = &per_cpu(interactive_cpu, i); + + fvt = max(fvt, icpu->loc_floor_val_time); + if (icpu->target_freq > max_freq) { + max_freq = icpu->target_freq; + hvt = icpu->loc_hispeed_val_time; + } else if (icpu->target_freq == max_freq) { + hvt = min(hvt, icpu->loc_hispeed_val_time); } } @@ -508,7 +520,7 @@ static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, static void cpufreq_interactive_adjust_cpu(unsigned int cpu, struct cpufreq_policy *policy) { - struct cpufreq_interactive_cpuinfo *pcpu; + struct interactive_cpu *icpu; u64 hvt, fvt; unsigned int max_freq; int i; @@ -516,15 +528,15 @@ static void cpufreq_interactive_adjust_cpu(unsigned int cpu, cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt); for_each_cpu(i, policy->cpus) { - pcpu = &per_cpu(cpuinfo, i); - pcpu->pol_floor_val_time = fvt; + icpu = &per_cpu(interactive_cpu, i); + icpu->pol_floor_val_time = fvt; } if (max_freq != policy->cur) { __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H); for_each_cpu(i, policy->cpus) { - pcpu = &per_cpu(cpuinfo, i); - pcpu->pol_hispeed_val_time = hvt; + icpu = &per_cpu(interactive_cpu, i); + icpu->pol_hispeed_val_time = hvt; } } @@ -536,130 +548,112 @@ static int cpufreq_interactive_speedchange_task(void *data) unsigned int cpu; cpumask_t tmp_mask; unsigned long flags; - struct cpufreq_interactive_cpuinfo *pcpu; - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irqsave(&speedchange_cpumask_lock, flags); +again: + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); - if (cpumask_empty(&speedchange_cpumask)) { - spin_unlock_irqrestore(&speedchange_cpumask_lock, - flags); - schedule(); + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + schedule(); - if (kthread_should_stop()) - break; + if (kthread_should_stop()) + return 0; - spin_lock_irqsave(&speedchange_cpumask_lock, flags); - } + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + } - set_current_state(TASK_RUNNING); - tmp_mask = speedchange_cpumask; - cpumask_clear(&speedchange_cpumask); - spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + set_current_state(TASK_RUNNING); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); - for_each_cpu(cpu, &tmp_mask) { - pcpu = &per_cpu(cpuinfo, cpu); + for_each_cpu(cpu, &tmp_mask) { + struct interactive_cpu *icpu = &per_cpu(interactive_cpu, cpu); + struct cpufreq_policy *policy = icpu->ipolicy->policy; - down_write(&pcpu->policy->rwsem); + if (unlikely(!down_read_trylock(&icpu->enable_sem))) + continue; - if (likely(down_read_trylock(&pcpu->enable_sem))) { - if (likely(pcpu->governor_enabled)) - cpufreq_interactive_adjust_cpu(cpu, - pcpu->policy); - up_read(&pcpu->enable_sem); - } + if (likely(icpu->ipolicy)) + cpufreq_interactive_adjust_cpu(cpu, policy); - up_write(&pcpu->policy->rwsem); - } + up_read(&icpu->enable_sem); } - return 0; + goto again; } -static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables) +static void cpufreq_interactive_boost(struct interactive_tunables *tunables) { - int i; - int anyboost = 0; + struct interactive_policy *ipolicy; + struct cpufreq_policy *policy; + struct interactive_cpu *icpu; unsigned long flags[2]; - struct cpufreq_interactive_cpuinfo *pcpu; + bool wakeup = false; + int i; tunables->boosted = true; spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); - for_each_online_cpu(i) { - pcpu = &per_cpu(cpuinfo, i); + for_each_ipolicy(ipolicy) { + policy = ipolicy->policy; - if (!down_read_trylock(&pcpu->enable_sem)) - continue; + for_each_cpu(i, policy->cpus) { + icpu = &per_cpu(interactive_cpu, i); - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); - continue; - } + if (!down_read_trylock(&icpu->enable_sem)) + continue; - if (tunables != pcpu->policy->governor_data) { - up_read(&pcpu->enable_sem); - continue; - } + if (!icpu->ipolicy) { + up_read(&icpu->enable_sem); + continue; + } - spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); - if (pcpu->target_freq < tunables->hispeed_freq) { - pcpu->target_freq = tunables->hispeed_freq; - cpumask_set_cpu(i, &speedchange_cpumask); - pcpu->pol_hispeed_val_time = - ktime_to_us(ktime_get()); - anyboost = 1; - } - spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); + spin_lock_irqsave(&icpu->target_freq_lock, flags[1]); + if (icpu->target_freq < tunables->hispeed_freq) { + icpu->target_freq = tunables->hispeed_freq; + cpumask_set_cpu(i, &speedchange_cpumask); + icpu->pol_hispeed_val_time = ktime_to_us(ktime_get()); + wakeup = true; + } + spin_unlock_irqrestore(&icpu->target_freq_lock, flags[1]); - up_read(&pcpu->enable_sem); + up_read(&icpu->enable_sem); + } } spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); - if (anyboost) + if (wakeup) wake_up_process(speedchange_task); } -static int cpufreq_interactive_notifier( - struct notifier_block *nb, unsigned long val, void *data) +static int cpufreq_interactive_notifier(struct notifier_block *nb, + unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - struct cpufreq_interactive_cpuinfo *pcpu; - int cpu; + struct interactive_cpu *icpu = &per_cpu(interactive_cpu, freq->cpu); unsigned long flags; - if (val == CPUFREQ_POSTCHANGE) { - pcpu = &per_cpu(cpuinfo, freq->cpu); - if (!down_read_trylock(&pcpu->enable_sem)) - return 0; - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); - return 0; - } + if (val != CPUFREQ_POSTCHANGE) + return 0; - for_each_cpu(cpu, pcpu->policy->cpus) { - struct cpufreq_interactive_cpuinfo *pjcpu = - &per_cpu(cpuinfo, cpu); - if (cpu != freq->cpu) { - if (!down_read_trylock(&pjcpu->enable_sem)) - continue; - if (!pjcpu->governor_enabled) { - up_read(&pjcpu->enable_sem); - continue; - } - } - spin_lock_irqsave(&pjcpu->load_lock, flags); - update_load(cpu); - spin_unlock_irqrestore(&pjcpu->load_lock, flags); - if (cpu != freq->cpu) - up_read(&pjcpu->enable_sem); - } + if (!down_read_trylock(&icpu->enable_sem)) + return 0; - up_read(&pcpu->enable_sem); + if (!icpu->ipolicy) { + up_read(&icpu->enable_sem); + return 0; } + + spin_lock_irqsave(&icpu->load_lock, flags); + update_load(icpu, freq->cpu); + spin_unlock_irqrestore(&icpu->load_lock, flags); + + up_read(&icpu->enable_sem); + return 0; } @@ -669,29 +663,26 @@ static struct notifier_block cpufreq_notifier_block = { static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) { - const char *cp; - int i; - int ntokens = 1; + const char *cp = buf; + int ntokens = 1, i = 0; unsigned int *tokenized_data; int err = -EINVAL; - cp = buf; while ((cp = strpbrk(cp + 1, " :"))) ntokens++; if (!(ntokens & 0x1)) goto err; - tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + tokenized_data = kcalloc(ntokens, sizeof(*tokenized_data), GFP_KERNEL); if (!tokenized_data) { err = -ENOMEM; goto err; } cp = buf; - i = 0; while (i < ntokens) { - if (sscanf(cp, "%u", &tokenized_data[i++]) != 1) + if (kstrtouint(cp, 0, &tokenized_data[i++]) < 0) goto err_kfree; cp = strpbrk(cp, " :"); @@ -712,13 +703,25 @@ static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) return ERR_PTR(err); } -static ssize_t show_target_loads( - struct cpufreq_interactive_tunables *tunables, - char *buf) +/* Interactive governor sysfs interface */ +static struct interactive_tunables *to_tunables(struct gov_attr_set *attr_set) { - int i; - ssize_t ret = 0; + return container_of(attr_set, struct interactive_tunables, attr_set); +} + +#define show_one(file_name, type) \ +static ssize_t show_##file_name(struct gov_attr_set *attr_set, char *buf) \ +{ \ + struct interactive_tunables *tunables = to_tunables(attr_set); \ + return sprintf(buf, type "\n", tunables->file_name); \ +} + +static ssize_t show_target_loads(struct gov_attr_set *attr_set, char *buf) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long flags; + ssize_t ret = 0; + int i; spin_lock_irqsave(&tunables->target_loads_lock, flags); @@ -728,20 +731,21 @@ static ssize_t show_target_loads( sprintf(buf + ret - 1, "\n"); spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; } -static ssize_t store_target_loads( - struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_target_loads(struct gov_attr_set *attr_set, + const char *buf, size_t count) { - int ntokens; - unsigned int *new_target_loads = NULL; + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned int *new_target_loads; unsigned long flags; + int ntokens; new_target_loads = get_tokenized_data(buf, &ntokens); if (IS_ERR(new_target_loads)) - return PTR_RET(new_target_loads); + return PTR_ERR(new_target_loads); spin_lock_irqsave(&tunables->target_loads_lock, flags); if (tunables->target_loads != default_target_loads) @@ -749,15 +753,17 @@ static ssize_t store_target_loads( tunables->target_loads = new_target_loads; tunables->ntarget_loads = ntokens; spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return count; } -static ssize_t show_above_hispeed_delay( - struct cpufreq_interactive_tunables *tunables, char *buf) +static ssize_t show_above_hispeed_delay(struct gov_attr_set *attr_set, + char *buf) { - int i; - ssize_t ret = 0; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long flags; + ssize_t ret = 0; + int i; spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); @@ -768,20 +774,21 @@ static ssize_t show_above_hispeed_delay( sprintf(buf + ret - 1, "\n"); spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; } -static ssize_t store_above_hispeed_delay( - struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_above_hispeed_delay(struct gov_attr_set *attr_set, + const char *buf, size_t count) { - int ntokens; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned int *new_above_hispeed_delay = NULL; unsigned long flags; + int ntokens; new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); if (IS_ERR(new_above_hispeed_delay)) - return PTR_RET(new_above_hispeed_delay); + return PTR_ERR(new_above_hispeed_delay); spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); if (tunables->above_hispeed_delay != default_above_hispeed_delay) @@ -789,78 +796,71 @@ static ssize_t store_above_hispeed_delay( tunables->above_hispeed_delay = new_above_hispeed_delay; tunables->nabove_hispeed_delay = ntokens; spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); - return count; - -} -static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables, - char *buf) -{ - return sprintf(buf, "%u\n", tunables->hispeed_freq); + return count; } -static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_hispeed_freq(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long int val; int ret; - long unsigned int val; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; + tunables->hispeed_freq = val; - return count; -} -static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables - *tunables, char *buf) -{ - return sprintf(buf, "%lu\n", tunables->go_hispeed_load); + return count; } -static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables - *tunables, const char *buf, size_t count) +static ssize_t store_go_hispeed_load(struct gov_attr_set *attr_set, + const char *buf, size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; + tunables->go_hispeed_load = val; - return count; -} -static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables - *tunables, char *buf) -{ - return sprintf(buf, "%lu\n", tunables->min_sample_time); + return count; } -static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables - *tunables, const char *buf, size_t count) +static ssize_t store_min_sample_time(struct gov_attr_set *attr_set, + const char *buf, size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; + tunables->min_sample_time = val; + return count; } -static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables, - char *buf) +static ssize_t show_timer_rate(struct gov_attr_set *attr_set, char *buf) { - return sprintf(buf, "%lu\n", tunables->timer_rate); + struct interactive_tunables *tunables = to_tunables(attr_set); + + return sprintf(buf, "%lu\n", tunables->sampling_rate); } -static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_timer_rate(struct gov_attr_set *attr_set, const char *buf, + size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val, val_round; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) @@ -871,49 +871,42 @@ static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables, pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n", val_round); - tunables->timer_rate = val_round; - return count; -} + tunables->sampling_rate = val_round; -static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables, - char *buf) -{ - return sprintf(buf, "%d\n", tunables->timer_slack_val); + return count; } -static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_timer_slack(struct gov_attr_set *attr_set, const char *buf, + size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtol(buf, 10, &val); if (ret < 0) return ret; - tunables->timer_slack_val = val; - return count; -} + tunables->timer_slack = val; + update_slack_delay(tunables); -static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables, - char *buf) -{ - return sprintf(buf, "%d\n", tunables->boost_val); + return count; } -static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_boost(struct gov_attr_set *attr_set, const char *buf, + size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; - tunables->boost_val = val; + tunables->boost = val; - if (tunables->boost_val) { + if (tunables->boost) { trace_cpufreq_interactive_boost("on"); if (!tunables->boosted) cpufreq_interactive_boost(tunables); @@ -925,193 +918,100 @@ static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables, return count; } -static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_boostpulse(struct gov_attr_set *attr_set, const char *buf, + size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + - tunables->boostpulse_duration_val; + tunables->boostpulse_duration; trace_cpufreq_interactive_boost("pulse"); if (!tunables->boosted) cpufreq_interactive_boost(tunables); - return count; -} -static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables - *tunables, char *buf) -{ - return sprintf(buf, "%d\n", tunables->boostpulse_duration_val); + return count; } -static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables - *tunables, const char *buf, size_t count) +static ssize_t store_boostpulse_duration(struct gov_attr_set *attr_set, + const char *buf, size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; - tunables->boostpulse_duration_val = val; - return count; -} + tunables->boostpulse_duration = val; -static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables, - char *buf) -{ - return sprintf(buf, "%u\n", tunables->io_is_busy); + return count; } -static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables, - const char *buf, size_t count) +static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, + size_t count) { - int ret; + struct interactive_tunables *tunables = to_tunables(attr_set); unsigned long val; + int ret; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; - tunables->io_is_busy = val; - return count; -} -/* - * Create show/store routines - * - sys: One governor instance for complete SYSTEM - * - pol: One governor instance per struct cpufreq_policy - */ -#define show_gov_pol_sys(file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - return show_##file_name(common_tunables, buf); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - return show_##file_name(policy->governor_data, buf); \ -} + tunables->io_is_busy = val; -#define store_gov_pol_sys(file_name) \ -static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, \ - size_t count) \ -{ \ - return store_##file_name(common_tunables, buf, count); \ -} \ - \ -static ssize_t store_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, const char *buf, size_t count) \ -{ \ - return store_##file_name(policy->governor_data, buf, count); \ + return count; } -#define show_store_gov_pol_sys(file_name) \ -show_gov_pol_sys(file_name); \ -store_gov_pol_sys(file_name) - -show_store_gov_pol_sys(target_loads); -show_store_gov_pol_sys(above_hispeed_delay); -show_store_gov_pol_sys(hispeed_freq); -show_store_gov_pol_sys(go_hispeed_load); -show_store_gov_pol_sys(min_sample_time); -show_store_gov_pol_sys(timer_rate); -show_store_gov_pol_sys(timer_slack); -show_store_gov_pol_sys(boost); -store_gov_pol_sys(boostpulse); -show_store_gov_pol_sys(boostpulse_duration); -show_store_gov_pol_sys(io_is_busy); - -#define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) - -#define gov_pol_attr_rw(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) - -#define gov_sys_pol_attr_rw(_name) \ - gov_sys_attr_rw(_name); \ - gov_pol_attr_rw(_name) - -gov_sys_pol_attr_rw(target_loads); -gov_sys_pol_attr_rw(above_hispeed_delay); -gov_sys_pol_attr_rw(hispeed_freq); -gov_sys_pol_attr_rw(go_hispeed_load); -gov_sys_pol_attr_rw(min_sample_time); -gov_sys_pol_attr_rw(timer_rate); -gov_sys_pol_attr_rw(timer_slack); -gov_sys_pol_attr_rw(boost); -gov_sys_pol_attr_rw(boostpulse_duration); -gov_sys_pol_attr_rw(io_is_busy); - -static struct global_attr boostpulse_gov_sys = - __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys); - -static struct freq_attr boostpulse_gov_pol = - __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol); - -/* One Governor instance for entire system */ -static struct attribute *interactive_attributes_gov_sys[] = { - &target_loads_gov_sys.attr, - &above_hispeed_delay_gov_sys.attr, - &hispeed_freq_gov_sys.attr, - &go_hispeed_load_gov_sys.attr, - &min_sample_time_gov_sys.attr, - &timer_rate_gov_sys.attr, - &timer_slack_gov_sys.attr, - &boost_gov_sys.attr, - &boostpulse_gov_sys.attr, - &boostpulse_duration_gov_sys.attr, - &io_is_busy_gov_sys.attr, - NULL, +show_one(hispeed_freq, "%u"); +show_one(go_hispeed_load, "%lu"); +show_one(min_sample_time, "%lu"); +show_one(timer_slack, "%lu"); +show_one(boost, "%u"); +show_one(boostpulse_duration, "%u"); +show_one(io_is_busy, "%u"); + +gov_attr_rw(target_loads); +gov_attr_rw(above_hispeed_delay); +gov_attr_rw(hispeed_freq); +gov_attr_rw(go_hispeed_load); +gov_attr_rw(min_sample_time); +gov_attr_rw(timer_rate); +gov_attr_rw(timer_slack); +gov_attr_rw(boost); +gov_attr_wo(boostpulse); +gov_attr_rw(boostpulse_duration); +gov_attr_rw(io_is_busy); + +static struct attribute *interactive_attributes[] = { + &target_loads.attr, + &above_hispeed_delay.attr, + &hispeed_freq.attr, + &go_hispeed_load.attr, + &min_sample_time.attr, + &timer_rate.attr, + &timer_slack.attr, + &boost.attr, + &boostpulse.attr, + &boostpulse_duration.attr, + &io_is_busy.attr, + NULL }; -static struct attribute_group interactive_attr_group_gov_sys = { - .attrs = interactive_attributes_gov_sys, - .name = "interactive", +static struct kobj_type interactive_tunables_ktype = { + .default_attrs = interactive_attributes, + .sysfs_ops = &governor_sysfs_ops, }; -/* Per policy governor instance */ -static struct attribute *interactive_attributes_gov_pol[] = { - &target_loads_gov_pol.attr, - &above_hispeed_delay_gov_pol.attr, - &hispeed_freq_gov_pol.attr, - &go_hispeed_load_gov_pol.attr, - &min_sample_time_gov_pol.attr, - &timer_rate_gov_pol.attr, - &timer_slack_gov_pol.attr, - &boost_gov_pol.attr, - &boostpulse_gov_pol.attr, - &boostpulse_duration_gov_pol.attr, - &io_is_busy_gov_pol.attr, - NULL, -}; - -static struct attribute_group interactive_attr_group_gov_pol = { - .attrs = interactive_attributes_gov_pol, - .name = "interactive", -}; - -static struct attribute_group *get_sysfs_attr(void) -{ - if (have_governor_per_policy()) - return &interactive_attr_group_gov_pol; - else - return &interactive_attr_group_gov_sys; -} - static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) + unsigned long val, void *data) { if (val == IDLE_END) cpufreq_interactive_idle_end(); @@ -1123,31 +1023,170 @@ static struct notifier_block cpufreq_interactive_idle_nb = { .notifier_call = cpufreq_interactive_idle_notifier, }; -static int cpufreq_governor_interactive_init(struct cpufreq_policy *policy) +/* Interactive Governor callbacks */ +struct interactive_governor { + struct cpufreq_governor gov; + unsigned int usage_count; +}; + +static struct interactive_governor interactive_gov; + +#define CPU_FREQ_GOV_INTERACTIVE (&interactive_gov.gov) + +static void irq_work(struct irq_work *irq_work) { - int rc; - struct cpufreq_interactive_tunables *tunables; + struct interactive_cpu *icpu = container_of(irq_work, struct + interactive_cpu, irq_work); - if (have_governor_per_policy()) - tunables = policy->governor_data; - else - tunables = common_tunables; + cpufreq_interactive_update(icpu); + icpu->work_in_progress = false; +} - if (have_governor_per_policy()) { - WARN_ON(tunables); - } else if (tunables) { - tunables->usage_count++; - policy->governor_data = tunables; - return 0; +static void update_util_handler(struct update_util_data *data, u64 time, + unsigned int flags) +{ + struct interactive_cpu *icpu = container_of(data, + struct interactive_cpu, update_util); + struct interactive_policy *ipolicy = icpu->ipolicy; + struct interactive_tunables *tunables = ipolicy->tunables; + u64 delta_ns; + + /* + * The irq-work may not be allowed to be queued up right now. + * Possible reasons: + * - Work has already been queued up or is in progress. + * - It is too early (too little time from the previous sample). + */ + if (icpu->work_in_progress) + return; + + delta_ns = time - icpu->last_sample_time; + if ((s64)delta_ns < tunables->sampling_rate * NSEC_PER_USEC) + return; + + icpu->last_sample_time = time; + icpu->next_sample_jiffies = usecs_to_jiffies(tunables->sampling_rate) + + jiffies; + + icpu->work_in_progress = true; + irq_work_queue(&icpu->irq_work); +} + +static void gov_set_update_util(struct interactive_policy *ipolicy) +{ + struct cpufreq_policy *policy = ipolicy->policy; + struct interactive_cpu *icpu; + int cpu; + + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); + + icpu->last_sample_time = 0; + icpu->next_sample_jiffies = 0; + cpufreq_add_update_util_hook(cpu, &icpu->update_util, + update_util_handler); } +} + +static inline void gov_clear_update_util(struct cpufreq_policy *policy) +{ + int i; + + for_each_cpu(i, policy->cpus) + cpufreq_remove_update_util_hook(i); + + synchronize_sched(); +} + +static void icpu_cancel_work(struct interactive_cpu *icpu) +{ + irq_work_sync(&icpu->irq_work); + icpu->work_in_progress = false; + del_timer_sync(&icpu->slack_timer); +} + +static struct interactive_policy * +interactive_policy_alloc(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy; + + ipolicy = kzalloc(sizeof(*ipolicy), GFP_KERNEL); + if (!ipolicy) + return NULL; + + ipolicy->policy = policy; + + return ipolicy; +} + +static void interactive_policy_free(struct interactive_policy *ipolicy) +{ + kfree(ipolicy); +} + +static struct interactive_tunables * +interactive_tunables_alloc(struct interactive_policy *ipolicy) +{ + struct interactive_tunables *tunables; tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); - if (!tunables) { - pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); + if (!tunables) + return NULL; + + gov_attr_set_init(&tunables->attr_set, &ipolicy->tunables_hook); + if (!have_governor_per_policy()) + global_tunables = tunables; + + ipolicy->tunables = tunables; + + return tunables; +} + +static void interactive_tunables_free(struct interactive_tunables *tunables) +{ + if (!have_governor_per_policy()) + global_tunables = NULL; + + kfree(tunables); +} + +int cpufreq_interactive_init(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy; + struct interactive_tunables *tunables; + int ret; + + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + + ipolicy = interactive_policy_alloc(policy); + if (!ipolicy) return -ENOMEM; + + mutex_lock(&global_tunables_lock); + + if (global_tunables) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_int_policy; + } + + policy->governor_data = ipolicy; + ipolicy->tunables = global_tunables; + + gov_attr_set_get(&global_tunables->attr_set, + &ipolicy->tunables_hook); + goto out; + } + + tunables = interactive_tunables_alloc(ipolicy); + if (!tunables) { + ret = -ENOMEM; + goto free_int_policy; } - tunables->usage_count = 1; + tunables->hispeed_freq = policy->max; tunables->above_hispeed_delay = default_above_hispeed_delay; tunables->nabove_hispeed_delay = ARRAY_SIZE(default_above_hispeed_delay); @@ -1155,226 +1194,217 @@ static int cpufreq_governor_interactive_init(struct cpufreq_policy *policy) tunables->target_loads = default_target_loads; tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; - tunables->timer_rate = DEFAULT_TIMER_RATE; - tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; - tunables->timer_slack_val = DEFAULT_TIMER_SLACK; + tunables->boostpulse_duration = DEFAULT_MIN_SAMPLE_TIME; + tunables->sampling_rate = DEFAULT_SAMPLING_RATE; + tunables->timer_slack = DEFAULT_TIMER_SLACK; + update_slack_delay(tunables); spin_lock_init(&tunables->target_loads_lock); spin_lock_init(&tunables->above_hispeed_delay_lock); - policy->governor_data = tunables; - if (!have_governor_per_policy()) { - common_tunables = tunables; - } + policy->governor_data = ipolicy; - rc = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr()); - if (rc) { - kfree(tunables); - policy->governor_data = NULL; - if (!have_governor_per_policy()) { - common_tunables = NULL; - } - return rc; - } + ret = kobject_init_and_add(&tunables->attr_set.kobj, + &interactive_tunables_ktype, + get_governor_parent_kobj(policy), "%s", + interactive_gov.gov.name); + if (ret) + goto fail; - idle_notifier_register(&cpufreq_interactive_idle_nb); - cpufreq_register_notifier(&cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); + /* One time initialization for governor */ + if (!interactive_gov.usage_count++) { + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + out: + mutex_unlock(&global_tunables_lock); return 0; + + fail: + policy->governor_data = NULL; + interactive_tunables_free(tunables); + + free_int_policy: + mutex_unlock(&global_tunables_lock); + + interactive_policy_free(ipolicy); + pr_err("governor initialization failed (%d)\n", ret); + + return ret; } -static void cpufreq_governor_interactive_exit(struct cpufreq_policy *policy) +void cpufreq_interactive_exit(struct cpufreq_policy *policy) { - struct cpufreq_interactive_tunables *tunables; - - if (have_governor_per_policy()) - tunables = policy->governor_data; - else - tunables = common_tunables; + struct interactive_policy *ipolicy = policy->governor_data; + struct interactive_tunables *tunables = ipolicy->tunables; + unsigned int count; - WARN_ON(!tunables); + mutex_lock(&global_tunables_lock); - if (!--tunables->usage_count) { + /* Last policy using the governor ? */ + if (!--interactive_gov.usage_count) { cpufreq_unregister_notifier(&cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); + CPUFREQ_TRANSITION_NOTIFIER); idle_notifier_unregister(&cpufreq_interactive_idle_nb); - - sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr()); - - kfree(tunables); - common_tunables = NULL; } + count = gov_attr_set_put(&tunables->attr_set, &ipolicy->tunables_hook); policy->governor_data = NULL; + if (!count) + interactive_tunables_free(tunables); + + mutex_unlock(&global_tunables_lock); + + interactive_policy_free(ipolicy); } -static int cpufreq_governor_interactive_start(struct cpufreq_policy *policy) +int cpufreq_interactive_start(struct cpufreq_policy *policy) { - unsigned int j; - struct cpufreq_interactive_cpuinfo *pcpu; - struct cpufreq_frequency_table *freq_table; - struct cpufreq_interactive_tunables *tunables; + struct interactive_policy *ipolicy = policy->governor_data; + struct interactive_cpu *icpu; + unsigned int cpu; - if (have_governor_per_policy()) - tunables = policy->governor_data; - else - tunables = common_tunables; - - WARN_ON(!tunables); - - mutex_lock(&gov_lock); - - freq_table = policy->freq_table; - if (!tunables->hispeed_freq) - tunables->hispeed_freq = policy->max; - - for_each_cpu(j, policy->cpus) { - pcpu = &per_cpu(cpuinfo, j); - pcpu->policy = policy; - pcpu->target_freq = policy->cur; - pcpu->freq_table = freq_table; - pcpu->floor_freq = pcpu->target_freq; - pcpu->pol_floor_val_time = - ktime_to_us(ktime_get()); - pcpu->loc_floor_val_time = pcpu->pol_floor_val_time; - pcpu->pol_hispeed_val_time = pcpu->pol_floor_val_time; - pcpu->loc_hispeed_val_time = pcpu->pol_floor_val_time; - down_write(&pcpu->enable_sem); - del_timer_sync(&pcpu->cpu_timer); - del_timer_sync(&pcpu->cpu_slack_timer); - cpufreq_interactive_timer_start(tunables, j); - pcpu->governor_enabled = 1; - up_write(&pcpu->enable_sem); + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); + + icpu->target_freq = policy->cur; + icpu->floor_freq = icpu->target_freq; + icpu->pol_floor_val_time = ktime_to_us(ktime_get()); + icpu->loc_floor_val_time = icpu->pol_floor_val_time; + icpu->pol_hispeed_val_time = icpu->pol_floor_val_time; + icpu->loc_hispeed_val_time = icpu->pol_floor_val_time; + + down_write(&icpu->enable_sem); + icpu->ipolicy = ipolicy; + up_write(&icpu->enable_sem); + + slack_timer_resched(icpu, cpu, false); } - mutex_unlock(&gov_lock); + gov_set_update_util(ipolicy); return 0; } -static void cpufreq_governor_interactive_stop(struct cpufreq_policy *policy) +void cpufreq_interactive_stop(struct cpufreq_policy *policy) { - unsigned int j; - struct cpufreq_interactive_cpuinfo *pcpu; - - mutex_lock(&gov_lock); - for_each_cpu(j, policy->cpus) { - pcpu = &per_cpu(cpuinfo, j); - down_write(&pcpu->enable_sem); - pcpu->governor_enabled = 0; - del_timer_sync(&pcpu->cpu_timer); - del_timer_sync(&pcpu->cpu_slack_timer); - up_write(&pcpu->enable_sem); - } + struct interactive_policy *ipolicy = policy->governor_data; + struct interactive_cpu *icpu; + unsigned int cpu; + + gov_clear_update_util(ipolicy->policy); + + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); - mutex_unlock(&gov_lock); + icpu_cancel_work(icpu); + + down_write(&icpu->enable_sem); + icpu->ipolicy = NULL; + up_write(&icpu->enable_sem); + } } -static void cpufreq_governor_interactive_limits(struct cpufreq_policy *policy) +void cpufreq_interactive_limits(struct cpufreq_policy *policy) { - unsigned int j; - struct cpufreq_interactive_cpuinfo *pcpu; + struct interactive_cpu *icpu; + unsigned int cpu; unsigned long flags; - if (policy->max < policy->cur) - __cpufreq_driver_target(policy, - policy->max, CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, - policy->min, CPUFREQ_RELATION_L); - for_each_cpu(j, policy->cpus) { - pcpu = &per_cpu(cpuinfo, j); - - down_read(&pcpu->enable_sem); - if (pcpu->governor_enabled == 0) { - up_read(&pcpu->enable_sem); - continue; - } + cpufreq_policy_apply_limits(policy); - spin_lock_irqsave(&pcpu->target_freq_lock, flags); - if (policy->max < pcpu->target_freq) - pcpu->target_freq = policy->max; - else if (policy->min > pcpu->target_freq) - pcpu->target_freq = policy->min; + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); - spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); - up_read(&pcpu->enable_sem); + spin_lock_irqsave(&icpu->target_freq_lock, flags); + + if (policy->max < icpu->target_freq) + icpu->target_freq = policy->max; + else if (policy->min > icpu->target_freq) + icpu->target_freq = policy->min; + + spin_unlock_irqrestore(&icpu->target_freq_lock, flags); } } -static struct cpufreq_governor cpufreq_gov_interactive = { - .name = "interactive", - .init = cpufreq_governor_interactive_init, - .exit = cpufreq_governor_interactive_exit, - .start = cpufreq_governor_interactive_start, - .stop = cpufreq_governor_interactive_stop, - .limits = cpufreq_governor_interactive_limits, - .max_transition_latency = 10000000, - .owner = THIS_MODULE, +static struct interactive_governor interactive_gov = { + .gov = { + .name = "interactive", + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + .init = cpufreq_interactive_init, + .exit = cpufreq_interactive_exit, + .start = cpufreq_interactive_start, + .stop = cpufreq_interactive_stop, + .limits = cpufreq_interactive_limits, + } }; static void cpufreq_interactive_nop_timer(unsigned long data) { + /* + * The purpose of slack-timer is to wake up the CPU from IDLE, in order + * to decrease its frequency if it is not set to minimum already. + * + * This is important for platforms where CPU with higher frequencies + * consume higher power even at IDLE. + */ } -static int __init cpufreq_interactive_init(void) +static int __init cpufreq_interactive_gov_init(void) { - unsigned int i; - struct cpufreq_interactive_cpuinfo *pcpu; - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - - /* Initalize per-cpu timers */ - for_each_possible_cpu(i) { - pcpu = &per_cpu(cpuinfo, i); - init_timer_pinned_deferrable(&pcpu->cpu_timer); - pcpu->cpu_timer.function = cpufreq_interactive_timer; - pcpu->cpu_timer.data = i; - init_timer(&pcpu->cpu_slack_timer); - pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; - spin_lock_init(&pcpu->load_lock); - spin_lock_init(&pcpu->target_freq_lock); - init_rwsem(&pcpu->enable_sem); + struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; + struct interactive_cpu *icpu; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + icpu = &per_cpu(interactive_cpu, cpu); + + init_irq_work(&icpu->irq_work, irq_work); + spin_lock_init(&icpu->load_lock); + spin_lock_init(&icpu->target_freq_lock); + init_rwsem(&icpu->enable_sem); + + /* Initialize per-cpu slack-timer */ + init_timer_pinned(&icpu->slack_timer); + icpu->slack_timer.function = cpufreq_interactive_nop_timer; } spin_lock_init(&speedchange_cpumask_lock); - mutex_init(&gov_lock); - speedchange_task = - kthread_create(cpufreq_interactive_speedchange_task, NULL, - "cfinteractive"); + speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, + NULL, "cfinteractive"); if (IS_ERR(speedchange_task)) return PTR_ERR(speedchange_task); sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); get_task_struct(speedchange_task); - /* NB: wake up so the thread does not look hung to the freezer */ + /* wake up so the thread does not look hung to the freezer */ wake_up_process(speedchange_task); - return cpufreq_register_governor(&cpufreq_gov_interactive); + return cpufreq_register_governor(CPU_FREQ_GOV_INTERACTIVE); } #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return &cpufreq_gov_interactive; + return CPU_FREQ_GOV_INTERACTIVE; } -fs_initcall(cpufreq_interactive_init); + +fs_initcall(cpufreq_interactive_gov_init); #else -module_init(cpufreq_interactive_init); +module_init(cpufreq_interactive_gov_init); #endif -static void __exit cpufreq_interactive_exit(void) +static void __exit cpufreq_interactive_gov_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_interactive); + cpufreq_unregister_governor(CPU_FREQ_GOV_INTERACTIVE); kthread_stop(speedchange_task); put_task_struct(speedchange_task); } - -module_exit(cpufreq_interactive_exit); +module_exit(cpufreq_interactive_gov_exit); MODULE_AUTHOR("Mike Chan "); -MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for " - "Latency sensitive workloads"); +MODULE_DESCRIPTION("'cpufreq_interactive' - A dynamic cpufreq governor for Latency sensitive workloads"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index dafb679adc589ed533ef6dd02c186fb5ed03f4cc..399428e40e8910f31ead1a522641a1606d17d592 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -22,7 +22,10 @@ static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); } -static struct cpufreq_governor cpufreq_gov_performance = { +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +static +#endif +struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .owner = THIS_MODULE, .limits = cpufreq_gov_performance_limits, diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 78a651038faf2d0a5e38bc8f5cc28ea0ab4363d6..5daa500fb0a9e1b74b7f35779ddc009fc82cb0a7 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -22,7 +22,10 @@ static void cpufreq_gov_powersave_limits(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); } -static struct cpufreq_governor cpufreq_gov_powersave = { +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +static +#endif +struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .limits = cpufreq_gov_powersave_limits, .owner = THIS_MODULE, diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index bd897e3e134de055e5120cc0fe63bc48a70b21c2..765166d881bb34959337d06f08b4cd87ab71b52c 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -118,7 +118,10 @@ static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy) mutex_unlock(&userspace_mutex); } -static struct cpufreq_governor cpufreq_gov_userspace = { +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +static +#endif +struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .init = cpufreq_userspace_policy_init, .exit = cpufreq_userspace_policy_exit, diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index d3ffde8066298ff48d64c990eefe1dec0bf34cb6..a84724eabfb89904629022260133290648707e8b 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -647,8 +647,14 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, if (unlikely(rebooting) && new_index != get_nominal_index()) return 0; - if (!throttled) + if (!throttled) { + /* we don't want to be preempted while + * checking if the CPU frequency has been throttled + */ + preempt_disable(); powernv_cpufreq_throttle_check(NULL); + preempt_enable(); + } cur_msec = jiffies_to_msecs(get_jiffies_64()); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c73207abb5a44a77ed0819101ab75cd626ceb353..78ab946d946a7cf21d0ec77d6fdefc9d21b639f9 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -192,7 +192,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, } /* Take note of the planned idle state. */ - sched_idle_set_state(target_state); + sched_idle_set_state(target_state, index); trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ns_to_ktime(local_clock()); @@ -205,7 +205,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* The cpu is no longer idle or about to enter idle. */ - sched_idle_set_state(NULL); + sched_idle_set_state(NULL, -1); if (broadcast) { if (WARN_ON_ONCE(!irqs_disabled())) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 954a64c7757b10e1551672861a6ef28623e57019..c310318b34ddced2b286d82f9007150219526efa 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -736,7 +736,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Will read cryptlen */ append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | + FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); /* Write ICV */ append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index bf3ea7603a58a9705d51feb271a61507f5c779e9..712592cef1a2e78d11f0ddb330d8c2d60515d481 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -593,11 +593,16 @@ struct devfreq *devfreq_add_device(struct device *dev, list_add(&devfreq->node, &devfreq_list); governor = find_devfreq_governor(devfreq->governor_name); - if (!IS_ERR(governor)) - devfreq->governor = governor; - if (devfreq->governor) - err = devfreq->governor->event_handler(devfreq, - DEVFREQ_GOV_START, NULL); + if (IS_ERR(governor)) { + dev_err(dev, "%s: Unable to find governor for the device\n", + __func__); + err = PTR_ERR(governor); + goto err_init; + } + + devfreq->governor = governor; + err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START, + NULL); if (err) { dev_err(dev, "%s: Unable to start governor for the device\n", __func__); diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 29866f7e6d7e76a328d80b02b906d821f9a9db6a..1b21bb60e7975ac91ac9735cd0b239148d4df292 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -498,7 +498,7 @@ static int exynos_bus_probe(struct platform_device *pdev) if (IS_ERR(bus->devfreq)) { dev_err(dev, "failed to add devfreq dev with passive governor\n"); - ret = -EPROBE_DEFER; + ret = PTR_ERR(bus->devfreq); goto err; } diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index d5ba43a87a682b6e718d5e2ad7c804498bad61de..55c1782e36239cd56c02df91fcea88582afdfe83 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -153,6 +153,8 @@ struct cppi41_dd { /* context for suspend/resume */ unsigned int dma_tdfdq; + + bool is_suspended; }; #define FIST_COMPLETION_QUEUE 93 @@ -257,6 +259,10 @@ static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) BUG_ON(desc_num >= ALLOC_DECS_NUM); c = cdd->chan_busy[desc_num]; cdd->chan_busy[desc_num] = NULL; + + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + return c; } @@ -447,6 +453,15 @@ static void push_desc_queue(struct cppi41_channel *c) */ __iowmb(); + /* + * DMA transfers can take at least 200ms to complete with USB mass + * storage connected. To prevent autosuspend timeouts, we must use + * pm_runtime_get/put() when chan_busy[] is modified. This will get + * cleared in desc_to_chan() or cppi41_stop_chan() depending on the + * outcome of the transfer. + */ + pm_runtime_get(cdd->ddev.dev); + desc_phys = lower_32_bits(c->desc_phys); desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); WARN_ON(cdd->chan_busy[desc_num]); @@ -457,20 +472,26 @@ static void push_desc_queue(struct cppi41_channel *c) cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); } -static void pending_desc(struct cppi41_channel *c) +/* + * Caller must hold cdd->lock to prevent push_desc_queue() + * getting called out of order. We have both cppi41_dma_issue_pending() + * and cppi41_runtime_resume() call this function. + */ +static void cppi41_run_queue(struct cppi41_dd *cdd) { - struct cppi41_dd *cdd = c->cdd; - unsigned long flags; + struct cppi41_channel *c, *_c; - spin_lock_irqsave(&cdd->lock, flags); - list_add_tail(&c->node, &cdd->pending); - spin_unlock_irqrestore(&cdd->lock, flags); + list_for_each_entry_safe(c, _c, &cdd->pending, node) { + push_desc_queue(c); + list_del(&c->node); + } } static void cppi41_dma_issue_pending(struct dma_chan *chan) { struct cppi41_channel *c = to_cpp41_chan(chan); struct cppi41_dd *cdd = c->cdd; + unsigned long flags; int error; error = pm_runtime_get(cdd->ddev.dev); @@ -482,10 +503,11 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) return; } - if (likely(pm_runtime_active(cdd->ddev.dev))) - push_desc_queue(c); - else - pending_desc(c); + spin_lock_irqsave(&cdd->lock, flags); + list_add_tail(&c->node, &cdd->pending); + if (!cdd->is_suspended) + cppi41_run_queue(cdd); + spin_unlock_irqrestore(&cdd->lock, flags); pm_runtime_mark_last_busy(cdd->ddev.dev); pm_runtime_put_autosuspend(cdd->ddev.dev); @@ -705,6 +727,9 @@ static int cppi41_stop_chan(struct dma_chan *chan) WARN_ON(!cdd->chan_busy[desc_num]); cdd->chan_busy[desc_num] = NULL; + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + return 0; } @@ -1150,8 +1175,12 @@ static int __maybe_unused cppi41_resume(struct device *dev) static int __maybe_unused cppi41_runtime_suspend(struct device *dev) { struct cppi41_dd *cdd = dev_get_drvdata(dev); + unsigned long flags; + spin_lock_irqsave(&cdd->lock, flags); + cdd->is_suspended = true; WARN_ON(!list_empty(&cdd->pending)); + spin_unlock_irqrestore(&cdd->lock, flags); return 0; } @@ -1159,14 +1188,11 @@ static int __maybe_unused cppi41_runtime_suspend(struct device *dev) static int __maybe_unused cppi41_runtime_resume(struct device *dev) { struct cppi41_dd *cdd = dev_get_drvdata(dev); - struct cppi41_channel *c, *_c; unsigned long flags; spin_lock_irqsave(&cdd->lock, flags); - list_for_each_entry_safe(c, _c, &cdd->pending, node) { - push_desc_queue(c); - list_del(&c->node); - } + cdd->is_suspended = false; + cppi41_run_queue(cdd); spin_unlock_irqrestore(&cdd->lock, flags); return 0; diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 7ca27d4b1c54326a6808fa0bea488df13580db0e..6b16ce390dce182409c2bdbfc287bd2172a2a953 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -1339,6 +1339,7 @@ static int omap_dma_probe(struct platform_device *pdev) struct omap_dmadev *od; struct resource *res; int rc, i, irq; + u32 lch_count; od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL); if (!od) @@ -1381,20 +1382,31 @@ static int omap_dma_probe(struct platform_device *pdev) spin_lock_init(&od->lock); spin_lock_init(&od->irq_lock); - if (!pdev->dev.of_node) { - od->dma_requests = od->plat->dma_attr->lch_count; - if (unlikely(!od->dma_requests)) - od->dma_requests = OMAP_SDMA_REQUESTS; - } else if (of_property_read_u32(pdev->dev.of_node, "dma-requests", - &od->dma_requests)) { + /* Number of DMA requests */ + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { dev_info(&pdev->dev, "Missing dma-requests property, using %u.\n", OMAP_SDMA_REQUESTS); - od->dma_requests = OMAP_SDMA_REQUESTS; } - od->lch_map = devm_kcalloc(&pdev->dev, od->dma_requests, - sizeof(*od->lch_map), GFP_KERNEL); + /* Number of available logical channels */ + if (!pdev->dev.of_node) { + lch_count = od->plat->dma_attr->lch_count; + if (unlikely(!lch_count)) + lch_count = OMAP_SDMA_CHANNELS; + } else if (of_property_read_u32(pdev->dev.of_node, "dma-channels", + &lch_count)) { + dev_info(&pdev->dev, + "Missing dma-channels property, using %u.\n", + OMAP_SDMA_CHANNELS); + lch_count = OMAP_SDMA_CHANNELS; + } + + od->lch_map = devm_kcalloc(&pdev->dev, lch_count, sizeof(*od->lch_map), + GFP_KERNEL); if (!od->lch_map) return -ENOMEM; diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 030fe05ed43b1e99a63644324fb30305198c62ce..9f3dbc8c63d27e733aadc975ca90a74328513333 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -448,6 +448,9 @@ struct dma_pl330_chan { /* for cyclic capability */ bool cyclic; + + /* for runtime pm tracking */ + bool active; }; struct pl330_dmac { @@ -2031,6 +2034,7 @@ static void pl330_tasklet(unsigned long data) _stop(pch->thread); spin_unlock(&pch->thread->dmac->lock); power_down = true; + pch->active = false; } else { /* Make sure the PL330 Channel thread is active */ spin_lock(&pch->thread->dmac->lock); @@ -2050,6 +2054,7 @@ static void pl330_tasklet(unsigned long data) desc->status = PREP; list_move_tail(&desc->node, &pch->work_list); if (power_down) { + pch->active = true; spin_lock(&pch->thread->dmac->lock); _start(pch->thread); spin_unlock(&pch->thread->dmac->lock); @@ -2164,6 +2169,7 @@ static int pl330_terminate_all(struct dma_chan *chan) unsigned long flags; struct pl330_dmac *pl330 = pch->dmac; LIST_HEAD(list); + bool power_down = false; pm_runtime_get_sync(pl330->ddma.dev); spin_lock_irqsave(&pch->lock, flags); @@ -2174,6 +2180,8 @@ static int pl330_terminate_all(struct dma_chan *chan) pch->thread->req[0].desc = NULL; pch->thread->req[1].desc = NULL; pch->thread->req_running = -1; + power_down = pch->active; + pch->active = false; /* Mark all desc done */ list_for_each_entry(desc, &pch->submitted_list, node) { @@ -2191,6 +2199,8 @@ static int pl330_terminate_all(struct dma_chan *chan) list_splice_tail_init(&pch->completed_list, &pl330->desc_pool); spin_unlock_irqrestore(&pch->lock, flags); pm_runtime_mark_last_busy(pl330->ddma.dev); + if (power_down) + pm_runtime_put_autosuspend(pl330->ddma.dev); pm_runtime_put_autosuspend(pl330->ddma.dev); return 0; @@ -2350,6 +2360,7 @@ static void pl330_issue_pending(struct dma_chan *chan) * updated on work_list emptiness status. */ WARN_ON(list_empty(&pch->submitted_list)); + pch->active = true; pm_runtime_get_sync(pch->dmac->ddma.dev); } list_splice_tail_init(&pch->submitted_list, &pch->work_list); diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2e441d0ccd79a37a5486d394310839c2cdc47f01..4c357d47546594c6bd0c9b1ca16e27c658a720cf 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -986,6 +986,7 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) { struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); struct rcar_dmac *dmac = to_rcar_dmac(chan->device); + struct rcar_dmac_chan_map *map = &rchan->map; struct rcar_dmac_desc_page *page, *_page; struct rcar_dmac_desc *desc; LIST_HEAD(list); @@ -1019,6 +1020,13 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) free_page((unsigned long)page); } + /* Remove slave mapping if present. */ + if (map->slave.xfer_size) { + dma_unmap_resource(chan->device->dev, map->addr, + map->slave.xfer_size, map->dir, 0); + map->slave.xfer_size = 0; + } + pm_runtime_put(chan->device->dev); } diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 78298460d1686a79864c57d228ac6af5e114acf9..7c1e3a7b14e0c1f540a0bbe7d6b924b124561729 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -453,7 +453,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) dev_err(&edev->dev, "out of memory in extcon_set_state\n"); kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE); - return 0; + return -ENOMEM; } length = name_show(&edev->dev, NULL, prop_buf); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1ac199cd75e7bdd56ce8d5b9ab91bc2ee9d15133..a4944e22f294b321fba70d572dc4996fcbb838f7 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -259,8 +259,10 @@ static __init int efivar_ssdt_load(void) } data = kmalloc(size, GFP_KERNEL); - if (!data) + if (!data) { + ret = -ENOMEM; goto free_entry; + } ret = efivar_entry_get(entry, NULL, &size, data); if (ret) { diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 520a40e5e0e431129cf1fe66a7604d32d7e20061..6c7d60c239b5b459b1f45e1d1110f7ed5a265cb6 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -71,8 +71,7 @@ void __init efi_fake_memmap(void) } /* allocate memory for new EFI memmap */ - new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map, - PAGE_SIZE); + new_memmap_phy = efi_memmap_alloc(new_nr_map); if (!new_memmap_phy) return; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index ee49cd23ee636d96b7d340ee74577ff8aef50822..fac67992bede5867ff9093f76df69583b1400492 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -30,14 +30,6 @@ efi_status_t efi_file_close(void *handle); unsigned long get_dram_base(efi_system_table_t *sys_table_arg); -efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size, - efi_memory_desc_t *memory_map, - unsigned long map_size, unsigned long desc_size, - u32 desc_ver); - efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, void *handle, unsigned long *new_fdt_addr, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index a6a93116a8f053f6c14911376ffa6da7f1dff44e..260c4b4b492ec38735715859522068da40c21381 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -16,13 +16,10 @@ #include "efistub.h" -efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size, - efi_memory_desc_t *memory_map, - unsigned long map_size, unsigned long desc_size, - u32 desc_ver) +static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, + unsigned long orig_fdt_size, + void *fdt, int new_fdt_size, char *cmdline_ptr, + u64 initrd_addr, u64 initrd_size) { int node, num_rsv; int status; @@ -101,25 +98,23 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; - fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map); + fdt_val64 = U64_MAX; /* placeholder */ status = fdt_setprop(fdt, node, "linux,uefi-mmap-start", &fdt_val64, sizeof(fdt_val64)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(map_size); + fdt_val32 = U32_MAX; /* placeholder */ status = fdt_setprop(fdt, node, "linux,uefi-mmap-size", &fdt_val32, sizeof(fdt_val32)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(desc_size); status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size", &fdt_val32, sizeof(fdt_val32)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(desc_ver); status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver", &fdt_val32, sizeof(fdt_val32)); if (status) @@ -148,6 +143,43 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, return EFI_LOAD_ERROR; } +static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) +{ + int node = fdt_path_offset(fdt, "/chosen"); + u64 fdt_val64; + u32 fdt_val32; + int err; + + if (node < 0) + return EFI_LOAD_ERROR; + + fdt_val64 = cpu_to_fdt64((unsigned long)*map->map); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start", + &fdt_val64, sizeof(fdt_val64)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->map_size); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-size", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->desc_size); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-size", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->desc_ver); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-ver", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + return EFI_SUCCESS; +} + #ifndef EFI_FDT_ALIGN #define EFI_FDT_ALIGN EFI_PAGE_SIZE #endif @@ -155,6 +187,7 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, struct exit_boot_struct { efi_memory_desc_t *runtime_map; int *runtime_entry_count; + void *new_fdt_addr; }; static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, @@ -170,7 +203,7 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, p->runtime_map, p->runtime_entry_count); - return EFI_SUCCESS; + return update_fdt_memmap(p->new_fdt_addr, map); } /* @@ -243,20 +276,10 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, goto fail; } - /* - * Now that we have done our final memory allocation (and free) - * we can get the memory map key needed for - * exit_boot_services(). - */ - status = efi_get_memory_map(sys_table, &map); - if (status != EFI_SUCCESS) - goto fail_free_new_fdt; - status = update_fdt(sys_table, (void *)fdt_addr, fdt_size, (void *)*new_fdt_addr, new_fdt_size, - cmdline_ptr, initrd_addr, initrd_size, - memory_map, map_size, desc_size, desc_ver); + cmdline_ptr, initrd_addr, initrd_size); /* Succeeding the first time is the expected case. */ if (status == EFI_SUCCESS) @@ -266,22 +289,19 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, /* * We need to allocate more space for the new * device tree, so free existing buffer that is - * too small. Also free memory map, as we will need - * to get new one that reflects the free/alloc we do - * on the device tree buffer. + * too small. */ efi_free(sys_table, new_fdt_size, *new_fdt_addr); - sys_table->boottime->free_pool(memory_map); new_fdt_size += EFI_PAGE_SIZE; } else { pr_efi_err(sys_table, "Unable to construct new device tree.\n"); - goto fail_free_mmap; + goto fail_free_new_fdt; } } - sys_table->boottime->free_pool(memory_map); priv.runtime_map = runtime_map; priv.runtime_entry_count = &runtime_entry_count; + priv.new_fdt_addr = (void *)*new_fdt_addr; status = efi_exit_boot_services(sys_table, handle, &map, &priv, exit_boot_func); @@ -319,9 +339,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi_err(sys_table, "Exit boot services failed.\n"); -fail_free_mmap: - sys_table->boottime->free_pool(memory_map); - fail_free_new_fdt: efi_free(sys_table, new_fdt_size, *new_fdt_addr); diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index f03ddecd232b542ce4c7163506b69da0d29ad2c7..78686443cb378abf616c7cfe595b82d3a51c9cd6 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -9,6 +9,44 @@ #include #include #include +#include +#include + +static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size) +{ + return memblock_alloc(size, 0); +} + +static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) +{ + unsigned int order = get_order(size); + struct page *p = alloc_pages(GFP_KERNEL, order); + + if (!p) + return 0; + + return PFN_PHYS(page_to_pfn(p)); +} + +/** + * efi_memmap_alloc - Allocate memory for the EFI memory map + * @num_entries: Number of entries in the allocated map. + * + * Depending on whether mm_init() has already been invoked or not, + * either memblock or "normal" page allocation is used. + * + * Returns the physical address of the allocated memory map on + * success, zero on failure. + */ +phys_addr_t __init efi_memmap_alloc(unsigned int num_entries) +{ + unsigned long size = num_entries * efi.memmap.desc_size; + + if (slab_is_available()) + return __efi_memmap_alloc_late(size); + + return __efi_memmap_alloc_early(size); +} /** * __efi_memmap_init - Common code for mapping the EFI memory map diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 5b0042776ec7081f49d3eaff1dc56fe08733f3d1..adba614b39656791fbdfc03292ac597c5eb8b2e5 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -413,7 +413,7 @@ static irqreturn_t stmpe_gpio_irq(int irq, void *dev) stmpe->partnum != STMPE1801) { stmpe_reg_write(stmpe, statmsbreg + i, status[i]); stmpe_reg_write(stmpe, - stmpe->regs[STMPE_IDX_GPEDR_LSB + i], + stmpe->regs[STMPE_IDX_GPEDR_MSB] + i, status[i]); } } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 868128a676bae832090c2c18b1f45e94c2996a5f..92159313361b1a2527410062c75a26a3c9a5addd 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -986,7 +986,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *filp) return -ENODEV; get_device(&gdev->dev); filp->private_data = gdev; - return 0; + + return nonseekable_open(inode, filp); } /** @@ -1011,7 +1012,7 @@ static const struct file_operations gpio_fileops = { .release = gpio_chrdev_release, .open = gpio_chrdev_open, .owner = THIS_MODULE, - .llseek = noop_llseek, + .llseek = no_llseek, .unlocked_ioctl = gpio_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = gpio_ioctl_compat, @@ -1316,12 +1317,12 @@ void gpiochip_remove(struct gpio_chip *chip) /* FIXME: should the legacy sysfs handling be moved to gpio_device? */ gpiochip_sysfs_unregister(gdev); + gpiochip_free_hogs(chip); /* Numb the device, cancelling all outstanding operations */ gdev->chip = NULL; gpiochip_irqchip_remove(chip); acpi_gpiochip_remove(chip); gpiochip_remove_pin_ranges(chip); - gpiochip_free_hogs(chip); of_gpiochip_remove(chip); /* * We accept no more calls into the driver from this point, so diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9260caef74fa07f7045f1bbc30e5481bb6f5558e..882404cefbc23881973ed766f320590afe117042 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2577,6 +2577,9 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -2596,9 +2599,6 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - return 0; } @@ -2661,12 +2661,11 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v10_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2675,6 +2674,8 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v10_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 367739bd19279fa5f968675b54733bad289b65e0..7ddc32127d88f015e323434d1a112c8246017228 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2593,6 +2593,9 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -2612,9 +2615,6 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - return 0; } @@ -2677,12 +2677,11 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v11_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2691,6 +2690,8 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v11_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 15f9fc0514b29b800f1fb5c83cfd3f43ea52ec04..fde6ee1f6f2b3bbab424ed08ee699a1a3dfcf021 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1933,6 +1933,9 @@ static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, int w = amdgpu_crtc->cursor_width; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -1952,8 +1955,6 @@ static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(EVERGREEN_CUR_SIZE + amdgpu_crtc->crtc_offset, ((w - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; return 0; } @@ -2016,12 +2017,11 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v6_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2030,6 +2030,8 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v6_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 8c4d808db0f1279af1b5a0c05e364c6e6c07bdfd..7d9ffde0a628db4051fed12bc19f09a9e6fcb808 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2465,6 +2465,9 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -2484,9 +2487,6 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - return 0; } @@ -2549,12 +2549,11 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v8_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2563,6 +2562,8 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v8_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bb97182dc74991ae5b5ad0ab5d4121757ae54789..a88d365be4c56614c0657ecb0615c07363d41f27 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3947,8 +3947,12 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; data = mmRLC_SRM_INDEX_CNTL_DATA_0; for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { - amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); - amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); + if (unique_indices[i] != 0) { + amdgpu_mm_wreg(adev, temp + i, + unique_indices[i] & 0x3FFFF, false); + amdgpu_mm_wreg(adev, data + i, + unique_indices[i] >> 20, false); + } } kfree(register_list_format); @@ -3994,7 +3998,7 @@ static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) { - WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0); + WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); } static void gfx_v8_0_init_pg(struct amdgpu_device *adev) @@ -5891,29 +5895,24 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev adev->gfx.rlc.funcs->enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { - /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ - * Cmp_busy/GFX_Idle interrupts - */ - gfx_v8_0_enable_gui_idle_interrupt(adev, true); - temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; if (temp1 != data1) WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); - /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ + /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* 3 - clear cgcg override */ + /* 2 - clear cgcg override */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* 4 - write cmd to set CGLS */ + /* 3 - write cmd to set CGLS */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); - /* 5 - enable cgcg */ + /* 4 - enable cgcg */ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { @@ -5931,6 +5930,11 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); + + /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ + * Cmp_busy/GFX_Idle interrupts + */ + gfx_v8_0_enable_gui_idle_interrupt(adev, true); } else { /* disable cntx_empty_int_enable & GFX Idle interrupt */ gfx_v8_0_enable_gui_idle_interrupt(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b13c8aaec078e635f2d2d17b8d6332d0e80ae312..6df924f72f29cdea26af441055839361e0aedbe2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -227,6 +227,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); + if (adev->mode_info.num_crtc) + amdgpu_display_set_vga_render_state(adev, false); + gmc_v6_0_mc_stop(adev, &save); if (gmc_v6_0_wait_for_idle((void *)adev)) { @@ -256,7 +259,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } gmc_v6_0_mc_resume(adev, &save); - amdgpu_display_set_vga_render_state(adev, false); } static int gmc_v6_0_mc_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index d6f85b1a0b93540e60399b337310cee3be6753c0..b447a01ab21a6f01465a196c900cf582547c39cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -56,7 +56,6 @@ #define BIOS_SCRATCH_4 0x5cd MODULE_FIRMWARE("radeon/tahiti_smc.bin"); -MODULE_FIRMWARE("radeon/tahiti_k_smc.bin"); MODULE_FIRMWARE("radeon/pitcairn_smc.bin"); MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin"); MODULE_FIRMWARE("radeon/verde_smc.bin"); @@ -3486,24 +3485,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6817) || (adev->pdev->device == 0x6806)) max_mclk = 120000; - } else if (adev->asic_type == CHIP_VERDE) { - if ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87) || - (adev->pdev->device == 0x6820) || - (adev->pdev->device == 0x6821) || - (adev->pdev->device == 0x6822) || - (adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682A) || - (adev->pdev->device == 0x682B)) { - max_sclk = 75000; - max_mclk = 80000; - } } else if (adev->asic_type == CHIP_OLAND) { if ((adev->pdev->revision == 0xC7) || (adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87) || (adev->pdev->device == 0x6604) || (adev->pdev->device == 0x6605)) { max_sclk = 75000; @@ -7684,48 +7671,49 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) chip_name = "tahiti"; break; case CHIP_PITCAIRN: - if ((adev->pdev->revision == 0x81) || - (adev->pdev->device == 0x6810) || - (adev->pdev->device == 0x6811) || - (adev->pdev->device == 0x6816) || - (adev->pdev->device == 0x6817) || - (adev->pdev->device == 0x6806)) + if ((adev->pdev->revision == 0x81) && + ((adev->pdev->device == 0x6810) || + (adev->pdev->device == 0x6811))) chip_name = "pitcairn_k"; else chip_name = "pitcairn"; break; case CHIP_VERDE: - if ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87) || - (adev->pdev->device == 0x6820) || - (adev->pdev->device == 0x6821) || - (adev->pdev->device == 0x6822) || - (adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682A) || - (adev->pdev->device == 0x682B)) + if (((adev->pdev->device == 0x6820) && + ((adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83))) || + ((adev->pdev->device == 0x6821) && + ((adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87))) || + ((adev->pdev->revision == 0x87) && + ((adev->pdev->device == 0x6823) || + (adev->pdev->device == 0x682b)))) chip_name = "verde_k"; else chip_name = "verde"; break; case CHIP_OLAND: - if ((adev->pdev->revision == 0xC7) || - (adev->pdev->revision == 0x80) || - (adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605)) + if (((adev->pdev->revision == 0x81) && + ((adev->pdev->device == 0x6600) || + (adev->pdev->device == 0x6604) || + (adev->pdev->device == 0x6605) || + (adev->pdev->device == 0x6610))) || + ((adev->pdev->revision == 0x83) && + (adev->pdev->device == 0x6610))) chip_name = "oland_k"; else chip_name = "oland"; break; case CHIP_HAINAN: - if ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0xC3) || - (adev->pdev->device == 0x6664) || - (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)) + if (((adev->pdev->revision == 0x81) && + (adev->pdev->device == 0x6660)) || + ((adev->pdev->revision == 0x83) && + ((adev->pdev->device == 0x6660) || + (adev->pdev->device == 0x6663) || + (adev->pdev->device == 0x6665) || + (adev->pdev->device == 0x6667))) || + ((adev->pdev->revision == 0xc3) && + (adev->pdev->device == 0x6665))) chip_name = "hainan_k"; else chip_name = "hainan"; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c index 76310ac7ef0d12ac6e25a84847655261a284bd48..dca1b13fda2f43db570cfe78e1b4f28be6e21862 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c @@ -1958,6 +1958,12 @@ int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) int res; uint64_t tmp64; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (smu_data->smu7_data.fan_table_start == 0) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c index 8c889caba420dc2d55ec9340d46ab0a92d468514..6c26b83655d0d4097a1136d7f4c4b613198419d4 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c @@ -2006,6 +2006,12 @@ int iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) return 0; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (0 == smu7_data->fan_table_start) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); return 0; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c index 71bb2f8dc157ba36029c5131237a20a8c0e2b7be..8ca1a3341dea7b542d76587fe73731a79784a265 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c @@ -1885,6 +1885,12 @@ int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) int res; uint64_t tmp64; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (smu_data->smu7_data.fan_table_start == 0) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c index de2a24d85f48b94a0d17aafd41af3058839a4c6b..a6619e530fe3a3634849a72930b656d218c3bc01 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c @@ -2496,6 +2496,12 @@ int tonga_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) PHM_PlatformCaps_MicrocodeFanControl)) return 0; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (0 == smu_data->smu7_data.fan_table_start) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 904beaa932d03f0b5ce7d89a13d16729582052e0..f75c6421db6239c9435ed39dc7d6244d13894920 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -223,7 +223,8 @@ static int ast_get_dram_info(struct drm_device *dev) ast_write32(ast, 0x10000, 0xfc600309); do { - ; + if (pci_channel_offline(dev->pdev)) + return -EIO; } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); @@ -428,7 +429,9 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags) ast_detect_chip(dev, &need_post); if (ast->chip != AST1180) { - ast_get_dram_info(dev); + ret = ast_get_dram_info(dev); + if (ret) + goto out_free; ast->vram_size = ast_get_vram_info(dev); DRM_INFO("dram %d %d %d %08x\n", ast->mclk, ast->dram_type, ast->dram_bus_width, ast->vram_size); } diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 21f9926055415e7c0507aa4555e2a3b6daa7ca52..a05bb3891119102d4d96c48b7c9957353c7ae9ad 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1253,8 +1253,10 @@ int drm_atomic_helper_commit(struct drm_device *dev, if (!nonblock) { ret = drm_atomic_helper_wait_for_fences(dev, state, true); - if (ret) + if (ret) { + drm_atomic_helper_cleanup_planes(dev, state); return ret; + } } /* @@ -3113,6 +3115,8 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, if (state->fb) drm_framebuffer_reference(state->fb); + + state->fence = NULL; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b969a64a15140ca6a4a5b19bb441a24945827784..48a6167f5e7ba51674ffc17c26cedf5be5b20f22 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -952,8 +952,10 @@ static u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe, u32 vblank_count; unsigned int seq; - if (WARN_ON(pipe >= dev->num_crtcs)) + if (WARN_ON(pipe >= dev->num_crtcs)) { + *vblanktime = (struct timeval) { 0 }; return 0; + } do { seq = read_seqbegin(&vblank->seqlock); diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 11d44a1e0ab355f2b00b7e9fe4fd24c687bb9f59..ee07bb4a57b74eb093ecf2f819f61b628a4a8118 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -839,6 +839,7 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size) /* Clever trick to avoid a special case in the free hole tracking. */ INIT_LIST_HEAD(&mm->head_node.node_list); + mm->head_node.allocated = 0; mm->head_node.hole_follows = 1; mm->head_node.scanned_block = 0; mm->head_node.scanned_prev_free = 0; diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 53f07ac7c1749d20e7aa904eaabc33adbc4abbc8..e14366de0e6eb1e57ec357df25b82d42baa24972 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1462,6 +1462,13 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev, return NULL; mode->type |= DRM_MODE_TYPE_USERDEF; + /* fix up 1368x768: GFT/CVT can't express 1366 width due to alignment */ + if (cmd->xres == 1366 && mode->hdisplay == 1368) { + mode->hdisplay = 1366; + mode->hsync_start--; + mode->hsync_end--; + drm_mode_set_name(mode); + } drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); return mode; } diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index f6b64d7d3528d742a9b36c4551fe7243f38b58d5..276474d13763b97eeaf52a0eea5a8bc69c94a7ba 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -143,8 +143,18 @@ void drm_kms_helper_poll_enable_locked(struct drm_device *dev) } if (dev->mode_config.delayed_event) { + /* + * FIXME: + * + * Use short (1s) delay to handle the initial delayed event. + * This delay should not be needed, but Optimus/nouveau will + * fail in a mysterious way if the delayed event is handled as + * soon as possible like it is done in + * drm_helper_probe_single_connector_modes() in case the poll + * was enabled before. + */ poll = true; - delay = 0; + delay = HZ; } if (poll) diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index 50eb944fb78a8e80fe81df7f1dc291f14e16e0f8..8f3ca526bd1bd43118e78e8bf8eea8a9181ed403 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -473,6 +473,9 @@ static const struct file_operations psb_gem_fops = { .open = drm_open, .release = drm_release, .unlocked_ioctl = psb_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 18dfdd5c1b3b1ba5fc8b9c660b37c036d5ea87dd..670beebc32f619eb9d12d0a2846a49b74fc343bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2372,7 +2372,7 @@ static int intel_runtime_suspend(struct device *kdev) assert_forcewakes_inactive(dev_priv); - if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) intel_hpd_poll_init(dev_priv); DRM_DEBUG_KMS("Device suspended\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 685e9e065287983a50b82aa02faa9822d74d1582..da832d3cdca7e0204683ca621f8ece9c2df1259d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3684,6 +3684,8 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val); +int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 5b6f81c1dbca44786cf3243eb56494e128041e8d..7467355e4a18e048169b0a193d891318bb08deda 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -194,6 +194,7 @@ i915_gem_evict_something(struct i915_address_space *vm, } /* Unbinding will emit any required flushes */ + ret = 0; while (!list_empty(&eviction_list)) { vma = list_first_entry(&eviction_list, struct i915_vma, diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 974bd7bcc801f9bc7a6eb32003886b2c9b4c39be..59ac90025552215b5ae1997356a18c548f75c677 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -344,6 +344,25 @@ i915_gem_active_set(struct i915_gem_active *active, rcu_assign_pointer(active->request, request); } +/** + * i915_gem_active_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_gem_active_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_gem_active_set_retire_fn(struct i915_gem_active *active, + i915_gem_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_gem_retire_noop; +} + static inline struct drm_i915_gem_request * __i915_gem_active_peek(const struct i915_gem_active *active) { diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 59989e8ee5dc8b1834eee109e72eb02961e3934d..9a71ed546b9063abeab5b108d926ea0d9aeeb9cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -55,10 +55,9 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, return -ENODEV; /* See the comment at the drm_mm_init() call for more about this check. - * WaSkipStolenMemoryFirstPage:bdw,chv,kbl (incomplete) + * WaSkipStolenMemoryFirstPage:bdw+ (incomplete) */ - if (start < 4096 && (IS_GEN8(dev_priv) || - IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0))) + if (start < 4096 && INTEL_GEN(dev_priv) >= 8) start = 4096; mutex_lock(&dev_priv->mm.stolen_lock); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1012eeea132434c30e100c0b5fa16d0b26bd361d..306fc54c161b1d9b81823cd51668f855663df6ff 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -460,7 +460,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); -static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); +static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO | S_IWUSR, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index dfbcf16b41dfb956dddfff8f8cc90aef9d1b579c..4149a0fbe8bd0ff6784b6d842e0bd67d5c961600 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; + bool ret = false; BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG); @@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) */ if (!is_digital) { DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + ret = true; + } else { + DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } - - DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } else { DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n"); } kfree(edid); - return false; + return ret; } static enum drm_connector_status diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3cb70d73239bf424857c87d3dbc252cfa94c37ab..8079e5b380cba1c217d0a8e7976c4fc169a4ec5a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2587,8 +2587,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, * We only keep the x/y offsets, so push all of the * gtt offset into the x/y offsets. */ - _intel_adjust_tile_offset(&x, &y, tile_size, - tile_width, tile_height, pitch_tiles, + _intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, gtt_offset_rotated * tile_size, 0); gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; @@ -2975,6 +2976,9 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) unsigned int rotation = plane_state->base.rotation; int ret; + if (!plane_state->base.visible) + return 0; + /* Rotate src coordinates to match rotated GTT view */ if (intel_rotation_90_or_270(rotation)) drm_rect_rotate(&plane_state->base.src, @@ -6262,36 +6266,25 @@ skl_dpll0_disable(struct drm_i915_private *dev_priv) dev_priv->cdclk_pll.vco = 0; } -static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) -{ - int ret; - u32 val; - - /* inform PCU we want to change CDCLK */ - val = SKL_CDCLK_PREPARE_FOR_CHANGE; - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_read(dev_priv, SKL_PCODE_CDCLK_CONTROL, &val); - mutex_unlock(&dev_priv->rps.hw_lock); - - return ret == 0 && (val & SKL_CDCLK_READY_FOR_CHANGE); -} - -static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) -{ - return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0; -} - static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) { struct drm_device *dev = &dev_priv->drm; u32 freq_select, pcu_ack; + int ret; WARN_ON((cdclk == 24000) != (vco == 0)); DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) { - DRM_ERROR("failed to inform PCU about cdclk change\n"); + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); return; } @@ -6876,6 +6869,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) } state = drm_atomic_state_alloc(crtc->dev); + if (!state) { + DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", + crtc->base.id, crtc->name); + return; + } + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; /* Everything's already locked, -EDEADLK can't happen. */ @@ -13970,8 +13969,9 @@ static int intel_modeset_checks(struct drm_atomic_state *state) DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", intel_state->cdclk, intel_state->dev_cdclk); - } else + } else { to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + } intel_modeset_clear_plls(state); @@ -14072,8 +14072,9 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; - } else - intel_state->cdclk = dev_priv->cdclk_freq; + } else { + intel_state->cdclk = dev_priv->atomic_cdclk_freq; + } ret = drm_atomic_helper_check_planes(dev, state); if (ret) @@ -16441,6 +16442,7 @@ void intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev); + dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_shared_dpll_init(dev); @@ -16757,7 +16759,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = crtc->config; - int pixclk = 0; __drm_atomic_helper_crtc_destroy_state(&crtc_state->base); memset(crtc_state, 0, sizeof(*crtc_state)); @@ -16769,23 +16770,9 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->base.enabled = crtc_state->base.enable; crtc->active = crtc_state->base.active; - if (crtc_state->base.active) { + if (crtc_state->base.active) dev_priv->active_crtcs |= 1 << crtc->pipe; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; - else - WARN_ON(dev_priv->display.modeset_calc_cdclk); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixclk = DIV_ROUND_UP(pixclk * 100, 95); - } - - dev_priv->min_pixclk[crtc->pipe] = pixclk; - readout_plane_state(crtc); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", @@ -16859,6 +16846,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } for_each_intel_crtc(dev, crtc) { + int pixclk = 0; + crtc->base.hwmode = crtc->config->base.adjusted_mode; memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); @@ -16886,10 +16875,23 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc->base.state->mode.private_flags = I915_MODE_FLAG_INHERITED; + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + pixclk = ilk_pipe_pixel_rate(crtc->config); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc->config->base.adjusted_mode.crtc_clock; + else + WARN_ON(dev_priv->display.modeset_calc_cdclk); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc->config->ips_enabled) + pixclk = DIV_ROUND_UP(pixclk * 100, 95); + drm_calc_timestamping_constants(&crtc->base, &crtc->base.hwmode); update_scanline_offset(crtc); } + dev_priv->min_pixclk[crtc->pipe] = pixclk; + intel_pipe_config_sanity_check(dev_priv, crtc->config); } } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index bf344d08356a2b48a77726fa8bf749c55eb2651f..055525013d2f2bcc74b4ee78219061fad3856eaf 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -280,7 +280,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, struct intel_dp *intel_dp); static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp); + struct intel_dp *intel_dp, + bool force_disable_vdd); static void intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); @@ -442,7 +443,7 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); /* * Even vdd force doesn't work until we've made @@ -479,7 +480,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) * Only the HW needs to be reprogrammed, the SW state is fixed and * has been setup during connector init. */ - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); return 0; } @@ -562,7 +563,7 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) port_name(port), pipe_name(intel_dp->pps_pipe)); intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); } void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) @@ -2924,7 +2925,7 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); } static void vlv_pre_enable_dp(struct intel_encoder *encoder, @@ -4017,6 +4018,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!to_intel_crtc(intel_encoder->base.crtc)->active) return; + /* FIXME: we need to synchronize this sort of stuff with hardware + * readout. Currently fast link training doesn't work on boot-up. */ + if (!intel_dp->lane_count) + return; + /* if link training is requested we should perform it always */ if ((intel_dp->compliance_test_type == DP_TEST_LINK_TRAINING) || (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { @@ -5054,7 +5060,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp) + struct intel_dp *intel_dp, + bool force_disable_vdd) { struct drm_i915_private *dev_priv = to_i915(dev); u32 pp_on, pp_off, pp_div, port_sel = 0; @@ -5067,6 +5074,31 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, intel_pps_get_registers(dev_priv, intel_dp, ®s); + /* + * On some VLV machines the BIOS can leave the VDD + * enabled even on power seqeuencers which aren't + * hooked up to any port. This would mess up the + * power domain tracking the first time we pick + * one of these power sequencers for use since + * edp_panel_vdd_on() would notice that the VDD was + * already on and therefore wouldn't grab the power + * domain reference. Disable VDD first to avoid this. + * This also avoids spuriously turning the VDD on as + * soon as the new power seqeuencer gets initialized. + */ + if (force_disable_vdd) { + u32 pp = ironlake_get_pp_control(intel_dp); + + WARN(pp & PANEL_POWER_ON, "Panel power already on\n"); + + if (pp & EDP_FORCE_VDD) + DRM_DEBUG_KMS("VDD already on, disabling first\n"); + + pp &= ~EDP_FORCE_VDD; + + I915_WRITE(regs.pp_ctrl, pp); + } + pp_on = (seq->t1_t3 << PANEL_POWER_UP_DELAY_SHIFT) | (seq->t8 << PANEL_LIGHT_ON_DELAY_SHIFT); pp_off = (seq->t9 << PANEL_LIGHT_OFF_DELAY_SHIFT) | @@ -5119,7 +5151,7 @@ static void intel_dp_pps_init(struct drm_device *dev, vlv_initial_power_sequencer_setup(intel_dp); } else { intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); } } diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index cd154ce6b6c1ed8edad276dec2477532bbf52415..34601574fc6edb5a541048e97b95eb85cf10a208 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -296,7 +296,8 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv, mutex_lock(&dev_priv->sb_lock); vlv_iosf_sb_write(dev_priv, port, cfg1, 0); vlv_iosf_sb_write(dev_priv, port, cfg0, - CHV_GPIO_GPIOCFG_GPO | CHV_GPIO_GPIOTXSTATE(value)); + CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO | + CHV_GPIO_GPIOTXSTATE(value)); mutex_unlock(&dev_priv->sb_lock); } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index b7098f98bb671d2c7e06c296466a482c91159ddd..9127e57f383c1005010797e61c97da5ec37664be 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -745,6 +745,9 @@ void intel_fbdev_initial_config_async(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + if (!ifbdev) + return; + ifbdev->cookie = async_schedule(intel_fbdev_initial_config, ifbdev); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0adb879833ff0fadccd7fc7e6191c64b8107599a..4147e51cf8930a143450587a01657ff6b6686723 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -858,8 +858,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, * this batch updates GEN8_L3SQCREG4 with default value we need to * set this bit here to retain the WA during flush. */ - if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) || - IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) + if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | @@ -2153,30 +2152,42 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, void intel_lr_context_resume(struct drm_i915_private *dev_priv) { - struct i915_gem_context *ctx = dev_priv->kernel_context; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + + /* Because we emit WA_TAIL_DWORDS there may be a disparity + * between our bookkeeping in ce->ring->head and ce->ring->tail and + * that stored in context. As we only write new commands from + * ce->ring->tail onwards, everything before that is junk. If the GPU + * starts reading from its RING_HEAD from the context, it may try to + * execute that junk and die. + * + * So to avoid that we reset the context images upon resume. For + * simplicity, we just zero everything out. + */ + list_for_each_entry(ctx, &dev_priv->context_list, link) { + for_each_engine(engine, dev_priv) { + struct intel_context *ce = &ctx->engine[engine->id]; + u32 *reg; - for_each_engine(engine, dev_priv) { - struct intel_context *ce = &ctx->engine[engine->id]; - void *vaddr; - uint32_t *reg_state; - - if (!ce->state) - continue; - - vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); - if (WARN_ON(IS_ERR(vaddr))) - continue; + if (!ce->state) + continue; - reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + reg = i915_gem_object_pin_map(ce->state->obj, + I915_MAP_WB); + if (WARN_ON(IS_ERR(reg))) + continue; - reg_state[CTX_RING_HEAD+1] = 0; - reg_state[CTX_RING_TAIL+1] = 0; + reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg); + reg[CTX_RING_HEAD+1] = 0; + reg[CTX_RING_TAIL+1] = 0; - ce->state->obj->dirty = true; - i915_gem_object_unpin_map(ce->state->obj); + ce->state->obj->dirty = true; + i915_gem_object_unpin_map(ce->state->obj); - ce->ring->head = 0; - ce->ring->tail = 0; + ce->ring->head = ce->ring->tail = 0; + ce->ring->last_retired_head = -1; + intel_ring_update_space(ce->ring); + } } } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a24bc8c7889f04ce3f3d7e2351404e43f777ceef..a2655cd5a84e2c4599a5527cd9f8f42eec77e0f5 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -216,7 +216,8 @@ static void intel_overlay_submit_request(struct intel_overlay *overlay, { GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, &overlay->i915->drm.struct_mutex)); - overlay->last_flip.retire = retire; + i915_gem_active_set_retire_fn(&overlay->last_flip, retire, + &overlay->i915->drm.struct_mutex); i915_gem_active_set(&overlay->last_flip, req); i915_add_request(req); } @@ -839,8 +840,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret) goto out_unpin; - i915_gem_track_fb(overlay->vma->obj, new_bo, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL, + vma->obj, INTEL_FRONTBUFFER_OVERLAY(pipe)); overlay->old_vma = overlay->vma; overlay->vma = vma; @@ -1430,6 +1431,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; + init_request_active(&overlay->last_flip, NULL); + regs = intel_overlay_map_regs(overlay); if (!regs) goto out_unpin_bo; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index db24f898853cbbbc98d2043b9a511956dd0bc1bd..e559a45ff1f719797fd04370fb6dcee6ee21823c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2879,6 +2879,21 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +/* + * FIXME: We still don't have the proper code detect if we need to apply the WA, + * so assume we'll always need it in order to avoid underruns. + */ +static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || + IS_KABYLAKE(dev_priv)) + return true; + + return false; +} + static bool intel_has_sagv(struct drm_i915_private *dev_priv) { @@ -2940,24 +2955,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; } -static int -intel_do_sagv_disable(struct drm_i915_private *dev_priv) -{ - int ret; - uint32_t temp = GEN9_SAGV_DISABLE; - - ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, - &temp); - if (ret) - return ret; - else - return temp & GEN9_SAGV_IS_DISABLED; -} - int intel_disable_sagv(struct drm_i915_private *dev_priv) { - int ret, result; + int ret; if (!intel_has_sagv(dev_priv)) return 0; @@ -2969,25 +2970,23 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->rps.hw_lock); /* bspec says to keep retrying for at least 1 ms */ - ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1); + ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_DISABLE, + GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, + 1); mutex_unlock(&dev_priv->rps.hw_lock); - if (ret == -ETIMEDOUT) { - DRM_ERROR("Request to disable SAGV timed out\n"); - return -ETIMEDOUT; - } - /* * Some skl systems, pre-release machines in particular, * don't actually have an SAGV. */ - if (IS_SKYLAKE(dev_priv) && result == -ENXIO) { + if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; - } else if (result < 0) { - DRM_ERROR("Failed to disable the SAGV\n"); - return result; + } else if (ret < 0) { + DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); + return ret; } dev_priv->sagv_status = I915_SAGV_DISABLED; @@ -2999,9 +2998,10 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_crtc *crtc; + struct intel_crtc *crtc; + struct intel_plane *plane; enum pipe pipe; - int level, plane; + int level, id, latency; if (!intel_has_sagv(dev_priv)) return false; @@ -3019,27 +3019,36 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* Since we're now guaranteed to only have one active CRTC... */ pipe = ffs(intel_state->active_crtcs) - 1; - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE) + if (crtc->base.state->mode.flags & DRM_MODE_FLAG_INTERLACE) return false; - for_each_plane(dev_priv, pipe, plane) { + for_each_intel_plane_on_crtc(dev, crtc, plane) { + id = skl_wm_plane_id(plane); + /* Skip this plane if it's not enabled */ - if (intel_state->wm_results.plane[pipe][plane][0] == 0) + if (intel_state->wm_results.plane[pipe][id][0] == 0) continue; /* Find the highest enabled wm level for this plane */ for (level = ilk_wm_max_level(dev); - intel_state->wm_results.plane[pipe][plane][level] == 0; --level) + intel_state->wm_results.plane[pipe][id][level] == 0; --level) { } + latency = dev_priv->wm.skl_latency[level]; + + if (skl_needs_memory_bw_wa(intel_state) && + plane->base.state->fb->modifier[0] == + I915_FORMAT_MOD_X_TILED) + latency += 15; + /* * If any of the planes on this pipe don't enable wm levels * that incur memory latencies higher then 30µs we can't enable * the SAGV */ - if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME) + if (latency < SKL_SAGV_BLOCK_TIME) return false; } @@ -3549,12 +3558,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t width = 0, height = 0; uint32_t plane_pixel_rate; uint32_t y_tile_minimum, y_min_scanlines; + struct intel_atomic_state *state = + to_intel_atomic_state(cstate->base.state); + bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { *enabled = false; return 0; } + if (apply_memory_bw_wa && fb->modifier[0] == I915_FORMAT_MOD_X_TILED) + latency += 15; + width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; @@ -3586,6 +3601,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } + if (apply_memory_bw_wa) + y_min_scanlines *= 2; + plane_bytes_per_line = width * cpp; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { @@ -7921,6 +7939,81 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, return 0; } +static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status) +{ + u32 val = request; + + *status = sandybridge_pcode_read(dev_priv, mbox, &val); + + return *status || ((val & reply_mask) == reply); +} + +/** + * skl_pcode_request - send PCODE request until acknowledgment + * @dev_priv: device private + * @mbox: PCODE mailbox ID the request is targeted for + * @request: request ID + * @reply_mask: mask used to check for request acknowledgment + * @reply: value used to check for request acknowledgment + * @timeout_base_ms: timeout for polling with preemption enabled + * + * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE + * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * The request is acknowledged once the PCODE reply dword equals @reply after + * applying @reply_mask. Polling is first attempted with preemption enabled + * for @timeout_base_ms and if this times out for another 10 ms with + * preemption disabled. + * + * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some + * other error as reported by PCODE. + */ +int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + u32 status; + int ret; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + +#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ + &status) + + /* + * Prime the PCODE by doing a request first. Normally it guarantees + * that a subsequent request, at most @timeout_base_ms later, succeeds. + * _wait_for() doesn't guarantee when its passed condition is evaluated + * first, so send the first request explicitly. + */ + if (COND) { + ret = 0; + goto out; + } + ret = _wait_for(COND, timeout_base_ms * 1000, 10); + if (!ret) + goto out; + + /* + * The above can time out if the number of requests was low (2 in the + * worst case) _and_ PCODE was busy for some reason even after a + * (queued) request and @timeout_base_ms delay. As a workaround retry + * the poll with preemption disabled to maximize the number of + * requests. Increase the timeout from @timeout_base_ms to 10ms to + * account for interrupts that could reduce the number of these + * requests. + */ + DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); + WARN_ON_ONCE(timeout_base_ms > 3); + preempt_disable(); + ret = wait_for_atomic(COND, 10); + preempt_enable(); + +out: + return ret ? ret : status; +#undef COND +} + static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) { /* diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 108ba1e5d65872cfcf33ee4b7bc6ca9ea2ed625e..9b307cee3008680aca877ee6bcd2ca8b5285d886 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -825,13 +825,9 @@ void intel_psr_init(struct drm_device *dev) dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; - /* Per platform default */ - if (i915.enable_psr == -1) { - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - i915.enable_psr = 1; - else - i915.enable_psr = 0; - } + /* Per platform default: all disabled. */ + if (i915.enable_psr == -1) + i915.enable_psr = 0; /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ed9955dce156dd0519134bba04166687e4dfab4f..8babfe0ce4e3d4ab8973b9434d1c889f11f55b41 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1153,14 +1153,6 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FENCE_DEST_SLM_DISABLE); - /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes - * involving this register should also be added to WA batch as required. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - /* WaDisableLSQCROPERFforOCL:kbl */ - I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_RO_PERF_DIS); - /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index a38c2fefe85a6fc2abb1fcaabe2143da3c95be3a..23ed3f5972fa4e34e53bf2f93a8573f5fbc926bf 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1065,7 +1065,18 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) { - I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + u32 val; + + /* + * On driver load, a pipe may be active and driving a DSI display. + * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck + * (and never recovering) in this case. intel_dsi_post_disable() will + * clear it when we turn off the display. + */ + val = I915_READ(DSPCLK_GATE_D); + val &= DPOUNIT_CLOCK_GATE_DISABLE; + val |= VRHUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(DSPCLK_GATE_D, val); /* * Disable trickle feed and enable pnd deadline calculation diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c index 74856a8b8f35943b08a59f8aed5a546e98058d3c..e64f52464ecf55b83a17f25b434cb9e1474c10e1 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/hw.c +++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c @@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype) uint32_t mpllP; pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP); + mpllP = (mpllP >> 8) & 0xf; if (!mpllP) mpllP = 4; @@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype) uint32_t clock; pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock); - return clock; + return clock / 1000; } ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals); diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index a1570b109434057db887acff8fc71e634595a430..23ffe8571a990d2a0e9574253edfee30fb96df04 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -333,6 +333,9 @@ get_fp_strap(struct drm_device *dev, struct nvbios *bios) if (bios->major_version < 5 && bios->data[0x48] & 0x4) return NVReadVgaCrtc5758(dev, 0, 0xf) & 0xf; + if (drm->device.info.family >= NV_DEVICE_INFO_V0_MAXWELL) + return nvif_rd32(device, 0x001800) & 0x0000000f; + else if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 24) & 0xf; else diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 343b8659472cfb4b6b5e2a45c447346e381d41f2..a2e6a81669e789c57be30cd6db3649538d4467b8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1209,6 +1209,7 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem) nvbo->page_shift != vma->vm->mmu->lpg_shift)) { nvkm_vm_map(vma, new_mem->mm_node); } else { + WARN_ON(ttm_bo_wait(bo, false, false)); nvkm_vm_unmap(vma); } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7218a067a6c5e85764f1f330cb05b10c7e50132c..e0d7f8472ac6523fb9ee3ce8b7fee245b405b0ea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1851,7 +1851,7 @@ nvf1_chipset = { .fb = gk104_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1965,7 +1965,7 @@ nv117_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1999,7 +1999,7 @@ nv118_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c index 6f0436df021953337ba29f0ff9c02c507214b07b..f8f2f16c22a2a2502bf283c63a5d1fc124d0ff89 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c @@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1) ); } for (i = 0; i < size; i++) - nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]); + nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]); for (; i < 0x60; i++) nvkm_wr32(device, 0x61c440 + soff, (i << 8)); nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c index cbc67f2623223177d084395294facda07375e694..12d964260a29921ed71cb25b2a299824ac4442f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c @@ -60,6 +60,7 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, struct nvkm_gpuobj *inst = chan->base.inst; int ret = 0; + mutex_lock(&subdev->mutex); nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x002634) == chan->base.chid) @@ -67,10 +68,12 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, ) < 0) { nvkm_error(subdev, "channel %d [%s] kick timeout\n", chan->base.chid, chan->base.object.client->name); - ret = -EBUSY; - if (suspend) - return ret; + ret = -ETIMEDOUT; } + mutex_unlock(&subdev->mutex); + + if (ret && suspend) + return ret; if (offset) { nvkm_kmap(inst); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index ed4351032ed603df4180a4113c114459c9e5a372..a2df4f3e7763494acf93acc5a46c2f15720b19d3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -40,7 +40,9 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; struct nvkm_client *client = chan->base.object.client; + int ret = 0; + mutex_lock(&subdev->mutex); nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (!(nvkm_rd32(device, 0x002634) & 0x00100000)) @@ -48,10 +50,10 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) ) < 0) { nvkm_error(subdev, "channel %d [%s] kick timeout\n", chan->base.chid, client->name); - return -EBUSY; + ret = -ETIMEDOUT; } - - return 0; + mutex_unlock(&subdev->mutex); + return ret; } static u32 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 157919c788e6f72261d6989a352c4f6fddf3a014..6584d505460ce4a7ab9528db86c6001d11f7da69 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1755,6 +1755,50 @@ gf100_gr_ = { .object_get = gf100_gr_object_get, }; +int +gf100_gr_ctor_fw_legacy(struct gf100_gr *gr, const char *fwname, + struct gf100_gr_fuc *fuc, int ret) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + const struct firmware *fw; + char f[32]; + + /* see if this firmware has a legacy path */ + if (!strcmp(fwname, "fecs_inst")) + fwname = "fuc409c"; + else if (!strcmp(fwname, "fecs_data")) + fwname = "fuc409d"; + else if (!strcmp(fwname, "gpccs_inst")) + fwname = "fuc41ac"; + else if (!strcmp(fwname, "gpccs_data")) + fwname = "fuc41ad"; + else { + /* nope, let's just return the error we got */ + nvkm_error(subdev, "failed to load %s\n", fwname); + return ret; + } + + /* yes, try to load from the legacy path */ + nvkm_debug(subdev, "%s: falling back to legacy path\n", fwname); + + snprintf(f, sizeof(f), "nouveau/nv%02x_%s", device->chipset, fwname); + ret = request_firmware(&fw, f, device->dev); + if (ret) { + snprintf(f, sizeof(f), "nouveau/%s", fwname); + ret = request_firmware(&fw, f, device->dev); + if (ret) { + nvkm_error(subdev, "failed to load %s\n", fwname); + return ret; + } + } + + fuc->size = fw->size; + fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL); + release_firmware(fw); + return (fuc->data != NULL) ? 0 : -ENOMEM; +} + int gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname, struct gf100_gr_fuc *fuc) @@ -1765,10 +1809,8 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname, int ret; ret = nvkm_firmware_get(device, fwname, &fw); - if (ret) { - nvkm_error(subdev, "failed to load %s\n", fwname); - return ret; - } + if (ret) + return gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret); fuc->size = fw->size; fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h index 212800ecdce99e4eb1a3a23ebdab9c207cd860da..7d1d3c6b4b728dec4711a75883df4d700a1a6290 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h @@ -12,6 +12,7 @@ struct nvbios_source { bool rw; bool ignore_checksum; bool no_pcir; + bool require_checksum; }; int nvbios_extend(struct nvkm_bios *, u32 length); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index b2557e87afdd6d0e95910b3b4b91e37ce9a3e269..7deb81b6dbac6bc5a440da281eca94b51b632f83 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -86,9 +86,12 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvbios_checksum(&bios->data[image.base], image.size)) { nvkm_debug(subdev, "%08x: checksum failed\n", image.base); - if (mthd->func->rw) + if (!mthd->func->require_checksum) { + if (mthd->func->rw) + score += 1; score += 1; - score += 1; + } else + return 0; } else { score += 3; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c index 8fecb5ff22a0efaaeaed9bc2b4939072fc6513bc..06572f8ce9148805ca37470c978af17f42e72827 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c @@ -99,6 +99,7 @@ nvbios_acpi_fast = { .init = acpi_init, .read = acpi_read_fast, .rw = false, + .require_checksum = true, }; const struct nvbios_source diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c index 39c2a38e54f74e6e2a4af834feca795823a21408..0c7ef250dcafc3cbdad15b5cfe79acfd513314fa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -47,8 +47,10 @@ nvkm_ltc_tags_clear(struct nvkm_ltc *ltc, u32 first, u32 count) BUG_ON((first > limit) || (limit >= ltc->num_tags)); + mutex_lock(<c->subdev.mutex); ltc->func->cbc_clear(ltc, first, limit); ltc->func->cbc_wait(ltc); + mutex_unlock(<c->subdev.mutex); } int diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 113db3c4a633fc6a1ce35e5ec86d217b99e7851f..27cb42467b2024881b7d5ea9cc9ca146a666e713 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -120,7 +120,7 @@ static int panel_simple_get_fixed_modes(struct panel_simple *panel) mode->type |= DRM_MODE_TYPE_DRIVER; - if (panel->desc->num_modes == 1) + if (panel->desc->num_timings == 1) mode->type |= DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, mode); diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 2a10e24b34b10b2f826e35c60556808efb2ebe2e..fb16070b266e3f2de51243ba478f57d8fd956420 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -90,6 +90,9 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; + if (radeon_crtc->cursor_out_of_bounds) + return; + if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, upper_32_bits(radeon_crtc->cursor_addr)); @@ -143,21 +146,25 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) int xorigin = 0, yorigin = 0; int w = radeon_crtc->cursor_width; + radeon_crtc->cursor_x = x; + radeon_crtc->cursor_y = y; + if (ASIC_IS_AVIVO(rdev)) { /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; } - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - if (x < 0) { + if (x < 0) xorigin = min(-x, radeon_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { + if (y < 0) yorigin = min(-y, radeon_crtc->max_cursor_height - 1); - y = 0; + + if (!ASIC_IS_AVIVO(rdev)) { + x += crtc->x; + y += crtc->y; } + DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); /* fixed on DCE6 and newer */ if (ASIC_IS_AVIVO(rdev) && !ASIC_IS_DCE6(rdev)) { @@ -180,27 +187,31 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) if (i > 1) { int cursor_end, frame_end; - cursor_end = x - xorigin + w; + cursor_end = x + w; frame_end = crtc->x + crtc->mode.crtc_hdisplay; if (cursor_end >= frame_end) { w = w - (cursor_end - frame_end); if (!(frame_end & 0x7f)) w--; - } else { - if (!(cursor_end & 0x7f)) - w--; + } else if (cursor_end <= 0) { + goto out_of_bounds; + } else if (!(cursor_end & 0x7f)) { + w--; } if (w <= 0) { - w = 1; - cursor_end = x - xorigin + w; - if (!(cursor_end & 0x7f)) { - x--; - WARN_ON_ONCE(x < 0); - } + goto out_of_bounds; } } } + if (x <= (crtc->x - w) || y <= (crtc->y - radeon_crtc->cursor_height) || + x >= (crtc->x + crtc->mode.crtc_hdisplay) || + y >= (crtc->y + crtc->mode.crtc_vdisplay)) + goto out_of_bounds; + + x += xorigin; + y += yorigin; + if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y); WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); @@ -212,6 +223,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); } else { + x -= crtc->x; + y -= crtc->y; + if (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN) y *= 2; @@ -229,10 +243,20 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) yorigin * 256); } - radeon_crtc->cursor_x = x; - radeon_crtc->cursor_y = y; + if (radeon_crtc->cursor_out_of_bounds) { + radeon_crtc->cursor_out_of_bounds = false; + if (radeon_crtc->cursor_bo) + radeon_show_cursor(crtc); + } return 0; + + out_of_bounds: + if (!radeon_crtc->cursor_out_of_bounds) { + radeon_hide_cursor(crtc); + radeon_crtc->cursor_out_of_bounds = true; + } + return 0; } int radeon_crtc_cursor_move(struct drm_crtc *crtc, @@ -297,22 +321,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - radeon_crtc->cursor_width = width; - radeon_crtc->cursor_height = height; - radeon_lock_cursor(crtc, true); - if (hot_x != radeon_crtc->cursor_hot_x || + if (width != radeon_crtc->cursor_width || + height != radeon_crtc->cursor_height || + hot_x != radeon_crtc->cursor_hot_x || hot_y != radeon_crtc->cursor_hot_y) { int x, y; x = radeon_crtc->cursor_x + radeon_crtc->cursor_hot_x - hot_x; y = radeon_crtc->cursor_y + radeon_crtc->cursor_hot_y - hot_y; - radeon_cursor_move_locked(crtc, x, y); - + radeon_crtc->cursor_width = width; + radeon_crtc->cursor_height = height; radeon_crtc->cursor_hot_x = hot_x; radeon_crtc->cursor_hot_y = hot_y; + + radeon_cursor_move_locked(crtc, x, y); } radeon_show_cursor(crtc); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 00ea0002b539b9e9b5b0a063f62deb3b7638fd56..e0c143b865f39cb36074b5e524638530cadeede9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -366,11 +366,10 @@ static void radeon_pci_shutdown(struct pci_dev *pdev) { /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown. - * unfortunately we can't detect certain - * hypervisors so just do this all the time. + * torn down properly on reboot/shutdown */ - radeon_pci_remove(pdev); + if (radeon_device_is_virtual()) + radeon_pci_remove(pdev); } static int radeon_pmops_suspend(struct device *dev) diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index bb75201a24ba923fd124c39fedbe860bdcbe3e07..f1da484864a9c554ee4d7f846c3a75b63740e005 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -330,6 +330,7 @@ struct radeon_crtc { u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; + bool cursor_out_of_bounds; uint32_t crtc_offset; struct drm_gem_object *cursor_bo; uint64_t cursor_addr; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index e402be8821c45271a276f721f1eb9c94fcf4afb5..877af4a5ef685e9ea4069687ceb61a3ba3761b5f 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -50,7 +50,6 @@ MODULE_FIRMWARE("radeon/tahiti_ce.bin"); MODULE_FIRMWARE("radeon/tahiti_mc.bin"); MODULE_FIRMWARE("radeon/tahiti_rlc.bin"); MODULE_FIRMWARE("radeon/tahiti_smc.bin"); -MODULE_FIRMWARE("radeon/tahiti_k_smc.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_me.bin"); @@ -1657,9 +1656,6 @@ static int si_init_microcode(struct radeon_device *rdev) switch (rdev->family) { case CHIP_TAHITI: chip_name = "TAHITI"; - /* XXX: figure out which Tahitis need the new ucode */ - if (0) - new_smc = true; new_chip_name = "tahiti"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; me_req_size = SI_PM4_UCODE_SIZE * 4; @@ -1671,12 +1667,9 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_PITCAIRN: chip_name = "PITCAIRN"; - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->device == 0x6810) || - (rdev->pdev->device == 0x6811) || - (rdev->pdev->device == 0x6816) || - (rdev->pdev->device == 0x6817) || - (rdev->pdev->device == 0x6806)) + if ((rdev->pdev->revision == 0x81) && + ((rdev->pdev->device == 0x6810) || + (rdev->pdev->device == 0x6811))) new_smc = true; new_chip_name = "pitcairn"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; @@ -1689,15 +1682,15 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_VERDE: chip_name = "VERDE"; - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0x87) || - (rdev->pdev->device == 0x6820) || - (rdev->pdev->device == 0x6821) || - (rdev->pdev->device == 0x6822) || - (rdev->pdev->device == 0x6823) || - (rdev->pdev->device == 0x682A) || - (rdev->pdev->device == 0x682B)) + if (((rdev->pdev->device == 0x6820) && + ((rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83))) || + ((rdev->pdev->device == 0x6821) && + ((rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87))) || + ((rdev->pdev->revision == 0x87) && + ((rdev->pdev->device == 0x6823) || + (rdev->pdev->device == 0x682b)))) new_smc = true; new_chip_name = "verde"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; @@ -1710,12 +1703,13 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_OLAND: chip_name = "OLAND"; - if ((rdev->pdev->revision == 0xC7) || - (rdev->pdev->revision == 0x80) || - (rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->device == 0x6604) || - (rdev->pdev->device == 0x6605)) + if (((rdev->pdev->revision == 0x81) && + ((rdev->pdev->device == 0x6600) || + (rdev->pdev->device == 0x6604) || + (rdev->pdev->device == 0x6605) || + (rdev->pdev->device == 0x6610))) || + ((rdev->pdev->revision == 0x83) && + (rdev->pdev->device == 0x6610))) new_smc = true; new_chip_name = "oland"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; @@ -1727,12 +1721,15 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_HAINAN: chip_name = "HAINAN"; - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0xC3) || - (rdev->pdev->device == 0x6664) || - (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667)) + if (((rdev->pdev->revision == 0x81) && + (rdev->pdev->device == 0x6660)) || + ((rdev->pdev->revision == 0x83) && + ((rdev->pdev->device == 0x6660) || + (rdev->pdev->device == 0x6663) || + (rdev->pdev->device == 0x6665) || + (rdev->pdev->device == 0x6667))) || + ((rdev->pdev->revision == 0xc3) && + (rdev->pdev->device == 0x6665))) new_smc = true; new_chip_name = "hainan"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index c49934527a87852bf207b26e04c4de5939df8c53..13ba73fd9b68849bd34d27207eb282eb6ff62793 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3008,24 +3008,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6817) || (rdev->pdev->device == 0x6806)) max_mclk = 120000; - } else if (rdev->family == CHIP_VERDE) { - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0x87) || - (rdev->pdev->device == 0x6820) || - (rdev->pdev->device == 0x6821) || - (rdev->pdev->device == 0x6822) || - (rdev->pdev->device == 0x6823) || - (rdev->pdev->device == 0x682A) || - (rdev->pdev->device == 0x682B)) { - max_sclk = 75000; - max_mclk = 80000; - } } else if (rdev->family == CHIP_OLAND) { if ((rdev->pdev->revision == 0xC7) || (rdev->pdev->revision == 0x80) || (rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || (rdev->pdev->device == 0x6604) || (rdev->pdev->device == 0x6605)) { max_sclk = 75000; diff --git a/drivers/gpu/drm/savage/savage_state.c b/drivers/gpu/drm/savage/savage_state.c index 3dc0d8ff95ec6d1e047d1e27d0587cd5d1cc7199..2db89bed52e80625bee50917652a41c2dc2a5f8d 100644 --- a/drivers/gpu/drm/savage/savage_state.c +++ b/drivers/gpu/drm/savage/savage_state.c @@ -1004,6 +1004,7 @@ int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_ kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size); if (IS_ERR(kvb_addr)) { ret = PTR_ERR(kvb_addr); + kvb_addr = NULL; goto done; } cmdbuf->vb_addr = kvb_addr; diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index 059f409556d55dd1c617917b60ce728d7d3a67a4..2fde44c3a1b30f21eed4925b4e1ff9c5d7432d74 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -539,9 +539,9 @@ static int tegra_dpaux_probe(struct platform_device *pdev) dpaux->desc.owner = THIS_MODULE; dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); - if (!dpaux->pinctrl) { + if (IS_ERR(dpaux->pinctrl)) { dev_err(&pdev->dev, "failed to register pincontrol\n"); - return -ENODEV; + return PTR_ERR(dpaux->pinctrl); } #endif /* enable and clear all interrupts */ diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 7f08d681a74b4e37529f6c09ae1d2c1a944dcabd..d544ff9b0d4609aae945960bf82e8aeee6bc94a8 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -832,7 +832,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, } - __drm_atomic_helper_crtc_destroy_state(state); + drm_atomic_helper_crtc_destroy_state(crtc, state); } static const struct drm_crtc_funcs vc4_crtc_funcs = { diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 47a095f392f8a7b5443227933c78b500a4c7137f..18e37171e9c8e2f0729ca1c582af98ccb4647e06 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -544,14 +544,15 @@ vc4_cl_lookup_bos(struct drm_device *dev, handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); if (!handles) { + ret = -ENOMEM; DRM_ERROR("Failed to allocate incoming GEM handles\n"); goto fail; } - ret = copy_from_user(handles, - (void __user *)(uintptr_t)args->bo_handles, - exec->bo_count * sizeof(uint32_t)); - if (ret) { + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->bo_handles, + exec->bo_count * sizeof(uint32_t))) { + ret = -EFAULT; DRM_ERROR("Failed to copy in GEM handles\n"); goto fail; } @@ -593,12 +594,14 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) args->shader_rec_count); struct vc4_bo *bo; - if (uniforms_offset < shader_rec_offset || + if (shader_rec_offset < args->bin_cl_size || + uniforms_offset < shader_rec_offset || exec_size < uniforms_offset || args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || temp_size < exec_size) { DRM_ERROR("overflow in exec arguments\n"); + ret = -EINVAL; goto fail; } diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 08886a3097577242f5c9e025fd6446d81bcc6dec..5cdd003605f57c99faf31832e3f3dd38a75b7402 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -461,7 +461,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, } ret = vc4_full_res_bounds_check(exec, *obj, surf); - if (!ret) + if (ret) return ret; return 0; diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 717704e9ae07bd97d64f38321d2d49f07de62b83..c0303f61c26a94f1998f6883d42a0fc8cb41f432 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -148,26 +148,36 @@ static enum led_brightness k90_backlight_get(struct led_classdev *led_cdev) struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); int brightness; - char data[8]; + char *data; + + data = kmalloc(8, GFP_KERNEL); + if (!data) + return -ENOMEM; ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), K90_REQUEST_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, 8, USB_CTRL_SET_TIMEOUT); - if (ret < 0) { + if (ret < 5) { dev_warn(dev, "Failed to get K90 initial state (error %d).\n", ret); - return -EIO; + ret = -EIO; + goto out; } brightness = data[4]; if (brightness < 0 || brightness > 3) { dev_warn(dev, "Read invalid backlight brightness: %02hhx.\n", data[4]); - return -EIO; + ret = -EIO; + goto out; } - return brightness; + ret = brightness; +out: + kfree(data); + + return ret; } static enum led_brightness k90_record_led_get(struct led_classdev *led_cdev) @@ -253,17 +263,22 @@ static ssize_t k90_show_macro_mode(struct device *dev, struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); const char *macro_mode; - char data[8]; + char *data; + + data = kmalloc(2, GFP_KERNEL); + if (!data) + return -ENOMEM; ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), K90_REQUEST_GET_MODE, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, 2, USB_CTRL_SET_TIMEOUT); - if (ret < 0) { + if (ret < 1) { dev_warn(dev, "Failed to get K90 initial mode (error %d).\n", ret); - return -EIO; + ret = -EIO; + goto out; } switch (data[0]) { @@ -277,10 +292,15 @@ static ssize_t k90_show_macro_mode(struct device *dev, default: dev_warn(dev, "K90 in unknown mode: %02hhx.\n", data[0]); - return -EIO; + ret = -EIO; + goto out; } - return snprintf(buf, PAGE_SIZE, "%s\n", macro_mode); + ret = snprintf(buf, PAGE_SIZE, "%s\n", macro_mode); +out: + kfree(data); + + return ret; } static ssize_t k90_store_macro_mode(struct device *dev, @@ -320,26 +340,36 @@ static ssize_t k90_show_current_profile(struct device *dev, struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); int current_profile; - char data[8]; + char *data; + + data = kmalloc(8, GFP_KERNEL); + if (!data) + return -ENOMEM; ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), K90_REQUEST_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, 8, USB_CTRL_SET_TIMEOUT); - if (ret < 0) { + if (ret < 8) { dev_warn(dev, "Failed to get K90 initial state (error %d).\n", ret); - return -EIO; + ret = -EIO; + goto out; } current_profile = data[7]; if (current_profile < 1 || current_profile > 3) { dev_warn(dev, "Read invalid current profile: %02hhx.\n", data[7]); - return -EIO; + ret = -EIO; + goto out; } - return snprintf(buf, PAGE_SIZE, "%d\n", current_profile); + ret = snprintf(buf, PAGE_SIZE, "%d\n", current_profile); +out: + kfree(data); + + return ret; } static ssize_t k90_store_current_profile(struct device *dev, diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 60d30203a5faf9c7b69dcddb09ec0a086ca96a08..e06c1344c9138e1f6bb1ffafa5f22eff11fe7a17 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -167,7 +167,7 @@ struct cp2112_device { atomic_t xfer_avail; struct gpio_chip gc; u8 *in_out_buffer; - spinlock_t lock; + struct mutex lock; }; static int gpio_push_pull = 0xFF; @@ -179,10 +179,9 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, @@ -206,8 +205,8 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) ret = 0; exit: - spin_unlock_irqrestore(&dev->lock, flags); - return ret <= 0 ? ret : -EIO; + mutex_unlock(&dev->lock); + return ret < 0 ? ret : -EIO; } static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) @@ -215,10 +214,9 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); buf[0] = CP2112_GPIO_SET; buf[1] = value ? 0xff : 0; @@ -230,7 +228,7 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) if (ret < 0) hid_err(hdev, "error setting GPIO values: %d\n", ret); - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); } static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) @@ -238,10 +236,9 @@ static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf, CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT, @@ -255,7 +252,7 @@ static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) ret = (buf[1] >> offset) & 1; exit: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret; } @@ -266,10 +263,9 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, @@ -290,7 +286,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, goto fail; } - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); /* * Set gpio value when output direction is already set, @@ -301,7 +297,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, return 0; fail: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret < 0 ? ret : -EIO; } @@ -1057,7 +1053,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!dev->in_out_buffer) return -ENOMEM; - spin_lock_init(&dev->lock); + mutex_init(&dev->lock); ret = hid_parse(hdev); if (ret) { diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 1b764d1745f3daa693a17ee706c302ff31ae0f0e..1689568b597d4e5bb8824ccbe679f627525bf2a4 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -39,6 +39,9 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) return rdesc; + if (*rsize < 4) + return rdesc; + for (i = 0; i < *rsize - 4; i++) if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) { rdesc[i] = 0x19; diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 575aa65436d182c31fb874a432807688747dd785..9845189fae925a7e5465d5f1cc11a4be4fc07386 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -76,6 +76,9 @@ #define USB_VENDOR_ID_ALPS_JP 0x044E #define HID_DEVICE_ID_ALPS_U1_DUAL 0x120B +#define USB_VENDOR_ID_AMI 0x046b +#define USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE 0xff10 + #define USB_VENDOR_ID_ANTON 0x1130 #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index c5c5fbe9d60577f44085d86a7fb5cf60efb6acd3..52026dc94d5c4b0306ce585be293cbe2cb1910d9 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -872,7 +872,7 @@ static const struct hid_device_id lg_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG), .driver_data = LG_NOGET | LG_FF4 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2), - .driver_data = LG_FF2 }, + .driver_data = LG_NOGET | LG_FF2 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FLIGHT_SYSTEM_G940), .driver_data = LG_FF3 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR), diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 60875625cbdff45725532b92a395fe7f329163ee..8f6c35370f661d592b359b22136447fbd5c5203f 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -212,7 +212,6 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, __s32 value; int ret = 0; - memset(buffer, 0, buffer_size); mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield)) { @@ -256,6 +255,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, int buffer_index = 0; int i; + memset(buffer, 0, buffer_size); + mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield) || diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index e6cfd323babc62d653146730e4d7325ee8a24696..cde060fefa91169f6a2654cff3309a0990fa27d7 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -57,6 +57,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET }, diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1cb79925730d931a0ee00f3ff0f470464731d3b3..623be90704abfb0ca987ae3aa4184bd97d4c1e3a 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -164,19 +164,21 @@ static int wacom_pl_irq(struct wacom_wac *wacom) wacom->id[0] = STYLUS_DEVICE_ID; } - pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1)); - if (features->pressure_max > 255) - pressure = (pressure << 1) | ((data[4] >> 6) & 1); - pressure += (features->pressure_max + 1) / 2; - - input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14)); - input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14)); - input_report_abs(input, ABS_PRESSURE, pressure); - - input_report_key(input, BTN_TOUCH, data[4] & 0x08); - input_report_key(input, BTN_STYLUS, data[4] & 0x10); - /* Only allow the stylus2 button to be reported for the pen tool. */ - input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20)); + if (prox) { + pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1)); + if (features->pressure_max > 255) + pressure = (pressure << 1) | ((data[4] >> 6) & 1); + pressure += (features->pressure_max + 1) / 2; + + input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14)); + input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14)); + input_report_abs(input, ABS_PRESSURE, pressure); + + input_report_key(input, BTN_TOUCH, data[4] & 0x08); + input_report_key(input, BTN_STYLUS, data[4] & 0x10); + /* Only allow the stylus2 button to be reported for the pen tool. */ + input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20)); + } if (!prox) wacom->id[0] = 0; diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 96a85cd39580a904acfdf6f57d89403c4fd734b6..1bc1d4795243a86f509b8624f9821f8d871538b5 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -389,6 +389,7 @@ void vmbus_free_channels(void) { struct vmbus_channel *channel, *tmp; + mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, listentry) { /* hv_process_channel_removal() needs this */ @@ -396,6 +397,7 @@ void vmbus_free_channels(void) vmbus_device_unregister(channel->device_obj); } + mutex_unlock(&vmbus_connection.channel_mutex); } /* diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 12e851a5af48d0d84a0130f854bb2439300c7bd8..46b4e35fd5555b4e45e3e38870d6c8fb23a94985 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -188,8 +188,8 @@ static struct amc6821_data *amc6821_update_device(struct device *dev) !data->valid) { for (i = 0; i < TEMP_IDX_LEN; i++) - data->temp[i] = i2c_smbus_read_byte_data(client, - temp_reg[i]); + data->temp[i] = (int8_t)i2c_smbus_read_byte_data( + client, temp_reg[i]); data->stat1 = i2c_smbus_read_byte_data(client, AMC6821_REG_STAT1); diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c index edf550fc4eef5c1b2cbbcd94365e7259e1a2049c..0043a4c02b85b4007d1972e50709801510df1a1d 100644 --- a/drivers/hwmon/ds620.c +++ b/drivers/hwmon/ds620.c @@ -166,7 +166,7 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *da, if (res) return res; - val = (val * 10 / 625) * 8; + val = (clamp_val(val, -128000, 128000) * 10 / 625) * 8; mutex_lock(&data->update_lock); data->temp[attr->index] = val; diff --git a/drivers/hwmon/g762.c b/drivers/hwmon/g762.c index b96a2a9e4df7d5af127ad42fa5ee5a00fa840b8e..628be9c95ff9dbe18d846ce7e5fd3266a96ebe62 100644 --- a/drivers/hwmon/g762.c +++ b/drivers/hwmon/g762.c @@ -193,14 +193,17 @@ static inline unsigned int rpm_from_cnt(u8 cnt, u32 clk_freq, u16 p, * Convert fan RPM value from sysfs into count value for fan controller * register (FAN_SET_CNT). */ -static inline unsigned char cnt_from_rpm(u32 rpm, u32 clk_freq, u16 p, +static inline unsigned char cnt_from_rpm(unsigned long rpm, u32 clk_freq, u16 p, u8 clk_div, u8 gear_mult) { - if (!rpm) /* to stop the fan, set cnt to 255 */ + unsigned long f1 = clk_freq * 30 * gear_mult; + unsigned long f2 = p * clk_div; + + if (!rpm) /* to stop the fan, set cnt to 255 */ return 0xff; - return clamp_val(((clk_freq * 30 * gear_mult) / (rpm * p * clk_div)), - 0, 255); + rpm = clamp_val(rpm, f1 / (255 * f2), ULONG_MAX / f2); + return DIV_ROUND_CLOSEST(f1, rpm * f2); } /* helper to grab and cache data, at most one time per second */ diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 322ed9272811817324396960e63facf9df80d96c..841f2428e84a3936de0a60ee42700efd70933ed2 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1036,7 +1036,7 @@ static const u8 lm90_temp_emerg_index[3] = { }; static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; -static const u8 lm90_max_alarm_bits[3] = { 0, 4, 12 }; +static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 3ce33d244cc0f7a0a31f2f1af4b7cdf92f5341e2..12b94b094c0d154eba138ec1845ed2c842cb887f 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -259,13 +259,15 @@ static int nct7802_read_fan_min(struct nct7802_data *data, u8 reg_fan_low, ret = 0; else if (ret) ret = DIV_ROUND_CLOSEST(1350000U, ret); + else + ret = 1350000U; abort: mutex_unlock(&data->access_lock); return ret; } static int nct7802_write_fan_min(struct nct7802_data *data, u8 reg_fan_low, - u8 reg_fan_high, unsigned int limit) + u8 reg_fan_high, unsigned long limit) { int err; @@ -326,8 +328,8 @@ static int nct7802_write_voltage(struct nct7802_data *data, int nr, int index, int shift = 8 - REG_VOLTAGE_LIMIT_MSB_SHIFT[index - 1][nr]; int err; + voltage = clamp_val(voltage, 0, 0x3ff * nct7802_vmul[nr]); voltage = DIV_ROUND_CLOSEST(voltage, nct7802_vmul[nr]); - voltage = clamp_val(voltage, 0, 0x3ff); mutex_lock(&data->access_lock); err = regmap_write(data->regmap, @@ -402,7 +404,7 @@ static ssize_t store_temp(struct device *dev, struct device_attribute *attr, if (err < 0) return err; - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), -128, 127); + val = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000); err = regmap_write(data->regmap, nr, val & 0xff); return err ? : count; diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c index 559a3dcd64d8d67a9e53a0469e2f369df0e4099c..094f948f99ffb08d56a53ed0520a7a47453c49df 100644 --- a/drivers/hwmon/scpi-hwmon.c +++ b/drivers/hwmon/scpi-hwmon.c @@ -251,6 +251,7 @@ static const struct of_device_id scpi_of_match[] = { {.compatible = "arm,scpi-sensors"}, {}, }; +MODULE_DEVICE_TABLE(of, scpi_of_match); static struct platform_driver scpi_hwmon_platdrv = { .driver = { diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index c4a7f2891b198057cda773221a39473dbace4f97..52698909d36ec3b7906d897baad9b3df4b295ec9 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -361,7 +361,7 @@ static int stm_char_open(struct inode *inode, struct file *file) struct stm_file *stmf; struct device *dev; unsigned int major = imajor(inode); - int err = -ENODEV; + int err = -ENOMEM; dev = class_find_device(&stm_class, NULL, &major, major_match); if (!dev) @@ -369,8 +369,9 @@ static int stm_char_open(struct inode *inode, struct file *file) stmf = kzalloc(sizeof(*stmf), GFP_KERNEL); if (!stmf) - return -ENOMEM; + goto err_put_device; + err = -ENODEV; stm_output_init(&stmf->output); stmf->stm = to_stm_device(dev); @@ -382,9 +383,10 @@ static int stm_char_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); err_free: + kfree(stmf); +err_put_device: /* matches class_find_device() above */ put_device(dev); - kfree(stmf); return err; } diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index c2268cdf38e82348ae883c99d1f140a7fc2d0ecc..e34d82e79b988a781010cad1e0f283617dfb8471 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -585,10 +585,29 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 command, int size, union i2c_smbus_data *data) { struct i2c_piix4_adapdata *adapdata = i2c_get_adapdata(adap); + unsigned short piix4_smba = adapdata->smba; + int retries = MAX_TIMEOUT; + int smbslvcnt; u8 smba_en_lo; u8 port; int retval; + /* Request the SMBUS semaphore, avoid conflicts with the IMC */ + smbslvcnt = inb_p(SMBSLVCNT); + do { + outb_p(smbslvcnt | 0x10, SMBSLVCNT); + + /* Check the semaphore status */ + smbslvcnt = inb_p(SMBSLVCNT); + if (smbslvcnt & 0x10) + break; + + usleep_range(1000, 2000); + } while (--retries); + /* SMBus is still owned by the IMC, we give up */ + if (!retries) + return -EBUSY; + mutex_lock(&piix4_mutex_sb800); outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); @@ -606,6 +625,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, mutex_unlock(&piix4_mutex_sb800); + /* Release the semaphore */ + outb_p(smbslvcnt | 0x20, SMBSLVCNT); + return retval; } diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index b432b64e307a81740b0ce8a292dc9b80a3921a40..7484aac1e14d6ea31497e5a6608f338c7da8dfb3 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1657,7 +1657,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, if (i2c_check_addr_validity(addr, info.flags)) { dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n", - info.addr, node->full_name); + addr, node->full_name); return ERR_PTR(-EINVAL); } diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 66f323fd39826ebe9a738d55724ee2e124b45419..6f638bbc922db4fd366e1e3dcdd11c8655215622 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -331,7 +331,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, unsigned long arg) { struct i2c_smbus_ioctl_data data_arg; - union i2c_smbus_data temp; + union i2c_smbus_data temp = {}; int datasize, res; if (copy_from_user(&data_arg, diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 8bc3d36d28379ee9bf834b49d8022a310e55aee4..9c4ac26c014e163bb62f479dc686c87d14403af6 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -151,6 +151,9 @@ static int pca954x_reg_write(struct i2c_adapter *adap, buf[0] = val; msg.buf = buf; ret = __i2c_transfer(adap, &msg, 1); + + if (ret >= 0 && ret != 1) + ret = -EREMOTEIO; } else { union i2c_smbus_data data; ret = adap->algo->smbus_xfer(adap, client->addr, @@ -179,7 +182,7 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { ret = pca954x_reg_write(muxc->parent, client, regval); - data->last_chan = ret ? 0 : regval; + data->last_chan = ret < 0 ? 0 : regval; } return ret; diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index ce69048c88e98ba9825fbee17e8fd186b94e8d8f..3a557e3181eac58b8f2a3972ca50404277aad687 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -154,8 +154,8 @@ #define ST_ACCEL_4_FS_MASK 0x80 #define ST_ACCEL_4_FS_AVL_2_VAL 0X00 #define ST_ACCEL_4_FS_AVL_6_VAL 0X01 -#define ST_ACCEL_4_FS_AVL_2_GAIN IIO_G_TO_M_S_2(1024) -#define ST_ACCEL_4_FS_AVL_6_GAIN IIO_G_TO_M_S_2(340) +#define ST_ACCEL_4_FS_AVL_2_GAIN IIO_G_TO_M_S_2(1000) +#define ST_ACCEL_4_FS_AVL_6_GAIN IIO_G_TO_M_S_2(3000) #define ST_ACCEL_4_BDU_ADDR 0x21 #define ST_ACCEL_4_BDU_MASK 0x40 #define ST_ACCEL_4_DRDY_IRQ_ADDR 0x21 @@ -346,6 +346,14 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .addr = ST_ACCEL_1_BDU_ADDR, .mask = ST_ACCEL_1_BDU_MASK, }, + /* + * Data Alignment Setting - needs to be set to get + * left-justified data like all other sensors. + */ + .das = { + .addr = 0x21, + .mask = 0x01, + }, .drdy_irq = { .addr = ST_ACCEL_1_DRDY_IRQ_ADDR, .mask_int1 = ST_ACCEL_1_DRDY_IRQ_INT1_MASK, diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index 2bbf0c521bebb5c44840a8ce5296024910d8d73d..7d61b566e148dddd21ce2986cba233567802f93c 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -775,7 +775,7 @@ static int palmas_adc_wakeup_reset(struct palmas_gpadc *adc) static int palmas_gpadc_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); struct palmas_gpadc *adc = iio_priv(indio_dev); int wakeup = adc->wakeup1_enable || adc->wakeup2_enable; int ret; @@ -798,7 +798,7 @@ static int palmas_gpadc_suspend(struct device *dev) static int palmas_gpadc_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); struct palmas_gpadc *adc = iio_priv(indio_dev); int wakeup = adc->wakeup1_enable || adc->wakeup2_enable; int ret; diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index fe7775bb37402ea73915189d2c4f121b0a74464d..df4045203a0717c40613b2297c9f85a855333f68 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -30,7 +30,9 @@ static int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf) for_each_set_bit(i, indio_dev->active_scan_mask, num_data_channels) { const struct iio_chan_spec *channel = &indio_dev->channels[i]; - unsigned int bytes_to_read = channel->scan_type.realbits >> 3; + unsigned int bytes_to_read = + DIV_ROUND_UP(channel->scan_type.realbits + + channel->scan_type.shift, 8); unsigned int storage_bytes = channel->scan_type.storagebits >> 3; diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 975a1f19f74760e5e1c17c786e36d9a86ae5a2a6..79c8c7cd70d5c6d74fc2e32cad372f8644233c6b 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -401,6 +401,15 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, return err; } + /* set DAS */ + if (sdata->sensor_settings->das.addr) { + err = st_sensors_write_data_with_mask(indio_dev, + sdata->sensor_settings->das.addr, + sdata->sensor_settings->das.mask, 1); + if (err < 0) + return err; + } + if (sdata->int_pin_open_drain) { dev_info(&indio_dev->dev, "set interrupt line to open drain mode\n"); @@ -483,8 +492,10 @@ static int st_sensors_read_axis_data(struct iio_dev *indio_dev, int err; u8 *outdata; struct st_sensor_data *sdata = iio_priv(indio_dev); - unsigned int byte_for_channel = ch->scan_type.realbits >> 3; + unsigned int byte_for_channel; + byte_for_channel = DIV_ROUND_UP(ch->scan_type.realbits + + ch->scan_type.shift, 8); outdata = kmalloc(byte_for_channel, GFP_KERNEL); if (!outdata) return -ENOMEM; diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 9a081465c42f4225d26747a734c5daceea68c842..6bb23a49e81eb8cf304a2f81f50c5e36a91e2d48 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -422,7 +422,7 @@ MODULE_DEVICE_TABLE(of, afe4403_of_match); static int __maybe_unused afe4403_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev)); struct afe4403_data *afe = iio_priv(indio_dev); int ret; @@ -443,7 +443,7 @@ static int __maybe_unused afe4403_suspend(struct device *dev) static int __maybe_unused afe4403_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev)); struct afe4403_data *afe = iio_priv(indio_dev); int ret; diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 45266404f7e3b5bd7c7790a0b05ff298fc24d5a9..964f5231a831c437c277e4bcfe292720104043f7 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -428,7 +428,7 @@ MODULE_DEVICE_TABLE(of, afe4404_of_match); static int __maybe_unused afe4404_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct afe4404_data *afe = iio_priv(indio_dev); int ret; @@ -449,7 +449,7 @@ static int __maybe_unused afe4404_suspend(struct device *dev) static int __maybe_unused afe4404_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct afe4404_data *afe = iio_priv(indio_dev); int ret; diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c index 90ab8a2d2846f8a8591ee6b1615dce2c984020ef..183c14329d6e350f6325b0e77a95dcb56de892dd 100644 --- a/drivers/iio/health/max30100.c +++ b/drivers/iio/health/max30100.c @@ -238,7 +238,7 @@ static irqreturn_t max30100_interrupt_handler(int irq, void *private) mutex_lock(&data->lock); - while (cnt || (cnt = max30100_fifo_count(data) > 0)) { + while (cnt || (cnt = max30100_fifo_count(data)) > 0) { ret = max30100_read_measurement(data); if (ret) break; diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c index 9c47bc98f3acdea4cb4b56b23a72e582c34a413b..2a22ad92033306d02eec60f288c00a65d5186c37 100644 --- a/drivers/iio/humidity/dht11.c +++ b/drivers/iio/humidity/dht11.c @@ -71,7 +71,8 @@ * a) select an implementation using busy loop polling on those systems * b) use the checksum to do some probabilistic decoding */ -#define DHT11_START_TRANSMISSION 18 /* ms */ +#define DHT11_START_TRANSMISSION_MIN 18000 /* us */ +#define DHT11_START_TRANSMISSION_MAX 20000 /* us */ #define DHT11_MIN_TIMERES 34000 /* ns */ #define DHT11_THRESHOLD 49000 /* ns */ #define DHT11_AMBIG_LOW 23000 /* ns */ @@ -228,7 +229,8 @@ static int dht11_read_raw(struct iio_dev *iio_dev, ret = gpio_direction_output(dht11->gpio, 0); if (ret) goto err; - msleep(DHT11_START_TRANSMISSION); + usleep_range(DHT11_START_TRANSMISSION_MIN, + DHT11_START_TRANSMISSION_MAX); ret = gpio_direction_input(dht11->gpio); if (ret) goto err; diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index e0251b8c1a527aa7fe97ef443c0b1614888e63fc..5fb571d0315377c05be3ba9951fc07a5ecd411ee 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -66,10 +66,8 @@ #define BMI160_REG_DUMMY 0x7F -#define BMI160_ACCEL_PMU_MIN_USLEEP 3200 -#define BMI160_ACCEL_PMU_MAX_USLEEP 3800 -#define BMI160_GYRO_PMU_MIN_USLEEP 55000 -#define BMI160_GYRO_PMU_MAX_USLEEP 80000 +#define BMI160_ACCEL_PMU_MIN_USLEEP 3800 +#define BMI160_GYRO_PMU_MIN_USLEEP 80000 #define BMI160_SOFTRESET_USLEEP 1000 #define BMI160_CHANNEL(_type, _axis, _index) { \ @@ -151,20 +149,9 @@ static struct bmi160_regs bmi160_regs[] = { }, }; -struct bmi160_pmu_time { - unsigned long min; - unsigned long max; -}; - -static struct bmi160_pmu_time bmi160_pmu_time[] = { - [BMI160_ACCEL] = { - .min = BMI160_ACCEL_PMU_MIN_USLEEP, - .max = BMI160_ACCEL_PMU_MAX_USLEEP - }, - [BMI160_GYRO] = { - .min = BMI160_GYRO_PMU_MIN_USLEEP, - .max = BMI160_GYRO_PMU_MIN_USLEEP, - }, +static unsigned long bmi160_pmu_time[] = { + [BMI160_ACCEL] = BMI160_ACCEL_PMU_MIN_USLEEP, + [BMI160_GYRO] = BMI160_GYRO_PMU_MIN_USLEEP, }; struct bmi160_scale { @@ -289,7 +276,7 @@ int bmi160_set_mode(struct bmi160_data *data, enum bmi160_sensor_type t, if (ret < 0) return ret; - usleep_range(bmi160_pmu_time[t].min, bmi160_pmu_time[t].max); + usleep_range(bmi160_pmu_time[t], bmi160_pmu_time[t] + 1000); return 0; } diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c index 6511b20a2a2966b7db3987e46dae98774f4d5540..a8ffa432bf0debf007f2fb862eac43c996f7741d 100644 --- a/drivers/iio/light/max44000.c +++ b/drivers/iio/light/max44000.c @@ -113,7 +113,7 @@ static const char max44000_int_time_avail_str[] = "0.100 " "0.025 " "0.00625 " - "0.001625"; + "0.0015625"; /* Available scales (internal to ulux) with pretty manual alignment: */ static const int max44000_scale_avail_ulux_array[] = { diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 1a2984c28b9589410b381a6a0277aff188395907..ae04826e82fc0982496f8b560c6465e6562f819c 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -770,12 +770,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev) int err = 0; table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); - - if (!table) { - pr_warn("failed to allocate ib gid cache for %s\n", - ib_dev->name); + if (!table) return -ENOMEM; - } for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); @@ -1170,14 +1166,13 @@ int ib_cache_setup_one(struct ib_device *device) GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { - pr_warn("Couldn't allocate cache for %s\n", device->name); - return -ENOMEM; + err = -ENOMEM; + goto free; } err = gid_table_setup_one(device); if (err) - /* Allocated memory will be cleaned in the release function */ - return err; + goto free; for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device)); @@ -1192,6 +1187,9 @@ int ib_cache_setup_one(struct ib_device *device) err: gid_table_cleanup_one(device); +free: + kfree(device->cache.pkey_cache); + kfree(device->cache.lmc_cache); return err; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2a6fc47a1dfbb578324ada2cd899ed22c4e1c8ad..c25768c2dd3b14afd29b56ba4da7f4bc5ac8c8e6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2768,7 +2768,8 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; - if (dst_addr->sa_family == AF_INET6) { + if (IS_ENABLED(CONFIG_IPV6) && + dst_addr->sa_family == AF_INET6) { struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 40cbd6bdb73be1f23095387dd95f58c8508227b1..2395fe2021c96102fd190f57344729fc8ee7c70e 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1746,7 +1746,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= - IB_MGMT_MAX_METHODS) + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index e51b739f6ea3d2e9bd6d8b776d5ea5d8e49844ba..322cb67b07a9611b658b88523eb0fc5fb988bed9 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -518,8 +518,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, process_join_error(group, status); else { int mgids_changed, is_mgid0; - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); + + if (ib_find_pkey(group->port->dev->device, + group->port->port_num, be16_to_cpu(rec->pkey), + &pkey_index)) + pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 84b4eff90395e5eb2afc7d66a2f932de8afd28f2..c22fde6207d141bdaec17dcda3d8b585a1140255 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -134,6 +134,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { + put_pid(umem->pid); ret = ib_umem_odp_get(context, umem); if (ret) { kfree(umem); @@ -149,6 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { + put_pid(umem->pid); kfree(umem); return ERR_PTR(-ENOMEM); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 93e3d270a98af630f849947c2d6f6a1849b8d688..b85a1a983e07c8cb77f56c04e6aa7ce7dbd2a499 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -828,8 +828,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } rdev->status_page = (struct t4_dev_status_page *) __get_free_page(GFP_KERNEL); - if (!rdev->status_page) + if (!rdev->status_page) { + err = -ENOMEM; goto destroy_ocqp_pool; + } rdev->status_page->qp_start = rdev->lldi.vr->qp.start; rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; @@ -846,9 +848,17 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } } + rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free"); + if (!rdev->free_workq) { + err = -ENOMEM; + goto err_free_status_page; + } + rdev->status_page->db_off = 0; return 0; +err_free_status_page: + free_page((unsigned long)rdev->status_page); destroy_ocqp_pool: c4iw_ocqp_pool_destroy(rdev); destroy_rqtpool: @@ -862,6 +872,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + destroy_workqueue(rdev->free_workq); kfree(rdev->wr_log); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4788e1a46fdee23cce2956cc17ba8d09b0f3eb56..7d540667dad26aa60af9979076e05804c6bef519 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -107,6 +108,7 @@ struct c4iw_dev_ucontext { struct list_head qpids; struct list_head cqids; struct mutex lock; + struct kref kref; }; enum c4iw_rdev_flags { @@ -183,6 +185,7 @@ struct c4iw_rdev { atomic_t wr_log_idx; struct wr_log_entry *wr_log; int wr_log_size; + struct workqueue_struct *free_workq; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -482,6 +485,8 @@ struct c4iw_qp { int sq_sig_all; struct completion rq_drained; struct completion sq_drained; + struct work_struct free_work; + struct c4iw_ucontext *ucontext; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -495,6 +500,7 @@ struct c4iw_ucontext { u32 key; spinlock_t mmap_lock; struct list_head mmaps; + struct kref kref; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -502,6 +508,18 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) return container_of(c, struct c4iw_ucontext, ibucontext); } +void _c4iw_free_ucontext(struct kref *kref); + +static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_put(&ucontext->kref, _c4iw_free_ucontext); +} + +static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_get(&ucontext->kref); +} + struct c4iw_mm_entry { struct list_head entry; u64 addr; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c569483a8cf74615bb8912ffca79d2..8278ba06f9958a8ecaf62f9ac8b7fa39023a4650 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -91,17 +91,28 @@ static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, return -ENOSYS; } -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +void _c4iw_free_ucontext(struct kref *kref) { - struct c4iw_dev *rhp = to_c4iw_dev(context->device); - struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_ucontext *ucontext; + struct c4iw_dev *rhp; struct c4iw_mm_entry *mm, *tmp; - PDBG("%s context %p\n", __func__, context); + ucontext = container_of(kref, struct c4iw_ucontext, kref); + rhp = to_c4iw_dev(ucontext->ibucontext.device); + + PDBG("%s ucontext %p\n", __func__, ucontext); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); kfree(ucontext); +} + +static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + + PDBG("%s context %p\n", __func__, context); + c4iw_put_ucontext(ucontext); return 0; } @@ -125,6 +136,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); + kref_init(&context->kref); if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { if (!warned++) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index b7ac97b27c88c2fe11ad11f564ce786085d3217c..cc2243f6cc7f5692a89fc95c91be8f24b9fa5035 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -321,7 +321,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_DCAEN_V(0) | FW_RI_RES_WR_DCACPU_V(0) | FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(2) | + (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) : + FW_RI_RES_WR_FBMAX_V(3)) | FW_RI_RES_WR_CIDXFTHRESHO_V(0) | FW_RI_RES_WR_CIDXFTHRESH_V(0) | FW_RI_RES_WR_EQSIZE_V(eqsize)); @@ -345,7 +346,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_DCAEN_V(0) | FW_RI_RES_WR_DCACPU_V(0) | FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | FW_RI_RES_WR_CIDXFTHRESHO_V(0) | FW_RI_RES_WR_CIDXFTHRESH_V(0) | FW_RI_RES_WR_EQSIZE_V(eqsize)); @@ -714,13 +715,32 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) return 0; } -static void _free_qp(struct kref *kref) +static void free_qp_work(struct work_struct *work) +{ + struct c4iw_ucontext *ucontext; + struct c4iw_qp *qhp; + struct c4iw_dev *rhp; + + qhp = container_of(work, struct c4iw_qp, free_work); + ucontext = qhp->ucontext; + rhp = qhp->rhp; + + PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext); + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + if (ucontext) + c4iw_put_ucontext(ucontext); + kfree(qhp); +} + +static void queue_qp_free(struct kref *kref) { struct c4iw_qp *qhp; qhp = container_of(kref, struct c4iw_qp, kref); PDBG("%s qhp %p\n", __func__, qhp); - kfree(qhp); + queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work); } void c4iw_qp_add_ref(struct ib_qp *qp) @@ -732,7 +752,7 @@ void c4iw_qp_add_ref(struct ib_qp *qp) void c4iw_qp_rem_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - kref_put(&to_c4iw_qp(qp)->kref, _free_qp); + kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free); } static void add_to_fc_list(struct list_head *head, struct list_head *entry) @@ -1642,7 +1662,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_qp_attributes attrs; - struct c4iw_ucontext *ucontext; qhp = to_c4iw_qp(ib_qp); rhp = qhp->rhp; @@ -1662,11 +1681,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) spin_unlock_irq(&rhp->lock); free_ird(rhp, qhp->attr.max_ird); - ucontext = ib_qp->uobject ? - to_c4iw_ucontext(ib_qp->uobject->context) : NULL; - destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - c4iw_qp_rem_ref(ib_qp); PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); @@ -1767,6 +1781,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); kref_init(&qhp->kref); + INIT_WORK(&qhp->free_work, free_qp_work); ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) @@ -1853,6 +1868,9 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ma_sync_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, ma_sync_key_mm); } + + c4iw_get_ucontext(ucontext); + qhp->ucontext = ucontext; } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6329c971c22fc383330099fa07819b4666ee48b8..4b892ca2b13a4922948d7f549222c1f5f5e92148 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2501,7 +2501,7 @@ static int i40iw_get_hw_stats(struct ib_device *ibdev, return -ENOSYS; } - memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats)); + memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats)); return stats->num_counters; } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index b9bf0759f10a54f37e242b8d07c54c56a063e859..8dfc76f8cbb410af977df8220628a8dc99fd0093 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -114,7 +114,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } - + ah->av.eth.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); /* * HW requires multicast LID so we just choose one. */ @@ -122,7 +124,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1672907ff219771fa0e479853176db0a988ea8c1..18d309e40f1b9269ec5b38f5e180c129d85b5e81 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -702,10 +702,18 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, /* If a grh is present, we demux according to it */ if (wc->wc_flags & IB_WC_GRH) { - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); - if (slave < 0) { - mlx4_ib_warn(ibdev, "failed matching grh\n"); - return -ENOENT; + if (grh->dgid.global.interface_id == + cpu_to_be64(IB_SA_WELL_KNOWN_GUID) && + grh->dgid.global.subnet_prefix == cpu_to_be64( + atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) { + slave = 0; + } else { + slave = mlx4_ib_find_real_gid(ibdev, port, + grh->dgid.global.interface_id); + if (slave < 0) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } } } /* Class-specific handling */ diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b597e822759139f2de50c70dc41394e2a93998ab..46ad99595fd2080d7de304ddf2778dbd182a5cb9 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -697,9 +697,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, if (err) goto out; - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? - IB_WIDTH_4X : IB_WIDTH_1X; - props->active_speed = IB_SPEED_QDR; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_SPEED_FDR : IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; @@ -2820,14 +2822,19 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_qp_release; } - bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); - - err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( - dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_base + - ibdev->steer_qpn_count - 1); - if (err) - goto err_steer_free_bitmap; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) { + bitmap_zero(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } else { + bitmap_fill(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + } } for (j = 1; j <= ibdev->dev->caps.num_ports; j++) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 570bc866b1d62251f0ac50dc8c99606d57beedfd..c22454383976c4dda19eee489a9f6e93eabd9251 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1280,7 +1280,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - if (dev->qp1_proxy[mqp->port - 1] == mqp) { + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && + dev->qp1_proxy[mqp->port - 1] == mqp) { mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); dev->qp1_proxy[mqp->port - 1] = NULL; mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); @@ -1764,14 +1765,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; - struct ib_gid_attr gid_attr; + struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && attr->ah_attr.ah_flags & IB_AH_GRH; - if (is_eth) { + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 32b09f059c84a460b8e3bc5f80db2bb6fb892709..4cab29ea394cf063b1775c672884690e9e01b4bf 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -496,6 +496,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; + int max_sq_desc; int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); @@ -618,9 +619,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); + max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4e9012463c37de6381cddde1c527e4c2ff6e84ae..be2d02b6a6aa6d834e6349e08b90ffb8de54e7c3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -628,7 +628,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->order = i + 2; ent->dev = dev; - if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + (mlx5_core_is_pf(dev->mdev))) limit = dev->mdev->profile->mr_cache[i].limit; else limit = 0; @@ -646,6 +647,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } +static void wait_for_async_commands(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int total = 0; + int i; + int j; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + for (j = 0 ; j < 1000; j++) { + if (!ent->pending) + break; + msleep(50); + } + } + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + total += ent->pending; + } + + if (total) + mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); + else + mlx5_ib_warn(dev, "done with all pending requests\n"); +} + int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -659,6 +687,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); + wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d1e921816bfee3596c961e6671d87a84a3caa0ab..aee3942ec68d85168a69e567b0a5934f2314ff99 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -351,6 +351,29 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } +static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) +{ + int max_sge; + + if (attr->qp_type == IB_QPT_RC) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else if (attr->qp_type == IB_QPT_XRC_INI) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_xrc_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else + max_sge = (wqe_size - sq_overhead(attr)) / + sizeof(struct mlx5_wqe_data_seg); + + return min_t(int, max_sge, wqe_size - sq_overhead(attr) / + sizeof(struct mlx5_wqe_data_seg)); +} + static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { @@ -387,7 +410,11 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); - qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_gs = get_send_sge(attr, wqe_size); + if (qp->sq.max_gs < attr->cap.max_send_sge) + return -ENOMEM; + + attr->cap.max_send_sge = qp->sq.max_gs; qp->sq.max_post = wq_size / wqe_size; attr->cap.max_send_wr = qp->sq.max_post; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3857dbd9c95604d8169e3078a82c22d0f2ca4c95..729b0696626e41bdcf3a1e1ff1144a51cc5b6e53 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -282,6 +282,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + in.type = init_attr->srq_type; if (pd->uobject) err = create_srq_user(pd, srq, &in, udata, buf_size); @@ -294,7 +295,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in.type = init_attr->srq_type; in.log_size = ilog2(srq->msrq.max); in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index ffff5a54cb340d9d95d6bef83ce78ff456011d46..f4f3942ebbd133ac1606e7381bcdccd0f3604726 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -554,7 +554,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) } spin_lock_bh(&dev_list_lock); - list_add_tail(&rxe_dev_list, &rxe->list); + list_add_tail(&rxe->list, &rxe_dev_list); spin_unlock_bh(&dev_list_lock); return rxe; } diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8334e2572f92d553c197bf3ed9229..13ed2cc6eaa2ac743877cf06b6687907baa40cf9 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index c3e60e4bde6e2a3ba5e0953b531a42f65927b717..44b2108253bd988ec1f5222da999575ed37d3bed 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -813,8 +813,7 @@ void rxe_qp_destroy(struct rxe_qp *qp) del_timer_sync(&qp->rnr_nak_timer); rxe_cleanup_task(&qp->req.task); - if (qp_type(qp) == IB_QPT_RC) - rxe_cleanup_task(&qp->comp.task); + rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ __rxe_do_task(&qp->req.task); @@ -855,4 +854,5 @@ void rxe_qp_cleanup(void *arg) free_rd_atomic_resources(qp); kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 22bd9630dcd924315012019ff0a39242d7476a59..9f46be52335e7a81ed7a365f07f3105cb59bcd77 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn = qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_qp->req.psn; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -719,7 +719,7 @@ int rxe_requester(void *arg) * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -727,7 +727,7 @@ int rxe_requester(void *arg) qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 339a1eecdfe3083e2b15d09473a1053113b7acaa..81a8080c18b358eb9c65bae675509c323642d483 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1054,8 +1054,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { - ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", - priv->ca->name); attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1909dd252c9406ba4700e96d5730a67c51ba1a91..fddff403d5d21b0c8a3216de6f7ec0170afa3278 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; - if (ib_query_port(priv->ca, priv->port, &port_attr) || - port_attr.state != IB_PORT_ACTIVE) { + if (ib_query_port(priv->ca, priv->port, &port_attr)) { + ipoib_dbg(priv, "ib_query_port() failed\n"); + return; + } + if (port_attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", port_attr.state); return; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 64b3d11dcf1ed588a95d149f2bbd9ff884562893..140f3f354cf302eadb297d1b8deaf22d61af347c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -651,13 +651,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, SHOST_DIX_GUARD_CRC); } - /* - * Limit the sg_tablesize and max_sectors based on the device - * max fastreg page list length. - */ - shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, - ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len); - if (iscsi_host_add(shost, ib_conn->device->ib_device->dma_device)) { mutex_unlock(&iser_conn->state_mutex); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d980fb458ad49c7eb37038facebb9ef14d96a1de..e7dcf14a76e24ac6c16cfa14456cefc7a2cb0208 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -366,6 +366,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_desc *d; struct ib_mr *mr; int i, ret = -EINVAL; + enum ib_mr_type mr_type; if (pool_size <= 0) goto err; @@ -379,9 +380,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); + if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, - max_page_list_len); + mr = ib_alloc_mr(pd, mr_type, max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto destroy_pool; @@ -3678,6 +3683,12 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + if (indirect_sg_entries > SG_MAX_SEGMENTS) { + pr_warn("Clamping indirect_sg_entries to %u\n", + SG_MAX_SEGMENTS); + indirect_sg_entries = SG_MAX_SEGMENTS; + } + srp_remove_wq = create_workqueue("srp_remove"); if (!srp_remove_wq) { ret = -ENOMEM; diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 1ee1786c5a6ac5da6437f25e4d7d601b148d2efe..d01a5b13c0acbb60a8728b756dee88bdc6dd5745 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -26,6 +26,8 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/ obj-$(CONFIG_INPUT_MISC) += misc/ obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o +obj-$(CONFIG_INPUT_KEYRESET) += keyreset.o +obj-$(CONFIG_INPUT_KEYCOMBO) += keycombo.o obj-$(CONFIG_RMI4_CORE) += rmi4/ diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 83af17ad0f1f131d6d397e3df2e15521d0c24b55..bbe15243b8e7ab467f2f1598b4854dc22fafe762 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1376,6 +1376,12 @@ static int xpad_init_input(struct usb_xpad *xpad) input_dev->name = xpad->name; input_dev->phys = xpad->phys; usb_to_input_id(xpad->udev, &input_dev->id); + + if (xpad->xtype == XTYPE_XBOX360W) { + /* x360w controllers and the receiver have different ids */ + input_dev->id.product = 0x02a1; + } + input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index f6e643b589b616c61d1751005fedb3288f0ad82c..c877e56a9bd5760f0ef9151460a6fd5f7d99a6b2 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,8 @@ #include #include +#define GOLDFISH_MAX_FINGERS 5 + enum { REG_READ = 0x00, REG_SET_PAGE = 0x00, @@ -52,7 +55,21 @@ static irqreturn_t events_interrupt(int irq, void *dev_id) value = __raw_readl(edev->addr + REG_READ); input_event(edev->input, type, code, value); - input_sync(edev->input); + // Send an extra (EV_SYN, SYN_REPORT, 0x0) event + // if a key was pressed. Some keyboard device + // drivers may only send the EV_KEY event and + // not EV_SYN. + // Note that sending an extra SYN_REPORT is not + // necessary nor correct protocol with other + // devices such as touchscreens, which will send + // their own SYN_REPORT's when sufficient event + // information has been collected (e.g., for + // touchscreens, when pressure and X/Y coordinates + // have been received). Hence, we will only send + // this extra SYN_REPORT if type == EV_KEY. + if (type == EV_KEY) { + input_sync(edev->input); + } return IRQ_HANDLED; } @@ -154,6 +171,15 @@ static int events_probe(struct platform_device *pdev) input_dev->name = edev->name; input_dev->id.bustype = BUS_HOST; + // Set the Goldfish Device to be multi-touch. + // In the Ranchu kernel, there is multi-touch-specific + // code for handling ABS_MT_SLOT events. + // See drivers/input/input.c:input_handle_abs_event. + // If we do not issue input_mt_init_slots, + // the kernel will filter out needed ABS_MT_SLOT + // events when we touch the screen in more than one place, + // preventing multi-touch with more than one finger from working. + input_mt_init_slots(input_dev, GOLDFISH_MAX_FINGERS, 0); events_import_bits(edev, input_dev->evbit, EV_SYN, EV_MAX); events_import_bits(edev, input_dev->keybit, EV_KEY, KEY_MAX); diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index 2adfd86c869a5364312a2d8df904d2bd7df68b52..930424e55439d411ec8059f011d198766c8efa46 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -592,7 +592,6 @@ static int drv260x_probe(struct i2c_client *client, } haptics->input_dev->name = "drv260x:haptics"; - haptics->input_dev->dev.parent = client->dev.parent; haptics->input_dev->close = drv260x_close; input_set_drvdata(haptics->input_dev, haptics); input_set_capability(haptics->input_dev, EV_FF, FF_RUMBLE); diff --git a/drivers/input/misc/gpio_matrix.c b/drivers/input/misc/gpio_matrix.c index eaa9e89d473acb0795a9ebf02b2de7e7382fcca8..08769dd88f56a99c81178420dbd2ab0933982ba2 100644 --- a/drivers/input/misc/gpio_matrix.c +++ b/drivers/input/misc/gpio_matrix.c @@ -19,13 +19,12 @@ #include #include #include -#include struct gpio_kp { struct gpio_event_input_devs *input_devs; struct gpio_event_matrix_info *keypad_info; struct hrtimer timer; - struct wake_lock wake_lock; + struct wakeup_source wake_src; int current_output; unsigned int use_irq:1; unsigned int key_state_changed:1; @@ -215,7 +214,7 @@ static enum hrtimer_restart gpio_keypad_timer_func(struct hrtimer *timer) } for (in = 0; in < mi->ninputs; in++) enable_irq(gpio_to_irq(mi->input_gpios[in])); - wake_unlock(&kp->wake_lock); + __pm_relax(&kp->wake_src); return HRTIMER_NORESTART; } @@ -242,7 +241,7 @@ static irqreturn_t gpio_keypad_irq_handler(int irq_in, void *dev_id) else gpio_direction_input(mi->output_gpios[i]); } - wake_lock(&kp->wake_lock); + __pm_stay_awake(&kp->wake_src); hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); return IRQ_HANDLED; } @@ -396,7 +395,7 @@ int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, hrtimer_init(&kp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kp->timer.function = gpio_keypad_timer_func; - wake_lock_init(&kp->wake_lock, WAKE_LOCK_SUSPEND, "gpio_kp"); + wakeup_source_init(&kp->wake_src, "gpio_kp"); err = gpio_keypad_request_irqs(kp); kp->use_irq = err == 0; @@ -406,7 +405,7 @@ int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, kp->use_irq ? "interrupt" : "polling"); if (kp->use_irq) - wake_lock(&kp->wake_lock); + __pm_stay_awake(&kp->wake_src); hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); return 0; @@ -420,7 +419,7 @@ int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, free_irq(gpio_to_irq(mi->input_gpios[i]), kp); hrtimer_cancel(&kp->timer); - wake_lock_destroy(&kp->wake_lock); + wakeup_source_trash(&kp->wake_src); for (i = mi->noutputs - 1; i >= 0; i--) { err_gpio_direction_input_failed: gpio_free(mi->input_gpios[i]); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 6d7de9bfed9a7f2bdf872474f156613740632539..b93fe83a0b63f364be5eac94fcfe0b013f8d6721 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1346,6 +1346,18 @@ static void alps_process_packet_ss4_v2(struct psmouse *psmouse) priv->multi_packet = 0; + /* Report trackstick */ + if (alps_get_pkt_id_ss4_v2(packet) == SS4_PACKET_ID_STICK) { + if (priv->flags & ALPS_DUALPOINT) { + input_report_key(dev2, BTN_LEFT, f->ts_left); + input_report_key(dev2, BTN_RIGHT, f->ts_right); + input_report_key(dev2, BTN_MIDDLE, f->ts_middle); + input_sync(dev2); + } + return; + } + + /* Report touchpad */ alps_report_mt_data(psmouse, (f->fingers <= 4) ? f->fingers : 4); input_mt_report_finger_count(dev, f->fingers); @@ -1356,13 +1368,6 @@ static void alps_process_packet_ss4_v2(struct psmouse *psmouse) input_report_abs(dev, ABS_PRESSURE, f->pressure); input_sync(dev); - - if (priv->flags & ALPS_DUALPOINT) { - input_report_key(dev2, BTN_LEFT, f->ts_left); - input_report_key(dev2, BTN_RIGHT, f->ts_right); - input_report_key(dev2, BTN_MIDDLE, f->ts_middle); - input_sync(dev2); - } } static bool alps_is_valid_package_ss4_v2(struct psmouse *psmouse) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index cf805b960866215f52e96b2ae9a21ae373ddedfb..2e934aef3d2a8b675635fcdf9ccd4af807c4ef87 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -200,7 +200,7 @@ static int rmi_f54_request_report(struct rmi_function *fn, u8 report_type) error = rmi_write(rmi_dev, fn->fd.command_base_addr, F54_GET_REPORT); if (error < 0) - return error; + goto unlock; init_completion(&f54->cmd_done); @@ -209,9 +209,10 @@ static int rmi_f54_request_report(struct rmi_function *fn, u8 report_type) queue_delayed_work(f54->workqueue, &f54->work, 0); +unlock: mutex_unlock(&f54->data_mutex); - return 0; + return error; } static size_t rmi_f54_get_report_size(struct f54_data *f54) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 073246c7d1634eef411c1bf07d7f72ab47cad369..0cdd95801a25701fbb0fd27bfedb2319f2262e6a 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -211,6 +211,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), + DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), + }, + }, { } }; diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 02aec284decac37b1a93b16b609007c077db7f98..3e6003d32e565c748a43730574b9424eeb294d33 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -914,9 +914,9 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev) case QUEUE_HEADER_NORMAL: report_count = ts->buf[FW_HDR_COUNT]; - if (report_count > 3) { + if (report_count == 0 || report_count > 3) { dev_err(&client->dev, - "too large report count: %*ph\n", + "bad report count: %*ph\n", HEADER_SIZE, ts->buf); break; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 754595ee11b68afe32e8432d40337482111dcdac..11a13b5be73a38e7ffe82825f6aa0350932cef2c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1021,7 +1021,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; left = (head - next_tail) % CMD_BUFFER_SIZE; - if (left <= 2) { + if (left <= 0x20) { struct iommu_cmd sync_cmd; int ret; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 594849a3a9be63a8ada2662bd19040d1280fcab6..f8ed8c95b68537820c92be07df1bf9e28c19db32 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -805,8 +805,10 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) goto out_free_domain; group = iommu_group_get(&pdev->dev); - if (!group) + if (!group) { + ret = -EINVAL; goto out_free_domain; + } ret = iommu_attach_group(dev_state->domain, group); if (ret != 0) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d8376c2d18b3bf7491cb80473053b1a34f6cd54f..d82637ab09fd49b5ba87ee3bc586d2db4ba10833 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2037,6 +2037,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_present(context)) goto out_unlock; + /* + * For kdump cases, old valid entries may be cached due to the + * in-flight DMA and copied pgtable, but there is no unmapping + * behaviour for them, thus we need an explicit cache flush for + * the newly-mapped device. For kdump, at this point, the device + * is supposed to finish reset at its driver probe stage, so no + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ + if (context_copied(context)) { + u16 did_old = context_domain_id(context); + + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + iommu->flush.flush_context(iommu, did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + } + pgd = domain->pgd; context_clear_entry(context); @@ -5197,6 +5216,25 @@ static void intel_iommu_remove_device(struct device *dev) } #ifdef CONFIG_INTEL_IOMMU_SVM +#define MAX_NR_PASID_BITS (20) +static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu) +{ + /* + * Convert ecap_pss to extend context entry pts encoding, also + * respect the soft pasid_max value set by the iommu. + * - number of PASID bits = ecap_pss + 1 + * - number of PASID table entries = 2^(pts + 5) + * Therefore, pts = ecap_pss - 4 + * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15 + */ + if (ecap_pss(iommu->ecap) < 5) + return 0; + + /* pasid_max is encoded as actual number of entries not the bits */ + return find_first_bit((unsigned long *)&iommu->pasid_max, + MAX_NR_PASID_BITS) - 5; +} + int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5229,7 +5267,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd if (!(ctx_lo & CONTEXT_PASIDE)) { context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap); + context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | + intel_iommu_get_pts(iommu); + wmb(); /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both * extended to permit requests-with-PASID if the PASIDE bit diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 353c54986211805dd865e93c4540eb0a452def44..c2662a1bfdd3746c36114c36ab810af85507aa22 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -215,6 +215,31 @@ static int bcm7038_l1_set_affinity(struct irq_data *d, return 0; } +static void bcm7038_l1_cpu_offline(struct irq_data *d) +{ + struct cpumask *mask = irq_data_get_affinity_mask(d); + int cpu = smp_processor_id(); + cpumask_t new_affinity; + + /* This CPU was not on the affinity mask */ + if (!cpumask_test_cpu(cpu, mask)) + return; + + if (cpumask_weight(mask) > 1) { + /* + * Multiple CPU affinity, remove this CPU from the affinity + * mask + */ + cpumask_copy(&new_affinity, mask); + cpumask_clear_cpu(cpu, &new_affinity); + } else { + /* Only CPU, put on the lowest online CPU */ + cpumask_clear(&new_affinity); + cpumask_set_cpu(cpumask_first(cpu_online_mask), &new_affinity); + } + irq_set_affinity_locked(d, &new_affinity, false); +} + static int __init bcm7038_l1_init_one(struct device_node *dn, unsigned int idx, struct bcm7038_l1_chip *intc) @@ -266,6 +291,7 @@ static struct irq_chip bcm7038_l1_irq_chip = { .irq_mask = bcm7038_l1_mask, .irq_unmask = bcm7038_l1_unmask, .irq_set_affinity = bcm7038_l1_set_affinity, + .irq_cpu_offline = bcm7038_l1_cpu_offline, }; static int bcm7038_l1_map(struct irq_domain *d, unsigned int virq, diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index 1a1d99704fe694ad2f0c19933fdfc7df89803da9..296f1411fe84208d8c2511b8866d3f6936f8254a 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -11297,7 +11297,8 @@ static void mixer_notify_update(PLCI *plci, byte others) ((CAPI_MSG *) msg)->header.ncci = 0; ((CAPI_MSG *) msg)->info.facility_req.Selector = SELECTOR_LINE_INTERCONNECT; ((CAPI_MSG *) msg)->info.facility_req.structs[0] = 3; - PUT_WORD(&(((CAPI_MSG *) msg)->info.facility_req.structs[1]), LI_REQ_SILENT_UPDATE); + ((CAPI_MSG *) msg)->info.facility_req.structs[1] = LI_REQ_SILENT_UPDATE & 0xff; + ((CAPI_MSG *) msg)->info.facility_req.structs[2] = LI_REQ_SILENT_UPDATE >> 8; ((CAPI_MSG *) msg)->info.facility_req.structs[3] = 0; w = api_put(notify_plci->appl, (CAPI_MSG *) msg); if (w != _QUEUE_FULL) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index f550da3beabd7494ace63a88df7f2f42dda087bf..1e66909af4bcd81b8b3316b490196875691650b7 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -458,6 +458,21 @@ config DM_VERITY If unsure, say N. +config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + bool "Prefetch size 128" + +config DM_VERITY_HASH_PREFETCH_MIN_SIZE + int "Verity hash prefetch minimum size" + depends on DM_VERITY + range 1 4096 + default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + default 1 + ---help--- + This sets minimum number of hash blocks to prefetch for dm-verity. + For devices like eMMC, having larger prefetch size like 128 can improve + performance with increased memory consumption for keeping more hashes + in RAM. + config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY @@ -510,6 +525,7 @@ config DM_ANDROID_VERITY depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE depends on MD_LINEAR + select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index bb6c1285e499b466b3d5d2b216eb52e3c50f3965..881e7099d4012506c0939dfd412b392e8b56fa52 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -635,6 +635,7 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) android_verity_target.status = dm_linear_status, android_verity_target.prepare_ioctl = dm_linear_prepare_ioctl, android_verity_target.iterate_devices = dm_linear_iterate_devices, + android_verity_target.direct_access = dm_linear_direct_access, android_verity_target.io_hints = NULL; err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args); diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index 0c7ff6afec69c04f41f0c7ab6762777004f96d74..c8d7ab642780d444fec8b1464c8fa26f4043097a 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -118,4 +118,6 @@ extern int dm_linear_prepare_ioctl(struct dm_target *ti, extern int dm_linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data); extern int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv); +extern long dm_linear_direct_access(struct dm_target *ti, sector_t sector, + void **kaddr, pfn_t *pfn, long size); #endif /* DM_ANDROID_VERITY_H */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 715cc52ab01b2024ddb80f5e84f675cc482d9f2a..7a5b75fd39d6f9566a03c627f641a2d32af5a45f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1503,12 +1503,15 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (!cc->key_size && strcmp(key, "-")) goto out; + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 6a2e8dd44a1b83b8fb45a4532458db3fb58db526..3643cba713518e9fdba01662c3516491bdb7d047 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -200,11 +200,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 760a464e2be883bf75993d3c8f42f06410c0cf11..4ad62d680547ab8cffca8dafde0ef0cf08395103 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -147,7 +147,7 @@ int dm_linear_iterate_devices(struct dm_target *ti, } EXPORT_SYMBOL_GPL(dm_linear_iterate_devices); -static long linear_direct_access(struct dm_target *ti, sector_t sector, +long dm_linear_direct_access(struct dm_target *ti, sector_t sector, void **kaddr, pfn_t *pfn, long size) { struct linear_c *lc = ti->private; @@ -164,6 +164,7 @@ static long linear_direct_access(struct dm_target *ti, sector_t sector, return ret; } +EXPORT_SYMBOL_GPL(dm_linear_direct_access); static struct target_type linear_target = { .name = "linear", @@ -173,9 +174,9 @@ static struct target_type linear_target = { .dtr = dm_linear_dtr, .map = dm_linear_map, .status = dm_linear_status, - .prepare_ioctl = dm_linear_prepare_ioctl, + .prepare_ioctl = dm_linear_prepare_ioctl, .iterate_devices = dm_linear_iterate_devices, - .direct_access = linear_direct_access, + .direct_access = dm_linear_direct_access, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 6d53810963f7531a7e5048dad5c55ef53e8aa914..af2d79b52484b64099903b6d905dba97d9bb15fe 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2994,6 +2994,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) } } + /* Disable/enable discard support on raid set. */ + configure_discard_support(rs); + mddev_unlock(&rs->md); return 0; @@ -3580,12 +3583,6 @@ static int raid_preresume(struct dm_target *ti) if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) rs_update_sbs(rs); - /* - * Disable/enable discard support on raid set after any - * conversion, because devices can have been added - */ - configure_discard_support(rs); - /* Load the bitmap from disk unless raid0 */ r = __load_dirty_region_bitmap(rs); if (r) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 1d0d2adc050a5539a4b430bd9e055f99e1abb5d7..31a89c8832c0ed48e7887930542cc4c8f39ecc10 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -226,6 +226,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { + struct request_queue *q = md->queue; + unsigned long flags; + atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ @@ -238,8 +241,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * back into ->request_fn() could deadlock attempting to grab the * queue lock again. */ - if (!md->queue->mq_ops && run_queue) - blk_run_queue_async(md->queue); + if (!q->mq_ops && run_queue) { + spin_lock_irqsave(q->queue_lock, flags); + blk_run_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } /* * dm_put() must be at the end of this function. See the comment above diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 399bcacb6d7c93a2385d1fcb66e952cb107ab390..d837a289429ac32722371c3cbafa91e7d9e4e491 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -925,12 +925,6 @@ static int dm_table_determine_type(struct dm_table *t) BUG_ON(!request_based); /* No targets in this table */ - if (list_empty(devices) && __table_type_request_based(live_md_type)) { - /* inherit live MD type */ - t->type = live_md_type; - return 0; - } - /* * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by * having a compatible target use dm_table_set_type. @@ -949,6 +943,19 @@ static int dm_table_determine_type(struct dm_table *t) return -EINVAL; } + if (list_empty(devices)) { + int srcu_idx; + struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); + + /* inherit live table's type and all_blk_mq */ + if (live_table) { + t->type = live_table->type; + t->all_blk_mq = live_table->all_blk_mq; + } + dm_put_live_table(t->md, srcu_idx); + return 0; + } + /* Non-request-stackable devices can't be used for request-based dm */ list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); @@ -975,6 +982,11 @@ static int dm_table_determine_type(struct dm_table *t) t->all_blk_mq = true; } + if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) { + DMERR("table load rejected: all devices are not blk-mq request-stackable"); + return -EINVAL; + } + return 0; } diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index f36a67720989daf7934e0e6de5d1ed40f4c72ab4..4db0cae262eb7ccc489ee3ce44fbde7660626fdc 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -12,6 +12,7 @@ #ifndef DM_VERITY_FEC_H #define DM_VERITY_FEC_H +#include "dm.h" #include "dm-core.h" #include "dm-verity.h" #include diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b53539e26bc2f079361e93ea4dd324f5d1812002..5d0a9963b10855b4165aee50b10c11053b2b0078 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -501,6 +501,7 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; + sector_t prefetch_size; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -523,8 +524,14 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: + // for emmc, it is more efficient to send bigger read + prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE, + hash_block_end - hash_block_start + 1); + if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) { + prefetch_size = hash_block_end - hash_block_start + 1; + } dm_bufio_prefetch(v->bufio, hash_block_start, - hash_block_end - hash_block_start + 1); + prefetch_size); } kfree(pw); diff --git a/drivers/md/md.c b/drivers/md/md.c index 2089d46b0eb89cf280bd6d90202a9caaeff13d47..24925f2aa23539e8b870048629cf2f7373c6e5d9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6829,7 +6829,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* need to ensure recovery thread has run */ wait_event_interruptible_timeout(mddev->sb_wait, !test_bit(MD_RECOVERY_NEEDED, - &mddev->flags), + &mddev->recovery), msecs_to_jiffies(5000)); if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) { /* Need to flush page cache, and ensure no-one else opens @@ -7092,7 +7092,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) if (test_bit(MD_CLOSING, &mddev->flags)) { mutex_unlock(&mddev->open_mutex); - return -ENODEV; + err = -ENODEV; + goto out; } err = 0; @@ -7101,6 +7102,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) check_disk_change(bdev); out: + if (err) + mddev_put(mddev); return err; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 7e44005595c1e77d1982e7e89b1259ddea97bf9a..20557e2c60c6236bf16cc35e33d3f9eb0e318779 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } + memcpy(&smm->sm, &ops, sizeof(smm->sm)); if (r) return r; - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - /* * Now we need to update the newly created data structures with the * allocated blocks that they were built from. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ac251e91e62e94043ef35a78b911a66c00e741..cce6057b9aca5b038e9109430c8a68a8b8a9b6f7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6984,6 +6984,15 @@ static int raid5_run(struct mddev *mddev) stripe = (stripe | (stripe-1)) + 1; mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; + + /* + * We use 16-bit counter of active stripes in bi_phys_segments + * (minus one for over-loaded initialization) + */ + blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); + blk_queue_max_discard_sectors(mddev->queue, + 0xfffe * STRIPE_SECTORS); + /* * unaligned part of discard request will be ignored, so can't * guarantee discard_zeroes_data diff --git a/drivers/media/dvb-frontends/mn88472.c b/drivers/media/dvb-frontends/mn88472.c index 18fb2df1e2bd470cfa6a836033abe3da8c19d67c..72650116732cb7e7558430bedccbbeea75a813fc 100644 --- a/drivers/media/dvb-frontends/mn88472.c +++ b/drivers/media/dvb-frontends/mn88472.c @@ -488,18 +488,6 @@ static int mn88472_probe(struct i2c_client *client, goto err_kfree; } - /* Check demod answers with correct chip id */ - ret = regmap_read(dev->regmap[0], 0xff, &utmp); - if (ret) - goto err_regmap_0_regmap_exit; - - dev_dbg(&client->dev, "chip id=%02x\n", utmp); - - if (utmp != 0x02) { - ret = -ENODEV; - goto err_regmap_0_regmap_exit; - } - /* * Chip has three I2C addresses for different register banks. Used * addresses are 0x18, 0x1a and 0x1c. We register two dummy clients, @@ -536,6 +524,18 @@ static int mn88472_probe(struct i2c_client *client, } i2c_set_clientdata(dev->client[2], dev); + /* Check demod answers with correct chip id */ + ret = regmap_read(dev->regmap[2], 0xff, &utmp); + if (ret) + goto err_regmap_2_regmap_exit; + + dev_dbg(&client->dev, "chip id=%02x\n", utmp); + + if (utmp != 0x02) { + ret = -ENODEV; + goto err_regmap_2_regmap_exit; + } + /* Sleep because chip is active by default */ ret = regmap_write(dev->regmap[2], 0x05, 0x3e); if (ret) diff --git a/drivers/media/dvb-frontends/mn88473.c b/drivers/media/dvb-frontends/mn88473.c index 451974a1d7ed46fa8b42062d018eefdbed742e32..2932bdc8fa94b8358b8c34bf4375a757e107e8f2 100644 --- a/drivers/media/dvb-frontends/mn88473.c +++ b/drivers/media/dvb-frontends/mn88473.c @@ -485,18 +485,6 @@ static int mn88473_probe(struct i2c_client *client, goto err_kfree; } - /* Check demod answers with correct chip id */ - ret = regmap_read(dev->regmap[0], 0xff, &uitmp); - if (ret) - goto err_regmap_0_regmap_exit; - - dev_dbg(&client->dev, "chip id=%02x\n", uitmp); - - if (uitmp != 0x03) { - ret = -ENODEV; - goto err_regmap_0_regmap_exit; - } - /* * Chip has three I2C addresses for different register banks. Used * addresses are 0x18, 0x1a and 0x1c. We register two dummy clients, @@ -533,6 +521,18 @@ static int mn88473_probe(struct i2c_client *client, } i2c_set_clientdata(dev->client[2], dev); + /* Check demod answers with correct chip id */ + ret = regmap_read(dev->regmap[2], 0xff, &uitmp); + if (ret) + goto err_regmap_2_regmap_exit; + + dev_dbg(&client->dev, "chip id=%02x\n", uitmp); + + if (uitmp != 0x03) { + ret = -ENODEV; + goto err_regmap_2_regmap_exit; + } + /* Sleep because chip is active by default */ ret = regmap_write(dev->regmap[2], 0x05, 0x3e); if (ret) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index 2669b4bad9106853e269670dbecd5987f044e029..5a27bffa02fb5aad5a0b3ff2915293445a3db2b8 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -655,6 +655,7 @@ config VIDEO_S5K6A3 config VIDEO_S5K4ECGX tristate "Samsung S5K4ECGX sensor support" depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API + select CRC32 ---help--- This is a V4L2 sensor-level driver for Samsung S5K4ECGX 5M camera sensor with an embedded SoC image signal processor. diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 4740da39d6988a19574d8f040bb20038ab4b6f67..59aa4dafb60b0d0951b57e6bdf2413d9a7769b9c 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -288,8 +288,12 @@ static inline void tvp5150_selmux(struct v4l2_subdev *sd) tvp5150_write(sd, TVP5150_OP_MODE_CTL, opmode); tvp5150_write(sd, TVP5150_VD_IN_SRC_SEL_1, input); - /* Svideo should enable YCrCb output and disable GPCL output - * For Composite and TV, it should be the reverse + /* + * Setup the FID/GLCO/VLK/HVLK and INTREQ/GPCL/VBLK output signals. For + * S-Video we output the vertical lock (VLK) signal on FID/GLCO/VLK/HVLK + * and set INTREQ/GPCL/VBLK to logic 0. For composite we output the + * field indicator (FID) signal on FID/GLCO/VLK/HVLK and set + * INTREQ/GPCL/VBLK to logic 1. */ val = tvp5150_read(sd, TVP5150_MISC_CTL); if (val < 0) { @@ -298,9 +302,9 @@ static inline void tvp5150_selmux(struct v4l2_subdev *sd) } if (decoder->input == TVP5150_SVIDEO) - val = (val & ~0x40) | 0x10; + val = (val & ~TVP5150_MISC_CTL_GPCL) | TVP5150_MISC_CTL_HVLK; else - val = (val & ~0x10) | 0x40; + val = (val & ~TVP5150_MISC_CTL_HVLK) | TVP5150_MISC_CTL_GPCL; tvp5150_write(sd, TVP5150_MISC_CTL, val); }; @@ -452,7 +456,12 @@ static const struct i2c_reg_value tvp5150_init_enable[] = { },{ /* Automatic offset and AGC enabled */ TVP5150_ANAL_CHL_CTL, 0x15 },{ /* Activate YCrCb output 0x9 or 0xd ? */ - TVP5150_MISC_CTL, 0x6f + TVP5150_MISC_CTL, TVP5150_MISC_CTL_GPCL | + TVP5150_MISC_CTL_INTREQ_OE | + TVP5150_MISC_CTL_YCBCR_OE | + TVP5150_MISC_CTL_SYNC_OE | + TVP5150_MISC_CTL_VBLANK | + TVP5150_MISC_CTL_CLOCK_OE, },{ /* Activates video std autodetection for all standards */ TVP5150_AUTOSW_MSK, 0x0 },{ /* Default format: 0x47. For 4:2:2: 0x40 */ @@ -815,6 +824,7 @@ static int tvp5150_s_ctrl(struct v4l2_ctrl *ctrl) return 0; case V4L2_CID_HUE: tvp5150_write(sd, TVP5150_HUE_CTL, ctrl->val); + break; case V4L2_CID_TEST_PATTERN: decoder->enable = ctrl->val ? false : true; tvp5150_selmux(sd); @@ -857,8 +867,6 @@ static int tvp5150_fill_fmt(struct v4l2_subdev *sd, f = &format->format; - tvp5150_reset(sd, 0); - f->width = decoder->rect.width; f->height = decoder->rect.height / 2; @@ -1047,21 +1055,27 @@ static const struct media_entity_operations tvp5150_sd_media_ops = { static int tvp5150_s_stream(struct v4l2_subdev *sd, int enable) { struct tvp5150 *decoder = to_tvp5150(sd); - /* Output format: 8-bit ITU-R BT.656 with embedded syncs */ - int val = 0x09; - - /* Output format: 8-bit 4:2:2 YUV with discrete sync */ - if (decoder->mbus_type == V4L2_MBUS_PARALLEL) - val = 0x0d; + int val; - /* Initializes TVP5150 to its default values */ - /* # set PCLK (27MHz) */ - tvp5150_write(sd, TVP5150_CONF_SHARED_PIN, 0x00); + /* Enable or disable the video output signals. */ + val = tvp5150_read(sd, TVP5150_MISC_CTL); + if (val < 0) + return val; + + val &= ~(TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE | + TVP5150_MISC_CTL_CLOCK_OE); + + if (enable) { + /* + * Enable the YCbCr and clock outputs. In discrete sync mode + * (non-BT.656) additionally enable the the sync outputs. + */ + val |= TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; + if (decoder->mbus_type == V4L2_MBUS_PARALLEL) + val |= TVP5150_MISC_CTL_SYNC_OE; + } - if (enable) - tvp5150_write(sd, TVP5150_MISC_CTL, val); - else - tvp5150_write(sd, TVP5150_MISC_CTL, 0x00); + tvp5150_write(sd, TVP5150_MISC_CTL, val); return 0; } @@ -1520,7 +1534,6 @@ static int tvp5150_probe(struct i2c_client *c, res = core->hdl.error; goto err; } - v4l2_ctrl_handler_setup(&core->hdl); /* Default is no cropping */ core->rect.top = 0; @@ -1531,6 +1544,8 @@ static int tvp5150_probe(struct i2c_client *c, core->rect.left = 0; core->rect.width = TVP5150_H_MAX; + tvp5150_reset(sd, 0); /* Calls v4l2_ctrl_handler_setup() */ + res = v4l2_async_register_subdev(sd); if (res < 0) goto err; diff --git a/drivers/media/i2c/tvp5150_reg.h b/drivers/media/i2c/tvp5150_reg.h index 25a994944918703f064eee85a6017ca7010dca4b..30a48c28d05ab5d46d9eff505005ebf3f90e6409 100644 --- a/drivers/media/i2c/tvp5150_reg.h +++ b/drivers/media/i2c/tvp5150_reg.h @@ -9,6 +9,15 @@ #define TVP5150_ANAL_CHL_CTL 0x01 /* Analog channel controls */ #define TVP5150_OP_MODE_CTL 0x02 /* Operation mode controls */ #define TVP5150_MISC_CTL 0x03 /* Miscellaneous controls */ +#define TVP5150_MISC_CTL_VBLK_GPCL BIT(7) +#define TVP5150_MISC_CTL_GPCL BIT(6) +#define TVP5150_MISC_CTL_INTREQ_OE BIT(5) +#define TVP5150_MISC_CTL_HVLK BIT(4) +#define TVP5150_MISC_CTL_YCBCR_OE BIT(3) +#define TVP5150_MISC_CTL_SYNC_OE BIT(2) +#define TVP5150_MISC_CTL_VBLANK BIT(1) +#define TVP5150_MISC_CTL_CLOCK_OE BIT(0) + #define TVP5150_AUTOSW_MSK 0x04 /* Autoswitch mask: TVP5150A / TVP5150AM */ /* Reserved 05h */ diff --git a/drivers/media/pci/solo6x10/solo6x10.h b/drivers/media/pci/solo6x10/solo6x10.h index 5bd498735a6655fe61edc68a160b3c2f434fcad3..3f8da5e8c430198a625ced1a8d76b272a31f08ca 100644 --- a/drivers/media/pci/solo6x10/solo6x10.h +++ b/drivers/media/pci/solo6x10/solo6x10.h @@ -284,7 +284,10 @@ static inline u32 solo_reg_read(struct solo_dev *solo_dev, int reg) static inline void solo_reg_write(struct solo_dev *solo_dev, int reg, u32 data) { + u16 val; + writel(data, solo_dev->reg_base + reg); + pci_read_config_word(solo_dev->pdev, PCI_STATUS, &val); } static inline void solo_irq_on(struct solo_dev *dev, u32 mask) diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 02dd73ab72e84eead12eebf93e52d04223a55753..493035b8eff49ec7eaebc0667f7eede15c946190 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -93,7 +93,7 @@ config VIDEO_OMAP3_DEBUG config VIDEO_PXA27x tristate "PXA27x Quick Capture Interface driver" - depends on VIDEO_DEV && HAS_DMA + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA depends on PXA27x || COMPILE_TEST select VIDEOBUF2_DMA_SG select SG_SPLIT diff --git a/drivers/media/platform/blackfin/ppi.c b/drivers/media/platform/blackfin/ppi.c index cff63e511e6d9a99434deb6a2d776058018eb34e..b8f3d9fa66e907ca54c0e61972c5c2bbacfad2b5 100644 --- a/drivers/media/platform/blackfin/ppi.c +++ b/drivers/media/platform/blackfin/ppi.c @@ -214,6 +214,8 @@ static int ppi_set_params(struct ppi_if *ppi, struct ppi_params *params) if (params->dlen > 24 || params->dlen <= 0) return -EINVAL; pctrl = devm_pinctrl_get(ppi->dev); + if (IS_ERR(pctrl)) + return PTR_ERR(pctrl); pstate = pinctrl_lookup_state(pctrl, pin_state[(params->dlen + 7) / 8 - 1]); if (pinctrl_select_state(pctrl, pstate)) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 0a5b8f5e011e7ec83ddafee74165989f603cb1d4..27e7cf65c2a77a367fa1c8ce0537ab29040fac9d 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -926,10 +926,11 @@ static int s5p_mfc_release(struct file *file) mfc_debug_enter(); if (dev) mutex_lock(&dev->mfc_mutex); - s5p_mfc_clock_on(); vb2_queue_release(&ctx->vq_src); vb2_queue_release(&ctx->vq_dst); if (dev) { + s5p_mfc_clock_on(); + /* Mark context as idle */ clear_work_bit_irqsave(ctx); /* @@ -951,9 +952,9 @@ static int s5p_mfc_release(struct file *file) if (s5p_mfc_power_off() < 0) mfc_err("Power off failed\n"); } + mfc_debug(2, "Shutting down clock\n"); + s5p_mfc_clock_off(); } - mfc_debug(2, "Shutting down clock\n"); - s5p_mfc_clock_off(); if (dev) dev->ctx[ctx->num] = NULL; s5p_mfc_dec_ctrls_delete(ctx); @@ -1082,6 +1083,7 @@ static struct device *s5p_mfc_alloc_memdev(struct device *dev, idx); if (ret == 0) return child; + device_del(child); } put_device(child); diff --git a/drivers/media/platform/sti/hva/hva-hw.c b/drivers/media/platform/sti/hva/hva-hw.c index d341d49945289922df5168e031fec0aff89ad7c2..cf2a8d884536729217d6c2a73c524cd78ae48234 100644 --- a/drivers/media/platform/sti/hva/hva-hw.c +++ b/drivers/media/platform/sti/hva/hva-hw.c @@ -305,16 +305,16 @@ int hva_hw_probe(struct platform_device *pdev, struct hva_dev *hva) /* get memory for registers */ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); hva->regs = devm_ioremap_resource(dev, regs); - if (IS_ERR_OR_NULL(hva->regs)) { + if (IS_ERR(hva->regs)) { dev_err(dev, "%s failed to get regs\n", HVA_PREFIX); return PTR_ERR(hva->regs); } /* get memory for esram */ esram = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (IS_ERR_OR_NULL(esram)) { + if (!esram) { dev_err(dev, "%s failed to get esram\n", HVA_PREFIX); - return PTR_ERR(esram); + return -ENODEV; } hva->esram_addr = esram->start; hva->esram_size = resource_size(esram); diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index 0f301903aa6ffd43209c2b03854c2c8bef87d368..63165d324fffdb2ddbbfa81478b8d38618921775 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -263,6 +263,8 @@ static void ite_set_carrier_params(struct ite_dev *dev) if (allowance > ITE_RXDCR_MAX) allowance = ITE_RXDCR_MAX; + + use_demodulator = true; } } diff --git a/drivers/media/spi/gs1662.c b/drivers/media/spi/gs1662.c index d76f36233f430541791441301ea8cff29eab52dd..5143a90219c0842ebb18efcd4b4049a60db70913 100644 --- a/drivers/media/spi/gs1662.c +++ b/drivers/media/spi/gs1662.c @@ -453,10 +453,9 @@ static int gs_probe(struct spi_device *spi) static int gs_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); - struct gs *gs = to_gs(sd); v4l2_device_unregister_subdev(sd); - kfree(gs); + return 0; } diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index de3ee2547479428cd1c14347db5332ea1090c60d..8207e6900656bef61de4f0df10b37b5cc6110f98 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -382,9 +382,9 @@ int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) if (buf[0] != 0) deb_info("key: %*ph\n", 5, buf); +ret: kfree(buf); -ret: return ret; } EXPORT_SYMBOL(dibusb_rc_query); diff --git a/drivers/media/usb/dvb-usb/pctv452e.c b/drivers/media/usb/dvb-usb/pctv452e.c index 07fa08be9e994a3f7d5952251b73f871fe4ecdca..d54ebe7e02150f7240f58da51a32d025026bdf34 100644 --- a/drivers/media/usb/dvb-usb/pctv452e.c +++ b/drivers/media/usb/dvb-usb/pctv452e.c @@ -97,14 +97,13 @@ struct pctv452e_state { u8 c; /* transaction counter, wraps around... */ u8 initialized; /* set to 1 if 0x15 has been sent */ u16 last_rc_key; - - unsigned char data[80]; }; static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, unsigned int write_len, unsigned int read_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *buf; u8 id; unsigned int rlen; int ret; @@ -114,36 +113,39 @@ static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, return -EIO; } - mutex_lock(&state->ca_mutex); + buf = kmalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + id = state->c++; - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = cmd; - state->data[3] = write_len; + buf[0] = SYNC_BYTE_OUT; + buf[1] = id; + buf[2] = cmd; + buf[3] = write_len; - memcpy(state->data + 4, data, write_len); + memcpy(buf + 4, data, write_len); rlen = (read_len > 0) ? 64 : 0; - ret = dvb_usb_generic_rw(d, state->data, 4 + write_len, - state->data, rlen, /* delay_ms */ 0); + ret = dvb_usb_generic_rw(d, buf, 4 + write_len, + buf, rlen, /* delay_ms */ 0); if (0 != ret) goto failed; ret = -EIO; - if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) + if (SYNC_BYTE_IN != buf[0] || id != buf[1]) goto failed; - memcpy(data, state->data + 4, read_len); + memcpy(data, buf + 4, read_len); - mutex_unlock(&state->ca_mutex); + kfree(buf); return 0; failed: err("CI error %d; %02X %02X %02X -> %*ph.", - ret, SYNC_BYTE_OUT, id, cmd, 3, state->data); + ret, SYNC_BYTE_OUT, id, cmd, 3, buf); - mutex_unlock(&state->ca_mutex); + kfree(buf); return ret; } @@ -410,53 +412,57 @@ static int pctv452e_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *rcv_buf, u8 rcv_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *buf; u8 id; int ret; - mutex_lock(&state->ca_mutex); + buf = kmalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + id = state->c++; ret = -EINVAL; if (snd_len > 64 - 7 || rcv_len > 64 - 7) goto failed; - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = PCTV_CMD_I2C; - state->data[3] = snd_len + 3; - state->data[4] = addr << 1; - state->data[5] = snd_len; - state->data[6] = rcv_len; + buf[0] = SYNC_BYTE_OUT; + buf[1] = id; + buf[2] = PCTV_CMD_I2C; + buf[3] = snd_len + 3; + buf[4] = addr << 1; + buf[5] = snd_len; + buf[6] = rcv_len; - memcpy(state->data + 7, snd_buf, snd_len); + memcpy(buf + 7, snd_buf, snd_len); - ret = dvb_usb_generic_rw(d, state->data, 7 + snd_len, - state->data, /* rcv_len */ 64, + ret = dvb_usb_generic_rw(d, buf, 7 + snd_len, + buf, /* rcv_len */ 64, /* delay_ms */ 0); if (ret < 0) goto failed; /* TT USB protocol error. */ ret = -EIO; - if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) + if (SYNC_BYTE_IN != buf[0] || id != buf[1]) goto failed; /* I2C device didn't respond as expected. */ ret = -EREMOTEIO; - if (state->data[5] < snd_len || state->data[6] < rcv_len) + if (buf[5] < snd_len || buf[6] < rcv_len) goto failed; - memcpy(rcv_buf, state->data + 7, rcv_len); - mutex_unlock(&state->ca_mutex); + memcpy(rcv_buf, buf + 7, rcv_len); + kfree(buf); return rcv_len; failed: err("I2C error %d; %02X %02X %02X %02X %02X -> %*ph", ret, SYNC_BYTE_OUT, id, addr << 1, snd_len, rcv_len, - 7, state->data); + 7, buf); - mutex_unlock(&state->ca_mutex); + kfree(buf); return ret; } @@ -505,7 +511,7 @@ static u32 pctv452e_i2c_func(struct i2c_adapter *adapter) static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 *rx; + u8 *b0, *rx; int ret; info("%s: %d\n", __func__, i); @@ -516,11 +522,12 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) if (state->initialized) return 0; - rx = kmalloc(PCTV_ANSWER_LEN, GFP_KERNEL); - if (!rx) + b0 = kmalloc(5 + PCTV_ANSWER_LEN, GFP_KERNEL); + if (!b0) return -ENOMEM; - mutex_lock(&state->ca_mutex); + rx = b0 + 5; + /* hmm where shoud this should go? */ ret = usb_set_interface(d->udev, 0, ISOC_INTERFACE_ALTERNATIVE); if (ret != 0) @@ -528,66 +535,70 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) __func__, ret); /* this is a one-time initialization, dont know where to put */ - state->data[0] = 0xaa; - state->data[1] = state->c++; - state->data[2] = PCTV_CMD_RESET; - state->data[3] = 1; - state->data[4] = 0; + b0[0] = 0xaa; + b0[1] = state->c++; + b0[2] = PCTV_CMD_RESET; + b0[3] = 1; + b0[4] = 0; /* reset board */ - ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) goto ret; - state->data[1] = state->c++; - state->data[4] = 1; + b0[1] = state->c++; + b0[4] = 1; /* reset board (again?) */ - ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) goto ret; state->initialized = 1; ret: - mutex_unlock(&state->ca_mutex); - kfree(rx); + kfree(b0); return ret; } static int pctv452e_rc_query(struct dvb_usb_device *d) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *b, *rx; int ret, i; u8 id; - mutex_lock(&state->ca_mutex); + b = kmalloc(CMD_BUFFER_SIZE + PCTV_ANSWER_LEN, GFP_KERNEL); + if (!b) + return -ENOMEM; + + rx = b + CMD_BUFFER_SIZE; + id = state->c++; /* prepare command header */ - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = PCTV_CMD_IR; - state->data[3] = 0; + b[0] = SYNC_BYTE_OUT; + b[1] = id; + b[2] = PCTV_CMD_IR; + b[3] = 0; /* send ir request */ - ret = dvb_usb_generic_rw(d, state->data, 4, - state->data, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b, 4, rx, PCTV_ANSWER_LEN, 0); if (ret != 0) goto ret; if (debug > 3) { - info("%s: read: %2d: %*ph: ", __func__, ret, 3, state->data); - for (i = 0; (i < state->data[3]) && ((i + 3) < PCTV_ANSWER_LEN); i++) - info(" %02x", state->data[i + 3]); + info("%s: read: %2d: %*ph: ", __func__, ret, 3, rx); + for (i = 0; (i < rx[3]) && ((i+3) < PCTV_ANSWER_LEN); i++) + info(" %02x", rx[i+3]); info("\n"); } - if ((state->data[3] == 9) && (state->data[12] & 0x01)) { + if ((rx[3] == 9) && (rx[12] & 0x01)) { /* got a "press" event */ - state->last_rc_key = RC_SCANCODE_RC5(state->data[7], state->data[6]); + state->last_rc_key = RC_SCANCODE_RC5(rx[7], rx[6]); if (debug > 2) info("%s: cmd=0x%02x sys=0x%02x\n", - __func__, state->data[6], state->data[7]); + __func__, rx[6], rx[7]); rc_keydown(d->rc_dev, RC_TYPE_RC5, state->last_rc_key, 0); } else if (state->last_rc_key) { @@ -595,7 +606,7 @@ static int pctv452e_rc_query(struct dvb_usb_device *d) state->last_rc_key = 0; } ret: - mutex_unlock(&state->ca_mutex); + kfree(b); return ret; } diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c index 9a4d8684dd32bd5beaf73547271bd2b1ee42431f..df2e7756927f20fd17ab07688cd6c4ba4a39fbc3 100644 --- a/drivers/mfd/tps65217.c +++ b/drivers/mfd/tps65217.c @@ -424,6 +424,24 @@ static int tps65217_probe(struct i2c_client *client, return 0; } +static int tps65217_remove(struct i2c_client *client) +{ + struct tps65217 *tps = i2c_get_clientdata(client); + unsigned int virq; + int i; + + for (i = 0; i < ARRAY_SIZE(tps65217_irqs); i++) { + virq = irq_find_mapping(tps->irq_domain, i); + if (virq) + irq_dispose_mapping(virq); + } + + irq_domain_remove(tps->irq_domain); + tps->irq_domain = NULL; + + return 0; +} + static const struct i2c_device_id tps65217_id_table[] = { {"tps65217", TPS65217}, { /* sentinel */ } @@ -437,6 +455,7 @@ static struct i2c_driver tps65217_driver = { }, .id_table = tps65217_id_table, .probe = tps65217_probe, + .remove = tps65217_remove, }; static int __init tps65217_init(void) diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7ea17ec69e8c5a50d013b9c8b3ccc5907b4fdded..e62b43071203670eaa11a99ea9cdec7d68c21b81 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -57,6 +57,9 @@ obj-$(CONFIG_UID_CPUTIME) += uid_cputime.o obj-y += qcom/ obj-$(CONFIG_MEMORY_STATE_TIME) += memory_state_time.o +obj-$(CONFIG_UID_CPUTIME) += uid_cputime.o +obj-$(CONFIG_MEMORY_STATE_TIME) += memory_state_time.o + lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o lkdtm-$(CONFIG_LKDTM) += lkdtm_heap.o diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 8cac7ef9ad0d98789c3f910dced1b61bc2b3ab62..dbe676de7a19443b462aaeda59ab86cb6c9d959e 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -408,7 +408,7 @@ bool mei_cldev_enabled(struct mei_cl_device *cldev) EXPORT_SYMBOL_GPL(mei_cldev_enabled); /** - * mei_cldev_enable_device - enable me client device + * mei_cldev_enable - enable me client device * create connection with me client * * @cldev: me client device diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 6fe02350578dce04a0ded5f34da8d69274986397..e2af61f7e3b659aebebc25409c9ff75a64010038 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -425,7 +425,7 @@ static inline void mei_io_list_free(struct mei_cl_cb *list, struct mei_cl *cl) * * @cl: host client * @length: size of the buffer - * @type: operation type + * @fop_type: operation type * @fp: associated file pointer (might be NULL) * * Return: cb on success and NULL on failure @@ -459,7 +459,7 @@ struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, * * @cl: host client * @length: size of the buffer - * @type: operation type + * @fop_type: operation type * @fp: associated file pointer (might be NULL) * * Return: cb on success and NULL on failure @@ -686,7 +686,7 @@ void mei_host_client_init(struct mei_device *dev) pm_runtime_mark_last_busy(dev->dev); dev_dbg(dev->dev, "rpm: autosuspend\n"); - pm_runtime_autosuspend(dev->dev); + pm_request_autosuspend(dev->dev); } /** @@ -1536,7 +1536,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1; if (rets < 0) - return rets; + goto err; if (rets == 0) { cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); @@ -1570,11 +1570,8 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->buf.size, cb->buf_idx); rets = mei_write_message(dev, &mei_hdr, buf->data + cb->buf_idx); - if (rets) { - cl->status = rets; - list_move_tail(&cb->list, &cmpl_list->list); - return rets; - } + if (rets) + goto err; cl->status = 0; cl->writing_state = MEI_WRITING; @@ -1582,14 +1579,21 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->completed = mei_hdr.msg_complete == 1; if (first_chunk) { - if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) - return -EIO; + if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) { + rets = -EIO; + goto err; + } } if (mei_hdr.msg_complete) list_move_tail(&cb->list, &dev->write_waiting_list.list); return 0; + +err: + cl->status = rets; + list_move_tail(&cb->list, &cmpl_list->list); + return rets; } /** diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 7ad15d678878cf5e4f5bba819bda3ceba75560c6..c8307e8b4c163977f7b4ebf8ed2e8991fc2393d7 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -122,6 +122,8 @@ #define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */ #define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */ +#define MEI_DEV_ID_LBG 0xA1BA /* Lewisburg (SPT) */ + #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f3ffd883b232ea8ba0c72fb2135cb10d6d3b434a..f9c6ec4b98ab7d7b0c66c1f4d4af014440c3de23 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -87,6 +87,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)}, diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 3678220964fe62948a9a4d1aa2fed06b9c1a3a66..df382be626343dc27fc17cbf91124027c6df9a0b 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -818,7 +818,7 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test, struct mmc_async_req *cur_areq = &test_areq[0].areq; struct mmc_async_req *other_areq = &test_areq[1].areq; int i; - int ret; + int ret = RESULT_OK; test_areq[0].test = test; test_areq[1].test = test; diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 60a642a85da73cb1f703af220a41dc2ca32b7a3e..348b58b77b3b6a96f905228445d54d4cb9d2f2e6 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -453,7 +453,6 @@ void mmc_remove_host(struct mmc_host *host) { if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) mmc_unregister_pm_notifier(host); - mmc_stop_host(host); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index fa7ecd16e78683039bd5bb3ac50c5ade4e77a2bf..a0aa64eac123c5f6225a0b8d3fddf614cf71ab4e 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -223,6 +223,7 @@ static int mmc_decode_scr(struct mmc_card *card) static int mmc_read_ssr(struct mmc_card *card) { unsigned int au, es, et, eo; + u32 *raw_ssr; int i; if (!(card->csd.cmdclass & CCC_APP_SPEC)) { @@ -231,14 +232,21 @@ static int mmc_read_ssr(struct mmc_card *card) return 0; } - if (mmc_app_sd_status(card, card->raw_ssr)) { + raw_ssr = kmalloc(sizeof(card->raw_ssr), GFP_KERNEL); + if (!raw_ssr) + return -ENOMEM; + + if (mmc_app_sd_status(card, raw_ssr)) { pr_warn("%s: problem reading SD Status register\n", mmc_hostname(card->host)); + kfree(raw_ssr); return 0; } for (i = 0; i < 16; i++) - card->raw_ssr[i] = be32_to_cpu(card->raw_ssr[i]); + card->raw_ssr[i] = be32_to_cpu(raw_ssr[i]); + + kfree(raw_ssr); /* * UNSTUFF_BITS only works with four u32s so we have to offset the diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 44ecebd1ea8c1834a5d311fbe36ddfc8383e3ecd..c8b8ac66ff7e3a4839ba4943c9069e540846c87e 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -309,6 +309,9 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host) cmd0 = BF_SSP(cmd->opcode, CMD0_CMD); cmd1 = cmd->arg; + if (cmd->opcode == MMC_STOP_TRANSMISSION) + cmd0 |= BM_SSP_CMD0_APPEND_8CYC; + if (host->sdio_irq_en) { ctrl0 |= BM_SSP_CTRL0_SDIO_IRQ_CHECK; cmd0 |= BM_SSP_CMD0_CONT_CLKING_EN | BM_SSP_CMD0_SLOW_CLKING_EN; @@ -417,8 +420,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host) ssp->base + HW_SSP_BLOCK_SIZE); } - if ((cmd->opcode == MMC_STOP_TRANSMISSION) || - (cmd->opcode == SD_IO_RW_EXTENDED)) + if (cmd->opcode == SD_IO_RW_EXTENDED) cmd0 |= BM_SSP_CMD0_APPEND_8CYC; cmd1 = cmd->arg; diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 81d4dc034793ddb72c8ddb1d29c7d1bbb65f1a13..fddd0be196f4e38f0d64185ea699aab4b67d5ee7 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -394,7 +394,8 @@ static int sdhci_acpi_probe(struct platform_device *pdev) /* Power on the SDHCI controller and its children */ acpi_device_fix_up_power(device); list_for_each_entry(child, &device->children, node) - acpi_device_fix_up_power(child); + if (child->status.present && child->status.enabled) + acpi_device_fix_up_power(child); if (acpi_bus_get_status(device) || !device->status.present) return -ENODEV; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 42ef3ebb1d8cf9d57f30e48d21c3a5250aea16ea..ba637ff8aa7e4409958e5ee64e2c2a6687ebe856 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2086,16 +2086,32 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) if (!host->tuning_done) { pr_info(DRIVER_NAME ": Timeout waiting for Buffer Read Ready interrupt during tuning procedure, falling back to fixed sampling clock\n"); - - sdhci_do_reset(host, SDHCI_RESET_CMD); - sdhci_do_reset(host, SDHCI_RESET_DATA); - ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); ctrl &= ~SDHCI_CTRL_TUNED_CLK; ctrl &= ~SDHCI_CTRL_EXEC_TUNING; sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + sdhci_do_reset(host, SDHCI_RESET_CMD); + sdhci_do_reset(host, SDHCI_RESET_DATA); + err = -EIO; + + if (cmd.opcode != MMC_SEND_TUNING_BLOCK_HS200) + goto out; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + + spin_unlock_irqrestore(&host->lock, flags); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = MMC_STOP_TRANSMISSION; + cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + cmd.busy_timeout = 50; + mmc_wait_for_cmd(mmc, &cmd, 0); + + spin_lock_irqsave(&host->lock, flags); + goto out; } @@ -2703,7 +2719,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) if (intmask & SDHCI_INT_RETUNE) mmc_retune_needed(host->mmc); - if (intmask & SDHCI_INT_CARD_INT) { + if ((intmask & SDHCI_INT_CARD_INT) && + (host->ier & SDHCI_INT_CARD_INT)) { sdhci_enable_sdio_irq_nolock(host, false); host->thread_isr |= SDHCI_INT_CARD_INT; result = IRQ_WAKE_THREAD; diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index bfa587d65aa2addce834b6f334b23fab95999e13..50ee1bad3690f5317153da7910d49de2698398a6 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -541,7 +541,7 @@ config MTD_NAND_FSMC Flexible Static Memory Controller (FSMC) config MTD_NAND_XWAY - tristate "Support for NAND on Lantiq XWAY SoC" + bool "Support for NAND on Lantiq XWAY SoC" depends on LANTIQ && SOC_TYPE_XWAY help Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c index 852388171f2033320e7cba102c3be24d312c0f03..bc6e49af063a254bd68b7e9ff779b9b554447516 100644 --- a/drivers/mtd/nand/lpc32xx_mlc.c +++ b/drivers/mtd/nand/lpc32xx_mlc.c @@ -776,7 +776,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) init_completion(&host->comp_controller); host->irq = platform_get_irq(pdev, 0); - if ((host->irq < 0) || (host->irq >= NR_IRQS)) { + if (host->irq < 0) { dev_err(&pdev->dev, "failed to get platform irq\n"); res = -EINVAL; goto err_exit3; diff --git a/drivers/mtd/nand/xway_nand.c b/drivers/mtd/nand/xway_nand.c index 1f2948c0c458d60ec60e898bb1b4893ddba33ecf..895101a5e686457c0dc63237efcb33a79c43a559 100644 --- a/drivers/mtd/nand/xway_nand.c +++ b/drivers/mtd/nand/xway_nand.c @@ -232,7 +232,6 @@ static const struct of_device_id xway_nand_match[] = { { .compatible = "lantiq,nand-xway" }, {}, }; -MODULE_DEVICE_TABLE(of, xway_nand_match); static struct platform_driver xway_nand_driver = { .probe = xway_nand_probe, @@ -243,6 +242,4 @@ static struct platform_driver xway_nand_driver = { }, }; -module_platform_driver(xway_nand_driver); - -MODULE_LICENSE("GPL"); +builtin_platform_driver(xway_nand_driver); diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index d403ba7b8f432ea45690767e334b6d6d8f8016bb..d489fbd07c12b6e73bf0f73e5f3de2696d4da3c4 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -1077,12 +1077,14 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np) /* Get flash device data */ for_each_available_child_of_node(dev->of_node, np) { - if (of_property_read_u32(np, "reg", &cs)) { + ret = of_property_read_u32(np, "reg", &cs); + if (ret) { dev_err(dev, "Couldn't determine chip select.\n"); goto err; } - if (cs > CQSPI_MAX_CHIPSELECT) { + if (cs >= CQSPI_MAX_CHIPSELECT) { + ret = -EINVAL; dev_err(dev, "Chip select %d out of range.\n", cs); goto err; } diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c index 7be393c96b1a0481c1f181443a7b521b647378f5..cf7c18947189a1f5c2834ad5b0d14ac615686294 100644 --- a/drivers/net/can/c_can/c_can_pci.c +++ b/drivers/net/can/c_can/c_can_pci.c @@ -161,6 +161,7 @@ static int c_can_pci_probe(struct pci_dev *pdev, dev->irq = pdev->irq; priv->base = addr; + priv->device = &pdev->dev; if (!c_can_pci_data->freq) { dev_err(&pdev->dev, "no clock frequency defined\n"); diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 680d1ff07a55ddd60ceb09eb42bb98faaa42ad9f..6749b1829469411315dedac1634b7be974cf21d8 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -948,7 +948,12 @@ static int ti_hecc_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->napi, ti_hecc_rx_poll, HECC_DEF_NAPI_WEIGHT); - clk_enable(priv->clk); + err = clk_prepare_enable(priv->clk); + if (err) { + dev_err(&pdev->dev, "clk_prepare_enable() failed\n"); + goto probe_exit_clk; + } + err = register_candev(ndev); if (err) { dev_err(&pdev->dev, "register_candev() failed\n"); @@ -981,7 +986,7 @@ static int ti_hecc_remove(struct platform_device *pdev) struct ti_hecc_priv *priv = netdev_priv(ndev); unregister_candev(ndev); - clk_disable(priv->clk); + clk_disable_unprepare(priv->clk); clk_put(priv->clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); iounmap(priv->base); @@ -1006,7 +1011,7 @@ static int ti_hecc_suspend(struct platform_device *pdev, pm_message_t state) hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_PDR); priv->can.state = CAN_STATE_SLEEPING; - clk_disable(priv->clk); + clk_disable_unprepare(priv->clk); return 0; } @@ -1015,8 +1020,11 @@ static int ti_hecc_resume(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct ti_hecc_priv *priv = netdev_priv(dev); + int err; - clk_enable(priv->clk); + err = clk_prepare_enable(priv->clk); + if (err) + return err; hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_PDR); priv->can.state = CAN_STATE_ERROR_ACTIVE; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 9ec33b51a0edad879701bef79d6c8f250d778b91..2ce7ae97ac9148d39137eff66954dfc3b4cf239b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -393,7 +393,7 @@ static int bcm_sf2_sw_mdio_read(struct mii_bus *bus, int addr, int regnum) if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) return bcm_sf2_sw_indir_rw(priv, 1, addr, regnum, 0); else - return mdiobus_read(priv->master_mii_bus, addr, regnum); + return mdiobus_read_nested(priv->master_mii_bus, addr, regnum); } static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, @@ -407,7 +407,7 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); else - mdiobus_write(priv->master_mii_bus, addr, regnum, val); + mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val); return 0; } @@ -982,6 +982,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; struct device_node *dn = pdev->dev.of_node; struct b53_platform_data *pdata; + struct dsa_switch_ops *ops; struct bcm_sf2_priv *priv; struct b53_device *dev; struct dsa_switch *ds; @@ -995,6 +996,10 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + ops = devm_kzalloc(&pdev->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv); if (!dev) return -ENOMEM; @@ -1014,6 +1019,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) ds = dev->ds; /* Override the parts that are non-standard wrt. normal b53 devices */ + memcpy(ops, ds->ops, sizeof(*ops)); + ds->ops = ops; ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol; ds->ops->setup = bcm_sf2_sw_setup; ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 25d1eb4933d0b8b28dc53d07344eb6a47a263893..be7ec5a76a54102eecabe99c45ffcf00a49a7713 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -710,11 +710,8 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs; unsigned int pkts_compl = 0, bytes_compl = 0; struct bcm_sysport_cb *cb; - struct netdev_queue *txq; u32 hw_ind; - txq = netdev_get_tx_queue(ndev, ring->index); - /* Compute how many descriptors have been processed since last call */ hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK; @@ -745,9 +742,6 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, ring->c_index = c_index; - if (netif_tx_queue_stopped(txq) && pkts_compl) - netif_tx_wake_queue(txq); - netif_dbg(priv, tx_done, ndev, "ring=%d c_index=%d pkts_compl=%d, bytes_compl=%d\n", ring->index, ring->c_index, pkts_compl, bytes_compl); @@ -759,16 +753,33 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, static unsigned int bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, struct bcm_sysport_tx_ring *ring) { + struct netdev_queue *txq; unsigned int released; unsigned long flags; + txq = netdev_get_tx_queue(priv->netdev, ring->index); + spin_lock_irqsave(&ring->lock, flags); released = __bcm_sysport_tx_reclaim(priv, ring); + if (released) + netif_tx_wake_queue(txq); + spin_unlock_irqrestore(&ring->lock, flags); return released; } +/* Locked version of the per-ring TX reclaim, but does not wake the queue */ +static void bcm_sysport_tx_clean(struct bcm_sysport_priv *priv, + struct bcm_sysport_tx_ring *ring) +{ + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + __bcm_sysport_tx_reclaim(priv, ring); + spin_unlock_irqrestore(&ring->lock, flags); +} + static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget) { struct bcm_sysport_tx_ring *ring = @@ -1253,7 +1264,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv, napi_disable(&ring->napi); netif_napi_del(&ring->napi); - bcm_sysport_tx_reclaim(priv, ring); + bcm_sysport_tx_clean(priv, ring); kfree(ring->cbs); ring->cbs = NULL; diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 1026c452e39de4ccd1e82f18ed6284a1d51a4ea0..930c8165f2a812d1d7088409e19e186c4fd13ab8 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -770,6 +770,17 @@ struct mvpp2_rx_desc { u32 reserved8; }; +struct mvpp2_txq_pcpu_buf { + /* Transmitted SKB */ + struct sk_buff *skb; + + /* Physical address of transmitted buffer */ + dma_addr_t phys; + + /* Size transmitted */ + size_t size; +}; + /* Per-CPU Tx queue control */ struct mvpp2_txq_pcpu { int cpu; @@ -785,11 +796,8 @@ struct mvpp2_txq_pcpu { /* Number of Tx DMA descriptors reserved for each CPU */ int reserved_num; - /* Array of transmitted skb */ - struct sk_buff **tx_skb; - - /* Array of transmitted buffers' physical addresses */ - dma_addr_t *tx_buffs; + /* Infos about transmitted buffers */ + struct mvpp2_txq_pcpu_buf *buffs; /* Index of last TX DMA descriptor that was inserted */ int txq_put_index; @@ -979,10 +987,11 @@ static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu, struct sk_buff *skb, struct mvpp2_tx_desc *tx_desc) { - txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb; - if (skb) - txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] = - tx_desc->buf_phys_addr; + struct mvpp2_txq_pcpu_buf *tx_buf = + txq_pcpu->buffs + txq_pcpu->txq_put_index; + tx_buf->skb = skb; + tx_buf->size = tx_desc->data_size; + tx_buf->phys = tx_desc->buf_phys_addr; txq_pcpu->txq_put_index++; if (txq_pcpu->txq_put_index == txq_pcpu->size) txq_pcpu->txq_put_index = 0; @@ -4401,17 +4410,16 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, int i; for (i = 0; i < num; i++) { - dma_addr_t buf_phys_addr = - txq_pcpu->tx_buffs[txq_pcpu->txq_get_index]; - struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index]; + struct mvpp2_txq_pcpu_buf *tx_buf = + txq_pcpu->buffs + txq_pcpu->txq_get_index; mvpp2_txq_inc_get(txq_pcpu); - dma_unmap_single(port->dev->dev.parent, buf_phys_addr, - skb_headlen(skb), DMA_TO_DEVICE); - if (!skb) + dma_unmap_single(port->dev->dev.parent, tx_buf->phys, + tx_buf->size, DMA_TO_DEVICE); + if (!tx_buf->skb) continue; - dev_kfree_skb_any(skb); + dev_kfree_skb_any(tx_buf->skb); } } @@ -4651,15 +4659,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); txq_pcpu->size = txq->size; - txq_pcpu->tx_skb = kmalloc(txq_pcpu->size * - sizeof(*txq_pcpu->tx_skb), - GFP_KERNEL); - if (!txq_pcpu->tx_skb) - goto error; - - txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size * - sizeof(dma_addr_t), GFP_KERNEL); - if (!txq_pcpu->tx_buffs) + txq_pcpu->buffs = kmalloc(txq_pcpu->size * + sizeof(struct mvpp2_txq_pcpu_buf), + GFP_KERNEL); + if (!txq_pcpu->buffs) goto error; txq_pcpu->count = 0; @@ -4673,8 +4676,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, error: for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->tx_skb); - kfree(txq_pcpu->tx_buffs); + kfree(txq_pcpu->buffs); } dma_free_coherent(port->dev->dev.parent, @@ -4693,8 +4695,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->tx_skb); - kfree(txq_pcpu->tx_buffs); + kfree(txq_pcpu->buffs); } if (txq->descs) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fb8bb027b69c3332ef23d4b59f9288103a5b12ba..d223e7cb68ba316630f53e0279c0a52953ca0701 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1740,8 +1740,11 @@ int mlx4_en_start_port(struct net_device *dev) /* Process all completions if exist to prevent * the queues freezing if they are full */ - for (i = 0; i < priv->rx_ring_num; i++) + for (i = 0; i < priv->rx_ring_num; i++) { + local_bh_disable(); napi_schedule(&priv->rx_cq[i]->napi); + local_bh_enable(); + } netif_tx_start_all_queues(dev); netif_device_attach(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d17c242279003bcc5a867b464c6f531c2fd6ff7e..90e81ae9f3bc292f259508c03cc39e0751ac2308 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, } if (fs->flow_type & FLOW_MAC_EXT && !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), fs->m_ext.h_dest); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 246d98ebb58890c33005ba7d7abab5743e1b45d9..5dc3e2453ff598f99ef30f958c6901edf79ea8c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3773,14 +3773,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); - } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); @@ -3790,6 +3783,18 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) rep.priv_data = priv; mlx5_eswitch_register_vport_rep(esw, 0, &rep); } + + if (netdev->reg_state != NETREG_REGISTERED) + return; + + /* Device already registered: sync netdev system state */ + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); + } + + queue_work(priv->wq, &priv->set_rx_mode_work); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) @@ -3937,10 +3942,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); rtnl_lock(); if (netif_running(netdev)) @@ -3948,6 +3949,10 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) netif_device_detach(netdev); rtnl_unlock(); + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 33495d88aeb21469861fd2a9e22463cabe75e9b2..e7b2158bb48a0514306c7aeffb2625a0673475be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -193,6 +193,9 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } + if (unlikely(page_is_pfmemalloc(dma_info->page))) + return false; + cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index 1fffe48a93cc35d55d03eba9ba352590924548d2..cbfac06b7ffd1d5140226ccb87331db57d4880d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); return true; case MLX5E_AM_GOING_RIGHT: return (am->steps_left > 1) && (am->steps_right == 1); @@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: am->tune_state = MLX5E_AM_GOING_LEFT; @@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) @@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, u32 delta_us = ktime_us_delta(end->time, start->time); unsigned int npkts = end->pkt_ctr - start->pkt_ctr; - if (!delta_us) { - WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + if (!delta_us) return; - } curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index be1f7333ab7f6845acc01ae46fd9ba150f6ad6aa..c7011ef4e351ffb0a318abc0c1f6a9f20d38fdca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1703,7 +1703,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) + if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; mutex_lock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ada24e103b02ac927362b0aba5050c251a0e644b..0c9ef8729ca7b2dfefe2281d777bee8d048b93e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -468,6 +468,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, to_fw_pkey_sz(dev, 128)); + /* Check log_max_qp from HCA caps to set in current profile */ + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + profile[prof_sel].log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, prof->log_max_qp); @@ -540,7 +547,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int numa_node = priv->numa_node; int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { @@ -548,7 +554,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) return -ENOMEM; } - cpumask_set_cpu(cpumask_local_spread(i, numa_node), + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); @@ -1152,6 +1158,9 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; + if (cleanup) + mlx5_drain_health_wq(dev); + mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1312,7 +1321,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain health wq */ + /* In case of kernel call save the pci state and drain the health wq */ if (state) { pci_save_state(pdev); mlx5_drain_health_wq(dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index d942a3e6fa4151cbc10f4207b25c95ea8aa734c6..846fd4df7dabbce5d800c871cbf29bfc62c0542b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -211,21 +211,21 @@ MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); /* pci_eqe_cmd_token * Command completion event - token */ -MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); +MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16); /* pci_eqe_cmd_status * Command completion event - status */ -MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); +MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8); /* pci_eqe_cmd_out_param_h * Command completion event - output parameter - higher part */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); +MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32); /* pci_eqe_cmd_out_param_l * Command completion event - output parameter - lower part */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); +MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index dda5761e91bcbe27738cafd79d73e5a25f1f8b66..f902c4d3de9981a0e783249fa30aefafb19c16fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -684,6 +684,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } + dev_consume_skb_any(skb_orig); } if (eth_skb_pad(skb)) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 92bda8703f8752756c472aac916a19c8e5f38a65..d548f0a55174541383c6c1314d7e1a05dc337d74 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -314,6 +314,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } + dev_consume_skb_any(skb_orig); } mlxsw_sx_txhdr_construct(skb, &tx_info); /* TX header is consumed by HW on the way so we shouldn't count its diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d6a217874a8bc2f79fe4c255bc43feea611870a5..862f18ed602283d2e4a021c6e3154c47e89ae24e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1508,6 +1508,19 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + entry / NUM_TX_DESC * DPTR_ALIGN; len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data; + /* Zero length DMA descriptors are problematic as they seem to + * terminate DMA transfers. Avoid them by simply using a length of + * DPTR_ALIGN (4) when skb data is aligned to DPTR_ALIGN. + * + * As skb is guaranteed to have at least ETH_ZLEN (60) bytes of + * data by the call to skb_put_padto() above this is safe with + * respect to both the length of the first DMA descriptor (len) + * overflowing the available data and the length of the second DMA + * descriptor (skb->len - len) being negative. + */ + if (len == 0) + len = DPTR_ALIGN; + memcpy(buffer, skb->data, len); dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE); if (dma_mapping_error(ndev->dev.parent, dma_addr)) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index caf069a465f234121315cc39f5f38007791a57ec..b2893fbe25e528b887cff28610a6b1740b430529 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3349,12 +3349,6 @@ int stmmac_dvr_probe(struct device *device, spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - ret = register_netdev(ndev); - if (ret) { - pr_err("%s: ERROR %i registering the device\n", __func__, ret); - goto error_netdev_register; - } - /* If a specific clk_csr value is passed from the platform * this means that the CSR Clock Range selection cannot be * changed at run-time and it is fixed. Viceversa the driver'll try to @@ -3376,15 +3370,24 @@ int stmmac_dvr_probe(struct device *device, if (ret < 0) { pr_debug("%s: MDIO bus (id: %d) registration failed", __func__, priv->plat->bus_id); - goto error_mdio_register; + goto error_napi_register; } } - return 0; + ret = register_netdev(ndev); + if (ret) { + pr_err("%s: ERROR %i registering the device\n", __func__, ret); + goto error_netdev_register; + } + + return ret; -error_mdio_register: - unregister_netdev(ndev); error_netdev_register: + if (priv->hw->pcs != STMMAC_PCS_RGMII && + priv->hw->pcs != STMMAC_PCS_TBI && + priv->hw->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); +error_napi_register: netif_napi_del(&priv->napi); error_hw_init: clk_disable_unprepare(priv->pclk); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c9140c3aeb67d5fc71b3911a8b2ddb51a2ef806a..ff038e507fd6e9ca33ff308cf4777a8797829431 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -659,6 +659,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * policy filters on the host). Deliver these via the VF * interface in the guest. */ + rcu_read_lock(); vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); if (vf_netdev && (vf_netdev->flags & IFF_UP)) net = vf_netdev; @@ -667,6 +668,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); if (unlikely(!skb)) { ++net->stats.rx_dropped; + rcu_read_unlock(); return NVSP_STAT_FAIL; } @@ -696,6 +698,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * TODO - use NAPI? */ netif_rx(skb); + rcu_read_unlock(); return 0; } diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 1056ed142411d3c75082fced68c59f1b269d0532..f186e0460cde311092f2ae8298e83060317bef5f 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -112,13 +112,26 @@ static int atusb_read_reg(struct atusb *atusb, uint8_t reg) { struct usb_device *usb_dev = atusb->usb_dev; int ret; + uint8_t *buffer; uint8_t value; + buffer = kmalloc(1, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + dev_dbg(&usb_dev->dev, "atusb: reg = 0x%x\n", reg); ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_REG_READ, ATUSB_REQ_FROM_DEV, - 0, reg, &value, 1, 1000); - return ret >= 0 ? value : ret; + 0, reg, buffer, 1, 1000); + + if (ret >= 0) { + value = buffer[0]; + kfree(buffer); + return value; + } else { + kfree(buffer); + return ret; + } } static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask, @@ -587,9 +600,13 @@ static struct ieee802154_ops atusb_ops = { static int atusb_get_and_show_revision(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - unsigned char buffer[3]; + unsigned char *buffer; int ret; + buffer = kmalloc(3, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + /* Get a couple of the ATMega Firmware values */ ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0, @@ -605,15 +622,20 @@ static int atusb_get_and_show_revision(struct atusb *atusb) dev_info(&usb_dev->dev, "Please update to version 0.2 or newer"); } + kfree(buffer); return ret; } static int atusb_get_and_show_build(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - char build[ATUSB_BUILD_SIZE + 1]; + char *build; int ret; + build = kmalloc(ATUSB_BUILD_SIZE + 1, GFP_KERNEL); + if (!build) + return -ENOMEM; + ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000); @@ -622,6 +644,7 @@ static int atusb_get_and_show_build(struct atusb *atusb) dev_info(&usb_dev->dev, "Firmware: build %s\n", build); } + kfree(build); return ret; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 7869b0651576fa6432b9608adb906fc7e1fccd26..6f38daf2d978bd1f75051035211d9a0a5c8a53fd 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -827,7 +827,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, return -EINVAL; ret = virtio_net_hdr_from_skb(skb, &vnet_hdr, - macvtap_is_little_endian(q)); + macvtap_is_little_endian(q), true); if (ret) BUG(); diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index e741bf614c4eeca4f4c041bff53880a5ca8c7439..b0492ef2cdaa0d360928e3e65a14808e336c40ec 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -21,6 +21,23 @@ MODULE_DESCRIPTION("Broadcom 63xx internal PHY driver"); MODULE_AUTHOR("Maxime Bizon "); MODULE_LICENSE("GPL"); +static int bcm63xx_config_intr(struct phy_device *phydev) +{ + int reg, err; + + reg = phy_read(phydev, MII_BCM63XX_IR); + if (reg < 0) + return reg; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + reg &= ~MII_BCM63XX_IR_GMASK; + else + reg |= MII_BCM63XX_IR_GMASK; + + err = phy_write(phydev, MII_BCM63XX_IR, reg); + return err; +} + static int bcm63xx_config_init(struct phy_device *phydev) { int reg, err; @@ -55,7 +72,7 @@ static struct phy_driver bcm63xx_driver[] = { .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, - .config_intr = bcm_phy_config_intr, + .config_intr = bcm63xx_config_intr, }, { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, @@ -67,7 +84,7 @@ static struct phy_driver bcm63xx_driver[] = { .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, - .config_intr = bcm_phy_config_intr, + .config_intr = bcm63xx_config_intr, } }; module_phy_driver(bcm63xx_driver); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 929dafb86458d91d3e62b1d8222f05e10719a909..e5d904110a060c4bb97d59d7c4eaf57f6e20db97 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1374,7 +1374,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, return -EINVAL; ret = virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun)); + tun_is_little_endian(tun), true); if (ret) { struct skb_shared_info *sinfo = skb_shinfo(skb); pr_err("unexpected GSO type: " diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index dd623f6744875969e7eb657612ab1a85b85689eb..b82be816256c418c3ebc6a6560c0341a291b2d7e 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -531,6 +531,7 @@ static const struct driver_info wwan_info = { #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef #define NVIDIA_VENDOR_ID 0x0955 +#define HP_VENDOR_ID 0x03f0 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -677,6 +678,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* HP lt2523 (Novatel E371) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* AnyDATA ADU960S - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6fe1cdb0174f6cf91f8445dbe86a3e977d327255..24d5272cdce51091a26a116205d8ca2ae5be03d9 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -654,6 +654,13 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* HP lt2523 (Novatel E371) */ + USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, { /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index efb84f0924922af5caea946423e69c59f88c754d..90b426c5ffcec47e16431088cdb46acecc859f3b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "6" +#define NET_VERSION "7" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; - if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) + if (!(tp->netdev->features & NETIF_F_RXCSUM)) goto return_result; opts2 = le32_to_cpu(rx_desc->opts2); @@ -3572,43 +3572,93 @@ static bool delay_autosuspend(struct r8152 *tp) */ if (!sw_linking && tp->rtl_ops.in_nway(tp)) return true; + else if (!skb_queue_empty(&tp->tx_queue)) + return true; else return false; } -static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) +static int rtl8152_rumtime_suspend(struct r8152 *tp) { - struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev = tp->netdev; int ret = 0; - mutex_lock(&tp->control); + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { + u32 rcr = 0; - if (PMSG_IS_AUTO(message)) { - if (netif_running(netdev) && delay_autosuspend(tp)) { + if (delay_autosuspend(tp)) { ret = -EBUSY; goto out1; } - set_bit(SELECTIVE_SUSPEND, &tp->flags); - } else { - netif_device_detach(netdev); + if (netif_carrier_ok(netdev)) { + u32 ocp_data; + + rcr = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); + ocp_data = rcr & ~RCR_ACPT_ALL; + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + rxdy_gated_en(tp, true); + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, + PLA_OOB_CTRL); + if (!(ocp_data & RXFIFO_EMPTY)) { + rxdy_gated_en(tp, false); + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + ret = -EBUSY; + goto out1; + } + } + + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + + tp->rtl_ops.autosuspend_en(tp, true); + + if (netif_carrier_ok(netdev)) { + napi_disable(&tp->napi); + rtl_stop_rx(tp); + rxdy_gated_en(tp, false); + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + napi_enable(&tp->napi); + } } + set_bit(SELECTIVE_SUSPEND, &tp->flags); + +out1: + return ret; +} + +static int rtl8152_system_suspend(struct r8152 *tp) +{ + struct net_device *netdev = tp->netdev; + int ret = 0; + + netif_device_detach(netdev); + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); napi_disable(&tp->napi); - if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { - rtl_stop_rx(tp); - tp->rtl_ops.autosuspend_en(tp, true); - } else { - cancel_delayed_work_sync(&tp->schedule); - tp->rtl_ops.down(tp); - } + cancel_delayed_work_sync(&tp->schedule); + tp->rtl_ops.down(tp); napi_enable(&tp->napi); } -out1: + + return ret; +} + +static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct r8152 *tp = usb_get_intfdata(intf); + int ret; + + mutex_lock(&tp->control); + + if (PMSG_IS_AUTO(message)) + ret = rtl8152_rumtime_suspend(tp); + else + ret = rtl8152_system_suspend(tp); + mutex_unlock(&tp->control); return ret; @@ -4310,6 +4360,11 @@ static int rtl8152_probe(struct usb_interface *intf, NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM | NETIF_F_TSO6; + if (tp->version == RTL_VER_01) { + netdev->features &= ~NETIF_F_RXCSUM; + netdev->hw_features &= ~NETIF_F_RXCSUM; + } + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cbf1c613c67aefc48692079a5f11f64c34728fa5..51fc0c33a62f53074667bd416f79a6be4eb15d4e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -840,7 +840,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) hdr = skb_vnet_hdr(skb); if (virtio_net_hdr_from_skb(skb, &hdr->hdr, - virtio_is_little_endian(vi->vdev))) + virtio_is_little_endian(vi->vdev), false)) BUG(); if (vi->mergeable_rx_bufs) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 820de6a9ddde1dcfa8ee389bc20e56ae61afa1b9..95cf1d84478158f000c39ef177a8a44411e304f9 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -263,7 +263,9 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, + .saddr = ip4h->saddr, }; struct net *net = dev_net(vrf_dev); struct rtable *rt; @@ -371,6 +373,8 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct in6_addr *nexthop; int ret; + nf_reset(skb); + skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -552,6 +556,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s u32 nexthop; int ret = -EINVAL; + nf_reset(skb); + /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; @@ -850,8 +856,6 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, { struct net *net = dev_net(dev); - nf_reset(skb); - if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) skb = NULL; /* kfree_skb(skb) handled by nf code */ @@ -966,6 +970,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, */ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); if (!ipv6_ndisc_frame(skb) && !need_strict) { + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1007,6 +1012,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } + vrf_rx_stats(vrf_dev, skb->len); + skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); @@ -1232,6 +1239,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); + if (vrf->tb_id == RT_TABLE_UNSPEC) + return -EINVAL; dev->priv_flags |= IFF_L3MDEV_MASTER; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ba01ca02c9c5ed7f8024cbe7370127a3cb33a4c..0fafaa9d903b1f15949a6adbbf082229c6535539 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2887,7 +2887,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, memcpy(&vxlan->cfg, conf, sizeof(*conf)); if (!vxlan->cfg.dst_port) { if (conf->flags & VXLAN_F_GPE) - vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */ + vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ else vxlan->cfg.dst_port = default_port; } diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 21ae8d663e67c614bb2c001005894e6ced16ab51..0c4532227f253006a404a99d5b5a490e9b1c1325 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1534,7 +1534,7 @@ static void ath10k_core_restart(struct work_struct *work) switch (ar->state) { case ATH10K_STATE_ON: ar->state = ATH10K_STATE_RESTARTING; - ath10k_hif_stop(ar); + ath10k_halt(ar); ath10k_scan_finish(ar); ieee80211_restart_hw(ar->hw); break; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 521f1c55c19ee150e99403b8238cb54f065fc673..be5b527472f95d4db20f6bdac36af5b19228d271 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -557,10 +557,8 @@ enum ath10k_fw_features { */ ATH10K_FW_FEATURE_BTCOEX_PARAM = 14, - /* Older firmware with HTT delivers incorrect tx status for null func - * frames to driver, but this fixed in 10.2 and 10.4 firmware versions. - * Also this workaround results in reporting of incorrect null func - * status for 10.4. This flag is used to skip the workaround. + /* Unused flag and proven to be not working, enable this if you want + * to experiment sending NULL func data frames in HTT TX */ ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR = 15, diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 76297d69f1ed4303ec6595f3ad59ef29ee3137b6..f2e85eb22afe6b767eb6a2923b92983ad9489c8b 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3255,8 +3255,6 @@ ath10k_mac_tx_h_get_txmode(struct ath10k *ar, if (ar->htt.target_version_major < 3 && (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, - ar->running_fw->fw_file.fw_features) && - !test_bit(ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR, ar->running_fw->fw_file.fw_features)) return ATH10K_HW_TXRX_MGMT; @@ -4449,7 +4447,6 @@ static int ath10k_start(struct ieee80211_hw *hw) ar->state = ATH10K_STATE_ON; break; case ATH10K_STATE_RESTARTING: - ath10k_halt(ar); ar->state = ATH10K_STATE_RESTARTED; break; case ATH10K_STATE_ON: diff --git a/drivers/net/wireless/ath/ath10k/spectral.c b/drivers/net/wireless/ath/ath10k/spectral.c index 7d9b0da1b010a69bf267fb449852a4e4ab282c9d..2ffc1fe4923b9696441b4411428ba5548a231783 100644 --- a/drivers/net/wireless/ath/ath10k/spectral.c +++ b/drivers/net/wireless/ath/ath10k/spectral.c @@ -338,7 +338,7 @@ static ssize_t write_file_spec_scan_ctl(struct file *file, } else { res = -EINVAL; } - } else if (strncmp("background", buf, 9) == 0) { + } else if (strncmp("background", buf, 10) == 0) { res = ath10k_spectral_scan_config(ar, SPECTRAL_BACKGROUND); } else if (strncmp("manual", buf, 6) == 0) { res = ath10k_spectral_scan_config(ar, SPECTRAL_MANUAL); diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 14b13f07cd1fa87658f22f8352ff6bc32b3935ac..a35f78be8dec68d65f3fe7ff77a2f5edc244bd73 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2792,7 +2792,7 @@ u32 ath9k_hw_gpio_get(struct ath_hw *ah, u32 gpio) WARN_ON(1); } - return val; + return !!val; } EXPORT_SYMBOL(ath9k_hw_gpio_get); diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 0dd454acf22ad39010acd0f901c0787263a9ec2d..aff473dfa10df6a6f8eb2140d89f0f4aff9d4e2c 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -26,7 +26,6 @@ static const struct pci_device_id ath_pci_id_table[] = { { PCI_VDEVICE(ATHEROS, 0x0023) }, /* PCI */ { PCI_VDEVICE(ATHEROS, 0x0024) }, /* PCI-E */ { PCI_VDEVICE(ATHEROS, 0x0027) }, /* PCI */ - { PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI */ #ifdef CONFIG_ATH9K_PCOEM /* Mini PCI AR9220 MB92 cards: Compex WLM200NX, Wistron DNMA-92 */ @@ -37,7 +36,7 @@ static const struct pci_device_id ath_pci_id_table[] = { .driver_data = ATH9K_PCI_LED_ACT_HI }, #endif - { PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */ + { PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI */ #ifdef CONFIG_ATH9K_PCOEM { PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS, @@ -85,7 +84,11 @@ static const struct pci_device_id ath_pci_id_table[] = { 0x10CF, /* Fujitsu */ 0x1536), .driver_data = ATH9K_PCI_D3_L1_WAR }, +#endif + { PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */ + +#ifdef CONFIG_ATH9K_PCOEM /* AR9285 card for Asus */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS, 0x002B, diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 52bfbb9886117f21a95112f4283e19b679f4289f..e47286bf378eb349b30ae39cc823ec7891cda4e7 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2787,7 +2787,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) fifo_list = &txq->txq_fifo[txq->txq_tailidx]; if (list_empty(fifo_list)) { ath_txq_unlock(sc, txq); - return; + break; } bf = list_first_entry(fifo_list, struct ath_buf, list); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index d02ca1491d16cede66389540f8cb92dda5749ff3..8d3e53fac1dabc01ed875b6f8c2863bb908f770c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -91,7 +91,7 @@ #define IWL8000_FW_PRE "iwlwifi-8000C-" #define IWL8000_MODULE_FIRMWARE(api) \ - IWL8000_FW_PRE "-" __stringify(api) ".ucode" + IWL8000_FW_PRE __stringify(api) ".ucode" #define IWL8265_FW_PRE "iwlwifi-8265-" #define IWL8265_MODULE_FIRMWARE(api) \ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index fc771885e3831b17f589a688f359ac7e8c40cbfe..52de3c6d760ccf2ca94101d8ab6d60d6584ba591 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1144,9 +1144,10 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, .frame_limit = IWL_FRAME_LIMIT, }; - /* Make sure reserved queue is still marked as such (or allocated) */ - mvm->queue_info[mvm_sta->reserved_queue].status = - IWL_MVM_QUEUE_RESERVED; + /* Make sure reserved queue is still marked as such (if allocated) */ + if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) + mvm->queue_info[mvm_sta->reserved_queue].status = + IWL_MVM_QUEUE_RESERVED; for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; diff --git a/drivers/net/wireless/intersil/orinoco/mic.c b/drivers/net/wireless/intersil/orinoco/mic.c index bc7397d709d3ac5ff85b9a1057e43a93500587fc..08bc7822f8209d0a9136357edb18683092c86c18 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.c +++ b/drivers/net/wireless/intersil/orinoco/mic.c @@ -16,7 +16,7 @@ /********************************************************************/ int orinoco_mic_init(struct orinoco_private *priv) { - priv->tx_tfm_mic = crypto_alloc_ahash("michael_mic", 0, + priv->tx_tfm_mic = crypto_alloc_shash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " @@ -25,7 +25,7 @@ int orinoco_mic_init(struct orinoco_private *priv) return -ENOMEM; } - priv->rx_tfm_mic = crypto_alloc_ahash("michael_mic", 0, + priv->rx_tfm_mic = crypto_alloc_shash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " @@ -40,17 +40,16 @@ int orinoco_mic_init(struct orinoco_private *priv) void orinoco_mic_free(struct orinoco_private *priv) { if (priv->tx_tfm_mic) - crypto_free_ahash(priv->tx_tfm_mic); + crypto_free_shash(priv->tx_tfm_mic); if (priv->rx_tfm_mic) - crypto_free_ahash(priv->rx_tfm_mic); + crypto_free_shash(priv->rx_tfm_mic); } -int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *da, u8 *sa, u8 priority, u8 *data, size_t data_len, u8 *mic) { - AHASH_REQUEST_ON_STACK(req, tfm_michael); - struct scatterlist sg[2]; + SHASH_DESC_ON_STACK(desc, tfm_michael); u8 hdr[ETH_HLEN + 2]; /* size of header + padding */ int err; @@ -67,18 +66,27 @@ int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, hdr[ETH_ALEN * 2 + 2] = 0; hdr[ETH_ALEN * 2 + 3] = 0; - /* Use scatter gather to MIC header and data in one go */ - sg_init_table(sg, 2); - sg_set_buf(&sg[0], hdr, sizeof(hdr)); - sg_set_buf(&sg[1], data, data_len); + desc->tfm = tfm_michael; + desc->flags = 0; - if (crypto_ahash_setkey(tfm_michael, key, MIC_KEYLEN)) - return -1; + err = crypto_shash_setkey(tfm_michael, key, MIC_KEYLEN); + if (err) + return err; + + err = crypto_shash_init(desc); + if (err) + return err; + + err = crypto_shash_update(desc, hdr, sizeof(hdr)); + if (err) + return err; + + err = crypto_shash_update(desc, data, data_len); + if (err) + return err; + + err = crypto_shash_final(desc, mic); + shash_desc_zero(desc); - ahash_request_set_tfm(req, tfm_michael); - ahash_request_set_callback(req, 0, NULL, NULL); - ahash_request_set_crypt(req, sg, mic, data_len + sizeof(hdr)); - err = crypto_ahash_digest(req); - ahash_request_zero(req); return err; } diff --git a/drivers/net/wireless/intersil/orinoco/mic.h b/drivers/net/wireless/intersil/orinoco/mic.h index ce731d05cc98cd2d0b6415f66868d3ad517b4753..e8724e8892194bb3e732078b3561ab031071a061 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.h +++ b/drivers/net/wireless/intersil/orinoco/mic.h @@ -6,6 +6,7 @@ #define _ORINOCO_MIC_H_ #include +#include #define MICHAEL_MIC_LEN 8 @@ -15,7 +16,7 @@ struct crypto_ahash; int orinoco_mic_init(struct orinoco_private *priv); void orinoco_mic_free(struct orinoco_private *priv); -int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *da, u8 *sa, u8 priority, u8 *data, size_t data_len, u8 *mic); diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h index 2f0c84b1c440cd1160bdebc3cabcb13be110a73e..5fa1c3e3713f835387353ba781cbb24ce95df6dc 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco.h +++ b/drivers/net/wireless/intersil/orinoco/orinoco.h @@ -152,8 +152,8 @@ struct orinoco_private { u8 *wpa_ie; int wpa_ie_len; - struct crypto_ahash *rx_tfm_mic; - struct crypto_ahash *tx_tfm_mic; + struct crypto_shash *rx_tfm_mic; + struct crypto_shash *tx_tfm_mic; unsigned int wpa_enabled:1; unsigned int tkip_cm_active:1; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index a5e6ec2152bff8808b38bd8f96dfe0ff37a19918..82d949ede2940f023e111f07ca0b8392cff0588b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4372,6 +4372,13 @@ void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv, void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv, u8 macid, bool connect) { +#ifdef RTL8XXXU_GEN2_REPORT_CONNECT + /* + * Barry Day reports this causes issues with 8192eu and 8723bu + * devices reconnecting. The reason for this is unclear, but + * until it is better understood, leave the code in place but + * disabled, so it is not lost. + */ struct h2c_cmd h2c; memset(&h2c, 0, sizeof(struct h2c_cmd)); @@ -4383,6 +4390,7 @@ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv, h2c.media_status_rpt.parm &= ~BIT(0); rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.media_status_rpt)); +#endif } void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 264466f59c57029e71c64109b58c0b359557f5fe..4ac928bf1f8e6c77f68386c353c20f26168c77e4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1303,12 +1303,13 @@ EXPORT_SYMBOL_GPL(rtl_action_proc); static void setup_arp_tx(struct rtl_priv *rtlpriv, struct rtl_ps_ctl *ppsc) { + struct ieee80211_hw *hw = rtlpriv->hw; + rtlpriv->ra.is_special_data = true; if (rtlpriv->cfg->ops->get_btc_status()) rtlpriv->btcoexist.btc_ops->btc_special_packet_notify( rtlpriv, 1); - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } @@ -1381,8 +1382,7 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx, if (is_tx) { rtlpriv->ra.is_special_data = true; - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 8e7f23c11680a5fa98352e5052f02fa9ae091de8..4da4e458142c46247f1d71008a74acb6d8b8ef90 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1150,10 +1150,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, } else { mstatus = RT_MEDIA_DISCONNECT; - if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + if (mac->link_state == MAC80211_LINKED) + rtl_lps_leave(hw); if (ppsc->p2p_ps_info.p2p_ps_mode > P2P_PS_NONE) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); mac->link_state = MAC80211_NOLINK; @@ -1431,8 +1429,7 @@ static void rtl_op_sw_scan_start(struct ieee80211_hw *hw, } if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); mac->link_state = MAC80211_LINKED_SCANNING; } else { rtl_ips_nic_on(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 0dfa9eac3926dd488262529cd536c23c7eea08be..5be4fc96002d1f64d98271c4c8313793f2c0251e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -663,11 +663,9 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) } if (((rtlpriv->link_info.num_rx_inperiod + - rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + rtlpriv->link_info.num_tx_inperiod) > 8) || + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); } static int _rtl_pci_init_one_rxdesc(struct ieee80211_hw *hw, @@ -918,10 +916,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) } if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); skb = new_skb; no_new: if (rtlpriv->use_new_trx_flow) { diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 18d979affc1861ebc97dad9fe083d7159d40b746..d0ffc4d508cff2df70606c9a88845a22884c7bc9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -407,8 +407,8 @@ void rtl_lps_set_psmode(struct ieee80211_hw *hw, u8 rt_psmode) } } -/*Enter the leisure power save mode.*/ -void rtl_lps_enter(struct ieee80211_hw *hw) +/* Interrupt safe routine to enter the leisure power save mode.*/ +static void rtl_lps_enter_core(struct ieee80211_hw *hw) { struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -444,10 +444,9 @@ void rtl_lps_enter(struct ieee80211_hw *hw) spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_enter); -/*Leave the leisure power save mode.*/ -void rtl_lps_leave(struct ieee80211_hw *hw) +/* Interrupt safe routine to leave the leisure power save mode.*/ +static void rtl_lps_leave_core(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -477,7 +476,6 @@ void rtl_lps_leave(struct ieee80211_hw *hw) } spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_leave); /* For sw LPS*/ void rtl_swlps_beacon(struct ieee80211_hw *hw, void *data, unsigned int len) @@ -670,12 +668,34 @@ void rtl_lps_change_work_callback(struct work_struct *work) struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->enter_ps) - rtl_lps_enter(hw); + rtl_lps_enter_core(hw); else - rtl_lps_leave(hw); + rtl_lps_leave_core(hw); } EXPORT_SYMBOL_GPL(rtl_lps_change_work_callback); +void rtl_lps_enter(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_enter_core(hw); + rtlpriv->enter_ps = true; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_enter); + +void rtl_lps_leave(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_leave_core(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_leave); + void rtl_swlps_wq_callback(void *data) { struct rtl_works *rtlworks = container_of_dwork_rtl(data, diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 32aa5c1d070a07b429e97c03c9ff6f14c48cb91e..3837bbdecf059d40d26b0343e650f1afb70010de 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1067,6 +1067,7 @@ int rtl_usb_probe(struct usb_interface *intf, return -ENOMEM; } rtlpriv = hw->priv; + rtlpriv->hw = hw; rtlpriv->usb_data = kzalloc(RTL_USB_MAX_RX_COUNT * sizeof(u32), GFP_KERNEL); if (!rtlpriv->usb_data) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index abe5c6bc756c255193d803039973971368b9471d..1480734c2d6efb3b8ade93e76a3de5d5fe776848 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -957,6 +957,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) { resource_size_t allocated = 0, available = 0; struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns = to_ndns(dev); struct nd_mapping *nd_mapping; struct nvdimm_drvdata *ndd; struct nd_label_id label_id; @@ -964,7 +965,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) u8 *uuid = NULL; int rc, i; - if (dev->driver || to_ndns(dev)->claim) + if (dev->driver || ndns->claim) return -EBUSY; if (is_namespace_pmem(dev)) { @@ -1034,20 +1035,16 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) nd_namespace_pmem_set_resource(nd_region, nspm, val * nd_region->ndr_mappings); - } else if (is_namespace_blk(dev)) { - struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); - - /* - * Try to delete the namespace if we deleted all of its - * allocation, this is not the seed device for the - * region, and it is not actively claimed by a btt - * instance. - */ - if (val == 0 && nd_region->ns_seed != dev - && !nsblk->common.claim) - nd_device_unregister(dev, ND_ASYNC); } + /* + * Try to delete the namespace if we deleted all of its + * allocation, this is not the seed device for the region, and + * it is not actively claimed by a btt instance. + */ + if (val == 0 && nd_region->ns_seed != dev && !ndns->claim) + nd_device_unregister(dev, ND_ASYNC); + return rc; } diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index cea8350fbc7ec2f2e617199fd2bc8b8cb1df2fbb..a2ac9e641aa9341f2fcca9fa8b968fd874e80a90 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -108,7 +108,7 @@ static ssize_t align_show(struct device *dev, { struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); - return sprintf(buf, "%lx\n", nd_pfn->align); + return sprintf(buf, "%ld\n", nd_pfn->align); } static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 79e679d12f3b3667ad815e0bd39ab428da6651ac..da10b484bd25fef4772c23604f7bd337e54c3f6b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1122,12 +1122,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) if (ret) return ret; - /* Checking for ctrl->tagset is a trick to avoid sleeping on module - * load, since we only need the quirk on reset_controller. Notice - * that the HGST device needs this delay only in firmware activation - * procedure; unfortunately we have no (easy) way to verify this. - */ - if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset) + if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); return nvme_wait_ready(ctrl, cap, false); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index af5e2dc4a3d503bb8bca93951bcfa3be58685ca5..011f88e5663e72591ad60f5ed21a97682ad2b7e2 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -271,7 +271,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, mutex_lock(&subsys->lock); ret = -EBUSY; - if (nvmet_ns_enabled(ns)) + if (ns->enabled) goto out_unlock; kfree(ns->device_path); @@ -307,7 +307,7 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, int ret = 0; mutex_lock(&subsys->lock); - if (nvmet_ns_enabled(ns)) { + if (ns->enabled) { ret = -EBUSY; goto out_unlock; } @@ -339,7 +339,7 @@ CONFIGFS_ATTR(nvmet_ns_, device_nguid); static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", nvmet_ns_enabled(to_nvmet_ns(item))); + return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); } static ssize_t nvmet_ns_enable_store(struct config_item *item, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a21437a33adbef0c51395c0b88f2e8a10995ea86..55ce769cecee6c8e7a57e11b1d5bc1915c129771 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -264,7 +264,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) int ret = 0; mutex_lock(&subsys->lock); - if (!list_empty(&ns->dev_link)) + if (ns->enabled) goto out_unlock; ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, @@ -309,6 +309,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); + ns->enabled = true; ret = 0; out_unlock: mutex_unlock(&subsys->lock); @@ -325,11 +326,11 @@ void nvmet_ns_disable(struct nvmet_ns *ns) struct nvmet_ctrl *ctrl; mutex_lock(&subsys->lock); - if (list_empty(&ns->dev_link)) { - mutex_unlock(&subsys->lock); - return; - } - list_del_init(&ns->dev_link); + if (!ns->enabled) + goto out_unlock; + + ns->enabled = false; + list_del_rcu(&ns->dev_link); mutex_unlock(&subsys->lock); /* @@ -351,6 +352,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) if (ns->bdev) blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); +out_unlock: mutex_unlock(&subsys->lock); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 76b6eedccaf92ce4708cc4b730c03775d5469f0f..7655a351320f4e08eec2c1e67fef95f037ecd323 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -47,6 +47,7 @@ struct nvmet_ns { loff_t size; u8 nguid[16]; + bool enabled; struct nvmet_subsys *subsys; const char *device_path; @@ -61,11 +62,6 @@ static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) return container_of(to_config_group(item), struct nvmet_ns, group); } -static inline bool nvmet_ns_enabled(struct nvmet_ns *ns) -{ - return !list_empty_careful(&ns->dev_link); -} - struct nvmet_cq { u16 qid; u16 size; diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index f63d4b0deff009ad8963b61034b95d8563ca919b..a53982a330ea40444ac4e161b5838bb4aed1a013 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -176,7 +176,12 @@ int of_node_to_nid(struct device_node *device) np->name); of_node_put(np); - if (!r) + /* + * If numa=off passed on command line, or with a defective + * device tree, the nid may not be in the set of possible + * nodes. Check for this case and return NUMA_NO_NODE. + */ + if (!r && nid < MAX_NUMNODES && node_possible(nid)) return nid; return NUMA_NO_NODE; diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index bed19994c1e94d4e32c134e58133c4acd8b8bd88..af8f6e92e8851ca84b459e4587508960b225d336 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -807,11 +807,6 @@ void dw_pcie_setup_rc(struct pcie_port *pp) { u32 val; - /* get iATU unroll support */ - pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp); - dev_dbg(pp->dev, "iATU unroll: %s\n", - pp->iatu_unroll_enabled ? "enabled" : "disabled"); - /* set the number of lanes */ val = dw_pcie_readl_rc(pp, PCIE_PORT_LINK_CONTROL); val &= ~PORT_LINK_MODE_MASK; @@ -882,6 +877,11 @@ void dw_pcie_setup_rc(struct pcie_port *pp) * we should not program the ATU here. */ if (!pp->ops->rd_other_conf) { + /* get iATU unroll support */ + pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp); + dev_dbg(pp->dev, "iATU unroll: %s\n", + pp->iatu_unroll_enabled ? "enabled" : "disabled"); + dw_pcie_prog_outbound_atu(pp, PCIE_ATU_REGION_INDEX0, PCIE_ATU_TYPE_MEM, pp->mem_base, pp->mem_bus_addr, pp->mem_size); diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index e04f69beb42d0f8ef04de5da85702f238ff2a8a5..3452983d3569bd61c3069cbe59512e43a729052d 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -533,7 +533,7 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) /* Fix the transmitted FTS count desired to exit from L0s. */ status = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL_PLC1); - status = (status & PCIE_CORE_CTRL_PLC1_FTS_MASK) | + status = (status & ~PCIE_CORE_CTRL_PLC1_FTS_MASK) | (PCIE_CORE_CTRL_PLC1_FTS_CNT << PCIE_CORE_CTRL_PLC1_FTS_SHIFT); rockchip_pcie_write(rockchip, status, PCIE_CORE_CTRL_PLC1); @@ -590,8 +590,8 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) /* Check the final link width from negotiated lane counter from MGMT */ status = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL); - status = 0x1 << ((status & PCIE_CORE_PL_CONF_LANE_MASK) >> - PCIE_CORE_PL_CONF_LANE_MASK); + status = 0x1 << ((status & PCIE_CORE_PL_CONF_LANE_MASK) >> + PCIE_CORE_PL_CONF_LANE_SHIFT); dev_dbg(dev, "current link width is x%d\n", status); rockchip_pcie_write(rockchip, ROCKCHIP_VENDOR_ID, diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index dc67f39779ecd3cbfbd7d0f60eaafc627e85e2d2..c614ff7c3bc3f9b28a0c48cd4c1c236899477606 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -257,8 +257,13 @@ static int dlpar_add_phb(char *drc_name, struct device_node *dn) static int dlpar_add_vio_slot(char *drc_name, struct device_node *dn) { - if (vio_find_node(dn)) + struct vio_dev *vio_dev; + + vio_dev = vio_find_node(dn); + if (vio_dev) { + put_device(&vio_dev->dev); return -EINVAL; + } if (!vio_register_device_node(dn)) { printk(KERN_ERR @@ -334,6 +339,9 @@ static int dlpar_remove_vio_slot(char *drc_name, struct device_node *dn) return -EINVAL; vio_unregister_device(vio_dev); + + put_device(&vio_dev->dev); + return 0; } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index ad70507cfb566a2291498d4ba723e1a5a4aebd3c..3455f752d5e4fb3dddd6d5027031e94784a25ec5 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1294,7 +1294,8 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) } else if (dev->msi_enabled) { struct msi_desc *entry = first_pci_msi_entry(dev); - if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used)) + if (WARN_ON_ONCE(!entry || !entry->affinity || + nr >= entry->nvec_used)) return NULL; return &entry->affinity[nr]; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ba34907538f6160898cc1f04782a716cfe1d27d4..eda6a7cf0e5434e5ebb202161cc98d2c6176c753 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2106,6 +2106,10 @@ bool pci_dev_run_wake(struct pci_dev *dev) if (!dev->pme_support) return false; + /* PME-capable in principle, but not from the intended sleep state */ + if (!pci_pme_capable(dev, pci_target_state(dev))) + return false; + while (bus->parent) { struct pci_dev *bridge = bus->self; diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 0ec649d961d7a6ee515747c3458a8ce1df108099..b0916b126923ea87759b84d72c6f6bf2d80dcecf 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -518,25 +518,32 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return NULL; + INIT_LIST_HEAD(&link->sibling); INIT_LIST_HEAD(&link->children); INIT_LIST_HEAD(&link->link); link->pdev = pdev; - if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) { + + /* + * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe + * hierarchies. + */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE) { + link->root = link; + } else { struct pcie_link_state *parent; + parent = pdev->bus->parent->self->link_state; if (!parent) { kfree(link); return NULL; } + link->parent = parent; + link->root = link->parent->root; list_add(&link->link, &parent->children); } - /* Setup a pointer to the root port link */ - if (!link->parent) - link->root = link; - else - link->root = link->parent->root; list_add(&link->sibling, &link_list); pdev->link_state = link; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 104c46d531218874044775b19e68f2ba85c85714..300770cdc084adec39097cbeb1bf5a393c78f90e 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1050,6 +1050,7 @@ void set_pcie_port_type(struct pci_dev *pdev) pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); if (!pos) return; + pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_flags_reg = reg16; @@ -1057,13 +1058,14 @@ void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; /* - * A Root Port is always the upstream end of a Link. No PCIe - * component has two Links. Two Links are connected by a Switch - * that has a Port on each Link and internal logic to connect the - * two Ports. + * A Root Port or a PCI-to-PCIe bridge is always the upstream end + * of a Link. No PCIe component has two Links. Two Links are + * connected by a Switch that has a Port on each Link and internal + * logic to connect the two Ports. */ type = pci_pcie_type(pdev); - if (type == PCI_EXP_TYPE_ROOT_PORT) + if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_PCIE_BRIDGE) pdev->has_secondary_link = 1; else if (type == PCI_EXP_TYPE_UPSTREAM || type == PCI_EXP_TYPE_DOWNSTREAM) { diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c232729f5b1b6863f914fdab6c836d4d74204453..3a035e073889a506164d563105cdf3773dbf48f7 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3137,8 +3137,9 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b5, quirk_remove_d3_delay); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b7, quirk_remove_d3_delay); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2298, quirk_remove_d3_delay); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x229c, quirk_remove_d3_delay); + /* - * Some devices may pass our check in pci_intx_mask_supported if + * Some devices may pass our check in pci_intx_mask_supported() if * PCI_COMMAND_INTX_DISABLE works though they actually do not properly * support this feature. */ @@ -3146,53 +3147,139 @@ static void quirk_broken_intx_masking(struct pci_dev *dev) { dev->broken_intx_masking = 1; } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, 0x0030, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ - quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x0030, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ + quirk_broken_intx_masking); + /* * Realtek RTL8169 PCI Gigabit Ethernet Controller (rev 10) * Subsystem: Realtek RTL8169/8110 Family PCI Gigabit Ethernet NIC * * RTL8110SC - Fails under PCI device assignment using DisINTx masking. */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, - quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REALTEK, 0x8169, + quirk_broken_intx_masking); /* * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking, * DisINTx can be set but the interrupt status bit is non-functional. */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1572, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1574, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1580, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1581, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1583, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1584, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1585, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1586, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1587, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1588, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1589, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d0, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d1, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d2, - quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1572, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1574, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1580, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1581, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1583, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1584, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1585, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1586, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1587, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1588, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1589, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d0, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d1, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d2, + quirk_broken_intx_masking); + +static u16 mellanox_broken_intx_devs[] = { + PCI_DEVICE_ID_MELLANOX_HERMON_SDR, + PCI_DEVICE_ID_MELLANOX_HERMON_DDR, + PCI_DEVICE_ID_MELLANOX_HERMON_QDR, + PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2, + PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2, + PCI_DEVICE_ID_MELLANOX_HERMON_EN, + PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX2, + PCI_DEVICE_ID_MELLANOX_CONNECTX3, + PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO, +}; + +#define CONNECTX_4_CURR_MAX_MINOR 99 +#define CONNECTX_4_INTX_SUPPORT_MINOR 14 + +/* + * Check ConnectX-4/LX FW version to see if it supports legacy interrupts. + * If so, don't mark it as broken. + * FW minor > 99 means older FW version format and no INTx masking support. + * FW minor < 14 means new FW version format and no INTx masking support. + */ +static void mellanox_check_broken_intx_masking(struct pci_dev *pdev) +{ + __be32 __iomem *fw_ver; + u16 fw_major; + u16 fw_minor; + u16 fw_subminor; + u32 fw_maj_min; + u32 fw_sub_min; + int i; + + for (i = 0; i < ARRAY_SIZE(mellanox_broken_intx_devs); i++) { + if (pdev->device == mellanox_broken_intx_devs[i]) { + pdev->broken_intx_masking = 1; + return; + } + } + + /* Getting here means Connect-IB cards and up. Connect-IB has no INTx + * support so shouldn't be checked further + */ + if (pdev->device == PCI_DEVICE_ID_MELLANOX_CONNECTIB) + return; + + if (pdev->device != PCI_DEVICE_ID_MELLANOX_CONNECTX4 && + pdev->device != PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) + return; + + /* For ConnectX-4 and ConnectX-4LX, need to check FW support */ + if (pci_enable_device_mem(pdev)) { + dev_warn(&pdev->dev, "Can't enable device memory\n"); + return; + } + + fw_ver = ioremap(pci_resource_start(pdev, 0), 4); + if (!fw_ver) { + dev_warn(&pdev->dev, "Can't map ConnectX-4 initialization segment\n"); + goto out; + } + + /* Reading from resource space should be 32b aligned */ + fw_maj_min = ioread32be(fw_ver); + fw_sub_min = ioread32be(fw_ver + 1); + fw_major = fw_maj_min & 0xffff; + fw_minor = fw_maj_min >> 16; + fw_subminor = fw_sub_min & 0xffff; + if (fw_minor > CONNECTX_4_CURR_MAX_MINOR || + fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR) { + dev_warn(&pdev->dev, "ConnectX-4: FW %u.%u.%u doesn't support INTx masking, disabling. Please upgrade FW to %d.14.1100 and up for INTx support\n", + fw_major, fw_minor, fw_subminor, pdev->device == + PCI_DEVICE_ID_MELLANOX_CONNECTX4 ? 12 : 14); + pdev->broken_intx_masking = 1; + } + + iounmap(fw_ver); + +out: + pci_disable_device(pdev); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, + mellanox_check_broken_intx_masking); static void quirk_no_bus_reset(struct pci_dev *dev) { @@ -3255,6 +3342,25 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE, quirk_thunderbolt_hotplug_msi); +static void quirk_chelsio_extend_vpd(struct pci_dev *dev) +{ + pci_set_vpd_size(dev, 8192); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd); + #ifdef CONFIG_ACPI /* * Apple: Shutdown Cactus Ridge Thunderbolt controller. diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 79c4e14a5a75e94fec9315588896bcfe9a104c11..5ef7e875b50e89a9d39dcfbd67e927b42ff9d087 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -778,10 +778,10 @@ int imx_pinctrl_probe(struct platform_device *pdev, imx_pinctrl_desc->name = dev_name(&pdev->dev); imx_pinctrl_desc->pins = info->pins; imx_pinctrl_desc->npins = info->npins; - imx_pinctrl_desc->pctlops = &imx_pctrl_ops, - imx_pinctrl_desc->pmxops = &imx_pmx_ops, - imx_pinctrl_desc->confops = &imx_pinconf_ops, - imx_pinctrl_desc->owner = THIS_MODULE, + imx_pinctrl_desc->pctlops = &imx_pctrl_ops; + imx_pinctrl_desc->pmxops = &imx_pmx_ops; + imx_pinctrl_desc->confops = &imx_pinconf_ops; + imx_pinctrl_desc->owner = THIS_MODULE; ret = imx_pinctrl_probe_dt(pdev, info); if (ret) { diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 71bbeb9321bad587504bf5ca4f4c889019b4b9f4..583ae3f38fc09ad7c2654d654bb0be7a6485ef8a 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -731,16 +731,23 @@ static void __iomem *byt_gpio_reg(struct byt_gpio *vg, unsigned int offset, int reg) { struct byt_community *comm = byt_get_community(vg, offset); - u32 reg_offset = 0; + u32 reg_offset; if (!comm) return NULL; offset -= comm->pin_base; - if (reg == BYT_INT_STAT_REG) + switch (reg) { + case BYT_INT_STAT_REG: reg_offset = (offset / 32) * 4; - else + break; + case BYT_DEBOUNCE_REG: + reg_offset = 0; + break; + default: reg_offset = comm->pad_map[offset] * 16; + break; + } return comm->reg_base + reg_offset + reg; } @@ -1092,6 +1099,7 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, enum pin_config_param param = pinconf_to_config_param(*config); void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + void __iomem *db_reg = byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG); unsigned long flags; u32 conf, pull, val, debounce; u16 arg = 0; @@ -1128,7 +1136,7 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, return -EINVAL; raw_spin_lock_irqsave(&vg->lock, flags); - debounce = readl(byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG)); + debounce = readl(db_reg); raw_spin_unlock_irqrestore(&vg->lock, flags); switch (debounce & BYT_DEBOUNCE_PULSE_MASK) { @@ -1176,6 +1184,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, unsigned int param, arg; void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + void __iomem *db_reg = byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG); unsigned long flags; u32 conf, val, debounce; int i, ret = 0; @@ -1238,36 +1247,40 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, break; case PIN_CONFIG_INPUT_DEBOUNCE: - debounce = readl(byt_gpio_reg(vg, offset, - BYT_DEBOUNCE_REG)); - conf &= ~BYT_DEBOUNCE_PULSE_MASK; + debounce = readl(db_reg); + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; switch (arg) { + case 0: + conf &= BYT_DEBOUNCE_EN; + break; case 375: - conf |= BYT_DEBOUNCE_PULSE_375US; + debounce |= BYT_DEBOUNCE_PULSE_375US; break; case 750: - conf |= BYT_DEBOUNCE_PULSE_750US; + debounce |= BYT_DEBOUNCE_PULSE_750US; break; case 1500: - conf |= BYT_DEBOUNCE_PULSE_1500US; + debounce |= BYT_DEBOUNCE_PULSE_1500US; break; case 3000: - conf |= BYT_DEBOUNCE_PULSE_3MS; + debounce |= BYT_DEBOUNCE_PULSE_3MS; break; case 6000: - conf |= BYT_DEBOUNCE_PULSE_6MS; + debounce |= BYT_DEBOUNCE_PULSE_6MS; break; case 12000: - conf |= BYT_DEBOUNCE_PULSE_12MS; + debounce |= BYT_DEBOUNCE_PULSE_12MS; break; case 24000: - conf |= BYT_DEBOUNCE_PULSE_24MS; + debounce |= BYT_DEBOUNCE_PULSE_24MS; break; default: ret = -EINVAL; } + if (!ret) + writel(debounce, db_reg); break; default: ret = -ENOTSUPP; @@ -1606,7 +1619,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc) continue; } + raw_spin_lock(&vg->lock); pending = readl(reg); + raw_spin_unlock(&vg->lock); for_each_set_bit(pin, &pending, 32) { virq = irq_find_mapping(vg->chip.irqdomain, base + pin); generic_handle_irq(virq); diff --git a/drivers/pinctrl/intel/pinctrl-broxton.c b/drivers/pinctrl/intel/pinctrl-broxton.c index 59cb7a6fc5bef316d042f93da72792edca2ea8d9..901b356b09d71679a2b4a03f7cd57b30a22fa6f4 100644 --- a/drivers/pinctrl/intel/pinctrl-broxton.c +++ b/drivers/pinctrl/intel/pinctrl-broxton.c @@ -19,7 +19,7 @@ #define BXT_PAD_OWN 0x020 #define BXT_HOSTSW_OWN 0x080 -#define BXT_PADCFGLOCK 0x090 +#define BXT_PADCFGLOCK 0x060 #define BXT_GPI_IE 0x110 #define BXT_COMMUNITY(s, e) \ diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index 7826c7f0cb7cac1959e0c8efb27a0c546f09b060..9931be6af0cadf57a5d8fee628ab0bac27dd8ac3 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -794,6 +794,9 @@ static int mrfld_config_set(struct pinctrl_dev *pctldev, unsigned int pin, unsigned int i; int ret; + if (!mrfld_buf_available(mp, pin)) + return -ENOTSUPP; + for (i = 0; i < nconfigs; i++) { switch (pinconf_to_config_param(configs[i])) { case PIN_CONFIG_BIAS_DISABLE: diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 57122eda155afdeba61202506e779ab320468017..9443c9d408c6025604f96c54d9b3921fd3c99772 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -212,7 +212,7 @@ static int meson_pmx_request_gpio(struct pinctrl_dev *pcdev, { struct meson_pinctrl *pc = pinctrl_dev_get_drvdata(pcdev); - meson_pmx_disable_other_groups(pc, range->pin_base + offset, -1); + meson_pmx_disable_other_groups(pc, offset, -1); return 0; } diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index aea310a918210ebc96bcb2739ac3c50d10329f8d..c9a146948192dba19ca5da1587791c25b315d628 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -382,26 +382,21 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) { int ret = 0; u32 pin_reg; - unsigned long flags; - bool level_trig; - u32 active_level; + unsigned long flags, irq_flags; struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct amd_gpio *gpio_dev = gpiochip_get_data(gc); spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); - /* - * When level_trig is set EDGE and active_level is set HIGH in BIOS - * default settings, ignore incoming settings from client and use - * BIOS settings to configure GPIO register. + /* Ignore the settings coming from the client and + * read the values from the ACPI tables + * while setting the trigger type */ - level_trig = !(pin_reg & (LEVEL_TRIGGER << LEVEL_TRIG_OFF)); - active_level = pin_reg & (ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); - if(level_trig && - ((active_level >> ACTIVE_LEVEL_OFF) == ACTIVE_HIGH)) - type = IRQ_TYPE_EDGE_FALLING; + irq_flags = irq_get_trigger_type(d->irq); + if (irq_flags != IRQ_TYPE_NONE) + type = irq_flags; switch (type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_RISING: diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index f3a8897d4e8f407f04980c8c404b0e5c4643ec2e..cf80ce1dd7cea5a0fdd9c524a6c5a03e1d93e0f2 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -389,6 +389,21 @@ int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type) return 0; } +const struct sh_pfc_bias_info * +sh_pfc_pin_to_bias_info(const struct sh_pfc_bias_info *info, + unsigned int num, unsigned int pin) +{ + unsigned int i; + + for (i = 0; i < num; i++) + if (info[i].pin == pin) + return &info[i]; + + WARN_ONCE(1, "Pin %u is not in bias info list\n", pin); + + return NULL; +} + static int sh_pfc_init_ranges(struct sh_pfc *pfc) { struct sh_pfc_pin_range *range; diff --git a/drivers/pinctrl/sh-pfc/core.h b/drivers/pinctrl/sh-pfc/core.h index 0bbdea5849f41713a135b5dd3ae6bc810717e2b3..6d598dd63720856774c0f7b2267f631fc67204a0 100644 --- a/drivers/pinctrl/sh-pfc/core.h +++ b/drivers/pinctrl/sh-pfc/core.h @@ -33,4 +33,8 @@ void sh_pfc_write_reg(struct sh_pfc *pfc, u32 reg, unsigned int width, int sh_pfc_get_pin_index(struct sh_pfc *pfc, unsigned int pin); int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type); +const struct sh_pfc_bias_info * +sh_pfc_pin_to_bias_info(const struct sh_pfc_bias_info *info, + unsigned int num, unsigned int pin); + #endif /* __SH_PFC_CORE_H__ */ diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c index 2e8cc2adbed7e2514096b9b75c4a9012d90066a4..84cee66b1e08433c3c7c1d51c24112d18d231c95 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c @@ -5188,184 +5188,183 @@ static int r8a7795_pin_to_pocctrl(struct sh_pfc *pfc, unsigned int pin, u32 *poc #define PU5 0x14 #define PU6 0x18 -static const struct { - u16 reg : 11; - u16 bit : 5; -} pullups[] = { - [RCAR_GP_PIN(2, 11)] = { PU0, 31 }, /* AVB_PHY_INT */ - [RCAR_GP_PIN(2, 10)] = { PU0, 30 }, /* AVB_MAGIC */ - [RCAR_GP_PIN(2, 9)] = { PU0, 29 }, /* AVB_MDC */ - - [RCAR_GP_PIN(1, 19)] = { PU1, 31 }, /* A19 */ - [RCAR_GP_PIN(1, 18)] = { PU1, 30 }, /* A18 */ - [RCAR_GP_PIN(1, 17)] = { PU1, 29 }, /* A17 */ - [RCAR_GP_PIN(1, 16)] = { PU1, 28 }, /* A16 */ - [RCAR_GP_PIN(1, 15)] = { PU1, 27 }, /* A15 */ - [RCAR_GP_PIN(1, 14)] = { PU1, 26 }, /* A14 */ - [RCAR_GP_PIN(1, 13)] = { PU1, 25 }, /* A13 */ - [RCAR_GP_PIN(1, 12)] = { PU1, 24 }, /* A12 */ - [RCAR_GP_PIN(1, 11)] = { PU1, 23 }, /* A11 */ - [RCAR_GP_PIN(1, 10)] = { PU1, 22 }, /* A10 */ - [RCAR_GP_PIN(1, 9)] = { PU1, 21 }, /* A9 */ - [RCAR_GP_PIN(1, 8)] = { PU1, 20 }, /* A8 */ - [RCAR_GP_PIN(1, 7)] = { PU1, 19 }, /* A7 */ - [RCAR_GP_PIN(1, 6)] = { PU1, 18 }, /* A6 */ - [RCAR_GP_PIN(1, 5)] = { PU1, 17 }, /* A5 */ - [RCAR_GP_PIN(1, 4)] = { PU1, 16 }, /* A4 */ - [RCAR_GP_PIN(1, 3)] = { PU1, 15 }, /* A3 */ - [RCAR_GP_PIN(1, 2)] = { PU1, 14 }, /* A2 */ - [RCAR_GP_PIN(1, 1)] = { PU1, 13 }, /* A1 */ - [RCAR_GP_PIN(1, 0)] = { PU1, 12 }, /* A0 */ - [RCAR_GP_PIN(2, 8)] = { PU1, 11 }, /* PWM2_A */ - [RCAR_GP_PIN(2, 7)] = { PU1, 10 }, /* PWM1_A */ - [RCAR_GP_PIN(2, 6)] = { PU1, 9 }, /* PWM0 */ - [RCAR_GP_PIN(2, 5)] = { PU1, 8 }, /* IRQ5 */ - [RCAR_GP_PIN(2, 4)] = { PU1, 7 }, /* IRQ4 */ - [RCAR_GP_PIN(2, 3)] = { PU1, 6 }, /* IRQ3 */ - [RCAR_GP_PIN(2, 2)] = { PU1, 5 }, /* IRQ2 */ - [RCAR_GP_PIN(2, 1)] = { PU1, 4 }, /* IRQ1 */ - [RCAR_GP_PIN(2, 0)] = { PU1, 3 }, /* IRQ0 */ - [RCAR_GP_PIN(2, 14)] = { PU1, 2 }, /* AVB_AVTP_CAPTURE_A */ - [RCAR_GP_PIN(2, 13)] = { PU1, 1 }, /* AVB_AVTP_MATCH_A */ - [RCAR_GP_PIN(2, 12)] = { PU1, 0 }, /* AVB_LINK */ - - [RCAR_GP_PIN(7, 3)] = { PU2, 29 }, /* HDMI1_CEC */ - [RCAR_GP_PIN(7, 2)] = { PU2, 28 }, /* HDMI0_CEC */ - [RCAR_GP_PIN(7, 1)] = { PU2, 27 }, /* AVS2 */ - [RCAR_GP_PIN(7, 0)] = { PU2, 26 }, /* AVS1 */ - [RCAR_GP_PIN(0, 15)] = { PU2, 25 }, /* D15 */ - [RCAR_GP_PIN(0, 14)] = { PU2, 24 }, /* D14 */ - [RCAR_GP_PIN(0, 13)] = { PU2, 23 }, /* D13 */ - [RCAR_GP_PIN(0, 12)] = { PU2, 22 }, /* D12 */ - [RCAR_GP_PIN(0, 11)] = { PU2, 21 }, /* D11 */ - [RCAR_GP_PIN(0, 10)] = { PU2, 20 }, /* D10 */ - [RCAR_GP_PIN(0, 9)] = { PU2, 19 }, /* D9 */ - [RCAR_GP_PIN(0, 8)] = { PU2, 18 }, /* D8 */ - [RCAR_GP_PIN(0, 7)] = { PU2, 17 }, /* D7 */ - [RCAR_GP_PIN(0, 6)] = { PU2, 16 }, /* D6 */ - [RCAR_GP_PIN(0, 5)] = { PU2, 15 }, /* D5 */ - [RCAR_GP_PIN(0, 4)] = { PU2, 14 }, /* D4 */ - [RCAR_GP_PIN(0, 3)] = { PU2, 13 }, /* D3 */ - [RCAR_GP_PIN(0, 2)] = { PU2, 12 }, /* D2 */ - [RCAR_GP_PIN(0, 1)] = { PU2, 11 }, /* D1 */ - [RCAR_GP_PIN(0, 0)] = { PU2, 10 }, /* D0 */ - [RCAR_GP_PIN(1, 27)] = { PU2, 8 }, /* EX_WAIT0_A */ - [RCAR_GP_PIN(1, 26)] = { PU2, 7 }, /* WE1_N */ - [RCAR_GP_PIN(1, 25)] = { PU2, 6 }, /* WE0_N */ - [RCAR_GP_PIN(1, 24)] = { PU2, 5 }, /* RD_WR_N */ - [RCAR_GP_PIN(1, 23)] = { PU2, 4 }, /* RD_N */ - [RCAR_GP_PIN(1, 22)] = { PU2, 3 }, /* BS_N */ - [RCAR_GP_PIN(1, 21)] = { PU2, 2 }, /* CS1_N_A26 */ - [RCAR_GP_PIN(1, 20)] = { PU2, 1 }, /* CS0_N */ - - [RCAR_GP_PIN(4, 9)] = { PU3, 31 }, /* SD3_DAT0 */ - [RCAR_GP_PIN(4, 8)] = { PU3, 30 }, /* SD3_CMD */ - [RCAR_GP_PIN(4, 7)] = { PU3, 29 }, /* SD3_CLK */ - [RCAR_GP_PIN(4, 6)] = { PU3, 28 }, /* SD2_DS */ - [RCAR_GP_PIN(4, 5)] = { PU3, 27 }, /* SD2_DAT3 */ - [RCAR_GP_PIN(4, 4)] = { PU3, 26 }, /* SD2_DAT2 */ - [RCAR_GP_PIN(4, 3)] = { PU3, 25 }, /* SD2_DAT1 */ - [RCAR_GP_PIN(4, 2)] = { PU3, 24 }, /* SD2_DAT0 */ - [RCAR_GP_PIN(4, 1)] = { PU3, 23 }, /* SD2_CMD */ - [RCAR_GP_PIN(4, 0)] = { PU3, 22 }, /* SD2_CLK */ - [RCAR_GP_PIN(3, 11)] = { PU3, 21 }, /* SD1_DAT3 */ - [RCAR_GP_PIN(3, 10)] = { PU3, 20 }, /* SD1_DAT2 */ - [RCAR_GP_PIN(3, 9)] = { PU3, 19 }, /* SD1_DAT1 */ - [RCAR_GP_PIN(3, 8)] = { PU3, 18 }, /* SD1_DAT0 */ - [RCAR_GP_PIN(3, 7)] = { PU3, 17 }, /* SD1_CMD */ - [RCAR_GP_PIN(3, 6)] = { PU3, 16 }, /* SD1_CLK */ - [RCAR_GP_PIN(3, 5)] = { PU3, 15 }, /* SD0_DAT3 */ - [RCAR_GP_PIN(3, 4)] = { PU3, 14 }, /* SD0_DAT2 */ - [RCAR_GP_PIN(3, 3)] = { PU3, 13 }, /* SD0_DAT1 */ - [RCAR_GP_PIN(3, 2)] = { PU3, 12 }, /* SD0_DAT0 */ - [RCAR_GP_PIN(3, 1)] = { PU3, 11 }, /* SD0_CMD */ - [RCAR_GP_PIN(3, 0)] = { PU3, 10 }, /* SD0_CLK */ - - [RCAR_GP_PIN(5, 19)] = { PU4, 31 }, /* MSIOF0_SS1 */ - [RCAR_GP_PIN(5, 18)] = { PU4, 30 }, /* MSIOF0_SYNC */ - [RCAR_GP_PIN(5, 17)] = { PU4, 29 }, /* MSIOF0_SCK */ - [RCAR_GP_PIN(5, 16)] = { PU4, 28 }, /* HRTS0_N */ - [RCAR_GP_PIN(5, 15)] = { PU4, 27 }, /* HCTS0_N */ - [RCAR_GP_PIN(5, 14)] = { PU4, 26 }, /* HTX0 */ - [RCAR_GP_PIN(5, 13)] = { PU4, 25 }, /* HRX0 */ - [RCAR_GP_PIN(5, 12)] = { PU4, 24 }, /* HSCK0 */ - [RCAR_GP_PIN(5, 11)] = { PU4, 23 }, /* RX2_A */ - [RCAR_GP_PIN(5, 10)] = { PU4, 22 }, /* TX2_A */ - [RCAR_GP_PIN(5, 9)] = { PU4, 21 }, /* SCK2 */ - [RCAR_GP_PIN(5, 8)] = { PU4, 20 }, /* RTS1_N_TANS */ - [RCAR_GP_PIN(5, 7)] = { PU4, 19 }, /* CTS1_N */ - [RCAR_GP_PIN(5, 6)] = { PU4, 18 }, /* TX1_A */ - [RCAR_GP_PIN(5, 5)] = { PU4, 17 }, /* RX1_A */ - [RCAR_GP_PIN(5, 4)] = { PU4, 16 }, /* RTS0_N_TANS */ - [RCAR_GP_PIN(5, 3)] = { PU4, 15 }, /* CTS0_N */ - [RCAR_GP_PIN(5, 2)] = { PU4, 14 }, /* TX0 */ - [RCAR_GP_PIN(5, 1)] = { PU4, 13 }, /* RX0 */ - [RCAR_GP_PIN(5, 0)] = { PU4, 12 }, /* SCK0 */ - [RCAR_GP_PIN(3, 15)] = { PU4, 11 }, /* SD1_WP */ - [RCAR_GP_PIN(3, 14)] = { PU4, 10 }, /* SD1_CD */ - [RCAR_GP_PIN(3, 13)] = { PU4, 9 }, /* SD0_WP */ - [RCAR_GP_PIN(3, 12)] = { PU4, 8 }, /* SD0_CD */ - [RCAR_GP_PIN(4, 17)] = { PU4, 7 }, /* SD3_DS */ - [RCAR_GP_PIN(4, 16)] = { PU4, 6 }, /* SD3_DAT7 */ - [RCAR_GP_PIN(4, 15)] = { PU4, 5 }, /* SD3_DAT6 */ - [RCAR_GP_PIN(4, 14)] = { PU4, 4 }, /* SD3_DAT5 */ - [RCAR_GP_PIN(4, 13)] = { PU4, 3 }, /* SD3_DAT4 */ - [RCAR_GP_PIN(4, 12)] = { PU4, 2 }, /* SD3_DAT3 */ - [RCAR_GP_PIN(4, 11)] = { PU4, 1 }, /* SD3_DAT2 */ - [RCAR_GP_PIN(4, 10)] = { PU4, 0 }, /* SD3_DAT1 */ - - [RCAR_GP_PIN(6, 24)] = { PU5, 31 }, /* USB0_PWEN */ - [RCAR_GP_PIN(6, 23)] = { PU5, 30 }, /* AUDIO_CLKB_B */ - [RCAR_GP_PIN(6, 22)] = { PU5, 29 }, /* AUDIO_CLKA_A */ - [RCAR_GP_PIN(6, 21)] = { PU5, 28 }, /* SSI_SDATA9_A */ - [RCAR_GP_PIN(6, 20)] = { PU5, 27 }, /* SSI_SDATA8 */ - [RCAR_GP_PIN(6, 19)] = { PU5, 26 }, /* SSI_SDATA7 */ - [RCAR_GP_PIN(6, 18)] = { PU5, 25 }, /* SSI_WS78 */ - [RCAR_GP_PIN(6, 17)] = { PU5, 24 }, /* SSI_SCK78 */ - [RCAR_GP_PIN(6, 16)] = { PU5, 23 }, /* SSI_SDATA6 */ - [RCAR_GP_PIN(6, 15)] = { PU5, 22 }, /* SSI_WS6 */ - [RCAR_GP_PIN(6, 14)] = { PU5, 21 }, /* SSI_SCK6 */ - [RCAR_GP_PIN(6, 13)] = { PU5, 20 }, /* SSI_SDATA5 */ - [RCAR_GP_PIN(6, 12)] = { PU5, 19 }, /* SSI_WS5 */ - [RCAR_GP_PIN(6, 11)] = { PU5, 18 }, /* SSI_SCK5 */ - [RCAR_GP_PIN(6, 10)] = { PU5, 17 }, /* SSI_SDATA4 */ - [RCAR_GP_PIN(6, 9)] = { PU5, 16 }, /* SSI_WS4 */ - [RCAR_GP_PIN(6, 8)] = { PU5, 15 }, /* SSI_SCK4 */ - [RCAR_GP_PIN(6, 7)] = { PU5, 14 }, /* SSI_SDATA3 */ - [RCAR_GP_PIN(6, 6)] = { PU5, 13 }, /* SSI_WS34 */ - [RCAR_GP_PIN(6, 5)] = { PU5, 12 }, /* SSI_SCK34 */ - [RCAR_GP_PIN(6, 4)] = { PU5, 11 }, /* SSI_SDATA2_A */ - [RCAR_GP_PIN(6, 3)] = { PU5, 10 }, /* SSI_SDATA1_A */ - [RCAR_GP_PIN(6, 2)] = { PU5, 9 }, /* SSI_SDATA0 */ - [RCAR_GP_PIN(6, 1)] = { PU5, 8 }, /* SSI_WS01239 */ - [RCAR_GP_PIN(6, 0)] = { PU5, 7 }, /* SSI_SCK01239 */ - [RCAR_GP_PIN(5, 25)] = { PU5, 5 }, /* MLB_DAT */ - [RCAR_GP_PIN(5, 24)] = { PU5, 4 }, /* MLB_SIG */ - [RCAR_GP_PIN(5, 23)] = { PU5, 3 }, /* MLB_CLK */ - [RCAR_GP_PIN(5, 22)] = { PU5, 2 }, /* MSIOF0_RXD */ - [RCAR_GP_PIN(5, 21)] = { PU5, 1 }, /* MSIOF0_SS2 */ - [RCAR_GP_PIN(5, 20)] = { PU5, 0 }, /* MSIOF0_TXD */ - - [RCAR_GP_PIN(6, 31)] = { PU6, 6 }, /* USB31_OVC */ - [RCAR_GP_PIN(6, 30)] = { PU6, 5 }, /* USB31_PWEN */ - [RCAR_GP_PIN(6, 29)] = { PU6, 4 }, /* USB30_OVC */ - [RCAR_GP_PIN(6, 28)] = { PU6, 3 }, /* USB30_PWEN */ - [RCAR_GP_PIN(6, 27)] = { PU6, 2 }, /* USB1_OVC */ - [RCAR_GP_PIN(6, 26)] = { PU6, 1 }, /* USB1_PWEN */ - [RCAR_GP_PIN(6, 25)] = { PU6, 0 }, /* USB0_OVC */ +static const struct sh_pfc_bias_info bias_info[] = { + { RCAR_GP_PIN(2, 11), PU0, 31 }, /* AVB_PHY_INT */ + { RCAR_GP_PIN(2, 10), PU0, 30 }, /* AVB_MAGIC */ + { RCAR_GP_PIN(2, 9), PU0, 29 }, /* AVB_MDC */ + + { RCAR_GP_PIN(1, 19), PU1, 31 }, /* A19 */ + { RCAR_GP_PIN(1, 18), PU1, 30 }, /* A18 */ + { RCAR_GP_PIN(1, 17), PU1, 29 }, /* A17 */ + { RCAR_GP_PIN(1, 16), PU1, 28 }, /* A16 */ + { RCAR_GP_PIN(1, 15), PU1, 27 }, /* A15 */ + { RCAR_GP_PIN(1, 14), PU1, 26 }, /* A14 */ + { RCAR_GP_PIN(1, 13), PU1, 25 }, /* A13 */ + { RCAR_GP_PIN(1, 12), PU1, 24 }, /* A12 */ + { RCAR_GP_PIN(1, 11), PU1, 23 }, /* A11 */ + { RCAR_GP_PIN(1, 10), PU1, 22 }, /* A10 */ + { RCAR_GP_PIN(1, 9), PU1, 21 }, /* A9 */ + { RCAR_GP_PIN(1, 8), PU1, 20 }, /* A8 */ + { RCAR_GP_PIN(1, 7), PU1, 19 }, /* A7 */ + { RCAR_GP_PIN(1, 6), PU1, 18 }, /* A6 */ + { RCAR_GP_PIN(1, 5), PU1, 17 }, /* A5 */ + { RCAR_GP_PIN(1, 4), PU1, 16 }, /* A4 */ + { RCAR_GP_PIN(1, 3), PU1, 15 }, /* A3 */ + { RCAR_GP_PIN(1, 2), PU1, 14 }, /* A2 */ + { RCAR_GP_PIN(1, 1), PU1, 13 }, /* A1 */ + { RCAR_GP_PIN(1, 0), PU1, 12 }, /* A0 */ + { RCAR_GP_PIN(2, 8), PU1, 11 }, /* PWM2_A */ + { RCAR_GP_PIN(2, 7), PU1, 10 }, /* PWM1_A */ + { RCAR_GP_PIN(2, 6), PU1, 9 }, /* PWM0 */ + { RCAR_GP_PIN(2, 5), PU1, 8 }, /* IRQ5 */ + { RCAR_GP_PIN(2, 4), PU1, 7 }, /* IRQ4 */ + { RCAR_GP_PIN(2, 3), PU1, 6 }, /* IRQ3 */ + { RCAR_GP_PIN(2, 2), PU1, 5 }, /* IRQ2 */ + { RCAR_GP_PIN(2, 1), PU1, 4 }, /* IRQ1 */ + { RCAR_GP_PIN(2, 0), PU1, 3 }, /* IRQ0 */ + { RCAR_GP_PIN(2, 14), PU1, 2 }, /* AVB_AVTP_CAPTURE_A */ + { RCAR_GP_PIN(2, 13), PU1, 1 }, /* AVB_AVTP_MATCH_A */ + { RCAR_GP_PIN(2, 12), PU1, 0 }, /* AVB_LINK */ + + { RCAR_GP_PIN(7, 3), PU2, 29 }, /* HDMI1_CEC */ + { RCAR_GP_PIN(7, 2), PU2, 28 }, /* HDMI0_CEC */ + { RCAR_GP_PIN(7, 1), PU2, 27 }, /* AVS2 */ + { RCAR_GP_PIN(7, 0), PU2, 26 }, /* AVS1 */ + { RCAR_GP_PIN(0, 15), PU2, 25 }, /* D15 */ + { RCAR_GP_PIN(0, 14), PU2, 24 }, /* D14 */ + { RCAR_GP_PIN(0, 13), PU2, 23 }, /* D13 */ + { RCAR_GP_PIN(0, 12), PU2, 22 }, /* D12 */ + { RCAR_GP_PIN(0, 11), PU2, 21 }, /* D11 */ + { RCAR_GP_PIN(0, 10), PU2, 20 }, /* D10 */ + { RCAR_GP_PIN(0, 9), PU2, 19 }, /* D9 */ + { RCAR_GP_PIN(0, 8), PU2, 18 }, /* D8 */ + { RCAR_GP_PIN(0, 7), PU2, 17 }, /* D7 */ + { RCAR_GP_PIN(0, 6), PU2, 16 }, /* D6 */ + { RCAR_GP_PIN(0, 5), PU2, 15 }, /* D5 */ + { RCAR_GP_PIN(0, 4), PU2, 14 }, /* D4 */ + { RCAR_GP_PIN(0, 3), PU2, 13 }, /* D3 */ + { RCAR_GP_PIN(0, 2), PU2, 12 }, /* D2 */ + { RCAR_GP_PIN(0, 1), PU2, 11 }, /* D1 */ + { RCAR_GP_PIN(0, 0), PU2, 10 }, /* D0 */ + { RCAR_GP_PIN(1, 27), PU2, 8 }, /* EX_WAIT0_A */ + { RCAR_GP_PIN(1, 26), PU2, 7 }, /* WE1_N */ + { RCAR_GP_PIN(1, 25), PU2, 6 }, /* WE0_N */ + { RCAR_GP_PIN(1, 24), PU2, 5 }, /* RD_WR_N */ + { RCAR_GP_PIN(1, 23), PU2, 4 }, /* RD_N */ + { RCAR_GP_PIN(1, 22), PU2, 3 }, /* BS_N */ + { RCAR_GP_PIN(1, 21), PU2, 2 }, /* CS1_N_A26 */ + { RCAR_GP_PIN(1, 20), PU2, 1 }, /* CS0_N */ + + { RCAR_GP_PIN(4, 9), PU3, 31 }, /* SD3_DAT0 */ + { RCAR_GP_PIN(4, 8), PU3, 30 }, /* SD3_CMD */ + { RCAR_GP_PIN(4, 7), PU3, 29 }, /* SD3_CLK */ + { RCAR_GP_PIN(4, 6), PU3, 28 }, /* SD2_DS */ + { RCAR_GP_PIN(4, 5), PU3, 27 }, /* SD2_DAT3 */ + { RCAR_GP_PIN(4, 4), PU3, 26 }, /* SD2_DAT2 */ + { RCAR_GP_PIN(4, 3), PU3, 25 }, /* SD2_DAT1 */ + { RCAR_GP_PIN(4, 2), PU3, 24 }, /* SD2_DAT0 */ + { RCAR_GP_PIN(4, 1), PU3, 23 }, /* SD2_CMD */ + { RCAR_GP_PIN(4, 0), PU3, 22 }, /* SD2_CLK */ + { RCAR_GP_PIN(3, 11), PU3, 21 }, /* SD1_DAT3 */ + { RCAR_GP_PIN(3, 10), PU3, 20 }, /* SD1_DAT2 */ + { RCAR_GP_PIN(3, 9), PU3, 19 }, /* SD1_DAT1 */ + { RCAR_GP_PIN(3, 8), PU3, 18 }, /* SD1_DAT0 */ + { RCAR_GP_PIN(3, 7), PU3, 17 }, /* SD1_CMD */ + { RCAR_GP_PIN(3, 6), PU3, 16 }, /* SD1_CLK */ + { RCAR_GP_PIN(3, 5), PU3, 15 }, /* SD0_DAT3 */ + { RCAR_GP_PIN(3, 4), PU3, 14 }, /* SD0_DAT2 */ + { RCAR_GP_PIN(3, 3), PU3, 13 }, /* SD0_DAT1 */ + { RCAR_GP_PIN(3, 2), PU3, 12 }, /* SD0_DAT0 */ + { RCAR_GP_PIN(3, 1), PU3, 11 }, /* SD0_CMD */ + { RCAR_GP_PIN(3, 0), PU3, 10 }, /* SD0_CLK */ + + { RCAR_GP_PIN(5, 19), PU4, 31 }, /* MSIOF0_SS1 */ + { RCAR_GP_PIN(5, 18), PU4, 30 }, /* MSIOF0_SYNC */ + { RCAR_GP_PIN(5, 17), PU4, 29 }, /* MSIOF0_SCK */ + { RCAR_GP_PIN(5, 16), PU4, 28 }, /* HRTS0_N */ + { RCAR_GP_PIN(5, 15), PU4, 27 }, /* HCTS0_N */ + { RCAR_GP_PIN(5, 14), PU4, 26 }, /* HTX0 */ + { RCAR_GP_PIN(5, 13), PU4, 25 }, /* HRX0 */ + { RCAR_GP_PIN(5, 12), PU4, 24 }, /* HSCK0 */ + { RCAR_GP_PIN(5, 11), PU4, 23 }, /* RX2_A */ + { RCAR_GP_PIN(5, 10), PU4, 22 }, /* TX2_A */ + { RCAR_GP_PIN(5, 9), PU4, 21 }, /* SCK2 */ + { RCAR_GP_PIN(5, 8), PU4, 20 }, /* RTS1_N_TANS */ + { RCAR_GP_PIN(5, 7), PU4, 19 }, /* CTS1_N */ + { RCAR_GP_PIN(5, 6), PU4, 18 }, /* TX1_A */ + { RCAR_GP_PIN(5, 5), PU4, 17 }, /* RX1_A */ + { RCAR_GP_PIN(5, 4), PU4, 16 }, /* RTS0_N_TANS */ + { RCAR_GP_PIN(5, 3), PU4, 15 }, /* CTS0_N */ + { RCAR_GP_PIN(5, 2), PU4, 14 }, /* TX0 */ + { RCAR_GP_PIN(5, 1), PU4, 13 }, /* RX0 */ + { RCAR_GP_PIN(5, 0), PU4, 12 }, /* SCK0 */ + { RCAR_GP_PIN(3, 15), PU4, 11 }, /* SD1_WP */ + { RCAR_GP_PIN(3, 14), PU4, 10 }, /* SD1_CD */ + { RCAR_GP_PIN(3, 13), PU4, 9 }, /* SD0_WP */ + { RCAR_GP_PIN(3, 12), PU4, 8 }, /* SD0_CD */ + { RCAR_GP_PIN(4, 17), PU4, 7 }, /* SD3_DS */ + { RCAR_GP_PIN(4, 16), PU4, 6 }, /* SD3_DAT7 */ + { RCAR_GP_PIN(4, 15), PU4, 5 }, /* SD3_DAT6 */ + { RCAR_GP_PIN(4, 14), PU4, 4 }, /* SD3_DAT5 */ + { RCAR_GP_PIN(4, 13), PU4, 3 }, /* SD3_DAT4 */ + { RCAR_GP_PIN(4, 12), PU4, 2 }, /* SD3_DAT3 */ + { RCAR_GP_PIN(4, 11), PU4, 1 }, /* SD3_DAT2 */ + { RCAR_GP_PIN(4, 10), PU4, 0 }, /* SD3_DAT1 */ + + { RCAR_GP_PIN(6, 24), PU5, 31 }, /* USB0_PWEN */ + { RCAR_GP_PIN(6, 23), PU5, 30 }, /* AUDIO_CLKB_B */ + { RCAR_GP_PIN(6, 22), PU5, 29 }, /* AUDIO_CLKA_A */ + { RCAR_GP_PIN(6, 21), PU5, 28 }, /* SSI_SDATA9_A */ + { RCAR_GP_PIN(6, 20), PU5, 27 }, /* SSI_SDATA8 */ + { RCAR_GP_PIN(6, 19), PU5, 26 }, /* SSI_SDATA7 */ + { RCAR_GP_PIN(6, 18), PU5, 25 }, /* SSI_WS78 */ + { RCAR_GP_PIN(6, 17), PU5, 24 }, /* SSI_SCK78 */ + { RCAR_GP_PIN(6, 16), PU5, 23 }, /* SSI_SDATA6 */ + { RCAR_GP_PIN(6, 15), PU5, 22 }, /* SSI_WS6 */ + { RCAR_GP_PIN(6, 14), PU5, 21 }, /* SSI_SCK6 */ + { RCAR_GP_PIN(6, 13), PU5, 20 }, /* SSI_SDATA5 */ + { RCAR_GP_PIN(6, 12), PU5, 19 }, /* SSI_WS5 */ + { RCAR_GP_PIN(6, 11), PU5, 18 }, /* SSI_SCK5 */ + { RCAR_GP_PIN(6, 10), PU5, 17 }, /* SSI_SDATA4 */ + { RCAR_GP_PIN(6, 9), PU5, 16 }, /* SSI_WS4 */ + { RCAR_GP_PIN(6, 8), PU5, 15 }, /* SSI_SCK4 */ + { RCAR_GP_PIN(6, 7), PU5, 14 }, /* SSI_SDATA3 */ + { RCAR_GP_PIN(6, 6), PU5, 13 }, /* SSI_WS34 */ + { RCAR_GP_PIN(6, 5), PU5, 12 }, /* SSI_SCK34 */ + { RCAR_GP_PIN(6, 4), PU5, 11 }, /* SSI_SDATA2_A */ + { RCAR_GP_PIN(6, 3), PU5, 10 }, /* SSI_SDATA1_A */ + { RCAR_GP_PIN(6, 2), PU5, 9 }, /* SSI_SDATA0 */ + { RCAR_GP_PIN(6, 1), PU5, 8 }, /* SSI_WS01239 */ + { RCAR_GP_PIN(6, 0), PU5, 7 }, /* SSI_SCK01239 */ + { RCAR_GP_PIN(5, 25), PU5, 5 }, /* MLB_DAT */ + { RCAR_GP_PIN(5, 24), PU5, 4 }, /* MLB_SIG */ + { RCAR_GP_PIN(5, 23), PU5, 3 }, /* MLB_CLK */ + { RCAR_GP_PIN(5, 22), PU5, 2 }, /* MSIOF0_RXD */ + { RCAR_GP_PIN(5, 21), PU5, 1 }, /* MSIOF0_SS2 */ + { RCAR_GP_PIN(5, 20), PU5, 0 }, /* MSIOF0_TXD */ + + { RCAR_GP_PIN(6, 31), PU6, 6 }, /* USB31_OVC */ + { RCAR_GP_PIN(6, 30), PU6, 5 }, /* USB31_PWEN */ + { RCAR_GP_PIN(6, 29), PU6, 4 }, /* USB30_OVC */ + { RCAR_GP_PIN(6, 28), PU6, 3 }, /* USB30_PWEN */ + { RCAR_GP_PIN(6, 27), PU6, 2 }, /* USB1_OVC */ + { RCAR_GP_PIN(6, 26), PU6, 1 }, /* USB1_PWEN */ + { RCAR_GP_PIN(6, 25), PU6, 0 }, /* USB0_OVC */ }; static unsigned int r8a7795_pinmux_get_bias(struct sh_pfc *pfc, unsigned int pin) { + const struct sh_pfc_bias_info *info; u32 reg; u32 bit; - if (WARN_ON_ONCE(!pullups[pin].reg)) + info = sh_pfc_pin_to_bias_info(bias_info, ARRAY_SIZE(bias_info), pin); + if (!info) return PIN_CONFIG_BIAS_DISABLE; - reg = pullups[pin].reg; - bit = BIT(pullups[pin].bit); + reg = info->reg; + bit = BIT(info->bit); if (sh_pfc_read_reg(pfc, PUEN + reg, 32) & bit) { if (sh_pfc_read_reg(pfc, PUD + reg, 32) & bit) @@ -5379,15 +5378,17 @@ static unsigned int r8a7795_pinmux_get_bias(struct sh_pfc *pfc, static void r8a7795_pinmux_set_bias(struct sh_pfc *pfc, unsigned int pin, unsigned int bias) { + const struct sh_pfc_bias_info *info; u32 enable, updown; u32 reg; u32 bit; - if (WARN_ON_ONCE(!pullups[pin].reg)) + info = sh_pfc_pin_to_bias_info(bias_info, ARRAY_SIZE(bias_info), pin); + if (!info) return; - reg = pullups[pin].reg; - bit = BIT(pullups[pin].bit); + reg = info->reg; + bit = BIT(info->bit); enable = sh_pfc_read_reg(pfc, PUEN + reg, 32) & ~bit; if (bias != PIN_CONFIG_BIAS_DISABLE) diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index c5772584594ce3c48686f5df0315586dc2e9f8b2..fcacfa73ef6e9b657fb043186fcd6c9cd0d2dfeb 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -570,7 +570,8 @@ static bool sh_pfc_pinconf_validate(struct sh_pfc *pfc, unsigned int _pin, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - return true; + return pin->configs & + (SH_PFC_PIN_CFG_PULL_UP | SH_PFC_PIN_CFG_PULL_DOWN); case PIN_CONFIG_BIAS_PULL_UP: return pin->configs & SH_PFC_PIN_CFG_PULL_UP; diff --git a/drivers/pinctrl/sh-pfc/sh_pfc.h b/drivers/pinctrl/sh-pfc/sh_pfc.h index 2345421103db342e537187380f088240e3b1bf59..9556c172e3d2085cc846c976e62c15d8fb9cd5cb 100644 --- a/drivers/pinctrl/sh-pfc/sh_pfc.h +++ b/drivers/pinctrl/sh-pfc/sh_pfc.h @@ -189,6 +189,12 @@ struct sh_pfc_window { unsigned long size; }; +struct sh_pfc_bias_info { + u16 pin; + u16 reg : 11; + u16 bit : 5; +}; + struct sh_pfc_pin_range; struct sh_pfc { diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c index aa8bd9794683b715013c82aa9220d11cfb0ea595..96686336e3a396254b9473f01f1776e0297301ce 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c @@ -561,7 +561,7 @@ static const int ether_rgmii_muxvals[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static const unsigned ether_rmii_pins[] = {30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 45}; -static const int ether_rmii_muxvals[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int ether_rmii_muxvals[] = {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}; static const unsigned i2c0_pins[] = {63, 64}; static const int i2c0_muxvals[] = {0, 0}; static const unsigned i2c1_pins[] = {65, 66}; diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile index d3487125838cdce457c2f7acefdd9052b92cbde1..277a820ee4e11c2477060eb7af79936a2a5a4f15 100644 --- a/drivers/platform/goldfish/Makefile +++ b/drivers/platform/goldfish/Makefile @@ -2,4 +2,5 @@ # Makefile for Goldfish platform specific drivers # obj-$(CONFIG_GOLDFISH_BUS) += pdev_bus.o -obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o +obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe_all.o +goldfish_pipe_all-objs := goldfish_pipe.o goldfish_pipe_v2.o diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 198d16da025d156e426d3e6f0f4f3c94b8747d32..00d8406713b373e556a00a611335a7c0ec67e86d 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -15,52 +15,11 @@ * */ -/* This source file contains the implementation of a special device driver - * that intends to provide a *very* fast communication channel between the - * guest system and the QEMU emulator. - * - * Usage from the guest is simply the following (error handling simplified): - * - * int fd = open("/dev/qemu_pipe",O_RDWR); - * .... write() or read() through the pipe. - * - * This driver doesn't deal with the exact protocol used during the session. - * It is intended to be as simple as something like: - * - * // do this _just_ after opening the fd to connect to a specific - * // emulator service. - * const char* msg = ""; - * if (write(fd, msg, strlen(msg)+1) < 0) { - * ... could not connect to service - * close(fd); - * } - * - * // after this, simply read() and write() to communicate with the - * // service. Exact protocol details left as an exercise to the reader. - * - * This driver is very fast because it doesn't copy any data through - * intermediate buffers, since the emulator is capable of translating - * guest user addresses into host ones. - * - * Note that we must however ensure that each user page involved in the - * exchange is properly mapped during a transfer. +/* This source file contains the implementation of the legacy version of + * a goldfish pipe device driver. See goldfish_pipe_v2.c for the current + * version. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "goldfish_pipe.h" /* * IMPORTANT: The following constants must match the ones used and defined @@ -110,29 +69,15 @@ #define PIPE_WAKE_READ (1 << 1) /* pipe can now be read from */ #define PIPE_WAKE_WRITE (1 << 2) /* pipe can now be written to */ -struct access_params { - unsigned long channel; - u32 size; - unsigned long address; - u32 cmd; - u32 result; - /* reserved for future extension */ - u32 flags; -}; +#define MAX_PAGES_TO_GRAB 32 -/* The global driver data. Holds a reference to the i/o page used to - * communicate with the emulator, and a wake queue for blocked tasks - * waiting to be awoken. - */ -struct goldfish_pipe_dev { - spinlock_t lock; - unsigned char __iomem *base; - struct access_params *aps; - int irq; - u32 version; -}; +#define DEBUG 0 -static struct goldfish_pipe_dev pipe_dev[1]; +#if DEBUG +#define DPRINT(...) { printk(KERN_ERR __VA_ARGS__); } +#else +#define DPRINT(...) +#endif /* This data type models a given pipe instance */ struct goldfish_pipe { @@ -142,6 +87,15 @@ struct goldfish_pipe { wait_queue_head_t wake_queue; }; +struct access_params { + unsigned long channel; + u32 size; + unsigned long address; + u32 cmd; + u32 result; + /* reserved for future extension */ + u32 flags; +}; /* Bit flags for the 'flags' field */ enum { @@ -231,8 +185,10 @@ static int setup_access_params_addr(struct platform_device *pdev, if (valid_batchbuffer_addr(dev, aps)) { dev->aps = aps; return 0; - } else + } else { + devm_kfree(&pdev->dev, aps); return -1; + } } /* A value that will not be set by qemu emulator */ @@ -269,6 +225,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, struct goldfish_pipe *pipe = filp->private_data; struct goldfish_pipe_dev *dev = pipe->dev; unsigned long address, address_end; + struct page* pages[MAX_PAGES_TO_GRAB] = {}; int count = 0, ret = -EINVAL; /* If the emulator already closed the pipe, no need to go further */ @@ -293,45 +250,61 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, while (address < address_end) { unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE; - unsigned long next = page_end < address_end ? page_end - : address_end; - unsigned long avail = next - address; - int status, wakeBit; - struct page *page; - - /* Either vaddr or paddr depending on the device version */ - unsigned long xaddr; + unsigned long next, avail; + int status, wakeBit, page_i, num_contiguous_pages; + long first_page, last_page, requested_pages; + unsigned long xaddr, xaddr_prev, xaddr_i; /* - * We grab the pages on a page-by-page basis in case user - * space gives us a potentially huge buffer but the read only - * returns a small amount, then there's no need to pin that - * much memory to the process. + * Attempt to grab multiple physically contiguous pages. */ - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(address, 1, is_write ? 0 : FOLL_WRITE, - &page, NULL); - up_read(¤t->mm->mmap_sem); - if (ret < 0) - break; + first_page = address & PAGE_MASK; + last_page = (address_end - 1) & PAGE_MASK; + requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1; + if (requested_pages > MAX_PAGES_TO_GRAB) { + requested_pages = MAX_PAGES_TO_GRAB; + } + ret = get_user_pages_fast(first_page, requested_pages, + !is_write, pages); + + DPRINT("%s: requested pages: %d %d %p\n", __FUNCTION__, + ret, requested_pages, first_page); + if (ret == 0) { + DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", + __FUNCTION__, requested_pages); + mutex_unlock(&pipe->lock); + return ret; + } + if (ret < 0) { + DPRINT("%s: (requested pages < 0) %d \n", + __FUNCTION__, requested_pages); + mutex_unlock(&pipe->lock); + return ret; + } - if (dev->version) { - /* Device version 1 or newer (qemu-android) expects the - * physical address. - */ - xaddr = page_to_phys(page) | (address & ~PAGE_MASK); - } else { - /* Device version 0 (classic emulator) expects the - * virtual address. - */ - xaddr = address; + xaddr = page_to_phys(pages[0]) | (address & ~PAGE_MASK); + xaddr_prev = xaddr; + num_contiguous_pages = ret == 0 ? 0 : 1; + for (page_i = 1; page_i < ret; page_i++) { + xaddr_i = page_to_phys(pages[page_i]) | (address & ~PAGE_MASK); + if (xaddr_i == xaddr_prev + PAGE_SIZE) { + page_end += PAGE_SIZE; + xaddr_prev = xaddr_i; + num_contiguous_pages++; + } else { + DPRINT("%s: discontinuous page boundary: %d pages instead\n", + __FUNCTION__, page_i); + break; + } } + next = page_end < address_end ? page_end : address_end; + avail = next - address; /* Now, try to transfer the bytes in the current page */ spin_lock_irqsave(&dev->lock, irq_flags); if (access_with_param(dev, - is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER, - xaddr, avail, pipe, &status)) { + is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER, + xaddr, avail, pipe, &status)) { gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL, dev->base + PIPE_REG_CHANNEL_HIGH); writel(avail, dev->base + PIPE_REG_SIZE); @@ -344,9 +317,13 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, } spin_unlock_irqrestore(&dev->lock, irq_flags); - if (status > 0 && !is_write) - set_page_dirty(page); - put_page(page); + for (page_i = 0; page_i < ret; page_i++) { + if (status > 0 && !is_write && + page_i < num_contiguous_pages) { + set_page_dirty(pages[page_i]); + } + put_page(pages[page_i]); + } if (status > 0) { /* Correct transfer */ count += status; @@ -368,7 +345,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, */ if (status != PIPE_ERROR_AGAIN) pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n", - status, is_write ? "write" : "read"); + status, is_write ? "write" : "read"); ret = 0; break; } @@ -378,7 +355,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, * non-blocking mode, just return the error code. */ if (status != PIPE_ERROR_AGAIN || - (filp->f_flags & O_NONBLOCK) != 0) { + (filp->f_flags & O_NONBLOCK) != 0) { ret = goldfish_pipe_error_convert(status); break; } @@ -392,7 +369,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, /* Tell the emulator we're going to wait for a wake event */ goldfish_cmd(pipe, - is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ); + is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ); /* Unlock the pipe, then wait for the wake signal */ mutex_unlock(&pipe->lock); @@ -538,6 +515,8 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file) pipe->dev = dev; mutex_init(&pipe->lock); + DPRINT("%s: call. pipe_dev pipe_dev=0x%lx new_pipe_addr=0x%lx file=0x%lx\n", __FUNCTION__, pipe_dev, pipe, file); + // spin lock init, write head of list, i guess init_waitqueue_head(&pipe->wake_queue); /* @@ -560,6 +539,7 @@ static int goldfish_pipe_release(struct inode *inode, struct file *filp) { struct goldfish_pipe *pipe = filp->private_data; + DPRINT("%s: call. pipe=0x%lx file=0x%lx\n", __FUNCTION__, pipe, filp); /* The guest is closing the channel, so tell the emulator right now */ goldfish_cmd(pipe, CMD_CLOSE); kfree(pipe); @@ -576,73 +556,35 @@ static const struct file_operations goldfish_pipe_fops = { .release = goldfish_pipe_release, }; -static struct miscdevice goldfish_pipe_device = { +static struct miscdevice goldfish_pipe_dev = { .minor = MISC_DYNAMIC_MINOR, .name = "goldfish_pipe", .fops = &goldfish_pipe_fops, }; -static int goldfish_pipe_probe(struct platform_device *pdev) +int goldfish_pipe_device_init_v1(struct platform_device *pdev) { - int err; - struct resource *r; struct goldfish_pipe_dev *dev = pipe_dev; - - /* not thread safe, but this should not happen */ - WARN_ON(dev->base != NULL); - - spin_lock_init(&dev->lock); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (r == NULL || resource_size(r) < PAGE_SIZE) { - dev_err(&pdev->dev, "can't allocate i/o page\n"); - return -EINVAL; - } - dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE); - if (dev->base == NULL) { - dev_err(&pdev->dev, "ioremap failed\n"); - return -EINVAL; - } - - r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (r == NULL) { - err = -EINVAL; - goto error; - } - dev->irq = r->start; - - err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, + int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, IRQF_SHARED, "goldfish_pipe", dev); if (err) { - dev_err(&pdev->dev, "unable to allocate IRQ\n"); - goto error; + dev_err(&pdev->dev, "unable to allocate IRQ for v1\n"); + return err; } - err = misc_register(&goldfish_pipe_device); + err = misc_register(&goldfish_pipe_dev); if (err) { - dev_err(&pdev->dev, "unable to register device\n"); - goto error; + dev_err(&pdev->dev, "unable to register v1 device\n"); + return err; } - setup_access_params_addr(pdev, dev); - /* Although the pipe device in the classic Android emulator does not - * recognize the 'version' register, it won't treat this as an error - * either and will simply return 0, which is fine. - */ - dev->version = readl(dev->base + PIPE_REG_VERSION); + setup_access_params_addr(pdev, dev); return 0; - -error: - dev->base = NULL; - return err; } -static int goldfish_pipe_remove(struct platform_device *pdev) +void goldfish_pipe_device_deinit_v1(struct platform_device *pdev) { - struct goldfish_pipe_dev *dev = pipe_dev; - misc_deregister(&goldfish_pipe_device); - dev->base = NULL; - return 0; + misc_deregister(&goldfish_pipe_dev); } static const struct acpi_device_id goldfish_pipe_acpi_match[] = { diff --git a/drivers/platform/goldfish/goldfish_pipe.h b/drivers/platform/goldfish/goldfish_pipe.h new file mode 100644 index 0000000000000000000000000000000000000000..6cd1b63be8c907a4d2279a6e8f7ce63a4f1de98f --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef GOLDFISH_PIPE_H +#define GOLDFISH_PIPE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Initialize the legacy version of the pipe device driver */ +int goldfish_pipe_device_init_v1(struct platform_device *pdev); + +/* Deinitialize the legacy version of the pipe device driver */ +void goldfish_pipe_device_deinit_v1(struct platform_device *pdev); + +/* Forward declarations for the device struct */ +struct goldfish_pipe; +struct goldfish_pipe_device_buffers; + +/* The global driver data. Holds a reference to the i/o page used to + * communicate with the emulator, and a wake queue for blocked tasks + * waiting to be awoken. + */ +struct goldfish_pipe_dev { + /* + * Global device spinlock. Protects the following members: + * - pipes, pipes_capacity + * - [*pipes, *pipes + pipes_capacity) - array data + * - first_signalled_pipe, + * goldfish_pipe::prev_signalled, + * goldfish_pipe::next_signalled, + * goldfish_pipe::signalled_flags - all singnalled-related fields, + * in all allocated pipes + * - open_command_params - PIPE_CMD_OPEN-related buffers + * + * It looks like a lot of different fields, but the trick is that the only + * operation that happens often is the signalled pipes array manipulation. + * That's why it's OK for now to keep the rest of the fields under the same + * lock. If we notice too much contention because of PIPE_CMD_OPEN, + * then we should add a separate lock there. + */ + spinlock_t lock; + + /* + * Array of the pipes of |pipes_capacity| elements, + * indexed by goldfish_pipe::id + */ + struct goldfish_pipe **pipes; + u32 pipes_capacity; + + /* Pointers to the buffers host uses for interaction with this driver */ + struct goldfish_pipe_dev_buffers *buffers; + + /* Head of a doubly linked list of signalled pipes */ + struct goldfish_pipe *first_signalled_pipe; + + /* Some device-specific data */ + int irq; + int version; + unsigned char __iomem *base; + + /* v1-specific access parameters */ + struct access_params *aps; +}; + +extern struct goldfish_pipe_dev pipe_dev[1]; + +#endif /* GOLDFISH_PIPE_H */ diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c new file mode 100644 index 0000000000000000000000000000000000000000..ad373ed36555233215349eb804dd344a4020290a --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe_v2.c @@ -0,0 +1,889 @@ +/* + * Copyright (C) 2012 Intel, Inc. + * Copyright (C) 2013 Intel, Inc. + * Copyright (C) 2014 Linaro Limited + * Copyright (C) 2011-2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* This source file contains the implementation of a special device driver + * that intends to provide a *very* fast communication channel between the + * guest system and the QEMU emulator. + * + * Usage from the guest is simply the following (error handling simplified): + * + * int fd = open("/dev/qemu_pipe",O_RDWR); + * .... write() or read() through the pipe. + * + * This driver doesn't deal with the exact protocol used during the session. + * It is intended to be as simple as something like: + * + * // do this _just_ after opening the fd to connect to a specific + * // emulator service. + * const char* msg = ""; + * if (write(fd, msg, strlen(msg)+1) < 0) { + * ... could not connect to service + * close(fd); + * } + * + * // after this, simply read() and write() to communicate with the + * // service. Exact protocol details left as an exercise to the reader. + * + * This driver is very fast because it doesn't copy any data through + * intermediate buffers, since the emulator is capable of translating + * guest user addresses into host ones. + * + * Note that we must however ensure that each user page involved in the + * exchange is properly mapped during a transfer. + */ + +#include "goldfish_pipe.h" + + +/* + * Update this when something changes in the driver's behavior so the host + * can benefit from knowing it + */ +enum { + PIPE_DRIVER_VERSION = 2, + PIPE_CURRENT_DEVICE_VERSION = 2 +}; + +/* + * IMPORTANT: The following constants must match the ones used and defined + * in external/qemu/hw/goldfish_pipe.c in the Android source tree. + */ + +/* List of bitflags returned in status of CMD_POLL command */ +enum PipePollFlags { + PIPE_POLL_IN = 1 << 0, + PIPE_POLL_OUT = 1 << 1, + PIPE_POLL_HUP = 1 << 2 +}; + +/* Possible status values used to signal errors - see goldfish_pipe_error_convert */ +enum PipeErrors { + PIPE_ERROR_INVAL = -1, + PIPE_ERROR_AGAIN = -2, + PIPE_ERROR_NOMEM = -3, + PIPE_ERROR_IO = -4 +}; + +/* Bit-flags used to signal events from the emulator */ +enum PipeWakeFlags { + PIPE_WAKE_CLOSED = 1 << 0, /* emulator closed pipe */ + PIPE_WAKE_READ = 1 << 1, /* pipe can now be read from */ + PIPE_WAKE_WRITE = 1 << 2 /* pipe can now be written to */ +}; + +/* Bit flags for the 'flags' field */ +enum PipeFlagsBits { + BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ + BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */ + BIT_WAKE_ON_READ = 2, /* want to be woken on reads */ +}; + +enum PipeRegs { + PIPE_REG_CMD = 0, + + PIPE_REG_SIGNAL_BUFFER_HIGH = 4, + PIPE_REG_SIGNAL_BUFFER = 8, + PIPE_REG_SIGNAL_BUFFER_COUNT = 12, + + PIPE_REG_OPEN_BUFFER_HIGH = 20, + PIPE_REG_OPEN_BUFFER = 24, + + PIPE_REG_VERSION = 36, + + PIPE_REG_GET_SIGNALLED = 48, +}; + +enum PipeCmdCode { + PIPE_CMD_OPEN = 1, /* to be used by the pipe device itself */ + PIPE_CMD_CLOSE, + PIPE_CMD_POLL, + PIPE_CMD_WRITE, + PIPE_CMD_WAKE_ON_WRITE, + PIPE_CMD_READ, + PIPE_CMD_WAKE_ON_READ, + + /* + * TODO(zyy): implement a deferred read/write execution to allow parallel + * processing of pipe operations on the host. + */ + PIPE_CMD_WAKE_ON_DONE_IO, +}; + +enum { + MAX_BUFFERS_PER_COMMAND = 336, + MAX_SIGNALLED_PIPES = 64, + INITIAL_PIPES_CAPACITY = 64 +}; + +struct goldfish_pipe_dev; +struct goldfish_pipe; +struct goldfish_pipe_command; + +/* A per-pipe command structure, shared with the host */ +struct goldfish_pipe_command { + s32 cmd; /* PipeCmdCode, guest -> host */ + s32 id; /* pipe id, guest -> host */ + s32 status; /* command execution status, host -> guest */ + s32 reserved; /* to pad to 64-bit boundary */ + union { + /* Parameters for PIPE_CMD_{READ,WRITE} */ + struct { + u32 buffers_count; /* number of buffers, guest -> host */ + s32 consumed_size; /* number of consumed bytes, host -> guest */ + u64 ptrs[MAX_BUFFERS_PER_COMMAND]; /* buffer pointers, guest -> host */ + u32 sizes[MAX_BUFFERS_PER_COMMAND]; /* buffer sizes, guest -> host */ + } rw_params; + }; +}; + +/* A single signalled pipe information */ +struct signalled_pipe_buffer { + u32 id; + u32 flags; +}; + +/* Parameters for the PIPE_CMD_OPEN command */ +struct open_command_param { + u64 command_buffer_ptr; + u32 rw_params_max_count; +}; + +/* Device-level set of buffers shared with the host */ +struct goldfish_pipe_dev_buffers { + struct open_command_param open_command_params; + struct signalled_pipe_buffer signalled_pipe_buffers[MAX_SIGNALLED_PIPES]; +}; + +/* This data type models a given pipe instance */ +struct goldfish_pipe { + u32 id; /* pipe ID - index into goldfish_pipe_dev::pipes array */ + unsigned long flags; /* The wake flags pipe is waiting for + * Note: not protected with any lock, uses atomic operations + * and barriers to make it thread-safe. + */ + unsigned long signalled_flags; /* wake flags host have signalled, + * - protected by goldfish_pipe_dev::lock */ + + struct goldfish_pipe_command *command_buffer; /* A pointer to command buffer */ + + /* doubly linked list of signalled pipes, protected by goldfish_pipe_dev::lock */ + struct goldfish_pipe *prev_signalled; + struct goldfish_pipe *next_signalled; + + /* + * A pipe's own lock. Protects the following: + * - *command_buffer - makes sure a command can safely write its parameters + * to the host and read the results back. + */ + struct mutex lock; + + wait_queue_head_t wake_queue; /* A wake queue for sleeping until host signals an event */ + struct goldfish_pipe_dev *dev; /* Pointer to the parent goldfish_pipe_dev instance */ +}; + +struct goldfish_pipe_dev pipe_dev[1] = {}; + +static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd) +{ + pipe->command_buffer->cmd = cmd; + pipe->command_buffer->status = PIPE_ERROR_INVAL; /* failure by default */ + writel(pipe->id, pipe->dev->base + PIPE_REG_CMD); + return pipe->command_buffer->status; +} + +static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd) +{ + int status; + if (mutex_lock_interruptible(&pipe->lock)) + return PIPE_ERROR_IO; + status = goldfish_cmd_locked(pipe, cmd); + mutex_unlock(&pipe->lock); + return status; +} + +/* + * This function converts an error code returned by the emulator through + * the PIPE_REG_STATUS i/o register into a valid negative errno value. + */ +static int goldfish_pipe_error_convert(int status) +{ + switch (status) { + case PIPE_ERROR_AGAIN: + return -EAGAIN; + case PIPE_ERROR_NOMEM: + return -ENOMEM; + case PIPE_ERROR_IO: + return -EIO; + default: + return -EINVAL; + } +} + +static int pin_user_pages(unsigned long first_page, unsigned long last_page, + unsigned last_page_size, int is_write, + struct page *pages[MAX_BUFFERS_PER_COMMAND], unsigned *iter_last_page_size) +{ + int ret; + int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1; + if (requested_pages > MAX_BUFFERS_PER_COMMAND) { + requested_pages = MAX_BUFFERS_PER_COMMAND; + *iter_last_page_size = PAGE_SIZE; + } else { + *iter_last_page_size = last_page_size; + } + + ret = get_user_pages_fast( + first_page, requested_pages, !is_write, pages); + if (ret <= 0) + return -EFAULT; + if (ret < requested_pages) + *iter_last_page_size = PAGE_SIZE; + return ret; + +} + +static void release_user_pages(struct page **pages, int pages_count, + int is_write, s32 consumed_size) +{ + int i; + for (i = 0; i < pages_count; i++) { + if (!is_write && consumed_size > 0) { + set_page_dirty(pages[i]); + } + put_page(pages[i]); + } +} + +/* Populate the call parameters, merging adjacent pages together */ +static void populate_rw_params( + struct page **pages, int pages_count, + unsigned long address, unsigned long address_end, + unsigned long first_page, unsigned long last_page, + unsigned iter_last_page_size, int is_write, + struct goldfish_pipe_command *command) +{ + /* + * Process the first page separately - it's the only page that + * needs special handling for its start address. + */ + unsigned long xaddr = page_to_phys(pages[0]); + unsigned long xaddr_prev = xaddr; + int buffer_idx = 0; + int i = 1; + int size_on_page = first_page == last_page + ? (int)(address_end - address) + : (PAGE_SIZE - (address & ~PAGE_MASK)); + command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK)); + command->rw_params.sizes[0] = size_on_page; + for (; i < pages_count; ++i) { + xaddr = page_to_phys(pages[i]); + size_on_page = (i == pages_count - 1) ? iter_last_page_size : PAGE_SIZE; + if (xaddr == xaddr_prev + PAGE_SIZE) { + command->rw_params.sizes[buffer_idx] += size_on_page; + } else { + ++buffer_idx; + command->rw_params.ptrs[buffer_idx] = (u64)xaddr; + command->rw_params.sizes[buffer_idx] = size_on_page; + } + xaddr_prev = xaddr; + } + command->rw_params.buffers_count = buffer_idx + 1; +} + +static int transfer_max_buffers(struct goldfish_pipe* pipe, + unsigned long address, unsigned long address_end, int is_write, + unsigned long last_page, unsigned int last_page_size, + s32* consumed_size, int* status) +{ + struct page *pages[MAX_BUFFERS_PER_COMMAND]; + unsigned long first_page = address & PAGE_MASK; + unsigned int iter_last_page_size; + int pages_count = pin_user_pages(first_page, last_page, + last_page_size, is_write, + pages, &iter_last_page_size); + if (pages_count < 0) + return pages_count; + + /* Serialize access to the pipe command buffers */ + if (mutex_lock_interruptible(&pipe->lock)) + return -ERESTARTSYS; + + populate_rw_params(pages, pages_count, address, address_end, + first_page, last_page, iter_last_page_size, is_write, + pipe->command_buffer); + + /* Transfer the data */ + *status = goldfish_cmd_locked(pipe, + is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ); + + *consumed_size = pipe->command_buffer->rw_params.consumed_size; + + mutex_unlock(&pipe->lock); + + release_user_pages(pages, pages_count, is_write, *consumed_size); + + return 0; +} + +static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write) +{ + u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ; + set_bit(wakeBit, &pipe->flags); + + /* Tell the emulator we're going to wait for a wake event */ + (void)goldfish_cmd(pipe, + is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ); + + while (test_bit(wakeBit, &pipe->flags)) { + if (wait_event_interruptible( + pipe->wake_queue, + !test_bit(wakeBit, &pipe->flags))) + return -ERESTARTSYS; + + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + return -EIO; + } + + return 0; +} + +static ssize_t goldfish_pipe_read_write(struct file *filp, + char __user *buffer, size_t bufflen, int is_write) +{ + struct goldfish_pipe *pipe = filp->private_data; + int count = 0, ret = -EINVAL; + unsigned long address, address_end, last_page; + unsigned int last_page_size; + + /* If the emulator already closed the pipe, no need to go further */ + if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))) + return -EIO; + /* Null reads or writes succeeds */ + if (unlikely(bufflen == 0)) + return 0; + /* Check the buffer range for access */ + if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ, + buffer, bufflen))) + return -EFAULT; + + address = (unsigned long)buffer; + address_end = address + bufflen; + last_page = (address_end - 1) & PAGE_MASK; + last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; + + while (address < address_end) { + s32 consumed_size; + int status; + ret = transfer_max_buffers(pipe, address, address_end, is_write, + last_page, last_page_size, &consumed_size, &status); + if (ret < 0) + break; + + if (consumed_size > 0) { + /* No matter what's the status, we've transfered something */ + count += consumed_size; + address += consumed_size; + } + if (status > 0) + continue; + if (status == 0) { + /* EOF */ + ret = 0; + break; + } + if (count > 0) { + /* + * An error occured, but we already transfered + * something on one of the previous iterations. + * Just return what we already copied and log this + * err. + */ + if (status != PIPE_ERROR_AGAIN) + pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n", + status, is_write ? "write" : "read"); + break; + } + + /* + * If the error is not PIPE_ERROR_AGAIN, or if we are in + * non-blocking mode, just return the error code. + */ + if (status != PIPE_ERROR_AGAIN || (filp->f_flags & O_NONBLOCK) != 0) { + ret = goldfish_pipe_error_convert(status); + break; + } + + status = wait_for_host_signal(pipe, is_write); + if (status < 0) + return status; + } + + if (count > 0) + return count; + return ret; +} + +static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer, + size_t bufflen, loff_t *ppos) +{ + return goldfish_pipe_read_write(filp, buffer, bufflen, /* is_write */ 0); +} + +static ssize_t goldfish_pipe_write(struct file *filp, + const char __user *buffer, size_t bufflen, + loff_t *ppos) +{ + return goldfish_pipe_read_write(filp, + /* cast away the const */(char __user *)buffer, bufflen, + /* is_write */ 1); +} + +static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait) +{ + struct goldfish_pipe *pipe = filp->private_data; + unsigned int mask = 0; + int status; + + poll_wait(filp, &pipe->wake_queue, wait); + + status = goldfish_cmd(pipe, PIPE_CMD_POLL); + if (status < 0) { + return -ERESTARTSYS; + } + + if (status & PIPE_POLL_IN) + mask |= POLLIN | POLLRDNORM; + if (status & PIPE_POLL_OUT) + mask |= POLLOUT | POLLWRNORM; + if (status & PIPE_POLL_HUP) + mask |= POLLHUP; + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + mask |= POLLERR; + + return mask; +} + +static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev, + u32 id, u32 flags) +{ + struct goldfish_pipe *pipe; + + BUG_ON(id >= dev->pipes_capacity); + + pipe = dev->pipes[id]; + if (!pipe) + return; + pipe->signalled_flags |= flags; + + if (pipe->prev_signalled || pipe->next_signalled + || dev->first_signalled_pipe == pipe) + return; /* already in the list */ + pipe->next_signalled = dev->first_signalled_pipe; + if (dev->first_signalled_pipe) { + dev->first_signalled_pipe->prev_signalled = pipe; + } + dev->first_signalled_pipe = pipe; +} + +static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev, + struct goldfish_pipe *pipe) { + if (pipe->prev_signalled) + pipe->prev_signalled->next_signalled = pipe->next_signalled; + if (pipe->next_signalled) + pipe->next_signalled->prev_signalled = pipe->prev_signalled; + if (pipe == dev->first_signalled_pipe) + dev->first_signalled_pipe = pipe->next_signalled; + pipe->prev_signalled = NULL; + pipe->next_signalled = NULL; +} + +static struct goldfish_pipe *signalled_pipes_pop_front(struct goldfish_pipe_dev *dev, + int *wakes) +{ + struct goldfish_pipe *pipe; + unsigned long flags; + spin_lock_irqsave(&dev->lock, flags); + + pipe = dev->first_signalled_pipe; + if (pipe) { + *wakes = pipe->signalled_flags; + pipe->signalled_flags = 0; + /* + * This is an optimized version of signalled_pipes_remove_locked() - + * we want to make it as fast as possible to wake the sleeping pipe + * operations faster + */ + dev->first_signalled_pipe = pipe->next_signalled; + if (dev->first_signalled_pipe) + dev->first_signalled_pipe->prev_signalled = NULL; + pipe->next_signalled = NULL; + } + + spin_unlock_irqrestore(&dev->lock, flags); + return pipe; +} + +static void goldfish_interrupt_task(unsigned long unused) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + /* Iterate over the signalled pipes and wake them one by one */ + struct goldfish_pipe *pipe; + int wakes; + while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) { + if (wakes & PIPE_WAKE_CLOSED) { + pipe->flags = 1 << BIT_CLOSED_ON_HOST; + } else { + if (wakes & PIPE_WAKE_READ) + clear_bit(BIT_WAKE_ON_READ, &pipe->flags); + if (wakes & PIPE_WAKE_WRITE) + clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags); + } + /* + * wake_up_interruptible() implies a write barrier, so don't explicitly + * add another one here. + */ + wake_up_interruptible(&pipe->wake_queue); + } +} +DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0); + +/* + * The general idea of the interrupt handling: + * + * 1. device raises an interrupt if there's at least one signalled pipe + * 2. IRQ handler reads the signalled pipes and their count from the device + * 3. device writes them into a shared buffer and returns the count + * it only resets the IRQ if it has returned all signalled pipes, + * otherwise it leaves it raised, so IRQ handler will be called + * again for the next chunk + * 4. IRQ handler adds all returned pipes to the device's signalled pipes list + * 5. IRQ handler launches a tasklet to process the signalled pipes from the + * list in a separate context + */ +static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id) +{ + u32 count; + u32 i; + unsigned long flags; + struct goldfish_pipe_dev *dev = dev_id; + if (dev != pipe_dev) + return IRQ_NONE; + + /* Request the signalled pipes from the device */ + spin_lock_irqsave(&dev->lock, flags); + + count = readl(dev->base + PIPE_REG_GET_SIGNALLED); + if (count == 0) { + spin_unlock_irqrestore(&dev->lock, flags); + return IRQ_NONE; + } + if (count > MAX_SIGNALLED_PIPES) + count = MAX_SIGNALLED_PIPES; + + for (i = 0; i < count; ++i) + signalled_pipes_add_locked(dev, + dev->buffers->signalled_pipe_buffers[i].id, + dev->buffers->signalled_pipe_buffers[i].flags); + + spin_unlock_irqrestore(&dev->lock, flags); + + tasklet_schedule(&goldfish_interrupt_tasklet); + return IRQ_HANDLED; +} + +static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) +{ + int id; + for (id = 0; id < dev->pipes_capacity; ++id) + if (!dev->pipes[id]) + return id; + + { + /* Reallocate the array */ + u32 new_capacity = 2 * dev->pipes_capacity; + struct goldfish_pipe **pipes = + kcalloc(new_capacity, sizeof(*pipes), + GFP_ATOMIC); + if (!pipes) + return -ENOMEM; + memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity); + kfree(dev->pipes); + dev->pipes = pipes; + id = dev->pipes_capacity; + dev->pipes_capacity = new_capacity; + } + return id; +} + +/** + * goldfish_pipe_open - open a channel to the AVD + * @inode: inode of device + * @file: file struct of opener + * + * Create a new pipe link between the emulator and the use application. + * Each new request produces a new pipe. + * + * Note: we use the pipe ID as a mux. All goldfish emulations are 32bit + * right now so this is fine. A move to 64bit will need this addressing + */ +static int goldfish_pipe_open(struct inode *inode, struct file *file) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + unsigned long flags; + int id; + int status; + + /* Allocate new pipe kernel object */ + struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL); + if (pipe == NULL) + return -ENOMEM; + + pipe->dev = dev; + mutex_init(&pipe->lock); + init_waitqueue_head(&pipe->wake_queue); + + /* + * Command buffer needs to be allocated on its own page to make sure it is + * physically contiguous in host's address space. + */ + pipe->command_buffer = + (struct goldfish_pipe_command*)__get_free_page(GFP_KERNEL); + if (!pipe->command_buffer) { + status = -ENOMEM; + goto err_pipe; + } + + spin_lock_irqsave(&dev->lock, flags); + + id = get_free_pipe_id_locked(dev); + if (id < 0) { + status = id; + goto err_id_locked; + } + + dev->pipes[id] = pipe; + pipe->id = id; + pipe->command_buffer->id = id; + + /* Now tell the emulator we're opening a new pipe. */ + dev->buffers->open_command_params.rw_params_max_count = + MAX_BUFFERS_PER_COMMAND; + dev->buffers->open_command_params.command_buffer_ptr = + (u64)(unsigned long)__pa(pipe->command_buffer); + status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN); + spin_unlock_irqrestore(&dev->lock, flags); + if (status < 0) + goto err_cmd; + /* All is done, save the pipe into the file's private data field */ + file->private_data = pipe; + return 0; + +err_cmd: + spin_lock_irqsave(&dev->lock, flags); + dev->pipes[id] = NULL; +err_id_locked: + spin_unlock_irqrestore(&dev->lock, flags); + free_page((unsigned long)pipe->command_buffer); +err_pipe: + kfree(pipe); + return status; +} + +static int goldfish_pipe_release(struct inode *inode, struct file *filp) +{ + unsigned long flags; + struct goldfish_pipe *pipe = filp->private_data; + struct goldfish_pipe_dev *dev = pipe->dev; + + /* The guest is closing the channel, so tell the emulator right now */ + (void)goldfish_cmd(pipe, PIPE_CMD_CLOSE); + + spin_lock_irqsave(&dev->lock, flags); + dev->pipes[pipe->id] = NULL; + signalled_pipes_remove_locked(dev, pipe); + spin_unlock_irqrestore(&dev->lock, flags); + + filp->private_data = NULL; + free_page((unsigned long)pipe->command_buffer); + kfree(pipe); + return 0; +} + +static const struct file_operations goldfish_pipe_fops = { + .owner = THIS_MODULE, + .read = goldfish_pipe_read, + .write = goldfish_pipe_write, + .poll = goldfish_pipe_poll, + .open = goldfish_pipe_open, + .release = goldfish_pipe_release, +}; + +static struct miscdevice goldfish_pipe_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "goldfish_pipe", + .fops = &goldfish_pipe_fops, +}; + +static int goldfish_pipe_device_init_v2(struct platform_device *pdev) +{ + char *page; + struct goldfish_pipe_dev *dev = pipe_dev; + int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, + IRQF_SHARED, "goldfish_pipe", dev); + if (err) { + dev_err(&pdev->dev, "unable to allocate IRQ for v2\n"); + return err; + } + + err = misc_register(&goldfish_pipe_dev); + if (err) { + dev_err(&pdev->dev, "unable to register v2 device\n"); + return err; + } + + dev->first_signalled_pipe = NULL; + dev->pipes_capacity = INITIAL_PIPES_CAPACITY; + dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes), GFP_KERNEL); + if (!dev->pipes) + return -ENOMEM; + + /* + * We're going to pass two buffers, open_command_params and + * signalled_pipe_buffers, to the host. This means each of those buffers + * needs to be contained in a single physical page. The easiest choice is + * to just allocate a page and place the buffers in it. + */ + BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); + page = (char*)__get_free_page(GFP_KERNEL); + if (!page) { + kfree(dev->pipes); + return -ENOMEM; + } + dev->buffers = (struct goldfish_pipe_dev_buffers*)page; + + /* Send the buffer addresses to the host */ + { + u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers); + writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH); + writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_SIGNAL_BUFFER); + writel((u32)MAX_SIGNALLED_PIPES, dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT); + + paddr = __pa(&dev->buffers->open_command_params); + writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_OPEN_BUFFER_HIGH); + writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_OPEN_BUFFER); + } + return 0; +} + +static void goldfish_pipe_device_deinit_v2(struct platform_device *pdev) { + struct goldfish_pipe_dev *dev = pipe_dev; + misc_deregister(&goldfish_pipe_dev); + kfree(dev->pipes); + free_page((unsigned long)dev->buffers); +} + +static int goldfish_pipe_probe(struct platform_device *pdev) +{ + int err; + struct resource *r; + struct goldfish_pipe_dev *dev = pipe_dev; + + BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); + + /* not thread safe, but this should not happen */ + WARN_ON(dev->base != NULL); + + spin_lock_init(&dev->lock); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (r == NULL || resource_size(r) < PAGE_SIZE) { + dev_err(&pdev->dev, "can't allocate i/o page\n"); + return -EINVAL; + } + dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE); + if (dev->base == NULL) { + dev_err(&pdev->dev, "ioremap failed\n"); + return -EINVAL; + } + + r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (r == NULL) { + err = -EINVAL; + goto error; + } + dev->irq = r->start; + + /* + * Exchange the versions with the host device + * + * Note: v1 driver used to not report its version, so we write it before + * reading device version back: this allows the host implementation to + * detect the old driver (if there was no version write before read). + */ + writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION); + dev->version = readl(dev->base + PIPE_REG_VERSION); + if (dev->version < PIPE_CURRENT_DEVICE_VERSION) { + /* initialize the old device version */ + err = goldfish_pipe_device_init_v1(pdev); + } else { + /* Host device supports the new interface */ + err = goldfish_pipe_device_init_v2(pdev); + } + if (!err) + return 0; + +error: + dev->base = NULL; + return err; +} + +static int goldfish_pipe_remove(struct platform_device *pdev) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + if (dev->version < PIPE_CURRENT_DEVICE_VERSION) + goldfish_pipe_device_deinit_v1(pdev); + else + goldfish_pipe_device_deinit_v2(pdev); + dev->base = NULL; + return 0; +} + +static const struct acpi_device_id goldfish_pipe_acpi_match[] = { + { "GFSH0003", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match); + +static const struct of_device_id goldfish_pipe_of_match[] = { + { .compatible = "google,android-pipe", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match); + +static struct platform_driver goldfish_pipe_driver = { + .probe = goldfish_pipe_probe, + .remove = goldfish_pipe_remove, + .driver = { + .name = "goldfish_pipe", + .of_match_table = goldfish_pipe_of_match, + .acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match), + } +}; + +module_platform_driver(goldfish_pipe_driver); +MODULE_AUTHOR("David Turner "); +MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 26e4cbc34db8f8e3b515f2e4e3a2c0c836e59c4b..6032b7085582b4cad48684bf6f40b2243b53b4d3 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -173,6 +173,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_wapf4, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X45U", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X45U"), + }, + .driver_data = &quirk_asus_wapf4, + }, { .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X456UA", diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 61f39abf5dc8f24b738a2dafc5117b40a2521fd5..82d67715ce76d9ff0442ecdcdbd2dbb324380203 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -177,43 +177,43 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event); #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); -static void logolamp_set(struct led_classdev *cdev, +static int logolamp_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev logolamp_led = { .name = "fujitsu::logolamp", .brightness_get = logolamp_get, - .brightness_set = logolamp_set + .brightness_set_blocking = logolamp_set }; static enum led_brightness kblamps_get(struct led_classdev *cdev); -static void kblamps_set(struct led_classdev *cdev, +static int kblamps_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev kblamps_led = { .name = "fujitsu::kblamps", .brightness_get = kblamps_get, - .brightness_set = kblamps_set + .brightness_set_blocking = kblamps_set }; static enum led_brightness radio_led_get(struct led_classdev *cdev); -static void radio_led_set(struct led_classdev *cdev, +static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev radio_led = { .name = "fujitsu::radio_led", .brightness_get = radio_led_get, - .brightness_set = radio_led_set + .brightness_set_blocking = radio_led_set }; static enum led_brightness eco_led_get(struct led_classdev *cdev); -static void eco_led_set(struct led_classdev *cdev, +static int eco_led_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev eco_led = { .name = "fujitsu::eco_led", .brightness_get = eco_led_get, - .brightness_set = eco_led_set + .brightness_set_blocking = eco_led_set }; #endif @@ -267,48 +267,48 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) #if IS_ENABLED(CONFIG_LEDS_CLASS) /* LED class callbacks */ -static void logolamp_set(struct led_classdev *cdev, +static int logolamp_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) { call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON); - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON); } else if (brightness >= LED_HALF) { call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON); - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF); } else { - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF); } } -static void kblamps_set(struct led_classdev *cdev, +static int kblamps_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON); else - call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF); } -static void radio_led_set(struct led_classdev *cdev, +static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); + return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); else - call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); + return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); } -static void eco_led_set(struct led_classdev *cdev, +static int eco_led_set(struct led_classdev *cdev, enum led_brightness brightness) { int curr; curr = call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0); if (brightness >= LED_FULL) - call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON); else - call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON); } static enum led_brightness logolamp_get(struct led_classdev *cdev) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 1fc0de870ff826e8b90956ab557cc83008e1ce68..361770568ad03a6e7a3bc7e6d579ccacae1725b7 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -77,7 +77,7 @@ static int mfld_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - error = request_threaded_irq(irq, NULL, mfld_pb_isr, 0, + error = request_threaded_irq(irq, NULL, mfld_pb_isr, IRQF_ONESHOT, DRIVER_NAME, input); if (error) { dev_err(&pdev->dev, "Unable to request irq %d for mfld power" diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index f5746b9f4e83900edd33fb049229b24aa48ba1b9..e9584330aeed27f0c4e2cc7fa731ff8311e82659 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -1141,7 +1141,7 @@ static int bq24190_battery_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_put_sync(bdi->dev); + pm_runtime_get_sync(bdi->dev); switch (psp) { case POWER_SUPPLY_PROP_ONLINE: diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 3b0dbc689d7299bde340b044311ab8ad859bd726..bccb3f595ff3dfe29be2644a4c8d270e02f2815d 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -164,6 +164,25 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = { [BQ27XXX_REG_DCAP] = 0x3c, [BQ27XXX_REG_AP] = INVALID_REG_ADDR, }, + [BQ27510] = { + [BQ27XXX_REG_CTRL] = 0x00, + [BQ27XXX_REG_TEMP] = 0x06, + [BQ27XXX_REG_INT_TEMP] = 0x28, + [BQ27XXX_REG_VOLT] = 0x08, + [BQ27XXX_REG_AI] = 0x14, + [BQ27XXX_REG_FLAGS] = 0x0a, + [BQ27XXX_REG_TTE] = 0x16, + [BQ27XXX_REG_TTF] = INVALID_REG_ADDR, + [BQ27XXX_REG_TTES] = 0x1a, + [BQ27XXX_REG_TTECP] = INVALID_REG_ADDR, + [BQ27XXX_REG_NAC] = 0x0c, + [BQ27XXX_REG_FCC] = 0x12, + [BQ27XXX_REG_CYCT] = 0x1e, + [BQ27XXX_REG_AE] = INVALID_REG_ADDR, + [BQ27XXX_REG_SOC] = 0x20, + [BQ27XXX_REG_DCAP] = 0x2e, + [BQ27XXX_REG_AP] = INVALID_REG_ADDR, + }, [BQ27530] = { [BQ27XXX_REG_CTRL] = 0x00, [BQ27XXX_REG_TEMP] = 0x06, @@ -302,6 +321,24 @@ static enum power_supply_property bq27500_battery_props[] = { POWER_SUPPLY_PROP_MANUFACTURER, }; +static enum power_supply_property bq27510_battery_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_CAPACITY, + POWER_SUPPLY_PROP_CAPACITY_LEVEL, + POWER_SUPPLY_PROP_TEMP, + POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_CHARGE_FULL, + POWER_SUPPLY_PROP_CHARGE_NOW, + POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, + POWER_SUPPLY_PROP_CYCLE_COUNT, + POWER_SUPPLY_PROP_HEALTH, + POWER_SUPPLY_PROP_MANUFACTURER, +}; + static enum power_supply_property bq27530_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -385,6 +422,7 @@ static struct { BQ27XXX_PROP(BQ27000, bq27000_battery_props), BQ27XXX_PROP(BQ27010, bq27010_battery_props), BQ27XXX_PROP(BQ27500, bq27500_battery_props), + BQ27XXX_PROP(BQ27510, bq27510_battery_props), BQ27XXX_PROP(BQ27530, bq27530_battery_props), BQ27XXX_PROP(BQ27541, bq27541_battery_props), BQ27XXX_PROP(BQ27545, bq27545_battery_props), @@ -635,7 +673,8 @@ static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di) */ static bool bq27xxx_battery_overtemp(struct bq27xxx_device_info *di, u16 flags) { - if (di->chip == BQ27500 || di->chip == BQ27541 || di->chip == BQ27545) + if (di->chip == BQ27500 || di->chip == BQ27510 || + di->chip == BQ27541 || di->chip == BQ27545) return flags & (BQ27XXX_FLAG_OTC | BQ27XXX_FLAG_OTD); if (di->chip == BQ27530 || di->chip == BQ27421) return flags & BQ27XXX_FLAG_OT; diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 85d4ea2a9c2026e834329bb3b3121579ced969da..5c5c3a6f99234dfd82cc41f4cd3ec5cd50865c7d 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -149,8 +149,8 @@ static const struct i2c_device_id bq27xxx_i2c_id_table[] = { { "bq27200", BQ27000 }, { "bq27210", BQ27010 }, { "bq27500", BQ27500 }, - { "bq27510", BQ27500 }, - { "bq27520", BQ27500 }, + { "bq27510", BQ27510 }, + { "bq27520", BQ27510 }, { "bq27530", BQ27530 }, { "bq27531", BQ27530 }, { "bq27541", BQ27541 }, diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 243b233ff31bff9fdcfae1ab019b3104fd36916e..3c71f608b444b1007b983aa1dcf90b4b15fa90e7 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -442,6 +442,7 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid) return i; } } + pr_err("Cannot find matching power limit for constraint %d\n", cid); return -EINVAL; } @@ -457,6 +458,10 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto set_exit; + } rp = rd->rp; @@ -496,6 +501,11 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto get_exit; + } + switch (rd->rpl[id].prim_id) { case PL1_ENABLE: prim = POWER_LIMIT1; @@ -512,6 +522,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid, else *data = val; +get_exit: put_online_cpus(); return ret; @@ -527,6 +538,10 @@ static int set_time_window(struct powercap_zone *power_zone, int cid, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto set_time_exit; + } switch (rd->rpl[id].prim_id) { case PL1_ENABLE: @@ -538,6 +553,8 @@ static int set_time_window(struct powercap_zone *power_zone, int cid, default: ret = -EINVAL; } + +set_time_exit: put_online_cpus(); return ret; } @@ -552,6 +569,10 @@ static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto get_time_exit; + } switch (rd->rpl[id].prim_id) { case PL1_ENABLE: @@ -566,6 +587,8 @@ static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) } if (!ret) *data = val; + +get_time_exit: put_online_cpus(); return ret; @@ -707,7 +730,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, case ENERGY_UNIT: scale = ENERGY_UNIT_SCALE; /* per domain unit takes precedence */ - if (rd && rd->domain_energy_unit) + if (rd->domain_energy_unit) units = rd->domain_energy_unit; else units = rp->energy_unit; diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 54382ef902c67b3df6702ba6034793cf5d2ae059..a3ade9e4ef478ed90311365a4e86db0cbbc394d5 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -272,7 +272,7 @@ static const struct regulator_desc axp806_regulators[] = { 64, AXP806_DCDCD_V_CTRL, 0x3f, AXP806_PWR_OUT_CTRL1, BIT(3)), AXP_DESC(AXP806, DCDCE, "dcdce", "vine", 1100, 3400, 100, - AXP806_DCDCB_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)), + AXP806_DCDCE_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)), AXP_DESC(AXP806, ALDO1, "aldo1", "aldoin", 700, 3300, 100, AXP806_ALDO1_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(5)), AXP_DESC(AXP806, ALDO2, "aldo2", "aldoin", 700, 3400, 100, @@ -337,10 +337,18 @@ static const struct regulator_desc axp809_regulators[] = { AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)), AXP_DESC(AXP809, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)), - AXP_DESC_IO(AXP809, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100, + /* + * Note the datasheet only guarantees reliable operation up to + * 3.3V, this needs to be enforced via dts provided constraints + */ + AXP_DESC_IO(AXP809, LDO_IO0, "ldo_io0", "ips", 700, 3800, 100, AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), - AXP_DESC_IO(AXP809, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100, + /* + * Note the datasheet only guarantees reliable operation up to + * 3.3V, this needs to be enforced via dts provided constraints + */ + AXP_DESC_IO(AXP809, LDO_IO1, "ldo_io1", "ips", 700, 3800, 100, AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), AXP_DESC_FIXED(AXP809, RTC_LDO, "rtc_ldo", "ips", 1800), diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index bcf38fd5106a2324ccd83dace8d9a8e25a46d789..379cdacc05d8ecc76465eba8f2515025c7b9fc16 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -454,13 +454,17 @@ EXPORT_SYMBOL_GPL(regulator_set_bypass_regmap); int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable) { unsigned int val; + unsigned int val_on = rdev->desc->bypass_val_on; int ret; ret = regmap_read(rdev->regmap, rdev->desc->bypass_reg, &val); if (ret != 0) return ret; - *enable = (val & rdev->desc->bypass_mask) == rdev->desc->bypass_val_on; + if (!val_on) + val_on = rdev->desc->bypass_mask; + + *enable = (val & rdev->desc->bypass_mask) == val_on; return 0; } diff --git a/drivers/regulator/stw481x-vmmc.c b/drivers/regulator/stw481x-vmmc.c index 7d2ae3e9e942f26f302f526450487213da2cc44d..342f5da799758e0d054ef2aaa64c9ff1c566365e 100644 --- a/drivers/regulator/stw481x-vmmc.c +++ b/drivers/regulator/stw481x-vmmc.c @@ -47,7 +47,8 @@ static struct regulator_desc vmmc_regulator = { .volt_table = stw481x_vmmc_voltages, .enable_time = 200, /* FIXME: look this up */ .enable_reg = STW_CONF1, - .enable_mask = STW_CONF1_PDN_VMMC, + .enable_mask = STW_CONF1_PDN_VMMC | STW_CONF1_MMC_LS_STATUS, + .enable_val = STW_CONF1_PDN_VMMC, .vsel_reg = STW_CONF1, .vsel_mask = STW_CONF1_VMMC_MASK, }; diff --git a/drivers/regulator/tps65086-regulator.c b/drivers/regulator/tps65086-regulator.c index 33f389d583ef115a988a2f2e32c89b7b3add893e..caf174ffa316fa9965f51b2cf0642805da8b4747 100644 --- a/drivers/regulator/tps65086-regulator.c +++ b/drivers/regulator/tps65086-regulator.c @@ -71,18 +71,17 @@ struct tps65086_regulator { unsigned int decay_mask; }; -static const struct regulator_linear_range tps65086_buck126_10mv_ranges[] = { +static const struct regulator_linear_range tps65086_10mv_ranges[] = { REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0), REGULATOR_LINEAR_RANGE(410000, 0x1, 0x7F, 10000), }; static const struct regulator_linear_range tps65086_buck126_25mv_ranges[] = { - REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0), - REGULATOR_LINEAR_RANGE(1000000, 0x1, 0x18, 0), + REGULATOR_LINEAR_RANGE(1000000, 0x0, 0x18, 0), REGULATOR_LINEAR_RANGE(1025000, 0x19, 0x7F, 25000), }; -static const struct regulator_linear_range tps65086_buck345_ranges[] = { +static const struct regulator_linear_range tps65086_buck345_25mv_ranges[] = { REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0), REGULATOR_LINEAR_RANGE(425000, 0x1, 0x7F, 25000), }; @@ -125,27 +124,27 @@ static int tps65086_of_parse_cb(struct device_node *dev, static struct tps65086_regulator regulators[] = { TPS65086_REGULATOR("BUCK1", "buck1", BUCK1, 0x80, TPS65086_BUCK1CTRL, BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(0), - tps65086_buck126_10mv_ranges, TPS65086_BUCK1CTRL, + tps65086_10mv_ranges, TPS65086_BUCK1CTRL, BIT(0)), TPS65086_REGULATOR("BUCK2", "buck2", BUCK2, 0x80, TPS65086_BUCK2CTRL, BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(1), - tps65086_buck126_10mv_ranges, TPS65086_BUCK2CTRL, + tps65086_10mv_ranges, TPS65086_BUCK2CTRL, BIT(0)), TPS65086_REGULATOR("BUCK3", "buck3", BUCK3, 0x80, TPS65086_BUCK3VID, BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(2), - tps65086_buck345_ranges, TPS65086_BUCK3DECAY, + tps65086_10mv_ranges, TPS65086_BUCK3DECAY, BIT(0)), TPS65086_REGULATOR("BUCK4", "buck4", BUCK4, 0x80, TPS65086_BUCK4VID, BUCK_VID_MASK, TPS65086_BUCK4CTRL, BIT(0), - tps65086_buck345_ranges, TPS65086_BUCK4VID, + tps65086_10mv_ranges, TPS65086_BUCK4VID, BIT(0)), TPS65086_REGULATOR("BUCK5", "buck5", BUCK5, 0x80, TPS65086_BUCK5VID, BUCK_VID_MASK, TPS65086_BUCK5CTRL, BIT(0), - tps65086_buck345_ranges, TPS65086_BUCK5CTRL, + tps65086_10mv_ranges, TPS65086_BUCK5CTRL, BIT(0)), TPS65086_REGULATOR("BUCK6", "buck6", BUCK6, 0x80, TPS65086_BUCK6VID, BUCK_VID_MASK, TPS65086_BUCK6CTRL, BIT(0), - tps65086_buck126_10mv_ranges, TPS65086_BUCK6CTRL, + tps65086_10mv_ranges, TPS65086_BUCK6CTRL, BIT(0)), TPS65086_REGULATOR("LDOA1", "ldoa1", LDOA1, 0xF, TPS65086_LDOA1CTRL, VDOA1_VID_MASK, TPS65086_LDOA1CTRL, BIT(0), @@ -162,18 +161,6 @@ static struct tps65086_regulator regulators[] = { TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)), }; -static inline bool has_25mv_mode(int id) -{ - switch (id) { - case BUCK1: - case BUCK2: - case BUCK6: - return true; - default: - return false; - } -} - static int tps65086_of_parse_cb(struct device_node *dev, const struct regulator_desc *desc, struct regulator_config *config) @@ -181,12 +168,27 @@ static int tps65086_of_parse_cb(struct device_node *dev, int ret; /* Check for 25mV step mode */ - if (has_25mv_mode(desc->id) && - of_property_read_bool(config->of_node, "ti,regulator-step-size-25mv")) { - regulators[desc->id].desc.linear_ranges = + if (of_property_read_bool(config->of_node, "ti,regulator-step-size-25mv")) { + switch (desc->id) { + case BUCK1: + case BUCK2: + case BUCK6: + regulators[desc->id].desc.linear_ranges = tps65086_buck126_25mv_ranges; - regulators[desc->id].desc.n_linear_ranges = + regulators[desc->id].desc.n_linear_ranges = ARRAY_SIZE(tps65086_buck126_25mv_ranges); + break; + case BUCK3: + case BUCK4: + case BUCK5: + regulators[desc->id].desc.linear_ranges = + tps65086_buck345_25mv_ranges; + regulators[desc->id].desc.n_linear_ranges = + ARRAY_SIZE(tps65086_buck345_25mv_ranges); + break; + default: + dev_warn(config->dev, "25mV step mode only valid for BUCK regulators\n"); + } } /* Check for decay mode */ diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index f396bfef5d42863b4f3b9609c9798b4df4ce6059..5fcbefcb86369b1de1ff95f8819b9e542c8f866e 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -91,17 +91,12 @@ config QCOM_Q6V5_PIL Say y here to support the Qualcomm Peripherial Image Loader for the Hexagon V5 based remote processors. -config QCOM_WCNSS_IRIS - tristate - depends on OF && ARCH_QCOM - config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM depends on QCOM_SMEM select QCOM_MDT_LOADER select QCOM_SCM - select QCOM_WCNSS_IRIS select REMOTEPROC help Say y here to support the Peripheral Image Loader for the Qualcomm diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index 6dfb62ed643f8ea46db285b4fc77a60ca4f8f3ee..034b6f3563a79fceb3834971af534f83144b0e4a 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_WKUP_M3_RPROC) += wkup_m3_rproc.o obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o obj-$(CONFIG_QCOM_MDT_LOADER) += qcom_mdt_loader.o obj-$(CONFIG_QCOM_Q6V5_PIL) += qcom_q6v5_pil.o -obj-$(CONFIG_QCOM_WCNSS_IRIS) += qcom_wcnss_iris.o -obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss.o +obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss_pil.o +qcom_wcnss_pil-y += qcom_wcnss.o +qcom_wcnss_pil-y += qcom_wcnss_iris.o obj-$(CONFIG_ST_REMOTEPROC) += st_remoteproc.o diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index f5cedeaafba18946757227dd646d393a2bc3f5a0..323b629474a6b5c8fe3c544fef4d681efa607cef 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -143,7 +143,6 @@ void qcom_wcnss_assign_iris(struct qcom_wcnss *wcnss, mutex_unlock(&wcnss->iris_lock); } -EXPORT_SYMBOL_GPL(qcom_wcnss_assign_iris); static int wcnss_load(struct rproc *rproc, const struct firmware *fw) { @@ -619,6 +618,28 @@ static struct platform_driver wcnss_driver = { }, }; -module_platform_driver(wcnss_driver); +static int __init wcnss_init(void) +{ + int ret; + + ret = platform_driver_register(&wcnss_driver); + if (ret) + return ret; + + ret = platform_driver_register(&qcom_iris_driver); + if (ret) + platform_driver_unregister(&wcnss_driver); + + return ret; +} +module_init(wcnss_init); + +static void __exit wcnss_exit(void) +{ + platform_driver_unregister(&qcom_iris_driver); + platform_driver_unregister(&wcnss_driver); +} +module_exit(wcnss_exit); + MODULE_DESCRIPTION("Qualcomm Peripherial Image Loader for Wireless Subsystem"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/remoteproc/qcom_wcnss.h b/drivers/remoteproc/qcom_wcnss.h index 9dc4a9fe41e19af6e637285b5bbce1664ac78b8d..25fb7f62a4576ddda9cea53247246ae26c84b9de 100644 --- a/drivers/remoteproc/qcom_wcnss.h +++ b/drivers/remoteproc/qcom_wcnss.h @@ -4,6 +4,8 @@ struct qcom_iris; struct qcom_wcnss; +extern struct platform_driver qcom_iris_driver; + struct wcnss_vreg_info { const char * const name; int min_voltage; diff --git a/drivers/remoteproc/qcom_wcnss_iris.c b/drivers/remoteproc/qcom_wcnss_iris.c index f0ca24a8dd0b77aef8103a8302d59af6fe658b8a..05d6e175411a9bb9e34feaf98103c6dc26300f5e 100644 --- a/drivers/remoteproc/qcom_wcnss_iris.c +++ b/drivers/remoteproc/qcom_wcnss_iris.c @@ -94,14 +94,12 @@ int qcom_iris_enable(struct qcom_iris *iris) return ret; } -EXPORT_SYMBOL_GPL(qcom_iris_enable); void qcom_iris_disable(struct qcom_iris *iris) { clk_disable_unprepare(iris->xo_clk); regulator_bulk_disable(iris->num_vregs, iris->vregs); } -EXPORT_SYMBOL_GPL(qcom_iris_disable); static int qcom_iris_probe(struct platform_device *pdev) { @@ -174,7 +172,7 @@ static const struct of_device_id iris_of_match[] = { {} }; -static struct platform_driver wcnss_driver = { +struct platform_driver qcom_iris_driver = { .probe = qcom_iris_probe, .remove = qcom_iris_remove, .driver = { @@ -182,7 +180,3 @@ static struct platform_driver wcnss_driver = { .of_match_table = iris_of_match, }, }; - -module_platform_driver(wcnss_driver); -MODULE_DESCRIPTION("Qualcomm Wireless Subsystem Iris driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c index ae8963fcc8c8f8f2b0ccb3cedc43f2419eddb6f9..da4e152e97331fbed605887430dddbe22947c9ff 100644 --- a/drivers/remoteproc/st_remoteproc.c +++ b/drivers/remoteproc/st_remoteproc.c @@ -245,8 +245,10 @@ static int st_rproc_probe(struct platform_device *pdev) goto free_rproc; enabled = st_rproc_state(pdev); - if (enabled < 0) + if (enabled < 0) { + ret = enabled; goto free_rproc; + } if (enabled) { atomic_inc(&rproc->power); diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 06fef2b4c81408f3e4e1bb1c91949f82e6afa58b..1d4770c02e57e2c0b0eda4c5b6313c8f69dae4f2 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -739,7 +739,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, while (qcom_smd_get_tx_avail(channel) < tlen) { if (!wait) { - ret = -ENOMEM; + ret = -EAGAIN; goto out; } diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index b6ea9ffa73816d2bbf3cada1033a5058ccb14394..e0a629eaceab831407036902cb7d724c31900a18 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -411,8 +411,8 @@ int rpmsg_register_device(struct rpmsg_device *rpdev) struct device *dev = &rpdev->dev; int ret; - dev_set_name(&rpdev->dev, "%s:%s", - dev_name(dev->parent), rpdev->id.name); + dev_set_name(&rpdev->dev, "%s.%s.%d.%d", dev_name(dev->parent), + rpdev->id.name, rpdev->src, rpdev->dst); rpdev->dev.bus = &rpmsg_bus; rpdev->dev.release = rpmsg_release_device; diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index e883063c72581b61c8d4e8fd0db287476a37e101..3167e85819941c95a921423ba85a64e78d081fbc 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -870,7 +870,7 @@ static int __init vmlogrdr_init(void) goto cleanup; for (i=0; i < MAXMINOR; ++i ) { - sys_ser[i].buffer = (char *) get_zeroed_page(GFP_KERNEL); + sys_ser[i].buffer = (char *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sys_ser[i].buffer) { rc = -ENOMEM; break; diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 581001989937ce1e0aaab11c26136d5e11b4fa4d..d5bf36ec8a751326062e47abe80ddd1f61a5f43b 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -289,11 +289,12 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, /** - * zfcp_dbf_rec_run - trace event related to running recovery + * zfcp_dbf_rec_run_lvl - trace event related to running recovery + * @level: trace level to be used for event * @tag: identifier for event * @erp: erp_action running */ -void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp) { struct zfcp_dbf *dbf = erp->adapter->dbf; struct zfcp_dbf_rec *rec = &dbf->rec_buf; @@ -319,10 +320,20 @@ void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) else rec->u.run.rec_count = atomic_read(&erp->adapter->erp_counter); - debug_event(dbf->rec, 1, rec, sizeof(*rec)); + debug_event(dbf->rec, level, rec, sizeof(*rec)); spin_unlock_irqrestore(&dbf->rec_lock, flags); } +/** + * zfcp_dbf_rec_run - trace event related to running recovery + * @tag: identifier for event + * @erp: erp_action running + */ +void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +{ + zfcp_dbf_rec_run_lvl(1, tag, erp); +} + /** * zfcp_dbf_rec_run_wka - trace wka port event with info like running recovery * @tag: identifier for event diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 36d07584271d569d27ec2eeb3706235d6459e026..db186d44cfafb6036a7e452e68e2c8f078cff6d1 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -2,7 +2,7 @@ * zfcp device driver * debug feature declarations * - * Copyright IBM Corp. 2008, 2015 + * Copyright IBM Corp. 2008, 2016 */ #ifndef ZFCP_DBF_H @@ -283,6 +283,30 @@ struct zfcp_dbf { struct zfcp_dbf_scsi scsi_buf; }; +/** + * zfcp_dbf_hba_fsf_resp_suppress - true if we should not trace by default + * @req: request that has been completed + * + * Returns true if FCP response with only benign residual under count. + */ +static inline +bool zfcp_dbf_hba_fsf_resp_suppress(struct zfcp_fsf_req *req) +{ + struct fsf_qtcb *qtcb = req->qtcb; + u32 fsf_stat = qtcb->header.fsf_status; + struct fcp_resp *fcp_rsp; + u8 rsp_flags, fr_status; + + if (qtcb->prefix.qtcb_type != FSF_IO_COMMAND) + return false; /* not an FCP response */ + fcp_rsp = (struct fcp_resp *)&qtcb->bottom.io.fcp_rsp; + rsp_flags = fcp_rsp->fr_flags; + fr_status = fcp_rsp->fr_status; + return (fsf_stat == FSF_FCP_RSP_AVAILABLE) && + (rsp_flags == FCP_RESID_UNDER) && + (fr_status == SAM_STAT_GOOD); +} + static inline void zfcp_dbf_hba_fsf_resp(char *tag, int level, struct zfcp_fsf_req *req) { @@ -304,7 +328,9 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); } else if (qtcb->header.fsf_status != FSF_GOOD) { - zfcp_dbf_hba_fsf_resp("fs_ferr", 1, req); + zfcp_dbf_hba_fsf_resp("fs_ferr", + zfcp_dbf_hba_fsf_resp_suppress(req) + ? 5 : 1, req); } else if ((req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || (req->fsf_command == FSF_QTCB_OPEN_LUN)) { @@ -388,4 +414,15 @@ void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); } +/** + * zfcp_dbf_scsi_nullcmnd() - trace NULLify of SCSI command in dev/tgt-reset. + * @scmnd: SCSI command that was NULLified. + * @fsf_req: request that owned @scmnd. + */ +static inline void zfcp_dbf_scsi_nullcmnd(struct scsi_cmnd *scmnd, + struct zfcp_fsf_req *fsf_req) +{ + _zfcp_dbf_scsi("scfc__1", 3, scmnd, fsf_req); +} + #endif /* ZFCP_DBF_H */ diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index a59d678125bd0e0ad0bd1ca74b0d42985abb25d8..7ccfce55903423f5e998fe3d93f6942a5879376f 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -3,7 +3,7 @@ * * Error Recovery Procedures (ERP). * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -1204,6 +1204,62 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action) } } +/** + * zfcp_erp_try_rport_unblock - unblock rport if no more/new recovery + * @port: zfcp_port whose fc_rport we should try to unblock + */ +static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) +{ + unsigned long flags; + struct zfcp_adapter *adapter = port->adapter; + int port_status; + struct Scsi_Host *shost = adapter->scsi_host; + struct scsi_device *sdev; + + write_lock_irqsave(&adapter->erp_lock, flags); + port_status = atomic_read(&port->status); + if ((port_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (port_status & (ZFCP_STATUS_COMMON_ERP_INUSE | + ZFCP_STATUS_COMMON_ERP_FAILED)) != 0) { + /* new ERP of severity >= port triggered elsewhere meanwhile or + * local link down (adapter erp_failed but not clear unblock) + */ + zfcp_dbf_rec_run_lvl(4, "ertru_p", &port->erp_action); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + spin_lock(shost->host_lock); + __shost_for_each_device(sdev, shost) { + struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); + int lun_status; + + if (zsdev->port != port) + continue; + /* LUN under port of interest */ + lun_status = atomic_read(&zsdev->status); + if ((lun_status & ZFCP_STATUS_COMMON_ERP_FAILED) != 0) + continue; /* unblock rport despite failed LUNs */ + /* LUN recovery not given up yet [maybe follow-up pending] */ + if ((lun_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (lun_status & ZFCP_STATUS_COMMON_ERP_INUSE) != 0) { + /* LUN blocked: + * not yet unblocked [LUN recovery pending] + * or meanwhile blocked [new LUN recovery triggered] + */ + zfcp_dbf_rec_run_lvl(4, "ertru_l", &zsdev->erp_action); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + } + /* now port has no child or all children have completed recovery, + * and no ERP of severity >= port was meanwhile triggered elsewhere + */ + zfcp_scsi_schedule_rport_register(port); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) { struct zfcp_adapter *adapter = act->adapter; @@ -1214,6 +1270,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) case ZFCP_ERP_ACTION_REOPEN_LUN: if (!(act->status & ZFCP_STATUS_ERP_NO_REF)) scsi_device_put(sdev); + zfcp_erp_try_rport_unblock(port); break; case ZFCP_ERP_ACTION_REOPEN_PORT: @@ -1224,7 +1281,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) */ if (act->step != ZFCP_ERP_STEP_UNINITIALIZED) if (result == ZFCP_ERP_SUCCEEDED) - zfcp_scsi_schedule_rport_register(port); + zfcp_erp_try_rport_unblock(port); /* fall through */ case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: put_device(&port->dev); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index c8fed9fa1cca3680015913162ce3ddfee50c85b1..21c8c689b02bb1f507398e3477e507027eec4732 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -3,7 +3,7 @@ * * External function declarations. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef ZFCP_EXT_H @@ -35,6 +35,8 @@ extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, struct zfcp_port *, struct scsi_device *, u8, u8); extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); +extern void zfcp_dbf_rec_run_lvl(int level, char *tag, + struct zfcp_erp_action *erp); extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64); extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index be1c04b334c51f678d643e4c488173f8fd6be0ee..ea3c76ac0de14dc8ea9ad147d1f5b9ab80cc1172 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -3,7 +3,7 @@ * * Interface to the FSF support functions. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef FSF_H @@ -78,6 +78,7 @@ #define FSF_APP_TAG_CHECK_FAILURE 0x00000082 #define FSF_REF_TAG_CHECK_FAILURE 0x00000083 #define FSF_ADAPTER_STATUS_AVAILABLE 0x000000AD +#define FSF_FCP_RSP_AVAILABLE 0x000000AF #define FSF_UNKNOWN_COMMAND 0x000000E2 #define FSF_UNKNOWN_OP_SUBTYPE 0x000000E3 #define FSF_INVALID_COMMAND_OPTION 0x000000E5 diff --git a/drivers/s390/scsi/zfcp_reqlist.h b/drivers/s390/scsi/zfcp_reqlist.h index 7c2c6194dfca58e1eb2fe6466abf64fd928deb96..703fce59befef0be884449e00addad4a49981d61 100644 --- a/drivers/s390/scsi/zfcp_reqlist.h +++ b/drivers/s390/scsi/zfcp_reqlist.h @@ -4,7 +4,7 @@ * Data structure and helper functions for tracking pending FSF * requests. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2016 */ #ifndef ZFCP_REQLIST_H @@ -180,4 +180,32 @@ static inline void zfcp_reqlist_move(struct zfcp_reqlist *rl, spin_unlock_irqrestore(&rl->lock, flags); } +/** + * zfcp_reqlist_apply_for_all() - apply a function to every request. + * @rl: the requestlist that contains the target requests. + * @f: the function to apply to each request; the first parameter of the + * function will be the target-request; the second parameter is the same + * pointer as given with the argument @data. + * @data: freely chosen argument; passed through to @f as second parameter. + * + * Uses :c:macro:`list_for_each_entry` to iterate over the lists in the hash- + * table (not a 'safe' variant, so don't modify the list). + * + * Holds @rl->lock over the entire request-iteration. + */ +static inline void +zfcp_reqlist_apply_for_all(struct zfcp_reqlist *rl, + void (*f)(struct zfcp_fsf_req *, void *), void *data) +{ + struct zfcp_fsf_req *req; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&rl->lock, flags); + for (i = 0; i < ZFCP_REQ_LIST_BUCKETS; i++) + list_for_each_entry(req, &rl->buckets[i], list) + f(req, data); + spin_unlock_irqrestore(&rl->lock, flags); +} + #endif /* ZFCP_REQLIST_H */ diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 9069f98a18172e754c943010654de65e91c2fb7c..07ffdbb5107f732082e88c94b0362b845e7e17d6 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -88,9 +88,7 @@ int zfcp_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scpnt) } if (unlikely(!(status & ZFCP_STATUS_COMMON_UNBLOCKED))) { - /* This could be either - * open LUN pending: this is temporary, will result in - * open LUN or ERP_FAILED, so retry command + /* This could be * call to rport_delete pending: mimic retry from * fc_remote_port_chkready until rport is BLOCKED */ @@ -209,6 +207,57 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) return retval; } +struct zfcp_scsi_req_filter { + u8 tmf_scope; + u32 lun_handle; + u32 port_handle; +}; + +static void zfcp_scsi_forget_cmnd(struct zfcp_fsf_req *old_req, void *data) +{ + struct zfcp_scsi_req_filter *filter = + (struct zfcp_scsi_req_filter *)data; + + /* already aborted - prevent side-effects - or not a SCSI command */ + if (old_req->data == NULL || old_req->fsf_command != FSF_QTCB_FCP_CMND) + return; + + /* (tmf_scope == FCP_TMF_TGT_RESET || tmf_scope == FCP_TMF_LUN_RESET) */ + if (old_req->qtcb->header.port_handle != filter->port_handle) + return; + + if (filter->tmf_scope == FCP_TMF_LUN_RESET && + old_req->qtcb->header.lun_handle != filter->lun_handle) + return; + + zfcp_dbf_scsi_nullcmnd((struct scsi_cmnd *)old_req->data, old_req); + old_req->data = NULL; +} + +static void zfcp_scsi_forget_cmnds(struct zfcp_scsi_dev *zsdev, u8 tm_flags) +{ + struct zfcp_adapter *adapter = zsdev->port->adapter; + struct zfcp_scsi_req_filter filter = { + .tmf_scope = FCP_TMF_TGT_RESET, + .port_handle = zsdev->port->handle, + }; + unsigned long flags; + + if (tm_flags == FCP_TMF_LUN_RESET) { + filter.tmf_scope = FCP_TMF_LUN_RESET; + filter.lun_handle = zsdev->lun_handle; + } + + /* + * abort_lock secures against other processings - in the abort-function + * and normal cmnd-handler - of (struct zfcp_fsf_req *)->data + */ + write_lock_irqsave(&adapter->abort_lock, flags); + zfcp_reqlist_apply_for_all(adapter->req_list, zfcp_scsi_forget_cmnd, + &filter); + write_unlock_irqrestore(&adapter->abort_lock, flags); +} + static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scpnt->device); @@ -241,8 +290,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); retval = FAILED; - } else + } else { zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); + zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); + } zfcp_fsf_req_free(fsf_req); return retval; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 79871f3519ffc923ed9c41848d70b17120c37844..d5b26fa541d36746ca2e2786ad39076d165c0256 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -160,7 +160,6 @@ static const struct pci_device_id aac_pci_tbl[] = { { 0x9005, 0x028b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 62 }, /* Adaptec PMC Series 6 (Tupelo) */ { 0x9005, 0x028c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 63 }, /* Adaptec PMC Series 7 (Denali) */ { 0x9005, 0x028d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 64 }, /* Adaptec PMC Series 8 */ - { 0x9005, 0x028f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 65 }, /* Adaptec PMC Series 9 */ { 0,} }; MODULE_DEVICE_TABLE(pci, aac_pci_tbl); @@ -239,7 +238,6 @@ static struct aac_driver_ident aac_drivers[] = { { aac_src_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC }, /* Adaptec PMC Series 6 (Tupelo) */ { aac_srcv_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC }, /* Adaptec PMC Series 7 (Denali) */ { aac_srcv_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC }, /* Adaptec PMC Series 8 */ - { aac_srcv_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC } /* Adaptec PMC Series 9 */ }; /** diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c index cbf010324c187589e199f2b14ef3793bf892f2a9..596a75924d90210b092d93904c54521d1e903279 100644 --- a/drivers/scsi/g_NCR5380.c +++ b/drivers/scsi/g_NCR5380.c @@ -170,12 +170,12 @@ static int generic_NCR5380_init_one(struct scsi_host_template *tpnt, if (ports[i]) { /* At this point we have our region reserved */ magic_configure(i, 0, magic); /* no IRQ yet */ - outb(0xc0, ports[i] + 9); - if (inb(ports[i] + 9) != 0x80) { + base = ports[i]; + outb(0xc0, base + 9); + if (inb(base + 9) != 0x80) { ret = -ENODEV; goto out_release; } - base = ports[i]; port_idx = i; } else return -EINVAL; diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 642b739ad0da33c63bb7667f65cbe3f21db87e2a..e3b911c895b4a3c1a8da6473eae22ff4136f13d9 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -45,6 +45,7 @@ #define INITIAL_SRP_LIMIT 800 #define DEFAULT_MAX_SECTORS 256 +#define MAX_TXU 1024 * 1024 static uint max_vdma_size = MAX_H_COPY_RDMA; @@ -1239,7 +1240,7 @@ static long ibmvscsis_adapter_info(struct scsi_info *vscsi, } info = dma_alloc_coherent(&vscsi->dma_dev->dev, sizeof(*info), &token, - GFP_KERNEL); + GFP_ATOMIC); if (!info) { dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", iue->target); @@ -1291,7 +1292,7 @@ static long ibmvscsis_adapter_info(struct scsi_info *vscsi, info->mad_version = cpu_to_be32(MAD_VERSION_1); info->os_type = cpu_to_be32(LINUX); memset(&info->port_max_txu[0], 0, sizeof(info->port_max_txu)); - info->port_max_txu[0] = cpu_to_be32(128 * PAGE_SIZE); + info->port_max_txu[0] = cpu_to_be32(MAX_TXU); dma_wmb(); rc = h_copy_rdma(sizeof(*info), vscsi->dds.window[LOCAL].liobn, @@ -1357,7 +1358,7 @@ static int ibmvscsis_cap_mad(struct scsi_info *vscsi, struct iu_entry *iue) } cap = dma_alloc_coherent(&vscsi->dma_dev->dev, olen, &token, - GFP_KERNEL); + GFP_ATOMIC); if (!cap) { dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", iue->target); @@ -3702,7 +3703,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) 1, 1); if (rc) { pr_err("srp_transfer_data() failed: %d\n", rc); - return -EAGAIN; + return -EIO; } /* * We now tell TCM to add this WRITE CDB directly into the TCM storage diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 52d8bbf7feb5c50efe361aa2e57f30277f38a0df..bd04bd01d34aad796fc43f8fc0859d301ec7e00a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2000,6 +2000,8 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, io_request->DevHandle = pd_sync->seq[pd_index].devHandle; pRAID_Context->regLockFlags |= (MR_RL_FLAGS_SEQ_NUM_ENABLE|MR_RL_FLAGS_GRANT_DESTINATION_CUDA); + pRAID_Context->Type = MPI2_TYPE_CUDA; + pRAID_Context->nseg = 0x1; } else if (fusion->fast_path_io) { pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id); pRAID_Context->configSeqNum = 0; @@ -2035,12 +2037,10 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, pRAID_Context->timeoutValue = cpu_to_le16((os_timeout_value > timeout_limit) ? timeout_limit : os_timeout_value); - if (fusion->adapter_type == INVADER_SERIES) { - pRAID_Context->Type = MPI2_TYPE_CUDA; - pRAID_Context->nseg = 0x1; + if (fusion->adapter_type == INVADER_SERIES) io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); - } + cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_FP_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); @@ -2823,6 +2823,7 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, dev_err(&instance->pdev->dev, "pending commands remain after waiting, " "will reset adapter scsi%d.\n", instance->host->host_no); + *convert = 1; retval = 1; } out: diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 3e71bc1b4a80604cca3488b7ce1d07acffb24445..7008061c4b5bbdada7742eb1c8cf8d5a8ffb0095 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -393,6 +393,7 @@ struct MPT3SAS_TARGET { * @eedp_enable: eedp support enable bit * @eedp_type: 0(type_1), 1(type_2), 2(type_3) * @eedp_block_length: block size + * @ata_command_pending: SATL passthrough outstanding for device */ struct MPT3SAS_DEVICE { struct MPT3SAS_TARGET *sas_target; @@ -402,6 +403,17 @@ struct MPT3SAS_DEVICE { u8 block; u8 tlr_snoop_check; u8 ignore_delay_remove; + /* + * Bug workaround for SATL handling: the mpt2/3sas firmware + * doesn't return BUSY or TASK_SET_FULL for subsequent + * commands while a SATL pass through is in operation as the + * spec requires, it simply does nothing with them until the + * pass through completes, causing them possibly to timeout if + * the passthrough is a long executing command (like format or + * secure erase). This variable allows us to do the right + * thing while a SATL command is pending. + */ + unsigned long ata_command_pending; }; #define MPT3_CMD_NOT_USED 0x8000 /* free */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 1c4744e78173bc0f78383b8f6cf901715b946d34..f84a6087cebdd441b13db9d8f41f43df25c6666c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3885,9 +3885,18 @@ _scsih_temp_threshold_events(struct MPT3SAS_ADAPTER *ioc, } } -static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) +static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) { - return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); + struct MPT3SAS_DEVICE *priv = scmd->device->hostdata; + + if (scmd->cmnd[0] != ATA_12 && scmd->cmnd[0] != ATA_16) + return 0; + + if (pending) + return test_and_set_bit(0, &priv->ata_command_pending); + + clear_bit(0, &priv->ata_command_pending); + return 0; } /** @@ -3911,9 +3920,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) if (!scmd) continue; count++; - if (ata_12_16_cmd(scmd)) - scsi_internal_device_unblock(scmd->device, - SDEV_RUNNING); + _scsih_set_satl_pending(scmd, false); mpt3sas_base_free_smid(ioc, smid); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery) @@ -4044,13 +4051,6 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (ioc->logging_level & MPT_DEBUG_SCSI) scsi_print_command(scmd); - /* - * Lock the device for any subsequent command until command is - * done. - */ - if (ata_12_16_cmd(scmd)) - scsi_internal_device_block(scmd->device); - sas_device_priv_data = scmd->device->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { scmd->result = DID_NO_CONNECT << 16; @@ -4064,6 +4064,19 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } + /* + * Bug work around for firmware SATL handling. The loop + * is based on atomic operations and ensures consistency + * since we're lockless at this point + */ + do { + if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { + scmd->result = SAM_STAT_BUSY; + scmd->scsi_done(scmd); + return 0; + } + } while (_scsih_set_satl_pending(scmd, true)); + sas_target_priv_data = sas_device_priv_data->sas_target; /* invalid device handle */ @@ -4626,8 +4639,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) if (scmd == NULL) return 1; - if (ata_12_16_cmd(scmd)) - scsi_internal_device_unblock(scmd->device, SDEV_RUNNING); + _scsih_set_satl_pending(scmd, false); mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c index 4c57d9abce7b772be368a19388eceeffafc66d6f..7de5d8d75480f0d5934925d53556ca04f9b34510 100644 --- a/drivers/scsi/mvsas/mv_94xx.c +++ b/drivers/scsi/mvsas/mv_94xx.c @@ -668,7 +668,7 @@ static void mvs_94xx_command_active(struct mvs_info *mvi, u32 slot_idx) { u32 tmp; tmp = mvs_cr32(mvi, MVS_COMMAND_ACTIVE+(slot_idx >> 3)); - if (tmp && 1 << (slot_idx % 32)) { + if (tmp & 1 << (slot_idx % 32)) { mv_printk("command active %08X, slot [%x].\n", tmp, slot_idx); mvs_cw32(mvi, MVS_COMMAND_ACTIVE + (slot_idx >> 3), 1 << (slot_idx % 32)); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 56d6142852a553ed9ad8011cb4c18a84e8656e0d..078d797cb492bf28d7e1cf75b36fe4c46099e446 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3489,7 +3489,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, sizeof(struct ct6_dsd), 0, SLAB_HWCACHE_ALIGN, NULL); if (!ctx_cachep) - goto fail_free_gid_list; + goto fail_free_srb_mempool; } ha->ctx_mempool = mempool_create_slab_pool(SRB_MIN_REQ, ctx_cachep); @@ -3642,7 +3642,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ha->loop_id_map = kzalloc(BITS_TO_LONGS(LOOPID_MAP_SIZE) * sizeof(long), GFP_KERNEL); if (!ha->loop_id_map) - goto fail_async_pd; + goto fail_loop_id_map; else { qla2x00_set_reserved_loop_ids(ha); ql_dbg_pci(ql_dbg_init, ha->pdev, 0x0123, @@ -3651,6 +3651,8 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, return 0; +fail_loop_id_map: + dma_pool_free(ha->s_dma_pool, ha->async_pd, ha->async_pd_dma); fail_async_pd: dma_pool_free(ha->s_dma_pool, ha->ex_init_cb, ha->ex_init_cb_dma); fail_ex_init_cb: @@ -3678,6 +3680,10 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, dma_pool_free(ha->s_dma_pool, ha->ms_iocb, ha->ms_iocb_dma); ha->ms_iocb = NULL; ha->ms_iocb_dma = 0; + + if (ha->sns_cmd) + dma_free_coherent(&ha->pdev->dev, sizeof(struct sns_cmd_pkt), + ha->sns_cmd, ha->sns_cmd_dma); fail_dma_pool: if (IS_QLA82XX(ha) || ql2xenabledif) { dma_pool_destroy(ha->fcp_cmnd_dma_pool); @@ -3695,10 +3701,12 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, kfree(ha->nvram); ha->nvram = NULL; fail_free_ctx_mempool: - mempool_destroy(ha->ctx_mempool); + if (ha->ctx_mempool) + mempool_destroy(ha->ctx_mempool); ha->ctx_mempool = NULL; fail_free_srb_mempool: - mempool_destroy(ha->srb_mempool); + if (ha->srb_mempool) + mempool_destroy(ha->srb_mempool); ha->srb_mempool = NULL; fail_free_gid_list: dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha), diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index a72bfde5d7fde50efb54ea050ae99d51211ec72a..e90a8e13e0fdb93acea65779f9605b2cac0b107f 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1204,10 +1204,6 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) struct request_queue *rq = sdev->request_queue; struct scsi_target *starget = sdev->sdev_target; - error = scsi_device_set_state(sdev, SDEV_RUNNING); - if (error) - return error; - error = scsi_target_add(starget); if (error) return error; diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 8c9a35c91705e42fcbc07e3721d0522f96d496dc..50adabbb5808902aea6abfd64a39c183678a6a6f 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -587,7 +587,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev, ses_enclosure_data_process(edev, to_scsi_device(edev->edev.parent), 0); - if (scsi_is_sas_rphy(&sdev->sdev_gendev)) + if (scsi_is_sas_rphy(sdev->sdev_target->dev.parent)) efd.addr = sas_get_address(sdev); if (efd.addr) { diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index f22fa96deb6d8cb3d01f3929ea119dffccbbfbb4..8251f6e233d2721dc292e2b8a8459973ad92bfcd 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -581,6 +581,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 4ad72b6d94e2b63cd44230d5c5e03b6ac7dd00ca..12a47589cb25d3e06bcf03fd012ece2d5664a0c0 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8764,6 +8764,9 @@ void ufshcd_remove(struct ufs_hba *hba) if (hba->devfreq) devfreq_remove_device(hba->devfreq); } + + ufshcd_exit_latency_hist(hba); + ufshcd_hba_exit(hba); ufsdbg_remove_debugfs(hba); } diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index d17dd496d8f497d01113fbe2f7ea135035f54132..709801f64f186dcfa335fa5f30d48cdb78655c50 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -888,6 +888,7 @@ struct ufs_hba { bool no_ref_clk_gating; int scsi_block_reqs_cnt; + int latency_hist_enabled; struct io_latency_state io_lat_s; }; diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 8823cc81ae45345bd0d632436eb4ce387d456e0b..5bb376009d98b78bd0dbf6da3e8f0853f9e6528e 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -459,6 +459,7 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) if (IS_ERR(task)) { dev_err(dev, "can't create rproc_boot thread\n"); + ret = PTR_ERR(task); goto err_put_rproc; } diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index ded37025b445834d0367ef41cac7aa80b2c656cf..6b001c4a564086528e9db608f7250305656d5da7 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -138,37 +138,62 @@ static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed) tclk_hz = clk_get_rate(orion_spi->clk); if (devdata->typ == ARMADA_SPI) { - unsigned int clk, spr, sppr, sppr2, err; - unsigned int best_spr, best_sppr, best_err; - - best_err = speed; - best_spr = 0; - best_sppr = 0; - - /* Iterate over the valid range looking for best fit */ - for (sppr = 0; sppr < 8; sppr++) { - sppr2 = 0x1 << sppr; - - spr = tclk_hz / sppr2; - spr = DIV_ROUND_UP(spr, speed); - if ((spr == 0) || (spr > 15)) - continue; - - clk = tclk_hz / (spr * sppr2); - err = speed - clk; - - if (err < best_err) { - best_spr = spr; - best_sppr = sppr; - best_err = err; - } - } + /* + * Given the core_clk (tclk_hz) and the target rate (speed) we + * determine the best values for SPR (in [0 .. 15]) and SPPR (in + * [0..7]) such that + * + * core_clk / (SPR * 2 ** SPPR) + * + * is as big as possible but not bigger than speed. + */ - if ((best_sppr == 0) && (best_spr == 0)) - return -EINVAL; + /* best integer divider: */ + unsigned divider = DIV_ROUND_UP(tclk_hz, speed); + unsigned spr, sppr; + + if (divider < 16) { + /* This is the easy case, divider is less than 16 */ + spr = divider; + sppr = 0; + + } else { + unsigned two_pow_sppr; + /* + * Find the highest bit set in divider. This and the + * three next bits define SPR (apart from rounding). + * SPPR is then the number of zero bits that must be + * appended: + */ + sppr = fls(divider) - 4; + + /* + * As SPR only has 4 bits, we have to round divider up + * to the next multiple of 2 ** sppr. + */ + two_pow_sppr = 1 << sppr; + divider = (divider + two_pow_sppr - 1) & -two_pow_sppr; + + /* + * recalculate sppr as rounding up divider might have + * increased it enough to change the position of the + * highest set bit. In this case the bit that now + * doesn't make it into SPR is 0, so there is no need to + * round again. + */ + sppr = fls(divider) - 4; + spr = divider >> sppr; + + /* + * Now do range checking. SPR is constructed to have a + * width of 4 bits, so this is fine for sure. So we + * still need to check for sppr to fit into 3 bits: + */ + if (sppr > 7) + return -EINVAL; + } - prescale = ((best_sppr & 0x6) << 5) | - ((best_sppr & 0x1) << 4) | best_spr; + prescale = ((sppr & 0x6) << 5) | ((sppr & 0x1) << 4) | spr; } else { /* * the supported rates are: 4,6,8...30 diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index dd7b5b47291d551890da8e8dfc324ca74895d66b..d6239fa718be9e251f577b9d9dd792a0e5c5ead5 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1690,6 +1690,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) pxa2xx_spi_write(drv_data, SSCR1, tmp); tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8); pxa2xx_spi_write(drv_data, SSCR0, tmp); + break; default: tmp = SSCR1_RxTresh(RX_THRESH_DFLT) | SSCR1_TxTresh(TX_THRESH_DFLT); diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index 0f28c08fcb3c76f4a2deaf3cd084a39f800db142..77b551da57288c11d21d6ff40b65b1d8b01ed887 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -909,6 +909,7 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus, if (err) { ssb_warn("WARNING: Using fallback SPROM failed (err %d)\n", err); + goto out_free; } else { ssb_dbg("Using SPROM revision %d provided by platform\n", sprom->revision); diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index b132cff14f01c8a3ca59271651269fac50c0faba..675b974b2a6ec88fb12608f94e1dd63885ed5226 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -33,7 +33,6 @@ #include #include #include -#include #ifdef CONFIG_FIQ_GLUE #include @@ -82,7 +81,7 @@ struct fiq_debugger_state { struct timer_list sleep_timer; spinlock_t sleep_timer_lock; bool uart_enabled; - struct wake_lock debugger_wake_lock; + struct wakeup_source debugger_wake_src; bool console_enable; int current_cpu; atomic_t unhandled_fiq_count; @@ -563,7 +562,7 @@ static void fiq_debugger_sleep_timer_expired(unsigned long data) state->uart_enabled = false; fiq_debugger_enable_wakeup_irq(state); } - wake_unlock(&state->debugger_wake_lock); + __pm_relax(&state->debugger_wake_src); spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } @@ -575,7 +574,7 @@ static void fiq_debugger_handle_wakeup(struct fiq_debugger_state *state) if (state->wakeup_irq >= 0 && state->ignore_next_wakeup_irq) { state->ignore_next_wakeup_irq = false; } else if (!state->uart_enabled) { - wake_lock(&state->debugger_wake_lock); + __pm_stay_awake(&state->debugger_wake_src); fiq_debugger_uart_enable(state); state->uart_enabled = true; fiq_debugger_disable_wakeup_irq(state); @@ -619,7 +618,7 @@ static void fiq_debugger_handle_irq_context(struct fiq_debugger_state *state) unsigned long flags; spin_lock_irqsave(&state->sleep_timer_lock, flags); - wake_lock(&state->debugger_wake_lock); + __pm_stay_awake(&state->debugger_wake_src); mod_timer(&state->sleep_timer, jiffies + HZ * 5); spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } @@ -1086,8 +1085,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) state->no_sleep = true; state->ignore_next_wakeup_irq = !state->no_sleep; - wake_lock_init(&state->debugger_wake_lock, - WAKE_LOCK_SUSPEND, "serial-debug"); + wakeup_source_init(&state->debugger_wake_src, "serial-debug"); state->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(state->clk)) @@ -1188,7 +1186,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) clk_disable(state->clk); if (state->clk) clk_put(state->clk); - wake_lock_destroy(&state->debugger_wake_lock); + wakeup_source_trash(&state->debugger_wake_src); platform_set_drvdata(pdev, NULL); kfree(state); return ret; diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 0f97d7b611d720b7fd774ab102adfd3a607494fa..1c967c30e4ce9815ae23eaa828c49c32d93bb561 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -1832,7 +1832,7 @@ static int ni_ai_insn_read(struct comedi_device *dev, unsigned int *data) { struct ni_private *devpriv = dev->private; - unsigned int mask = (s->maxdata + 1) >> 1; + unsigned int mask = s->maxdata; int i, n; unsigned int signbits; unsigned int d; @@ -1875,7 +1875,7 @@ static int ni_ai_insn_read(struct comedi_device *dev, return -ETIME; } d += signbits; - data[n] = d; + data[n] = d & 0xffff; } } else if (devpriv->is_6143) { for (n = 0; n < insn->n; n++) { @@ -1924,9 +1924,8 @@ static int ni_ai_insn_read(struct comedi_device *dev, data[n] = dl; } else { d = ni_readw(dev, NI_E_AI_FIFO_DATA_REG); - /* subtle: needs to be short addition */ d += signbits; - data[n] = d; + data[n] = d & 0xffff; } } } diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig index 4e094602437c3dea1c36e7614f808d680a0b4aa1..d293bbc22c792a41d83dddf5e4c5861135ae633e 100644 --- a/drivers/staging/goldfish/Kconfig +++ b/drivers/staging/goldfish/Kconfig @@ -4,6 +4,14 @@ config GOLDFISH_AUDIO ---help--- Emulated audio channel for the Goldfish Android Virtual Device +config GOLDFISH_SYNC + tristate "Goldfish AVD Sync Driver" + depends on GOLDFISH + depends on SW_SYNC + depends on SYNC_FILE + ---help--- + Emulated sync fences for the Goldfish Android Virtual Device + config MTD_GOLDFISH_NAND tristate "Goldfish NAND device" depends on GOLDFISH diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile index dec34ad58162fdda2dde0969b0e004bc3dbfb01d..3313fce4e940d7be564416ca1485225ab41aba4b 100644 --- a/drivers/staging/goldfish/Makefile +++ b/drivers/staging/goldfish/Makefile @@ -4,3 +4,9 @@ obj-$(CONFIG_GOLDFISH_AUDIO) += goldfish_audio.o obj-$(CONFIG_MTD_GOLDFISH_NAND) += goldfish_nand.o + +# and sync + +ccflags-y := -Idrivers/staging/android +goldfish_sync-objs := goldfish_sync_timeline_fence.o goldfish_sync_timeline.o +obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index bd559956f199001dcf2743f5e944111f8236169b..0bb0ee2e691f1a8899ca63a9c5bc93d6a8a7a36b 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -28,6 +28,7 @@ #include #include #include +#include MODULE_AUTHOR("Google, Inc."); MODULE_DESCRIPTION("Android QEMU Audio Driver"); @@ -116,6 +117,7 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, size_t count, loff_t *pos) { struct goldfish_audio *data = fp->private_data; + unsigned long irq_flags; int length; int result = 0; @@ -129,6 +131,10 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, wait_event_interruptible(data->wait, data->buffer_status & AUDIO_INT_READ_BUFFER_FULL); + spin_lock_irqsave(&data->lock, irq_flags); + data->buffer_status &= ~AUDIO_INT_READ_BUFFER_FULL; + spin_unlock_irqrestore(&data->lock, irq_flags); + length = AUDIO_READ(data, AUDIO_READ_BUFFER_AVAILABLE); /* copy data to user space */ @@ -351,12 +357,19 @@ static const struct of_device_id goldfish_audio_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_audio_of_match); +static const struct acpi_device_id goldfish_audio_acpi_match[] = { + { "GFSH0005", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_audio_acpi_match); + static struct platform_driver goldfish_audio_driver = { .probe = goldfish_audio_probe, .remove = goldfish_audio_remove, .driver = { .name = "goldfish_audio", .of_match_table = goldfish_audio_of_match, + .acpi_match_table = ACPI_PTR(goldfish_audio_acpi_match), } }; diff --git a/drivers/staging/goldfish/goldfish_sync_timeline.c b/drivers/staging/goldfish/goldfish_sync_timeline.c new file mode 100644 index 0000000000000000000000000000000000000000..5bef4c6c02838a42911d8142473fd965df1a32f4 --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline.c @@ -0,0 +1,962 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "goldfish_sync_timeline_fence.h" + +#define ERR(...) printk(KERN_ERR __VA_ARGS__); + +#define INFO(...) printk(KERN_INFO __VA_ARGS__); + +#define DPRINT(...) pr_debug(__VA_ARGS__); + +#define DTRACE() DPRINT("%s: enter", __func__) + +/* The Goldfish sync driver is designed to provide a interface + * between the underlying host's sync device and the kernel's + * fence sync framework.. + * The purpose of the device/driver is to enable lightweight + * creation and signaling of timelines and fences + * in order to synchronize the guest with host-side graphics events. + * + * Each time the interrupt trips, the driver + * may perform a sync operation. + */ + +/* The operations are: */ + +/* Ready signal - used to mark when irq should lower */ +#define CMD_SYNC_READY 0 + +/* Create a new timeline. writes timeline handle */ +#define CMD_CREATE_SYNC_TIMELINE 1 + +/* Create a fence object. reads timeline handle and time argument. + * Writes fence fd to the SYNC_REG_HANDLE register. */ +#define CMD_CREATE_SYNC_FENCE 2 + +/* Increments timeline. reads timeline handle and time argument */ +#define CMD_SYNC_TIMELINE_INC 3 + +/* Destroys a timeline. reads timeline handle */ +#define CMD_DESTROY_SYNC_TIMELINE 4 + +/* Starts a wait on the host with + * the given glsync object and sync thread handle. */ +#define CMD_TRIGGER_HOST_WAIT 5 + +/* The register layout is: */ + +#define SYNC_REG_BATCH_COMMAND 0x00 /* host->guest batch commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND 0x04 /* guest->host batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR 0x08 /* communicate physical address of host->guest batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR_HIGH 0x0c /* 64-bit part */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR 0x10 /* communicate physical address of guest->host commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH 0x14 /* 64-bit part */ +#define SYNC_REG_INIT 0x18 /* signals that the device has been probed */ + +/* There is an ioctl associated with goldfish sync driver. + * Make it conflict with ioctls that are not likely to be used + * in the emulator. + * + * '@' 00-0F linux/radeonfb.h conflict! + * '@' 00-0F drivers/video/aty/aty128fb.c conflict! + */ +#define GOLDFISH_SYNC_IOC_MAGIC '@' + +#define GOLDFISH_SYNC_IOC_QUEUE_WORK _IOWR(GOLDFISH_SYNC_IOC_MAGIC, 0, struct goldfish_sync_ioctl_info) + +/* The above definitions (command codes, register layout, ioctl definitions) + * need to be in sync with the following files: + * + * Host-side (emulator): + * external/qemu/android/emulation/goldfish_sync.h + * external/qemu-android/hw/misc/goldfish_sync.c + * + * Guest-side (system image): + * device/generic/goldfish-opengl/system/egl/goldfish_sync.h + * device/generic/goldfish/ueventd.ranchu.rc + * platform/build/target/board/generic/sepolicy/file_contexts + */ +struct goldfish_sync_hostcmd { + /* sorted for alignment */ + uint64_t handle; + uint64_t hostcmd_handle; + uint32_t cmd; + uint32_t time_arg; +}; + +struct goldfish_sync_guestcmd { + uint64_t host_command; /* uint64_t for alignment */ + uint64_t glsync_handle; + uint64_t thread_handle; + uint64_t guest_timeline_handle; +}; + +#define GOLDFISH_SYNC_MAX_CMDS 32 + +struct goldfish_sync_state { + char __iomem *reg_base; + int irq; + + /* Spinlock protects |to_do| / |to_do_end|. */ + spinlock_t lock; + /* |mutex_lock| protects all concurrent access + * to timelines for both kernel and user space. */ + struct mutex mutex_lock; + + /* Buffer holding commands issued from host. */ + struct goldfish_sync_hostcmd to_do[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t to_do_end; + + /* Addresses for the reading or writing + * of individual commands. The host can directly write + * to |batch_hostcmd| (and then this driver immediately + * copies contents to |to_do|). This driver either replies + * through |batch_hostcmd| or simply issues a + * guest->host command through |batch_guestcmd|. + */ + struct goldfish_sync_hostcmd *batch_hostcmd; + struct goldfish_sync_guestcmd *batch_guestcmd; + + /* Used to give this struct itself to a work queue + * function for executing actual sync commands. */ + struct work_struct work_item; +}; + +static struct goldfish_sync_state global_sync_state[1]; + +struct goldfish_sync_timeline_obj { + struct goldfish_sync_timeline *sync_tl; + uint32_t current_time; + /* We need to be careful about when we deallocate + * this |goldfish_sync_timeline_obj| struct. + * In order to ensure proper cleanup, we need to + * consider the triggered host-side wait that may + * still be in flight when the guest close()'s a + * goldfish_sync device's sync context fd (and + * destroys the |sync_tl| field above). + * The host-side wait may raise IRQ + * and tell the kernel to increment the timeline _after_ + * the |sync_tl| has already been set to null. + * + * From observations on OpenGL apps and CTS tests, this + * happens at some very low probability upon context + * destruction or process close, but it does happen + * and it needs to be handled properly. Otherwise, + * if we clean up the surrounding |goldfish_sync_timeline_obj| + * too early, any |handle| field of any host->guest command + * might not even point to a null |sync_tl| field, + * but to garbage memory or even a reclaimed |sync_tl|. + * If we do not count such "pending waits" and kfree the object + * immediately upon |goldfish_sync_timeline_destroy|, + * we might get mysterous RCU stalls after running a long + * time because the garbage memory that is being read + * happens to be interpretable as a |spinlock_t| struct + * that is currently in the locked state. + * + * To track when to free the |goldfish_sync_timeline_obj| + * itself, we maintain a kref. + * The kref essentially counts the timeline itself plus + * the number of waits in flight. kref_init/kref_put + * are issued on + * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy| + * and kref_get/kref_put are issued on + * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|. + * + * The timeline is destroyed after reference count + * reaches zero, which would happen after + * |goldfish_sync_timeline_destroy| and all pending + * |goldfish_sync_timeline_inc|'s are fulfilled. + * + * NOTE (1): We assume that |fence_create| and + * |timeline_inc| calls are 1:1, otherwise the kref scheme + * will not work. This is a valid assumption as long + * as the host-side virtual device implementation + * does not insert any timeline increments + * that we did not trigger from here. + * + * NOTE (2): The use of kref by itself requires no locks, + * but this does not mean everything works without locks. + * Related timeline operations do require a lock of some sort, + * or at least are not proven to work without it. + * In particualr, we assume that all the operations + * done on the |kref| field above are done in contexts where + * |global_sync_state->mutex_lock| is held. Do not + * remove that lock until everything is proven to work + * without it!!! */ + struct kref kref; +}; + +/* We will call |delete_timeline_obj| when the last reference count + * of the kref is decremented. This deletes the sync + * timeline object along with the wrapper itself. */ +static void delete_timeline_obj(struct kref* kref) { + struct goldfish_sync_timeline_obj* obj = + container_of(kref, struct goldfish_sync_timeline_obj, kref); + + goldfish_sync_timeline_put_internal(obj->sync_tl); + obj->sync_tl = NULL; + kfree(obj); +} + +static uint64_t gensym_ctr; +static void gensym(char *dst) +{ + sprintf(dst, "goldfish_sync:gensym:%llu", gensym_ctr); + gensym_ctr++; +} + +/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static struct goldfish_sync_timeline_obj* +goldfish_sync_timeline_create(void) +{ + + char timeline_name[256]; + struct goldfish_sync_timeline *res_sync_tl = NULL; + struct goldfish_sync_timeline_obj *res; + + DTRACE(); + + gensym(timeline_name); + + res_sync_tl = goldfish_sync_timeline_create_internal(timeline_name); + if (!res_sync_tl) { + ERR("Failed to create goldfish_sw_sync timeline."); + return NULL; + } + + res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL); + res->sync_tl = res_sync_tl; + res->current_time = 0; + kref_init(&res->kref); + + DPRINT("new timeline_obj=0x%p", res); + return res; +} + +/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static int +goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, + uint32_t val) +{ + + int fd; + char fence_name[256]; + struct sync_pt *syncpt = NULL; + struct sync_file *sync_file_obj = NULL; + struct goldfish_sync_timeline *tl; + + DTRACE(); + + if (!obj) return -1; + + tl = obj->sync_tl; + + syncpt = goldfish_sync_pt_create_internal( + tl, sizeof(struct sync_pt) + 4, val); + if (!syncpt) { + ERR("could not create sync point! " + "goldfish_sync_timeline=0x%p val=%d", + tl, val); + return -1; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ERR("could not get unused fd for sync fence. " + "errno=%d", fd); + goto err_cleanup_pt; + } + + gensym(fence_name); + + sync_file_obj = sync_file_create(&syncpt->base); + if (!sync_file_obj) { + ERR("could not create sync fence! " + "goldfish_sync_timeline=0x%p val=%d sync_pt=0x%p", + tl, val, syncpt); + goto err_cleanup_fd_pt; + } + + DPRINT("installing sync fence into fd %d sync_file_obj=0x%p", + fd, sync_file_obj); + fd_install(fd, sync_file_obj->file); + kref_get(&obj->kref); + + return fd; + +err_cleanup_fd_pt: + put_unused_fd(fd); +err_cleanup_pt: + fence_put(&syncpt->base); + return -1; +} + +/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock| + * is held. */ +static void +goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc) +{ + DTRACE(); + /* Just give up if someone else nuked the timeline. + * Whoever it was won't care that it doesn't get signaled. */ + if (!obj) return; + + DPRINT("timeline_obj=0x%p", obj); + goldfish_sync_timeline_signal_internal(obj->sync_tl, inc); + DPRINT("incremented timeline. increment max_time"); + obj->current_time += inc; + + /* Here, we will end up deleting the timeline object if it + * turns out that this call was a pending increment after + * |goldfish_sync_timeline_destroy| was called. */ + kref_put(&obj->kref, delete_timeline_obj); + DPRINT("done"); +} + +/* |goldfish_sync_timeline_destroy| assumes + * that |global_sync_state->mutex_lock| is held. */ +static void +goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj) +{ + DTRACE(); + /* See description of |goldfish_sync_timeline_obj| for why we + * should not immediately destroy |obj| */ + kref_put(&obj->kref, delete_timeline_obj); +} + +static inline void +goldfish_sync_cmd_queue(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + struct goldfish_sync_hostcmd *to_add; + + DTRACE(); + + BUG_ON(sync_state->to_do_end == GOLDFISH_SYNC_MAX_CMDS); + + to_add = &sync_state->to_do[sync_state->to_do_end]; + + to_add->cmd = cmd; + to_add->handle = handle; + to_add->time_arg = time_arg; + to_add->hostcmd_handle = hostcmd_handle; + + sync_state->to_do_end += 1; +} + +static inline void +goldfish_sync_hostcmd_reply(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_hostcmd *batch_hostcmd = + sync_state->batch_hostcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_hostcmd->cmd = cmd; + batch_hostcmd->handle = handle; + batch_hostcmd->time_arg = time_arg; + batch_hostcmd->hostcmd_handle = hostcmd_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +static inline void +goldfish_sync_send_guestcmd(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t glsync_handle, + uint64_t thread_handle, + uint64_t timeline_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_guestcmd *batch_guestcmd = + sync_state->batch_guestcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_guestcmd->host_command = (uint64_t)cmd; + batch_guestcmd->glsync_handle = (uint64_t)glsync_handle; + batch_guestcmd->thread_handle = (uint64_t)thread_handle; + batch_guestcmd->guest_timeline_handle = (uint64_t)timeline_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_GUESTCOMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +/* |goldfish_sync_interrupt| handles IRQ raises from the virtual device. + * In the context of OpenGL, this interrupt will fire whenever we need + * to signal a fence fd in the guest, with the command + * |CMD_SYNC_TIMELINE_INC|. + * However, because this function will be called in an interrupt context, + * it is necessary to do the actual work of signaling off of interrupt context. + * The shared work queue is used for this purpose. At the end when + * all pending commands are intercepted by the interrupt handler, + * we call |schedule_work|, which will later run the actual + * desired sync command in |goldfish_sync_work_item_fn|. + */ +static irqreturn_t goldfish_sync_interrupt(int irq, void *dev_id) +{ + + struct goldfish_sync_state *sync_state = dev_id; + + uint32_t nextcmd; + uint32_t command_r; + uint64_t handle_rw; + uint32_t time_r; + uint64_t hostcmd_handle_rw; + + int count = 0; + + DTRACE(); + + sync_state = dev_id; + + spin_lock(&sync_state->lock); + + for (;;) { + + readl(sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + nextcmd = sync_state->batch_hostcmd->cmd; + + if (nextcmd == 0) + break; + + command_r = nextcmd; + handle_rw = sync_state->batch_hostcmd->handle; + time_r = sync_state->batch_hostcmd->time_arg; + hostcmd_handle_rw = sync_state->batch_hostcmd->hostcmd_handle; + + goldfish_sync_cmd_queue( + sync_state, + command_r, + handle_rw, + time_r, + hostcmd_handle_rw); + + count++; + } + + spin_unlock(&sync_state->lock); + + schedule_work(&sync_state->work_item); + + return (count == 0) ? IRQ_NONE : IRQ_HANDLED; +} + +/* |goldfish_sync_work_item_fn| does the actual work of servicing + * host->guest sync commands. This function is triggered whenever + * the IRQ for the goldfish sync device is raised. Once it starts + * running, it grabs the contents of the buffer containing the + * commands it needs to execute (there may be multiple, because + * our IRQ is active high and not edge triggered), and then + * runs all of them one after the other. + */ +static void goldfish_sync_work_item_fn(struct work_struct *input) +{ + + struct goldfish_sync_state *sync_state; + int sync_fence_fd; + + struct goldfish_sync_timeline_obj *timeline; + uint64_t timeline_ptr; + + uint64_t hostcmd_handle; + + uint32_t cmd; + uint64_t handle; + uint32_t time_arg; + + struct goldfish_sync_hostcmd *todo; + uint32_t todo_end; + + unsigned long irq_flags; + + struct goldfish_sync_hostcmd to_run[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t i = 0; + + sync_state = container_of(input, struct goldfish_sync_state, work_item); + + mutex_lock(&sync_state->mutex_lock); + + spin_lock_irqsave(&sync_state->lock, irq_flags); { + + todo_end = sync_state->to_do_end; + + DPRINT("num sync todos: %u", sync_state->to_do_end); + + for (i = 0; i < todo_end; i++) + to_run[i] = sync_state->to_do[i]; + + /* We expect that commands will come in at a slow enough rate + * so that incoming items will not be more than + * GOLDFISH_SYNC_MAX_CMDS. + * + * This is because the way the sync device is used, + * it's only for managing buffer data transfers per frame, + * with a sequential dependency between putting things in + * to_do and taking them out. Once a set of commands is + * queued up in to_do, the user of the device waits for + * them to be processed before queuing additional commands, + * which limits the rate at which commands come in + * to the rate at which we take them out here. + * + * We also don't expect more than MAX_CMDS to be issued + * at once; there is a correspondence between + * which buffers need swapping to the (display / buffer queue) + * to particular commands, and we don't expect there to be + * enough display or buffer queues in operation at once + * to overrun GOLDFISH_SYNC_MAX_CMDS. + */ + sync_state->to_do_end = 0; + + } spin_unlock_irqrestore(&sync_state->lock, irq_flags); + + for (i = 0; i < todo_end; i++) { + DPRINT("todo index: %u", i); + + todo = &to_run[i]; + + cmd = todo->cmd; + + handle = (uint64_t)todo->handle; + time_arg = todo->time_arg; + hostcmd_handle = (uint64_t)todo->hostcmd_handle; + + DTRACE(); + + timeline = (struct goldfish_sync_timeline_obj *)(uintptr_t)handle; + + switch (cmd) { + case CMD_SYNC_READY: + break; + case CMD_CREATE_SYNC_TIMELINE: + DPRINT("exec CMD_CREATE_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + timeline = goldfish_sync_timeline_create(); + timeline_ptr = (uintptr_t)timeline; + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_TIMELINE, + timeline_ptr, + 0, + hostcmd_handle); + DPRINT("sync timeline created: %p", timeline); + break; + case CMD_CREATE_SYNC_FENCE: + DPRINT("exec CMD_CREATE_SYNC_FENCE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg); + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE, + sync_fence_fd, + 0, + hostcmd_handle); + break; + case CMD_SYNC_TIMELINE_INC: + DPRINT("exec CMD_SYNC_TIMELINE_INC: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_inc(timeline, time_arg); + break; + case CMD_DESTROY_SYNC_TIMELINE: + DPRINT("exec CMD_DESTROY_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_destroy(timeline); + break; + } + DPRINT("Done executing sync command"); + } + mutex_unlock(&sync_state->mutex_lock); +} + +/* Guest-side interface: file operations */ + +/* Goldfish sync context and ioctl info. + * + * When a sync context is created by open()-ing the goldfish sync device, we + * create a sync context (|goldfish_sync_context|). + * + * Currently, the only data required to track is the sync timeline itself + * along with the current time, which are all packed up in the + * |goldfish_sync_timeline_obj| field. We use a |goldfish_sync_context| + * as the filp->private_data. + * + * Next, when a sync context user requests that work be queued and a fence + * fd provided, we use the |goldfish_sync_ioctl_info| struct, which holds + * information about which host handles to touch for this particular + * queue-work operation. We need to know about the host-side sync thread + * and the particular host-side GLsync object. We also possibly write out + * a file descriptor. + */ +struct goldfish_sync_context { + struct goldfish_sync_timeline_obj *timeline; +}; + +struct goldfish_sync_ioctl_info { + uint64_t host_glsync_handle_in; + uint64_t host_syncthread_handle_in; + int fence_fd_out; +}; + +static int goldfish_sync_open(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = kzalloc(sizeof(struct goldfish_sync_context), GFP_KERNEL); + + if (sync_context == NULL) { + ERR("Creation of goldfish sync context failed!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -ENOMEM; + } + + sync_context->timeline = NULL; + + file->private_data = sync_context; + + DPRINT("successfully create a sync context @0x%p", sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +static int goldfish_sync_release(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = file->private_data; + + if (sync_context->timeline) + goldfish_sync_timeline_destroy(sync_context->timeline); + + sync_context->timeline = NULL; + + kfree(sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +/* |goldfish_sync_ioctl| is the guest-facing interface of goldfish sync + * and is used in conjunction with eglCreateSyncKHR to queue up the + * actual work of waiting for the EGL sync command to complete, + * possibly returning a fence fd to the guest. + */ +static long goldfish_sync_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct goldfish_sync_context *sync_context_data; + struct goldfish_sync_timeline_obj *timeline; + int fd_out; + struct goldfish_sync_ioctl_info ioctl_data; + + DTRACE(); + + sync_context_data = file->private_data; + fd_out = -1; + + switch (cmd) { + case GOLDFISH_SYNC_IOC_QUEUE_WORK: + + DPRINT("exec GOLDFISH_SYNC_IOC_QUEUE_WORK"); + + mutex_lock(&global_sync_state->mutex_lock); + + if (copy_from_user(&ioctl_data, + (void __user *)arg, + sizeof(ioctl_data))) { + ERR("Failed to copy memory for ioctl_data from user."); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (ioctl_data.host_syncthread_handle_in == 0) { + DPRINT("Error: zero host syncthread handle!!!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (!sync_context_data->timeline) { + DPRINT("no timeline yet, create one."); + sync_context_data->timeline = goldfish_sync_timeline_create(); + DPRINT("timeline: 0x%p", &sync_context_data->timeline); + } + + timeline = sync_context_data->timeline; + fd_out = goldfish_sync_fence_create(timeline, + timeline->current_time + 1); + DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)", + fd_out, + sync_context_data->timeline->current_time + 1, + sync_context_data->timeline); + + ioctl_data.fence_fd_out = fd_out; + + if (copy_to_user((void __user *)arg, + &ioctl_data, + sizeof(ioctl_data))) { + DPRINT("Error, could not copy to user!!!"); + + sys_close(fd_out); + /* We won't be doing an increment, kref_put immediately. */ + kref_put(&timeline->kref, delete_timeline_obj); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + /* We are now about to trigger a host-side wait; + * accumulate on |pending_waits|. */ + goldfish_sync_send_guestcmd(global_sync_state, + CMD_TRIGGER_HOST_WAIT, + ioctl_data.host_glsync_handle_in, + ioctl_data.host_syncthread_handle_in, + (uint64_t)(uintptr_t)(sync_context_data->timeline)); + + mutex_unlock(&global_sync_state->mutex_lock); + return 0; + default: + return -ENOTTY; + } +} + +static const struct file_operations goldfish_sync_fops = { + .owner = THIS_MODULE, + .open = goldfish_sync_open, + .release = goldfish_sync_release, + .unlocked_ioctl = goldfish_sync_ioctl, + .compat_ioctl = goldfish_sync_ioctl, +}; + +static struct miscdevice goldfish_sync_device = { + .name = "goldfish_sync", + .fops = &goldfish_sync_fops, +}; + + +static bool setup_verify_batch_cmd_addr(struct goldfish_sync_state *sync_state, + void *batch_addr, + uint32_t addr_offset, + uint32_t addr_offset_high) +{ + uint64_t batch_addr_phys; + uint32_t batch_addr_phys_test_lo; + uint32_t batch_addr_phys_test_hi; + + if (!batch_addr) { + ERR("Could not use batch command address!"); + return false; + } + + batch_addr_phys = virt_to_phys(batch_addr); + writel((uint32_t)(batch_addr_phys), + sync_state->reg_base + addr_offset); + writel((uint32_t)(batch_addr_phys >> 32), + sync_state->reg_base + addr_offset_high); + + batch_addr_phys_test_lo = + readl(sync_state->reg_base + addr_offset); + batch_addr_phys_test_hi = + readl(sync_state->reg_base + addr_offset_high); + + if (virt_to_phys(batch_addr) != + (((uint64_t)batch_addr_phys_test_hi << 32) | + batch_addr_phys_test_lo)) { + ERR("Invalid batch command address!"); + return false; + } + + return true; +} + +int goldfish_sync_probe(struct platform_device *pdev) +{ + struct resource *ioresource; + struct goldfish_sync_state *sync_state = global_sync_state; + int status; + + DTRACE(); + + sync_state->to_do_end = 0; + + spin_lock_init(&sync_state->lock); + mutex_init(&sync_state->mutex_lock); + + platform_set_drvdata(pdev, sync_state); + + ioresource = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (ioresource == NULL) { + ERR("platform_get_resource failed"); + return -ENODEV; + } + + sync_state->reg_base = + devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE); + if (sync_state->reg_base == NULL) { + ERR("Could not ioremap"); + return -ENOMEM; + } + + sync_state->irq = platform_get_irq(pdev, 0); + if (sync_state->irq < 0) { + ERR("Could not platform_get_irq"); + return -ENODEV; + } + + status = devm_request_irq(&pdev->dev, + sync_state->irq, + goldfish_sync_interrupt, + IRQF_SHARED, + pdev->name, + sync_state); + if (status) { + ERR("request_irq failed"); + return -ENODEV; + } + + INIT_WORK(&sync_state->work_item, + goldfish_sync_work_item_fn); + + misc_register(&goldfish_sync_device); + + /* Obtain addresses for batch send/recv of commands. */ + { + struct goldfish_sync_hostcmd *batch_addr_hostcmd; + struct goldfish_sync_guestcmd *batch_addr_guestcmd; + + batch_addr_hostcmd = + devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd), + GFP_KERNEL); + batch_addr_guestcmd = + devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd), + GFP_KERNEL); + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_hostcmd, + SYNC_REG_BATCH_COMMAND_ADDR, + SYNC_REG_BATCH_COMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch command address"); + return -ENODEV; + } + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_guestcmd, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch guest command address"); + return -ENODEV; + } + + sync_state->batch_hostcmd = batch_addr_hostcmd; + sync_state->batch_guestcmd = batch_addr_guestcmd; + } + + INFO("goldfish_sync: Initialized goldfish sync device"); + + writel(0, sync_state->reg_base + SYNC_REG_INIT); + + return 0; +} + +static int goldfish_sync_remove(struct platform_device *pdev) +{ + struct goldfish_sync_state *sync_state = global_sync_state; + + DTRACE(); + + misc_deregister(&goldfish_sync_device); + memset(sync_state, 0, sizeof(struct goldfish_sync_state)); + return 0; +} + +static const struct of_device_id goldfish_sync_of_match[] = { + { .compatible = "google,goldfish-sync", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_sync_of_match); + +static const struct acpi_device_id goldfish_sync_acpi_match[] = { + { "GFSH0006", 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(acpi, goldfish_sync_acpi_match); + +static struct platform_driver goldfish_sync = { + .probe = goldfish_sync_probe, + .remove = goldfish_sync_remove, + .driver = { + .name = "goldfish_sync", + .of_match_table = goldfish_sync_of_match, + .acpi_match_table = ACPI_PTR(goldfish_sync_acpi_match), + } +}; + +module_platform_driver(goldfish_sync); + +MODULE_AUTHOR("Google, Inc."); +MODULE_DESCRIPTION("Android QEMU Sync Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0"); diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.c b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c new file mode 100644 index 0000000000000000000000000000000000000000..e671618cf888d60425b33873c7e5d3c8d4700716 --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include + +#include "goldfish_sync_timeline_fence.h" + +/* + * Timeline-based sync for Goldfish Sync + * Based on "Sync File validation framework" + * (drivers/dma-buf/sw_sync.c) + * + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/** + * struct goldfish_sync_timeline - sync object + * @kref: reference count on fence. + * @name: name of the goldfish_sync_timeline. Useful for debugging + * @child_list_head: list of children sync_pts for this goldfish_sync_timeline + * @child_list_lock: lock protecting @child_list_head and fence.status + * @active_list_head: list of active (unsignaled/errored) sync_pts + */ +struct goldfish_sync_timeline { + struct kref kref; + char name[32]; + + /* protected by child_list_lock */ + u64 context; + int value; + + struct list_head child_list_head; + spinlock_t child_list_lock; + + struct list_head active_list_head; +}; + +static inline struct goldfish_sync_timeline *fence_parent(struct fence *fence) +{ + return container_of(fence->lock, struct goldfish_sync_timeline, + child_list_lock); +} + +static const struct fence_ops goldfish_sync_timeline_fence_ops; + +static inline struct sync_pt *goldfish_sync_fence_to_sync_pt(struct fence *fence) +{ + if (fence->ops != &goldfish_sync_timeline_fence_ops) + return NULL; + return container_of(fence, struct sync_pt, base); +} + +/** + * goldfish_sync_timeline_create_internal() - creates a sync object + * @name: sync_timeline name + * + * Creates a new sync_timeline. Returns the sync_timeline object or NULL in + * case of error. + */ +struct goldfish_sync_timeline +*goldfish_sync_timeline_create_internal(const char *name) +{ + struct goldfish_sync_timeline *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + kref_init(&obj->kref); + obj->context = fence_context_alloc(1); + strlcpy(obj->name, name, sizeof(obj->name)); + + INIT_LIST_HEAD(&obj->child_list_head); + INIT_LIST_HEAD(&obj->active_list_head); + spin_lock_init(&obj->child_list_lock); + + return obj; +} + +static void goldfish_sync_timeline_free_internal(struct kref *kref) +{ + struct goldfish_sync_timeline *obj = + container_of(kref, struct goldfish_sync_timeline, kref); + + kfree(obj); +} + +static void goldfish_sync_timeline_get_internal( + struct goldfish_sync_timeline *obj) +{ + kref_get(&obj->kref); +} + +void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj) +{ + kref_put(&obj->kref, goldfish_sync_timeline_free_internal); +} + +/** + * goldfish_sync_timeline_signal() - + * signal a status change on a goldfish_sync_timeline + * @obj: sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj, + unsigned int inc) +{ + unsigned long flags; + struct sync_pt *pt, *next; + + spin_lock_irqsave(&obj->child_list_lock, flags); + + obj->value += inc; + + list_for_each_entry_safe(pt, next, &obj->active_list_head, + active_list) { + if (fence_is_signaled_locked(&pt->base)) + list_del_init(&pt->active_list); + } + + spin_unlock_irqrestore(&obj->child_list_lock, flags); +} + +/** + * goldfish_sync_pt_create_internal() - creates a sync pt + * @parent: fence's parent sync_timeline + * @size: size to allocate for this pt + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +struct sync_pt *goldfish_sync_pt_create_internal( + struct goldfish_sync_timeline *obj, int size, + unsigned int value) +{ + unsigned long flags; + struct sync_pt *pt; + + if (size < sizeof(*pt)) + return NULL; + + pt = kzalloc(size, GFP_KERNEL); + if (!pt) + return NULL; + + spin_lock_irqsave(&obj->child_list_lock, flags); + goldfish_sync_timeline_get_internal(obj); + fence_init(&pt->base, &goldfish_sync_timeline_fence_ops, &obj->child_list_lock, + obj->context, value); + list_add_tail(&pt->child_list, &obj->child_list_head); + INIT_LIST_HEAD(&pt->active_list); + spin_unlock_irqrestore(&obj->child_list_lock, flags); + return pt; +} + +static const char *goldfish_sync_timeline_fence_get_driver_name( + struct fence *fence) +{ + return "sw_sync"; +} + +static const char *goldfish_sync_timeline_fence_get_timeline_name( + struct fence *fence) +{ + struct goldfish_sync_timeline *parent = fence_parent(fence); + + return parent->name; +} + +static void goldfish_sync_timeline_fence_release(struct fence *fence) +{ + struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence); + struct goldfish_sync_timeline *parent = fence_parent(fence); + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + list_del(&pt->child_list); + if (!list_empty(&pt->active_list)) + list_del(&pt->active_list); + spin_unlock_irqrestore(fence->lock, flags); + + goldfish_sync_timeline_put_internal(parent); + fence_free(fence); +} + +static bool goldfish_sync_timeline_fence_signaled(struct fence *fence) +{ + struct goldfish_sync_timeline *parent = fence_parent(fence); + + return (fence->seqno > parent->value) ? false : true; +} + +static bool goldfish_sync_timeline_fence_enable_signaling(struct fence *fence) +{ + struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence); + struct goldfish_sync_timeline *parent = fence_parent(fence); + + if (goldfish_sync_timeline_fence_signaled(fence)) + return false; + + list_add_tail(&pt->active_list, &parent->active_list_head); + return true; +} + +static void goldfish_sync_timeline_fence_disable_signaling(struct fence *fence) +{ + struct sync_pt *pt = container_of(fence, struct sync_pt, base); + + list_del_init(&pt->active_list); +} + +static void goldfish_sync_timeline_fence_value_str(struct fence *fence, + char *str, int size) +{ + snprintf(str, size, "%d", fence->seqno); +} + +static void goldfish_sync_timeline_fence_timeline_value_str( + struct fence *fence, + char *str, int size) +{ + struct goldfish_sync_timeline *parent = fence_parent(fence); + + snprintf(str, size, "%d", parent->value); +} + +static const struct fence_ops goldfish_sync_timeline_fence_ops = { + .get_driver_name = goldfish_sync_timeline_fence_get_driver_name, + .get_timeline_name = goldfish_sync_timeline_fence_get_timeline_name, + .enable_signaling = goldfish_sync_timeline_fence_enable_signaling, + .disable_signaling = goldfish_sync_timeline_fence_disable_signaling, + .signaled = goldfish_sync_timeline_fence_signaled, + .wait = fence_default_wait, + .release = goldfish_sync_timeline_fence_release, + .fence_value_str = goldfish_sync_timeline_fence_value_str, + .timeline_value_str = goldfish_sync_timeline_fence_timeline_value_str, +}; diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.h b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h new file mode 100644 index 0000000000000000000000000000000000000000..fc25924652c1609a480857e7de2bfc7d2a314d9f --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h @@ -0,0 +1,58 @@ +#include +#include + +/** + * struct sync_pt - sync_pt object + * @base: base fence object + * @child_list: sync timeline child's list + * @active_list: sync timeline active child's list + */ +struct sync_pt { + struct fence base; + struct list_head child_list; + struct list_head active_list; +}; + +/** + * goldfish_sync_timeline_create_internal() - creates a sync object + * @name: goldfish_sync_timeline name + * + * Creates a new goldfish_sync_timeline. + * Returns the goldfish_sync_timeline object or NULL in case of error. + */ +struct goldfish_sync_timeline +*goldfish_sync_timeline_create_internal(const char *name); + +/** + * goldfish_sync_pt_create_internal() - creates a sync pt + * @parent: fence's parent goldfish_sync_timeline + * @size: size to allocate for this pt + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +struct sync_pt +*goldfish_sync_pt_create_internal(struct goldfish_sync_timeline *obj, + int size, unsigned int value); + +/** + * goldfish_sync_timeline_signal_internal() - + * signal a status change on a sync_timeline + * @obj: goldfish_sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj, + unsigned int inc); + +/** + * goldfish_sync_timeline_put_internal() - dec refcount of a sync_timeline + * and clean up memory if it was the last ref. + * @obj: goldfish_sync_timeline to decref + */ +void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj); diff --git a/drivers/staging/greybus/timesync_platform.c b/drivers/staging/greybus/timesync_platform.c index 113f3d6c4b3a6cdeda3fce3abe729fbc927fd9d9..27f75b17679b8f19a5f6100a769dd7b1b0f77455 100644 --- a/drivers/staging/greybus/timesync_platform.c +++ b/drivers/staging/greybus/timesync_platform.c @@ -45,12 +45,18 @@ u32 gb_timesync_platform_get_clock_rate(void) int gb_timesync_platform_lock_bus(struct gb_timesync_svc *pdata) { + if (!arche_platform_change_state_cb) + return 0; + return arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_TIME_SYNC, pdata); } void gb_timesync_platform_unlock_bus(void) { + if (!arche_platform_change_state_cb) + return; + arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_ACTIVE, NULL); } diff --git a/drivers/staging/iio/adc/ad7606_core.c b/drivers/staging/iio/adc/ad7606_core.c index f79ee61851f6024994e9fe201c2ffde1755e3c0f..cbd6bc52050f1618d87acc7bd00a960f974dbaa4 100644 --- a/drivers/staging/iio/adc/ad7606_core.c +++ b/drivers/staging/iio/adc/ad7606_core.c @@ -189,7 +189,7 @@ static ssize_t ad7606_store_oversampling_ratio(struct device *dev, mutex_lock(&indio_dev->mlock); gpio_set_value(st->pdata->gpio_os0, (ret >> 0) & 1); gpio_set_value(st->pdata->gpio_os1, (ret >> 1) & 1); - gpio_set_value(st->pdata->gpio_os1, (ret >> 2) & 1); + gpio_set_value(st->pdata->gpio_os2, (ret >> 2) & 1); st->oversampling = lval; mutex_unlock(&indio_dev->mlock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 9a1136e32dfc5df975323ad99b8017e87b84e115..34efb499c00b293b04ea871d26f04e294d395ed7 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -356,10 +356,10 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) u32 recalc_interval_sec; int count; - recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { spin_lock(&pl->pl_lock); - recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* @@ -382,7 +382,7 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) count); } - recalc_interval_sec = pl->pl_recalc_time - ktime_get_seconds() + + recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() + pl->pl_recalc_period; if (recalc_interval_sec <= 0) { /* DEBUG: should be re-removed after LU-4536 is fixed */ @@ -657,7 +657,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, spin_lock_init(&pl->pl_lock); atomic_set(&pl->pl_granted, 0); - pl->pl_recalc_time = ktime_get_seconds(); + pl->pl_recalc_time = ktime_get_real_seconds(); atomic_set(&pl->pl_lock_volume_factor, 1); atomic_set(&pl->pl_grant_rate, 0); diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index 2a7a70aa9e802dd38d31b2d6198e13107ba183c3..9168451d2f292cfae7149f113612e7ea3b843016 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -542,7 +542,6 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, struct cl_object *clobj = NULL; struct cl_page **pvec; struct osc_page *opg; - struct osc_page *temp; int maxscan = 0; long count = 0; int index = 0; @@ -569,13 +568,15 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, spin_lock(&cli->cl_lru_list_lock); maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list)); - list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) { + while (!list_empty(&cli->cl_lru_list)) { struct cl_page *page; bool will_free = false; if (--maxscan < 0) break; + opg = list_entry(cli->cl_lru_list.next, struct osc_page, + ops_lru); page = opg->ops_cl.cpl_page; if (lru_page_busy(cli, page)) { list_move_tail(&opg->ops_lru, &cli->cl_lru_list); diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.c b/drivers/staging/media/davinci_vpfe/vpfe_video.c index 8be9f854510f4df1d287ce7a49521f2b48e32db0..89dd6b989254fabaa2f9a23df18d20d885b23e40 100644 --- a/drivers/staging/media/davinci_vpfe/vpfe_video.c +++ b/drivers/staging/media/davinci_vpfe/vpfe_video.c @@ -1362,7 +1362,7 @@ static int vpfe_reqbufs(struct file *file, void *priv, ret = vb2_queue_init(q); if (ret) { v4l2_err(&vpfe_dev->v4l2_dev, "vb2_queue_init() failed\n"); - return ret; + goto unlock_out; } fh->io_allowed = 1; diff --git a/drivers/staging/media/s5p-cec/s5p_cec.c b/drivers/staging/media/s5p-cec/s5p_cec.c index 1780a08b73c96193f063f4811ef5ee2a366ec5e3..58d7562311360b7f090534ac63dae6db36b07d5d 100644 --- a/drivers/staging/media/s5p-cec/s5p_cec.c +++ b/drivers/staging/media/s5p-cec/s5p_cec.c @@ -231,7 +231,7 @@ static int s5p_cec_remove(struct platform_device *pdev) return 0; } -static int s5p_cec_runtime_suspend(struct device *dev) +static int __maybe_unused s5p_cec_runtime_suspend(struct device *dev) { struct s5p_cec_dev *cec = dev_get_drvdata(dev); @@ -239,7 +239,7 @@ static int s5p_cec_runtime_suspend(struct device *dev) return 0; } -static int s5p_cec_runtime_resume(struct device *dev) +static int __maybe_unused s5p_cec_runtime_resume(struct device *dev) { struct s5p_cec_dev *cec = dev_get_drvdata(dev); int ret; diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index d02e3e31ed293dbe97fbe5327ec670d8fc91ecc9..12354440a334a35dc330414fc81214b83dedcf5c 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -776,6 +776,7 @@ static int cvm_oct_probe(struct platform_device *pdev) /* Initialize the device private structure. */ struct octeon_ethernet *priv = netdev_priv(dev); + SET_NETDEV_DEV(dev, &pdev->dev); dev->netdev_ops = &cvm_oct_pow_netdev_ops; priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED; priv->port = CVMX_PIP_NUM_INPUT_PORTS; @@ -820,6 +821,7 @@ static int cvm_oct_probe(struct platform_device *pdev) } /* Initialize the device private structure. */ + SET_NETDEV_DEV(dev, &pdev->dev); priv = netdev_priv(dev); priv->netdev = dev; priv->of_node = cvm_oct_node_for_port(pip, interface, diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 923c032f0b95f1efe2d1e12e90cb9086f3fd9346..e980e2d0c2db5512ac29b9974310b93ff910c89b 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -100,8 +100,10 @@ static ssize_t lio_target_np_driver_store(struct config_item *item, tpg_np_new = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, tpg_np, type); - if (IS_ERR(tpg_np_new)) + if (IS_ERR(tpg_np_new)) { + rc = PTR_ERR(tpg_np_new); goto out; + } } else { tpg_np_new = iscsit_tpg_locate_child_np(tpg_np, type); if (tpg_np_new) { diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 0814e5894a9616ffcc79fb0d0f086f718a971fd7..205a509b0dfb289fac9791cdd0d44d259031a9a7 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -260,7 +260,6 @@ int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_gro iscsi_release_param_list(tpg->param_list); tpg->param_list = NULL; } - kfree(tpg); return -ENOMEM; } diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 58bb6ed181853b49370d9fda31f73e7bd24c7fda..6ca388eca33bda0a4481808c67662def357fd080 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -928,7 +928,7 @@ static struct sbp_target_request *sbp_mgt_get_req(struct sbp_session *sess, struct sbp_target_request *req; int tag; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); if (tag < 0) return ERR_PTR(-ENOMEM); diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 47562509b4892b07b85cb304a3190804c658d6e9..70c143a5c38c3f7655c9a23fc10b369d9df3680c 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -685,8 +685,6 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION); cmd->se_cmd = NULL; - kmem_cache_free(tcmu_cmd_cache, cmd); - return 0; } diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index c41c7742903ab43b2132241574376b85849666b9..2dcd4194d103bdf9a052bee2da46407ed1a659ee 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -98,7 +98,7 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) int temperature; int ret; - ret = tz->ops->get_trip_temp(tz, 0, &temperature); + ret = tz->ops->get_crit_temp(tz, &temperature); if (ret) return ret; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 240a361b674fe72ce657067e5303156b7add6a6f..e8819aa20415603c80547e382838a8fa3ce54792 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -675,7 +675,7 @@ static struct console univ8250_console = { .device = uart_console_device, .setup = univ8250_console_setup, .match = univ8250_console_match, - .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_CONSDEV, + .flags = CON_PRINTBUFFER | CON_ANYTIME, .index = -1, .data = &serial8250_reg, }; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 1731b98d2471077c762806b63f88993b0a475fc3..080d5a59d0a7f41ee0578d1f6e2fbecf00409bce 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1411,7 +1411,7 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p) * Enable previously disabled RX interrupts. */ if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) { - serial8250_clear_fifos(p); + serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; serial_port_out(&p->port, UART_IER, p->ier); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 168b10cad47b5437c2152313fcad026e2747300a..fabbe76203bb76a541775792828d3a8662c78348 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -481,6 +481,14 @@ static void atmel_stop_tx(struct uart_port *port) /* disable PDC transmit */ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); } + + /* + * Disable the transmitter. + * This is mandatory when DMA is used, otherwise the DMA buffer + * is fully transmitted. + */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS); + /* Disable interrupts */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); @@ -513,6 +521,9 @@ static void atmel_start_tx(struct uart_port *port) /* Enable interrupts */ atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); + + /* re-enable the transmitter */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); } /* @@ -798,6 +809,11 @@ static void atmel_complete_tx_dma(void *arg) */ if (!uart_circ_empty(xmit)) atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx); + else if ((port->rs485.flags & SER_RS485_ENABLED) && + !(port->rs485.flags & SER_RS485_RX_DURING_TX)) { + /* DMA done, stop TX, start RX for RS485 */ + atmel_start_rx(port); + } spin_unlock_irqrestore(&port->lock, flags); } @@ -900,12 +916,6 @@ static void atmel_tx_dma(struct uart_port *port) desc->callback = atmel_complete_tx_dma; desc->callback_param = atmel_port; atmel_port->cookie_tx = dmaengine_submit(desc); - - } else { - if (port->rs485.flags & SER_RS485_ENABLED) { - /* DMA done, stop TX, start RX for RS485 */ - atmel_start_rx(port); - } } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index fb0672554123a196d75fd9eedca35a915ddc12e6..793395451982d8e65d495345b9f8bfc17bd24e6c 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1264,7 +1264,7 @@ static int sc16is7xx_probe(struct device *dev, /* Setup interrupt */ ret = devm_request_irq(dev, irq, sc16is7xx_irq, - IRQF_ONESHOT | flags, dev_name(dev), s); + flags, dev_name(dev), s); if (!ret) return 0; diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 52bbd27e93ae7a83af92489b6d27fe0309437afe..701c085bb19b8e03f67316133fa5a4be7e0b05fe 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -946,8 +946,8 @@ static const struct input_device_id sysrq_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { BIT_MASK(KEY_LEFTALT) }, + .evbit = { [BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_LEFTALT)] = BIT_MASK(KEY_LEFTALT) }, }, { }, }; diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 0f8caae4267df8b3a19e39011730478f2c63c9d3..ece10e6b731baa432254974a007eaac8c05a2859 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -982,7 +982,7 @@ static void kbd_led_trigger_activate(struct led_classdev *cdev) KBD_LED_TRIGGER((_led_bit) + 8, _name) static struct kbd_led_trigger kbd_led_triggers[] = { - KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"), + KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrolllock"), KBD_LED_TRIGGER(VC_NUMLOCK, "kbd-numlock"), KBD_LED_TRIGGER(VC_CAPSLOCK, "kbd-capslock"), KBD_LED_TRIGGER(VC_KANALOCK, "kbd-kanalock"), diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index fada988512a1622a9551a5d6d5ae7482d78ac35f..c5ff13f22b2473d392bb9e1b33bc4719f14aa05d 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1719,6 +1719,7 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */ .driver_info = QUIRK_CONTROL_LINE_STATE, }, { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ + { USB_DEVICE(0x2184, 0x0036) }, /* GW Instek AFG-125 */ { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ }, /* Motorola H24 HSPA module: */ diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index a2d90aca779fa16fb07e1327f33a41129c719e63..1f7036c8f57b5f835acda237bc7b624293b77bbf 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -234,6 +234,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, if (ifp->desc.bNumEndpoints >= num_ep) goto skip_to_next_endpoint_or_interface_descriptor; + /* Check for duplicate endpoint addresses */ + for (i = 0; i < ifp->desc.bNumEndpoints; ++i) { + if (ifp->endpoint[i].desc.bEndpointAddress == + d->bEndpointAddress) { + dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } + } + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; ++ifp->desc.bNumEndpoints; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index cbb146736f577da8a060d49b17052af78881adfa..aef81a16e2c8534701b8583392400faf77971d23 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -101,6 +101,7 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); +static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -898,88 +899,6 @@ static int hub_set_port_link_state(struct usb_hub *hub, int port1, USB_PORT_FEAT_LINK_STATE); } -/* - * If USB 3.0 ports are placed into the Disabled state, they will no longer - * detect any device connects or disconnects. This is generally not what the - * USB core wants, since it expects a disabled port to produce a port status - * change event when a new device connects. - * - * Instead, set the link state to Disabled, wait for the link to settle into - * that state, clear any change bits, and then put the port into the RxDetect - * state. - */ -static int hub_usb3_port_disable(struct usb_hub *hub, int port1) -{ - int ret; - int total_time; - u16 portchange, portstatus; - - if (!hub_is_superspeed(hub->hdev)) - return -EINVAL; - - ret = hub_port_status(hub, port1, &portstatus, &portchange); - if (ret < 0) - return ret; - - /* - * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI - * Controller [1022:7814] will have spurious result making the following - * usb 3.0 device hotplugging route to the 2.0 root hub and recognized - * as high-speed device if we set the usb 3.0 port link state to - * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we - * check the state here to avoid the bug. - */ - if ((portstatus & USB_PORT_STAT_LINK_STATE) == - USB_SS_PORT_LS_RX_DETECT) { - dev_dbg(&hub->ports[port1 - 1]->dev, - "Not disabling port; link state is RxDetect\n"); - return ret; - } - - ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED); - if (ret) - return ret; - - /* Wait for the link to enter the disabled state. */ - for (total_time = 0; ; total_time += HUB_DEBOUNCE_STEP) { - ret = hub_port_status(hub, port1, &portstatus, &portchange); - if (ret < 0) - return ret; - - if ((portstatus & USB_PORT_STAT_LINK_STATE) == - USB_SS_PORT_LS_SS_DISABLED) - break; - if (total_time >= HUB_DEBOUNCE_TIMEOUT) - break; - msleep(HUB_DEBOUNCE_STEP); - } - if (total_time >= HUB_DEBOUNCE_TIMEOUT) - dev_warn(&hub->ports[port1 - 1]->dev, - "Could not disable after %d ms\n", total_time); - - return hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_RX_DETECT); -} - -static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) -{ - struct usb_port *port_dev = hub->ports[port1 - 1]; - struct usb_device *hdev = hub->hdev; - int ret = 0; - - if (port_dev->child && set_state) - usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); - if (!hub->error) { - if (hub_is_superspeed(hub->hdev)) - ret = hub_usb3_port_disable(hub, port1); - else - ret = usb_clear_port_feature(hdev, port1, - USB_PORT_FEAT_ENABLE); - } - if (ret && ret != -ENODEV) - dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); - return ret; -} - /* * Disable a port and mark a logical connect-change event, so that some * time later hub_wq will disconnect() any existing usb_device on the port @@ -4140,6 +4059,26 @@ void usb_unlocked_enable_lpm(struct usb_device *udev) } EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); +/* usb3 devices use U3 for disabled, make sure remote wakeup is disabled */ +static void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev) +{ + struct usb_device *udev = port_dev->child; + int ret; + + if (udev && udev->port_is_suspended && udev->do_remote_wakeup) { + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U0); + if (!ret) { + msleep(USB_RESUME_TIMEOUT); + ret = usb_disable_remote_wakeup(udev); + } + if (ret) + dev_warn(&udev->dev, + "Port disable: can't disable remote wake\n"); + udev->do_remote_wakeup = 0; + } +} #else /* CONFIG_PM */ @@ -4147,6 +4086,9 @@ EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); #define hub_resume NULL #define hub_reset_resume NULL +static inline void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev) { } + int usb_disable_lpm(struct usb_device *udev) { return 0; @@ -4182,6 +4124,34 @@ static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port, #endif /* CONFIG_PM */ +/* + * USB-3 does not have a similar link state as USB-2 that will avoid negotiating + * a connection with a plugged-in cable but will signal the host when the cable + * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices + */ +static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) +{ + struct usb_port *port_dev = hub->ports[port1 - 1]; + struct usb_device *hdev = hub->hdev; + int ret = 0; + + if (!hub->error) { + if (hub_is_superspeed(hub->hdev)) { + hub_usb3_port_prepare_disable(hub, port_dev); + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U3); + } else { + ret = usb_clear_port_feature(hdev, port1, + USB_PORT_FEAT_ENABLE); + } + } + if (port_dev->child && set_state) + usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); + if (ret && ret != -ENODEV) + dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); + return ret; +} + /* USB 2.0 spec, 7.1.7.3 / fig 7-29: * diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c index 3ed5162677ad562e1ffa19b1913cd7b332137fda..1713248ab15ad497a54f76f9fd6bb7150ccc023a 100644 --- a/drivers/usb/core/ledtrig-usbport.c +++ b/drivers/usb/core/ledtrig-usbport.c @@ -74,8 +74,7 @@ static void usbport_trig_update_count(struct usbport_trig_data *usbport_data) usbport_data->count = 0; usb_for_each_dev(usbport_data, usbport_trig_usb_dev_check); - led_cdev->brightness_set(led_cdev, - usbport_data->count ? LED_FULL : LED_OFF); + led_set_brightness(led_cdev, usbport_data->count ? LED_FULL : LED_OFF); } /*************************************** @@ -228,12 +227,12 @@ static int usbport_trig_notify(struct notifier_block *nb, unsigned long action, case USB_DEVICE_ADD: usbport_trig_add_usb_dev_ports(usb_dev, usbport_data); if (observed && usbport_data->count++ == 0) - led_cdev->brightness_set(led_cdev, LED_FULL); + led_set_brightness(led_cdev, LED_FULL); return NOTIFY_OK; case USB_DEVICE_REMOVE: usbport_trig_remove_usb_dev_ports(usbport_data, usb_dev); if (observed && --usbport_data->count == 0) - led_cdev->brightness_set(led_cdev, LED_OFF); + led_set_brightness(led_cdev, LED_OFF); return NOTIFY_OK; } diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d2e50a27140c9254be2a80b6c6ae69bc71a93b4a..24f9f98968a5d860f83920287a5b7deb4c98bed6 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -37,6 +37,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* CBM - Flash disk */ { USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME }, + /* WORLDE easy key (easykey.25) MIDI controller */ + { USB_DEVICE(0x0218, 0x0401), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* HP 5300/5370C scanner */ { USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 }, diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 4ddd8e1b58777391e3feebdef5081cb95f536687..42967c2929f4023e41f29e85ee543408a622987d 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -46,9 +46,7 @@ #define DWC3_XHCI_RESOURCES_NUM 2 #define DWC3_SCRATCHBUF_SIZE 4096 /* each buffer is assumed to be 4KiB */ -#define DWC3_EVENT_SIZE 4 /* bytes */ -#define DWC3_EVENT_MAX_NUM 64 /* 2 events/endpoint */ -#define DWC3_EVENT_BUFFERS_SIZE (2 * PAGE_SIZE) +#define DWC3_EVENT_BUFFERS_SIZE 4096 #define DWC3_EVENT_TYPE_MASK 0xfe #define DWC3_EVENT_TYPE_DEV 0 @@ -330,9 +328,8 @@ #define DWC3_DCFG_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DCFG_SUPERSPEED (4 << 0) #define DWC3_DCFG_HIGHSPEED (0 << 0) -#define DWC3_DCFG_FULLSPEED2 (1 << 0) +#define DWC3_DCFG_FULLSPEED (1 << 0) #define DWC3_DCFG_LOWSPEED (2 << 0) -#define DWC3_DCFG_FULLSPEED1 (3 << 0) #define DWC3_DCFG_NUMP_SHIFT 17 #define DWC3_DCFG_NUMP(n) (((n) >> DWC3_DCFG_NUMP_SHIFT) & 0x1f) @@ -426,9 +423,8 @@ #define DWC3_DSTS_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DSTS_SUPERSPEED (4 << 0) #define DWC3_DSTS_HIGHSPEED (0 << 0) -#define DWC3_DSTS_FULLSPEED2 (1 << 0) +#define DWC3_DSTS_FULLSPEED (1 << 0) #define DWC3_DSTS_LOWSPEED (2 << 0) -#define DWC3_DSTS_FULLSPEED1 (3 << 0) /* Device Generic Command Register */ #define DWC3_DGCMD_SET_LMP 0x01 diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6df0f5dad9a4cdecd4c1841c6a277de167e1960e..427291a19e6deeda040f830087e77d1e217740a4 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -38,6 +38,7 @@ #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 +#define PCI_DEVICE_ID_INTEL_GLK 0x31aa static const struct acpi_gpio_params reset_gpios = { 0, 0, false }; static const struct acpi_gpio_params cs_gpios = { 1, 0, false }; @@ -81,7 +82,7 @@ static int dwc3_pci_quirks(struct pci_dev *pdev, struct platform_device *dwc3) int ret; struct property_entry properties[] = { - PROPERTY_ENTRY_STRING("dr-mode", "peripheral"), + PROPERTY_ENTRY_STRING("dr_mode", "peripheral"), { } }; @@ -229,6 +230,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 566b645c88ffa9a53796bfd812c6547f51964bd3..ee601478cd89e75b2ca4f3b25c4c8f1f3b719ba3 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -55,20 +55,13 @@ static const char *dwc3_ep0_state_string(enum dwc3_ep0_state state) } } -static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, - u32 len, u32 type, bool chain) +static void dwc3_ep0_prepare_one_trb(struct dwc3 *dwc, u8 epnum, + dma_addr_t buf_dma, u32 len, u32 type, bool chain) { - struct dwc3_gadget_ep_cmd_params params; struct dwc3_trb *trb; struct dwc3_ep *dep; - int ret; - dep = dwc->eps[epnum]; - if (dep->flags & DWC3_EP_BUSY) { - dwc3_trace(trace_dwc3_ep0, "%s still busy", dep->name); - return 0; - } trb = &dwc->ep0_trb[dep->trb_enqueue]; @@ -89,15 +82,25 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, trb->ctrl |= (DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_LST); - if (chain) + trace_dwc3_prepare_trb(dep, trb); +} + +static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum) +{ + struct dwc3_gadget_ep_cmd_params params; + struct dwc3_ep *dep; + int ret; + + dep = dwc->eps[epnum]; + if (dep->flags & DWC3_EP_BUSY) { + dwc3_trace(trace_dwc3_ep0, "%s still busy", dep->name); return 0; + } memset(¶ms, 0, sizeof(params)); params.param0 = upper_32_bits(dwc->ep0_trb_addr); params.param1 = lower_32_bits(dwc->ep0_trb_addr); - trace_dwc3_prepare_trb(dep, trb); - ret = dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_STARTTRANSFER, ¶ms); if (ret < 0) { dwc3_trace(trace_dwc3_ep0, "%s STARTTRANSFER failed", @@ -308,8 +311,9 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) { int ret; - ret = dwc3_ep0_start_trans(dwc, 0, dwc->ctrl_req_addr, 8, + dwc3_ep0_prepare_one_trb(dwc, 0, dwc->ctrl_req_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); + ret = dwc3_ep0_start_trans(dwc, 0); WARN_ON(ret < 0); } @@ -883,9 +887,9 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, dwc->ep0_next_event = DWC3_EP0_COMPLETE; - ret = dwc3_ep0_start_trans(dwc, epnum, - dwc->ctrl_req_addr, 0, - DWC3_TRBCTL_CONTROL_DATA, false); + dwc3_ep0_prepare_one_trb(dwc, epnum, dwc->ctrl_req_addr, + 0, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, epnum); WARN_ON(ret < 0); } } @@ -969,9 +973,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->direction = !!dep->number; if (req->request.length == 0) { - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ctrl_req_addr, 0, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) && (dep->number == 0)) { u32 transfer_size = 0; @@ -989,7 +994,7 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, if (req->request.length > DWC3_EP0_BOUNCE_SIZE) { transfer_size = ALIGN(req->request.length - maxpacket, maxpacket); - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, transfer_size, DWC3_TRBCTL_CONTROL_DATA, @@ -1001,9 +1006,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, dwc->ep0_bounced = true; - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ep0_bounce_addr, transfer_size, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } else { ret = usb_gadget_map_request(&dwc->gadget, &req->request, dep->number); @@ -1012,9 +1018,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, return; } - ret = dwc3_ep0_start_trans(dwc, dep->number, req->request.dma, + dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, req->request.length, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } WARN_ON(ret < 0); @@ -1028,8 +1035,9 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) type = dwc->three_stage_setup ? DWC3_TRBCTL_CONTROL_STATUS3 : DWC3_TRBCTL_CONTROL_STATUS2; - return dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ctrl_req_addr, 0, type, false); + return dwc3_ep0_start_trans(dwc, dep->number); } static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 7aa290fe3be2f4440ac872816ddaffe3e3c8d1c2..f1dbf3902af17c0a72c00853cb33bbdd0d55b7a7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -292,11 +292,11 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - if (dwc->ep0_bounced && dep->number == 0) + if (dwc->ep0_bounced && dep->number <= 1) dwc->ep0_bounced = false; - else - usb_gadget_unmap_request(&dwc->gadget, &req->request, - req->direction); + + usb_gadget_unmap_request(&dwc->gadget, &req->request, + req->direction); trace_dwc3_gadget_giveback(req); @@ -919,6 +919,9 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, unsigned length, unsigned chain, unsigned node) { struct dwc3_trb *trb; + struct dwc3 *dwc = dep->dwc; + struct usb_gadget *gadget = &dwc->gadget; + enum usb_device_speed speed = gadget->speed; dwc3_trace(trace_dwc3_gadget, "%s: req %p dma %08llx length %d%s", dep->name, req, (unsigned long long) dma, @@ -946,10 +949,16 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, break; case USB_ENDPOINT_XFER_ISOC: - if (!node) + if (!node) { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS_FIRST; - else + + if (speed == USB_SPEED_HIGH) { + struct usb_ep *ep = &dep->endpoint; + trb->size |= DWC3_TRB_SIZE_PCM1(ep->mult - 1); + } + } else { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS; + } /* always enable Interrupt on Missed ISOC */ trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; @@ -2079,7 +2088,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) reg |= DWC3_DCFG_LOWSPEED; break; case USB_SPEED_FULL: - reg |= DWC3_DCFG_FULLSPEED1; + reg |= DWC3_DCFG_FULLSPEED; break; case USB_SPEED_HIGH: reg |= DWC3_DCFG_HIGHSPEED; @@ -2990,8 +2999,7 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) dwc->gadget.ep0->maxpacket = 64; dwc->gadget.speed = USB_SPEED_HIGH; break; - case DWC3_DSTS_FULLSPEED2: - case DWC3_DSTS_FULLSPEED1: + case DWC3_DSTS_FULLSPEED: dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(64); dwc->gadget.ep0->maxpacket = 64; dwc->gadget.speed = USB_SPEED_FULL; diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c2b0616a9a1149930c9b9df650ae0048b00f116e..ec166f2aac0c477ddc143e948f4f5512eaab4ec4 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -208,11 +208,16 @@ int config_ep_by_speed(struct usb_gadget *g, ep_found: /* commit results */ - _ep->maxpacket = usb_endpoint_maxp(chosen_desc); + _ep->maxpacket = usb_endpoint_maxp(chosen_desc) & 0x7ff; _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; - _ep->mult = 0; + _ep->mult = 1; + + if (g->speed == USB_SPEED_HIGH && (usb_endpoint_xfer_isoc(_ep->desc) || + usb_endpoint_xfer_int(_ep->desc))) + _ep->mult = ((usb_endpoint_maxp(_ep->desc) & 0x1800) >> 11) + 1; + if (!want_comp_desc) return 0; @@ -229,7 +234,7 @@ int config_ep_by_speed(struct usb_gadget *g, switch (usb_endpoint_type(_ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* mult: bits 1:0 of bmAttributes */ - _ep->mult = comp_desc->bmAttributes & 0x3; + _ep->mult = (comp_desc->bmAttributes & 0x3) + 1; case USB_ENDPOINT_XFER_BULK: case USB_ENDPOINT_XFER_INT: _ep->maxburst = comp_desc->bMaxBurst + 1; @@ -1810,9 +1815,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) value = min(w_length, (u16) 1); break; - /* function drivers must handle get/set altsetting; if there's - * no get() method, we know only altsetting zero works. - */ + /* function drivers must handle get/set altsetting */ case USB_REQ_SET_INTERFACE: if (ctrl->bRequestType != USB_RECIP_INTERFACE) goto unknown; @@ -1821,7 +1824,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) f = cdev->config->interface[intf]; if (!f) break; - if (w_value && !f->set_alt) + + /* + * If there's no get_alt() method, we know only altsetting zero + * works. There is no need to check if set_alt() is not NULL + * as we check this in usb_add_function(). + */ + if (w_value && !f->get_alt) break; value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index a34651d1fcebb38ba956a9e00e8cf1817dd96698..6b2c1379923bf191b9ff2db541e70848d92d66bb 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -426,11 +426,6 @@ static int config_usb_cfg_link( } f = usb_get_function(fi); - if (f == NULL) { - /* Are we trying to symlink PTP without MTP function? */ - ret = -EINVAL; /* Invalid Configuration */ - goto out; - } if (IS_ERR(f)) { ret = PTR_ERR(f); goto out; diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index cfb57ace2941b5b5346aa5e495e39326499f92ec..b1cca11e022020f804b0cbfe34b82f288fef8a30 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -255,6 +255,7 @@ static inline struct acc_dev *func_to_dev(struct usb_function *f) static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -1076,6 +1077,7 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f) static void acc_start_work(struct work_struct *data) { char *envp[2] = { "ACCESSORY=START", NULL }; + kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp); } diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c index bcd817439dbf92efdc24885629a16a3de07bd261..db7903d19c435e4c5b08c09270baa44c5c48b6b2 100644 --- a/drivers/usb/gadget/function/f_audio_source.c +++ b/drivers/usb/gadget/function/f_audio_source.c @@ -310,6 +310,7 @@ static struct device_attribute *audio_source_function_attributes[] = { static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -377,10 +378,9 @@ static void audio_send(struct audio_dev *audio) /* compute number of frames to send */ now = ktime_get(); - msecs = ktime_to_ns(now) - ktime_to_ns(audio->start_time); - do_div(msecs, 1000000); - frames = msecs * SAMPLE_RATE; - do_div(frames, 1000); + msecs = div_s64((ktime_to_ns(now) - ktime_to_ns(audio->start_time)), + 1000000); + frames = div_s64((msecs * SAMPLE_RATE), 1000); /* Readjust our frames_sent if we fall too far behind. * If we get too far behind it is better to drop some frames than diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index cc3ac2681b6efe9286dcc9a2832d0e9ae00a03db..4a30afa651b4749d916495a771de003163bf387a 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2605,6 +2605,8 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || h->interface >= ffs->interfaces_count) return -EINVAL; length = le32_to_cpu(d->dwSize); + if (len < length) + return -EINVAL; type = le32_to_cpu(d->dwPropertyDataType); if (type < USB_EXT_PROP_UNICODE || type > USB_EXT_PROP_UNICODE_MULTI) { @@ -2613,6 +2615,11 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, return -EINVAL; } pnl = le16_to_cpu(d->wPropertyNameLength); + if (length < 14 + pnl) { + pr_vdebug("invalid os descriptor length: %d pnl:%d (descriptor %d)\n", + length, pnl, type); + return -EINVAL; + } pdl = le32_to_cpu(*(u32 *)((u8 *)data + 10 + pnl)); if (length != 14 + pnl + pdl) { pr_vdebug("invalid os descriptor length: %d pnl:%d pdl:%d (descriptor %d)\n", @@ -2704,6 +2711,9 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, } } if (flags & (1 << i)) { + if (len < 4) { + goto error; + } os_descs_count = get_unaligned_le32(data); data += 4; len -= 4; @@ -2781,7 +2791,8 @@ static int __ffs_data_got_strings(struct ffs_data *ffs, ffs_log("enter: len %zu", len); - if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC || + if (unlikely(len < 16 || + get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC || get_unaligned_le32(data + 4) != len)) goto error; str_count = get_unaligned_le32(data + 8); diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 58401803039bdb14636a465ce36a966ca21cc377..4d8694a7f62dc57e234efa8c88c3c1a5230ccadd 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -386,6 +386,7 @@ static inline struct mtp_dev *func_to_mtp(struct usb_function *f) static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -1313,6 +1314,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev, } else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS && w_index == 0 && w_value == 0) { struct mtp_device_status *status = cdev->req->buf; + status->wLength = __constant_cpu_to_le16(sizeof(*status)); @@ -1335,6 +1337,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev, /* respond with data transfer or status phase? */ if (value >= 0) { int rc; + cdev->req->zero = value < w_length; cdev->req->length = value; rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC); @@ -1685,6 +1688,7 @@ static struct mtp_instance *to_mtp_instance(struct config_item *item) static void mtp_attr_release(struct config_item *item) { struct mtp_instance *fi_mtp = to_mtp_instance(item); + usb_put_function_instance(&fi_mtp->func_inst); } @@ -1804,7 +1808,7 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, pr_err("\t2: Create MTP function\n"); pr_err("\t3: Create and symlink PTP function" " with a gadget configuration\n"); - return NULL; + return ERR_PTR(-EINVAL); /* Invalid Configuration */ } dev = fi_mtp->dev; diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 197f73386fac9ab45473de09db462ab5e4640d90..d2351139342f6200209078e769e04f5ea1eb2d1f 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1073,7 +1073,7 @@ static struct usbg_cmd *usbg_get_cmd(struct f_uas *fu, struct usbg_cmd *cmd; int tag; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); if (tag < 0) return ERR_PTR(-ENOMEM); diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index cd214ec8a601b4913b67f7d67e52fb8c57e003c4..969cfe7413808c5b1bcc421b56d60fb6db65e57d 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -1067,13 +1067,13 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); if (!agdev->out_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - goto err; + return ret; } agdev->in_ep = usb_ep_autoconfig(gadget, &fs_epin_desc); if (!agdev->in_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - goto err; + return ret; } uac2->p_prm.uac2 = uac2; @@ -1091,7 +1091,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL, NULL); if (ret) - goto err; + return ret; prm = &agdev->uac2.c_prm; prm->max_psize = hs_epout_desc.wMaxPacketSize; @@ -1106,19 +1106,19 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) prm->rbuf = kzalloc(prm->max_psize * USB_XFERS, GFP_KERNEL); if (!prm->rbuf) { prm->max_psize = 0; - goto err_free_descs; + goto err; } ret = alsa_uac2_init(agdev); if (ret) - goto err_free_descs; + goto err; return 0; -err_free_descs: - usb_free_all_descriptors(fn); err: kfree(agdev->uac2.p_prm.rbuf); kfree(agdev->uac2.c_prm.rbuf); +err_free_descs: + usb_free_all_descriptors(fn); return -EINVAL; } diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 3d0d5d94a62f2fe6927983ba6979784eb20eceeb..0f01c04d7cbd856581da53d6e69cba5f50b2c922 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -243,7 +243,7 @@ uvc_video_alloc_requests(struct uvc_video *video) req_size = video->ep->maxpacket * max_t(unsigned int, video->ep->maxburst, 1) - * (video->ep->mult + 1); + * (video->ep->mult); for (i = 0; i < UVC_NUM_REQUESTS; ++i) { video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index bd82dd12deffd25aa9ae8528d60a898bdfa7551b..1468d8f085a397ef72bafacffa0be6bcef2af396 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1126,7 +1126,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* data and/or status stage for control request */ } else if (dev->state == STATE_DEV_SETUP) { - /* IN DATA+STATUS caller makes len <= wLength */ + len = min_t(size_t, len, dev->setup_wLength); if (dev->setup_in) { retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { @@ -1734,10 +1734,12 @@ static struct usb_gadget_driver gadgetfs_driver = { * such as configuration notifications. */ -static int is_valid_config (struct usb_config_descriptor *config) +static int is_valid_config(struct usb_config_descriptor *config, + unsigned int total) { return config->bDescriptorType == USB_DT_CONFIG && config->bLength == USB_DT_CONFIG_SIZE + && total >= USB_DT_CONFIG_SIZE && config->bConfigurationValue != 0 && (config->bmAttributes & USB_CONFIG_ATT_ONE) != 0 && (config->bmAttributes & USB_CONFIG_ATT_WAKEUP) == 0; @@ -1762,7 +1764,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } spin_unlock_irq(&dev->lock); - if (len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) + if ((len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) || + (len > PAGE_SIZE * 4)) return -EINVAL; /* we might need to change message format someday */ @@ -1786,7 +1789,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* full or low speed config */ dev->config = (void *) kbuf; total = le16_to_cpu(dev->config->wTotalLength); - if (!is_valid_config (dev->config) || total >= length) + if (!is_valid_config(dev->config, total) || + total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; @@ -1795,10 +1799,13 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) if (kbuf [1] == USB_DT_CONFIG) { dev->hs_config = (void *) kbuf; total = le16_to_cpu(dev->hs_config->wTotalLength); - if (!is_valid_config (dev->hs_config) || total >= length) + if (!is_valid_config(dev->hs_config, total) || + total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; + } else { + dev->hs_config = NULL; } /* could support multiple configs, using another encoding! */ diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 03fe5c535617e46d7f8ea9a37725fd2563af3bcb..b2e3e7224277374b897c1e028ab474d3892f57e5 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1368,7 +1368,11 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver) if (!ret) break; } - if (!ret && !udc->driver) + if (ret) + ret = -ENODEV; + else if (udc->driver) + ret = -EBUSY; + else goto found; } else { list_for_each_entry(udc, &udc_list, list) { diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 77d07904f932e7d65bf735f5dc6238465a309650..a81d9ab861dc05e0de95e09a015df14de588222f 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -330,7 +330,7 @@ static void nuke(struct dummy *dum, struct dummy_ep *ep) /* caller must hold lock */ static void stop_activity(struct dummy *dum) { - struct dummy_ep *ep; + int i; /* prevent any more requests */ dum->address = 0; @@ -338,8 +338,8 @@ static void stop_activity(struct dummy *dum) /* The timer is left running so that outstanding URBs can fail */ /* nuke any pending requests first, so driver i/o is quiesced */ - list_for_each_entry(ep, &dum->gadget.ep_list, ep.ep_list) - nuke(dum, ep); + for (i = 0; i < DUMMY_ENDPOINTS; ++i) + nuke(dum, &dum->ep[i]); /* driver now does any non-usb quiescing necessary */ } diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c index 940304c33224574439cea18d83f24b6bc763ba10..02260cfdedb135c92d219a2e5531775355973188 100644 --- a/drivers/usb/host/uhci-pci.c +++ b/drivers/usb/host/uhci-pci.c @@ -129,6 +129,10 @@ static int uhci_pci_init(struct usb_hcd *hcd) if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_HP) uhci->wait_for_hp = 1; + /* Intel controllers use non-PME wakeup signalling */ + if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_INTEL) + device_set_run_wake(uhci_dev(uhci), 1); + /* Set up pointers to PCI-specific functions */ uhci->reset_hc = uhci_pci_reset_hc; uhci->check_and_reset_hc = uhci_pci_check_and_reset_hc; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 6afe32381209d76cd0cf2f46d686a9f07a43f2c9..7064892ff4a640e15c10bb0e73452eacff448554 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -979,6 +979,40 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) xhci->devs[slot_id] = NULL; } +/* + * Free a virt_device structure. + * If the virt_device added a tt_info (a hub) and has children pointing to + * that tt_info, then free the child first. Recursive. + * We can't rely on udev at this point to find child-parent relationships. + */ +void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) +{ + struct xhci_virt_device *vdev; + struct list_head *tt_list_head; + struct xhci_tt_bw_info *tt_info, *next; + int i; + + vdev = xhci->devs[slot_id]; + if (!vdev) + return; + + tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts); + list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { + /* is this a hub device that added a tt_info to the tts list */ + if (tt_info->slot_id == slot_id) { + /* are any devices using this tt_info? */ + for (i = 1; i < HCS_MAX_SLOTS(xhci->hcs_params1); i++) { + vdev = xhci->devs[i]; + if (vdev && (vdev->tt_info == tt_info)) + xhci_free_virt_devices_depth_first( + xhci, i); + } + } + } + /* we are now at a leaf device */ + xhci_free_virt_device(xhci, slot_id); +} + int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, struct usb_device *udev, gfp_t flags) { @@ -1796,7 +1830,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int size; int i, j, num_ports; - del_timer_sync(&xhci->cmd_timer); + cancel_delayed_work_sync(&xhci->cmd_timer); /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); @@ -1829,8 +1863,8 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) } } - for (i = 1; i < MAX_HC_SLOTS; ++i) - xhci_free_virt_device(xhci, i); + for (i = HCS_MAX_SLOTS(xhci->hcs_params1); i > 0; i--) + xhci_free_virt_devices_depth_first(xhci, i); dma_pool_destroy(xhci->segment_pool); xhci->segment_pool = NULL; @@ -2343,9 +2377,9 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) INIT_LIST_HEAD(&xhci->cmd_list); - /* init command timeout timer */ - setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, - (unsigned long)xhci); + /* init command timeout work */ + INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout); + init_completion(&xhci->cmd_ring_stop_completion); page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, @@ -2384,7 +2418,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * "physically contiguous and 64-byte (cache line) aligned". */ xhci->dcbaa = dma_alloc_coherent(dev, sizeof(*xhci->dcbaa), &dma, - GFP_KERNEL); + flags); if (!xhci->dcbaa) goto fail; memset(xhci->dcbaa, 0, sizeof *(xhci->dcbaa)); @@ -2480,7 +2514,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) xhci->erst.entries = dma_alloc_coherent(dev, sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS, &dma, - GFP_KERNEL); + flags); if (!xhci->erst.entries) goto fail; xhci_dbg_trace(xhci, trace_xhci_dbg_init, diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 79959f17c38cfe3cd0791b87cf9f0dd90f93ad9d..f2365a47fa4a42d593b7644ddbe574cdc103f2c0 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -560,8 +560,10 @@ static int xhci_mtk_probe(struct platform_device *pdev) goto disable_ldos; irq = platform_get_irq(pdev, 0); - if (irq < 0) + if (irq < 0) { + ret = irq; goto disable_clk; + } /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index e96ae80d107e94fd8db8c33cae52ff9153f14730..954abfd5014d281d537e11353cb6093b9b7a0bdd 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -165,7 +165,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 797137e26549b566474d99e1d3e6efe277aa4648..521c1816a26a7101a89c8e3afab290b9dfb48e09 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -260,23 +260,76 @@ void xhci_ring_cmd_db(struct xhci_hcd *xhci) readl(&xhci->dba->doorbell[0]); } -static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) +static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) +{ + return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); +} + +static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci) +{ + return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command, + cmd_list); +} + +/* + * Turn all commands on command ring with status set to "aborted" to no-op trbs. + * If there are other commands waiting then restart the ring and kick the timer. + * This must be called with command ring stopped and xhci->lock held. + */ +static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, + struct xhci_command *cur_cmd) +{ + struct xhci_command *i_cmd; + u32 cycle_state; + + /* Turn all aborted commands in list to no-ops, then restart */ + list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) { + + if (i_cmd->status != COMP_CMD_ABORT) + continue; + + i_cmd->status = COMP_CMD_STOP; + + xhci_dbg(xhci, "Turn aborted command %p to no-op\n", + i_cmd->command_trb); + /* get cycle state from the original cmd trb */ + cycle_state = le32_to_cpu( + i_cmd->command_trb->generic.field[3]) & TRB_CYCLE; + /* modify the command trb to no-op command */ + i_cmd->command_trb->generic.field[0] = 0; + i_cmd->command_trb->generic.field[1] = 0; + i_cmd->command_trb->generic.field[2] = 0; + i_cmd->command_trb->generic.field[3] = cpu_to_le32( + TRB_TYPE(TRB_CMD_NOOP) | cycle_state); + + /* + * caller waiting for completion is called when command + * completion event is received for these no-op commands + */ + } + + xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; + + /* ring command ring doorbell to restart the command ring */ + if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && + !(xhci->xhc_state & XHCI_STATE_DYING)) { + xhci->current_cmd = cur_cmd; + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_ring_cmd_db(xhci); + } +} + +/* Must be called with xhci->lock held, releases and aquires lock back */ +static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) { u64 temp_64; int ret; xhci_dbg(xhci, "Abort command ring\n"); - temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); - xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; + reinit_completion(&xhci->cmd_ring_stop_completion); - /* - * Writing the CMD_RING_ABORT bit should cause a cmd completion event, - * however on some host hw the CMD_RING_RUNNING bit is correctly cleared - * but the completion event in never sent. Use the cmd timeout timer to - * handle those cases. Use twice the time to cover the bit polling retry - */ - mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT)); + temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); @@ -296,16 +349,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) udelay(1000); ret = xhci_handshake(&xhci->op_regs->cmd_ring, CMD_RING_RUNNING, 0, 3 * 1000 * 1000); - if (ret == 0) - return 0; - - xhci_err(xhci, "Stopped the command ring failed, " - "maybe the host is dead\n"); - del_timer(&xhci->cmd_timer); - xhci->xhc_state |= XHCI_STATE_DYING; - xhci_quiesce(xhci); - xhci_halt(xhci); - return -ESHUTDOWN; + if (ret < 0) { + xhci_err(xhci, "Stopped the command ring failed, " + "maybe the host is dead\n"); + xhci->xhc_state |= XHCI_STATE_DYING; + xhci_quiesce(xhci); + xhci_halt(xhci); + return -ESHUTDOWN; + } + } + /* + * Writing the CMD_RING_ABORT bit should cause a cmd completion event, + * however on some host hw the CMD_RING_RUNNING bit is correctly cleared + * but the completion event in never sent. Wait 2 secs (arbitrary + * number) to handle those cases after negation of CMD_RING_RUNNING. + */ + spin_unlock_irqrestore(&xhci->lock, flags); + ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion, + msecs_to_jiffies(2000)); + spin_lock_irqsave(&xhci->lock, flags); + if (!ret) { + xhci_dbg(xhci, "No stop event for abort, ring start fail?\n"); + xhci_cleanup_command_queue(xhci); + } else { + xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci)); } return 0; @@ -850,17 +917,6 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) spin_lock_irqsave(&xhci->lock, flags); ep->stop_cmds_pending--; - if (xhci->xhc_state & XHCI_STATE_REMOVING) { - spin_unlock_irqrestore(&xhci->lock, flags); - return; - } - if (xhci->xhc_state & XHCI_STATE_DYING) { - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Stop EP timer ran, but another timer marked " - "xHCI as DYING, exiting."); - spin_unlock_irqrestore(&xhci->lock, flags); - return; - } if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Stop EP timer ran, but no command pending, " @@ -1211,101 +1267,62 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci) xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT); } -/* - * Turn all commands on command ring with status set to "aborted" to no-op trbs. - * If there are other commands waiting then restart the ring and kick the timer. - * This must be called with command ring stopped and xhci->lock held. - */ -static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, - struct xhci_command *cur_cmd) -{ - struct xhci_command *i_cmd, *tmp_cmd; - u32 cycle_state; - - /* Turn all aborted commands in list to no-ops, then restart */ - list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list, - cmd_list) { - - if (i_cmd->status != COMP_CMD_ABORT) - continue; - - i_cmd->status = COMP_CMD_STOP; - - xhci_dbg(xhci, "Turn aborted command %p to no-op\n", - i_cmd->command_trb); - /* get cycle state from the original cmd trb */ - cycle_state = le32_to_cpu( - i_cmd->command_trb->generic.field[3]) & TRB_CYCLE; - /* modify the command trb to no-op command */ - i_cmd->command_trb->generic.field[0] = 0; - i_cmd->command_trb->generic.field[1] = 0; - i_cmd->command_trb->generic.field[2] = 0; - i_cmd->command_trb->generic.field[3] = cpu_to_le32( - TRB_TYPE(TRB_CMD_NOOP) | cycle_state); - - /* - * caller waiting for completion is called when command - * completion event is received for these no-op commands - */ - } - - xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; - - /* ring command ring doorbell to restart the command ring */ - if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && - !(xhci->xhc_state & XHCI_STATE_DYING)) { - xhci->current_cmd = cur_cmd; - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); - xhci_ring_cmd_db(xhci); - } - return; -} - - -void xhci_handle_command_timeout(unsigned long data) +void xhci_handle_command_timeout(struct work_struct *work) { struct xhci_hcd *xhci; int ret; unsigned long flags; u64 hw_ring_state; - bool second_timeout = false; - xhci = (struct xhci_hcd *) data; - /* mark this command to be cancelled */ + xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer); + spin_lock_irqsave(&xhci->lock, flags); - if (xhci->current_cmd) { - if (xhci->current_cmd->status == COMP_CMD_ABORT) - second_timeout = true; - xhci->current_cmd->status = COMP_CMD_ABORT; + + /* + * If timeout work is pending, or current_cmd is NULL, it means we + * raced with command completion. Command is handled so just return. + */ + if (!xhci->current_cmd || delayed_work_pending(&xhci->cmd_timer)) { + spin_unlock_irqrestore(&xhci->lock, flags); + return; } + /* mark this command to be cancelled */ + xhci->current_cmd->status = COMP_CMD_ABORT; /* Make sure command ring is running before aborting it */ hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && (hw_ring_state & CMD_RING_RUNNING)) { - spin_unlock_irqrestore(&xhci->lock, flags); + /* Prevent new doorbell, and start command abort */ + xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; xhci_dbg(xhci, "Command timeout\n"); - ret = xhci_abort_cmd_ring(xhci); + ret = xhci_abort_cmd_ring(xhci, flags); if (unlikely(ret == -ESHUTDOWN)) { xhci_err(xhci, "Abort command ring failed\n"); xhci_cleanup_command_queue(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); usb_hc_died(xhci_to_hcd(xhci)->primary_hcd); xhci_dbg(xhci, "xHCI host controller is dead.\n"); + + return; } - return; + + goto time_out_completed; } - /* command ring failed to restart, or host removed. Bail out */ - if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) { - spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "command timed out twice, ring start fail?\n"); + /* host removed. Bail out */ + if (xhci->xhc_state & XHCI_STATE_REMOVING) { + xhci_dbg(xhci, "host removed, ring start fail?\n"); xhci_cleanup_command_queue(xhci); - return; + + goto time_out_completed; } /* command timeout on stopped ring, ring can't be aborted */ xhci_dbg(xhci, "Command timeout on stopped ring\n"); xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd); + +time_out_completed: spin_unlock_irqrestore(&xhci->lock, flags); return; } @@ -1338,7 +1355,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list); - del_timer(&xhci->cmd_timer); + cancel_delayed_work(&xhci->cmd_timer); trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event); @@ -1346,7 +1363,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, /* If CMD ring stopped we own the trbs between enqueue and dequeue */ if (cmd_comp_code == COMP_CMD_STOP) { - xhci_handle_stopped_cmd_ring(xhci, cmd); + complete_all(&xhci->cmd_ring_stop_completion); return; } @@ -1364,8 +1381,11 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, */ if (cmd_comp_code == COMP_CMD_ABORT) { xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; - if (cmd->status == COMP_CMD_ABORT) + if (cmd->status == COMP_CMD_ABORT) { + if (xhci->current_cmd == cmd) + xhci->current_cmd = NULL; goto event_handled; + } } cmd_type = TRB_FIELD_TO_TYPE(le32_to_cpu(cmd_trb->generic.field[3])); @@ -1426,7 +1446,9 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, if (cmd->cmd_list.next != &xhci->cmd_list) { xhci->current_cmd = list_entry(cmd->cmd_list.next, struct xhci_command, cmd_list); - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + } else if (xhci->current_cmd == cmd) { + xhci->current_cmd = NULL; } event_handled: @@ -3920,9 +3942,9 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, /* if there are no other commands queued we start the timeout timer */ if (xhci->cmd_list.next == &cmd->cmd_list && - !timer_pending(&xhci->cmd_timer)) { + !delayed_work_pending(&xhci->cmd_timer)) { xhci->current_cmd = cmd; - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); } queue_trb(xhci, xhci->cmd_ring, false, field1, field2, field3, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1a4ca02729c274cb83762636e5556ca75e840144..34e23c7d7797b948bb17b04e8935dcceddf0cc06 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1529,19 +1529,6 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) xhci_urb_free_priv(urb_priv); return ret; } - if ((xhci->xhc_state & XHCI_STATE_DYING) || - (xhci->xhc_state & XHCI_STATE_HALTED)) { - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Ep 0x%x: URB %p to be canceled on " - "non-responsive xHCI host.", - urb->ep->desc.bEndpointAddress, urb); - /* Let the stop endpoint command watchdog timer (which set this - * state) finish cleaning up the endpoint TD lists. We must - * have caught it in the middle of dropping a lock and giving - * back an URB. - */ - goto done; - } ep_index = xhci_get_endpoint_index(&urb->ep->desc); ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index]; @@ -3783,8 +3770,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); - if (xhci->xhc_state) /* dying, removing or halted */ + if (xhci->xhc_state) { /* dying, removing or halted */ + ret = -ESHUTDOWN; goto out; + } if (!udev->slot_id) { xhci_dbg_trace(xhci, trace_xhci_dbg_address, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index f945380035d07e2f7af2bb73da9d39bf94475991..c525722aa934caaf6b4ea29d091a9a1b8d059d31 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1571,7 +1571,8 @@ struct xhci_hcd { #define CMD_RING_STATE_STOPPED (1 << 2) struct list_head cmd_list; unsigned int cmd_ring_reserved_trbs; - struct timer_list cmd_timer; + struct delayed_work cmd_timer; + struct completion cmd_ring_stop_completion; struct xhci_command *current_cmd; struct xhci_ring *event_ring; struct xhci_erst erst; @@ -1941,7 +1942,7 @@ void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, struct xhci_dequeue_state *deq_state); void xhci_stop_endpoint_command_watchdog(unsigned long arg); -void xhci_handle_command_timeout(unsigned long data); +void xhci_handle_command_timeout(struct work_struct *work); void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, unsigned int stream_id); diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index 310238c6b5cd699bcd89ef101b926feba5d60a8c..8967980718170d5119d6331ed8395dfa2f4fa970 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -469,6 +469,7 @@ static const struct musb_platform_ops bfin_ops = { .init = bfin_musb_init, .exit = bfin_musb_exit, + .fifo_offset = bfin_fifo_offset, .readb = bfin_readb, .writeb = bfin_writeb, .readw = bfin_readw, diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index c3e172e15ec3d9d9ec7346b972b2702fed710c65..338575fb2d27819cfa516620f2dd1e35f7a9f66c 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -578,11 +578,11 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, | MUSB_PORT_STAT_RESUME; musb->rh_timer = jiffies + msecs_to_jiffies(USB_RESUME_TIMEOUT); - musb->need_finish_resume = 1; - musb->xceiv->otg->state = OTG_STATE_A_HOST; musb->is_active = 1; musb_host_resume_root_hub(musb); + schedule_delayed_work(&musb->finish_resume_work, + msecs_to_jiffies(USB_RESUME_TIMEOUT)); break; case OTG_STATE_B_WAIT_ACON: musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; @@ -2691,11 +2691,6 @@ static int musb_resume(struct device *dev) mask = MUSB_DEVCTL_BDEVICE | MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV; if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - if (musb->need_finish_resume) { - musb->need_finish_resume = 0; - schedule_delayed_work(&musb->finish_resume_work, - msecs_to_jiffies(USB_RESUME_TIMEOUT)); - } /* * The USB HUB code expects the device to be in RPM_ACTIVE once it came @@ -2747,12 +2742,6 @@ static int musb_runtime_resume(struct device *dev) musb_restore_context(musb); - if (musb->need_finish_resume) { - musb->need_finish_resume = 0; - schedule_delayed_work(&musb->finish_resume_work, - msecs_to_jiffies(USB_RESUME_TIMEOUT)); - } - spin_lock_irqsave(&musb->lock, flags); error = musb_run_resume_work(musb); if (error) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 91817d77d59c8ecd226e25fb6ce9e07b5a597d3f..854fbf7b6b23240e010f08cfdc0864da47d364ea 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -216,6 +216,7 @@ struct musb_platform_ops { void (*pre_root_reset_end)(struct musb *musb); void (*post_root_reset_end)(struct musb *musb); int (*phy_callback)(enum musb_vbus_id_status status); + void (*clear_ep_rxintr)(struct musb *musb, int epnum); }; /* @@ -409,7 +410,6 @@ struct musb { /* is_suspended means USB B_PERIPHERAL suspend */ unsigned is_suspended:1; - unsigned need_finish_resume :1; /* may_wakeup means remote wakeup is enabled */ unsigned may_wakeup:1; @@ -626,4 +626,10 @@ static inline void musb_platform_post_root_reset_end(struct musb *musb) musb->ops->post_root_reset_end(musb); } +static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum) +{ + if (musb->ops->clear_ep_rxintr) + musb->ops->clear_ep_rxintr(musb, epnum); +} + #endif /* __MUSB_CORE_H__ */ diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 9b22d946c089698e2083ab3457e0c5bda1f9bde2..534a3f6fa89c1efde51811031354b500ae3eb2f5 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -114,6 +114,7 @@ static int musb_regdump_show(struct seq_file *s, void *unused) unsigned i; seq_printf(s, "MUSB (M)HDRC Register Dump\n"); + pm_runtime_get_sync(musb->controller); for (i = 0; i < ARRAY_SIZE(musb_regmap); i++) { switch (musb_regmap[i].size) { @@ -132,6 +133,8 @@ static int musb_regdump_show(struct seq_file *s, void *unused) } } + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return 0; } @@ -145,7 +148,10 @@ static int musb_test_mode_show(struct seq_file *s, void *unused) struct musb *musb = s->private; unsigned test; + pm_runtime_get_sync(musb->controller); test = musb_readb(musb->mregs, MUSB_TESTMODE); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); if (test & MUSB_TEST_FORCE_HOST) seq_printf(s, "force host\n"); @@ -194,11 +200,12 @@ static ssize_t musb_test_mode_write(struct file *file, u8 test; char buf[18]; + pm_runtime_get_sync(musb->controller); test = musb_readb(musb->mregs, MUSB_TESTMODE); if (test) { dev_err(musb->controller, "Error: test mode is already set. " "Please do USB Bus Reset to start a new test.\n"); - return count; + goto ret; } memset(buf, 0x00, sizeof(buf)); @@ -234,6 +241,9 @@ static ssize_t musb_test_mode_write(struct file *file, musb_writeb(musb->mregs, MUSB_TESTMODE, test); +ret: + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return count; } @@ -254,8 +264,13 @@ static int musb_softconnect_show(struct seq_file *s, void *unused) switch (musb->xceiv->otg->state) { case OTG_STATE_A_HOST: case OTG_STATE_A_WAIT_BCON: + pm_runtime_get_sync(musb->controller); + reg = musb_readb(musb->mregs, MUSB_DEVCTL); connect = reg & MUSB_DEVCTL_SESSION ? 1 : 0; + + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); break; default: connect = -1; @@ -284,6 +299,7 @@ static ssize_t musb_softconnect_write(struct file *file, if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; + pm_runtime_get_sync(musb->controller); if (!strncmp(buf, "0", 1)) { switch (musb->xceiv->otg->state) { case OTG_STATE_A_HOST: @@ -314,6 +330,8 @@ static ssize_t musb_softconnect_write(struct file *file, } } + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return count; } diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index feae1561b9abb6924d2fe2fc6f1222dfac9d2be7..9f125e179acd444ca43c73a1a5d263b8e5c8893d 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -267,6 +267,17 @@ static void otg_timer(unsigned long _musb) pm_runtime_put_autosuspend(dev); } +void dsps_musb_clear_ep_rxintr(struct musb *musb, int epnum) +{ + u32 epintr; + struct dsps_glue *glue = dev_get_drvdata(musb->controller->parent); + const struct dsps_musb_wrapper *wrp = glue->wrp; + + /* musb->lock might already been held */ + epintr = (1 << epnum) << wrp->rxep_shift; + musb_writel(musb->ctrl_base, wrp->epintr_status, epintr); +} + static irqreturn_t dsps_interrupt(int irq, void *hci) { struct musb *musb = hci; @@ -622,6 +633,7 @@ static struct musb_platform_ops dsps_ops = { .set_mode = dsps_musb_set_mode, .recover = dsps_musb_recover, + .clear_ep_rxintr = dsps_musb_clear_ep_rxintr, }; static u64 musb_dmamask = DMA_BIT_MASK(32); diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 53bc4ceefe89ad16dcb576fb2fd18c37ad404e23..806451418cfe24fa5974b78554dc71de0614b608 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2374,12 +2374,11 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh) int is_in = usb_pipein(urb->pipe); int status = 0; u16 csr; + struct dma_channel *dma = NULL; musb_ep_select(regs, hw_end); if (is_dma_capable()) { - struct dma_channel *dma; - dma = is_in ? ep->rx_channel : ep->tx_channel; if (dma) { status = ep->musb->dma_controller->channel_abort(dma); @@ -2395,10 +2394,9 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh) /* giveback saves bulk toggle */ csr = musb_h_flush_rxfifo(ep, 0); - /* REVISIT we still get an irq; should likely clear the - * endpoint's irq status here to avoid bogus irqs. - * clearing that status is platform-specific... - */ + /* clear the endpoint's irq status here to avoid bogus irqs */ + if (is_dma_capable() && dma) + musb_platform_clear_ep_rxintr(musb, ep->epnum); } else if (ep->epnum) { musb_h_tx_flush_fifo(ep); csr = musb_readw(epio, MUSB_TXCSR); diff --git a/drivers/usb/musb/musbhsdma.h b/drivers/usb/musb/musbhsdma.h index f7b13fd252574f848e7984905cbaef4478c2e90a..a3dcbd55e43609b3140979d971a43426a1ce3db0 100644 --- a/drivers/usb/musb/musbhsdma.h +++ b/drivers/usb/musb/musbhsdma.h @@ -157,5 +157,5 @@ struct musb_dma_controller { void __iomem *base; u8 channel_count; u8 used_channels; - u8 irq; + int irq; }; diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index c5527d41f4090b339db03c7f58d3831ce3469b95..d2c48766d58e12ea65797e16c001888e8190879e 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -8,7 +8,7 @@ config USB_PHY config USB_OTG_WAKELOCK bool "Hold a wakelock when USB connected" - depends on WAKELOCK + depends on PM_WAKELOCKS select USB_OTG_UTILS help Select this to automatically hold a wakelock when USB is diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c index 479376bfa484bd73edc76a7cae115c37d4e729ae..ecd741027f53d9357ccb41205a84f046793038e9 100644 --- a/drivers/usb/phy/otg-wakelock.c +++ b/drivers/usb/phy/otg-wakelock.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -42,7 +41,7 @@ static DEFINE_SPINLOCK(otgwl_spinlock); struct otgwl_lock { char name[40]; - struct wake_lock wakelock; + struct wakeup_source wakesrc; bool held; }; @@ -57,22 +56,21 @@ static struct otgwl_lock vbus_lock; static void otgwl_hold(struct otgwl_lock *lock) { if (!lock->held) { - wake_lock(&lock->wakelock); + __pm_stay_awake(&lock->wakesrc); lock->held = true; } } static void otgwl_temporary_hold(struct otgwl_lock *lock) { - wake_lock_timeout(&lock->wakelock, - msecs_to_jiffies(TEMPORARY_HOLD_TIME)); + __pm_wakeup_event(&lock->wakesrc, TEMPORARY_HOLD_TIME); lock->held = false; } static void otgwl_drop(struct otgwl_lock *lock) { if (lock->held) { - wake_unlock(&lock->wakelock); + __pm_relax(&lock->wakesrc); lock->held = false; } } @@ -151,8 +149,7 @@ static int __init otg_wakelock_init(void) snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", dev_name(otgwl_xceiv->dev)); - wake_lock_init(&vbus_lock.wakelock, WAKE_LOCK_SUSPEND, - vbus_lock.name); + wakeup_source_init(&vbus_lock.wakesrc, vbus_lock.name); otgwl_nb.notifier_call = otgwl_otg_notifications; ret = usb_register_notifier(otgwl_xceiv, &otgwl_nb); @@ -162,7 +159,7 @@ static int __init otg_wakelock_init(void) " failed\n", __func__, dev_name(otgwl_xceiv->dev)); otgwl_xceiv = NULL; - wake_lock_destroy(&vbus_lock.wakelock); + wakeup_source_trash(&vbus_lock.wakesrc); return ret; } diff --git a/drivers/usb/phy/phy-am335x-control.c b/drivers/usb/phy/phy-am335x-control.c index 42a1afe36a905240cd5eb043004e931e21df0d1d..5f5f19813fde15eba836edacc4c71220426ff636 100644 --- a/drivers/usb/phy/phy-am335x-control.c +++ b/drivers/usb/phy/phy-am335x-control.c @@ -134,10 +134,12 @@ struct phy_control *am335x_get_phy_control(struct device *dev) return NULL; dev = bus_find_device(&platform_bus_type, NULL, node, match); + of_node_put(node); if (!dev) return NULL; ctrl_usb = dev_get_drvdata(dev); + put_device(dev); if (!ctrl_usb) return NULL; return &ctrl_usb->phy_ctrl; diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index f139488d0816c04374a83eeb3af3ef29ab30d33c..e98590aab633cd6491f2886842179d314ded5fe6 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -99,6 +99,8 @@ static int ch341_control_out(struct usb_device *dev, u8 request, r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, index, NULL, 0, DEFAULT_TIMEOUT); + if (r < 0) + dev_err(&dev->dev, "failed to send control message: %d\n", r); return r; } @@ -116,7 +118,20 @@ static int ch341_control_in(struct usb_device *dev, r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT); - return r; + if (r < bufsize) { + if (r >= 0) { + dev_err(&dev->dev, + "short control message received (%d < %u)\n", + r, bufsize); + r = -EIO; + } + + dev_err(&dev->dev, "failed to receive control message: %d\n", + r); + return r; + } + + return 0; } static int ch341_set_baudrate(struct usb_device *dev, @@ -158,9 +173,9 @@ static int ch341_set_handshake(struct usb_device *dev, u8 control) static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; unsigned long flags; buffer = kmalloc(size, GFP_KERNEL); @@ -171,14 +186,9 @@ static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) if (r < 0) goto out; - /* setup the private status if available */ - if (r == 2) { - r = 0; - spin_lock_irqsave(&priv->lock, flags); - priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; - spin_unlock_irqrestore(&priv->lock, flags); - } else - r = -EPROTO; + spin_lock_irqsave(&priv->lock, flags); + priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; + spin_unlock_irqrestore(&priv->lock, flags); out: kfree(buffer); return r; @@ -188,9 +198,9 @@ out: kfree(buffer); static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; buffer = kmalloc(size, GFP_KERNEL); if (!buffer) @@ -253,7 +263,6 @@ static int ch341_port_probe(struct usb_serial_port *port) spin_lock_init(&priv->lock); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(port->serial->dev, priv); if (r < 0) @@ -315,7 +324,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) r = ch341_configure(serial->dev, priv); if (r) - goto out; + return r; if (tty) ch341_set_termios(tty, port, NULL); @@ -325,12 +334,19 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) if (r) { dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n", __func__, r); - goto out; + return r; } r = usb_serial_generic_open(tty, port); + if (r) + goto err_kill_interrupt_urb; -out: return r; + return 0; + +err_kill_interrupt_urb: + usb_kill_urb(port->interrupt_in_urb); + + return r; } /* Old_termios contains the original termios settings and @@ -345,26 +361,25 @@ static void ch341_set_termios(struct tty_struct *tty, baud_rate = tty_get_baud_rate(tty); - priv->baud_rate = baud_rate; - if (baud_rate) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); + priv->baud_rate = baud_rate; ch341_set_baudrate(port->serial->dev, priv); - } else { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); } - ch341_set_handshake(port->serial->dev, priv->line_control); - /* Unimplemented: * (cflag & CSIZE) : data bits [5, 8] * (cflag & PARENB) : parity {NONE, EVEN, ODD} * (cflag & CSTOPB) : stop bits [1, 2] */ + + spin_lock_irqsave(&priv->lock, flags); + if (C_BAUD(tty) == B0) + priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); + else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) + priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); + spin_unlock_irqrestore(&priv->lock, flags); + + ch341_set_handshake(port->serial->dev, priv->line_control); } static void ch341_break_ctl(struct tty_struct *tty, int break_state) @@ -539,14 +554,23 @@ static int ch341_tiocmget(struct tty_struct *tty) static int ch341_reset_resume(struct usb_serial *serial) { - struct ch341_private *priv; - - priv = usb_get_serial_port_data(serial->port[0]); + struct usb_serial_port *port = serial->port[0]; + struct ch341_private *priv = usb_get_serial_port_data(port); + int ret; /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); - return 0; + if (tty_port_initialized(&port->port)) { + ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); + if (ret) { + dev_err(&port->dev, "failed to submit interrupt urb: %d\n", + ret); + return ret; + } + } + + return usb_serial_generic_resume(serial); } static struct usb_serial_driver ch341_device = { diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 5f17a3b9916d79ea28c82b04755e23d5680d4b2d..80260b08398b2e55833c65d21e0be70cc43ccf01 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -50,6 +50,7 @@ #define CYBERJACK_PRODUCT_ID 0x0100 /* Function prototypes */ +static int cyberjack_attach(struct usb_serial *serial); static int cyberjack_port_probe(struct usb_serial_port *port); static int cyberjack_port_remove(struct usb_serial_port *port); static int cyberjack_open(struct tty_struct *tty, @@ -77,6 +78,7 @@ static struct usb_serial_driver cyberjack_device = { .description = "Reiner SCT Cyberjack USB card reader", .id_table = id_table, .num_ports = 1, + .attach = cyberjack_attach, .port_probe = cyberjack_port_probe, .port_remove = cyberjack_port_remove, .open = cyberjack_open, @@ -100,6 +102,14 @@ struct cyberjack_private { short wrsent; /* Data already sent */ }; +static int cyberjack_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_out < serial->num_ports) + return -ENODEV; + + return 0; +} + static int cyberjack_port_probe(struct usb_serial_port *port) { struct cyberjack_private *priv; diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 97cabf803c2fa91a134c957dba74c4f77b97c712..b2f2e87aed945d7aceac24c99cf2f7c401beb5b2 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -1043,6 +1043,7 @@ static int garmin_write_bulk(struct usb_serial_port *port, "%s - usb_submit_urb(write bulk) failed with status = %d\n", __func__, status); count = status; + kfree(buffer); } /* we are done with this urb, so let the host driver diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 11c05ce2f35f8cd8ffe1ea46b0f6b0fed83e5064..36dfe9972b173bace76aef7910965947fe814ad3 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2754,6 +2754,11 @@ static int edge_startup(struct usb_serial *serial) EDGE_COMPATIBILITY_MASK1, EDGE_COMPATIBILITY_MASK2 }; + if (serial->num_bulk_in < 1 || serial->num_interrupt_in < 1) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + dev = serial->dev; /* create our private serial structure */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index fce82fd79f77bf59db864c2d1f09e166aa5d018a..c02808a304364f96d3602fb752d40f8eabc0333c 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1499,8 +1499,7 @@ static int do_boot_mode(struct edgeport_serial *serial, dev_dbg(dev, "%s - Download successful -- Device rebooting...\n", __func__); - /* return an error on purpose */ - return -ENODEV; + return 1; } stayinbootmode: @@ -1508,7 +1507,7 @@ static int do_boot_mode(struct edgeport_serial *serial, dev_dbg(dev, "%s - STAYING IN BOOT MODE\n", __func__); serial->product_info.TiMode = TI_MODE_BOOT; - return 0; + return 1; } static int ti_do_config(struct edgeport_port *port, int feature, int on) @@ -2549,6 +2548,13 @@ static int edge_startup(struct usb_serial *serial) int status; u16 product_id; + /* Make sure we have the required endpoints when in download mode. */ + if (serial->interface->cur_altsetting->desc.bNumEndpoints > 1) { + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) + return -ENODEV; + } + /* create our private serial structure */ edge_serial = kzalloc(sizeof(struct edgeport_serial), GFP_KERNEL); if (!edge_serial) @@ -2556,14 +2562,18 @@ static int edge_startup(struct usb_serial *serial) mutex_init(&edge_serial->es_lock); edge_serial->serial = serial; + INIT_DELAYED_WORK(&edge_serial->heartbeat_work, edge_heartbeat_work); usb_set_serial_data(serial, edge_serial); status = download_fw(edge_serial); - if (status) { + if (status < 0) { kfree(edge_serial); return status; } + if (status > 0) + return 1; /* bind but do not register any ports */ + product_id = le16_to_cpu( edge_serial->serial->dev->descriptor.idProduct); @@ -2575,7 +2585,6 @@ static int edge_startup(struct usb_serial *serial) } } - INIT_DELAYED_WORK(&edge_serial->heartbeat_work, edge_heartbeat_work); edge_heartbeat_schedule(edge_serial); return 0; @@ -2583,6 +2592,9 @@ static int edge_startup(struct usb_serial *serial) static void edge_disconnect(struct usb_serial *serial) { + struct edgeport_serial *edge_serial = usb_get_serial_data(serial); + + cancel_delayed_work_sync(&edge_serial->heartbeat_work); } static void edge_release(struct usb_serial *serial) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 344b4eea4bd59c4da0590f538323f37b5ed74074..d57fb51992182afec68a28fce42a242a4cd46ff2 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -68,6 +68,16 @@ struct iuu_private { u32 clk; }; +static int iuu_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || serial->num_bulk_out < num_ports) + return -ENODEV; + + return 0; +} + static int iuu_port_probe(struct usb_serial_port *port) { struct iuu_private *priv; @@ -1196,6 +1206,7 @@ static struct usb_serial_driver iuu_device = { .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, .init_termios = iuu_init_termios, + .attach = iuu_attach, .port_probe = iuu_port_probe, .port_remove = iuu_port_remove, }; diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index e49ad0c63ad8b54c61995965445054e34cda6869..83523fcf6fb9d6a75bfba7738362e1c8bbfa01c9 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -699,6 +699,19 @@ MODULE_FIRMWARE("keyspan_pda/keyspan_pda.fw"); MODULE_FIRMWARE("keyspan_pda/xircom_pgs.fw"); #endif +static int keyspan_pda_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int keyspan_pda_port_probe(struct usb_serial_port *port) { @@ -776,6 +789,7 @@ static struct usb_serial_driver keyspan_pda_device = { .break_ctl = keyspan_pda_break_ctl, .tiocmget = keyspan_pda_tiocmget, .tiocmset = keyspan_pda_tiocmset, + .attach = keyspan_pda_attach, .port_probe = keyspan_pda_port_probe, .port_remove = keyspan_pda_port_remove, }; diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index fc5d3a791e08c61fad21ba48292659d1f1924d12..6cb45757818fae2222383358b4d38d7c87b2dbf4 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -192,10 +192,11 @@ static int klsi_105_get_line_state(struct usb_serial_port *port, status_buf, KLSI_STATUSBUF_LEN, 10000 ); - if (rc < 0) - dev_err(&port->dev, "Reading line status failed (error = %d)\n", - rc); - else { + if (rc != KLSI_STATUSBUF_LEN) { + dev_err(&port->dev, "reading line status failed: %d\n", rc); + if (rc >= 0) + rc = -EIO; + } else { status = get_unaligned_le16(status_buf); dev_info(&port->serial->dev->dev, "read status %x %x\n", @@ -296,7 +297,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) rc = usb_serial_generic_open(tty, port); if (rc) { retval = rc; - goto exit; + goto err_free_cfg; } rc = usb_control_msg(port->serial->dev, @@ -311,21 +312,38 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) if (rc < 0) { dev_err(&port->dev, "Enabling read failed (error = %d)\n", rc); retval = rc; + goto err_generic_close; } else dev_dbg(&port->dev, "%s - enabled reading\n", __func__); rc = klsi_105_get_line_state(port, &line_state); - if (rc >= 0) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_state = line_state; - spin_unlock_irqrestore(&priv->lock, flags); - dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, line_state); - retval = 0; - } else + if (rc < 0) { retval = rc; + goto err_disable_read; + } + + spin_lock_irqsave(&priv->lock, flags); + priv->line_state = line_state; + spin_unlock_irqrestore(&priv->lock, flags); + dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, + line_state); + + return 0; -exit: +err_disable_read: + usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + KL5KUSB105A_SIO_CONFIGURE, + USB_TYPE_VENDOR | USB_DIR_OUT, + KL5KUSB105A_SIO_CONFIGURE_READ_OFF, + 0, /* index */ + NULL, 0, + KLSI_TIMEOUT); +err_generic_close: + usb_serial_generic_close(port); +err_free_cfg: kfree(cfg); + return retval; } diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 2363654cafc9b79de61a30bcc00b7f875c470b60..813035f51fe73a4e0de69ce76e6c3329ba4d376c 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -51,6 +51,7 @@ /* Function prototypes */ +static int kobil_attach(struct usb_serial *serial); static int kobil_port_probe(struct usb_serial_port *probe); static int kobil_port_remove(struct usb_serial_port *probe); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port); @@ -86,6 +87,7 @@ static struct usb_serial_driver kobil_device = { .description = "KOBIL USB smart card terminal", .id_table = id_table, .num_ports = 1, + .attach = kobil_attach, .port_probe = kobil_port_probe, .port_remove = kobil_port_remove, .ioctl = kobil_ioctl, @@ -113,6 +115,16 @@ struct kobil_private { }; +static int kobil_attach(struct usb_serial *serial) +{ + if (serial->num_interrupt_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing interrupt-out endpoint\n"); + return -ENODEV; + } + + return 0; +} + static int kobil_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index de9992b492b08ba447fb1f594e0a4f12f1d3c9a7..136ff5e1b7c1f7244fcafe98bbe913fadc78f6ae 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -65,8 +65,6 @@ struct moschip_port { struct urb *write_urb_pool[NUM_URBS]; }; -static struct usb_serial_driver moschip7720_2port_driver; - #define USB_VENDOR_ID_MOSCHIP 0x9710 #define MOSCHIP_DEVICE_ID_7720 0x7720 #define MOSCHIP_DEVICE_ID_7715 0x7715 @@ -970,25 +968,6 @@ static void mos7720_bulk_out_data_callback(struct urb *urb) tty_port_tty_wakeup(&mos7720_port->port->port); } -/* - * mos77xx_probe - * this function installs the appropriate read interrupt endpoint callback - * depending on whether the device is a 7720 or 7715, thus avoiding costly - * run-time checks in the high-frequency callback routine itself. - */ -static int mos77xx_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - if (id->idProduct == MOSCHIP_DEVICE_ID_7715) - moschip7720_2port_driver.read_int_callback = - mos7715_interrupt_callback; - else - moschip7720_2port_driver.read_int_callback = - mos7720_interrupt_callback; - - return 0; -} - static int mos77xx_calc_num_ports(struct usb_serial *serial) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); @@ -1920,6 +1899,11 @@ static int mos7720_startup(struct usb_serial *serial) u16 product; int ret_val; + if (serial->num_bulk_in < 2 || serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing bulk endpoints\n"); + return -ENODEV; + } + product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; @@ -1944,19 +1928,18 @@ static int mos7720_startup(struct usb_serial *serial) tmp->interrupt_in_endpointAddress; serial->port[1]->interrupt_in_urb = NULL; serial->port[1]->interrupt_in_buffer = NULL; + + if (serial->port[0]->interrupt_in_urb) { + struct urb *urb = serial->port[0]->interrupt_in_urb; + + urb->complete = mos7715_interrupt_callback; + } } /* setting configuration feature to one */ usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); - /* start the interrupt urb */ - ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); - if (ret_val) - dev_err(&dev->dev, - "%s - Error %d submitting control urb\n", - __func__, ret_val); - #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { ret_val = mos7715_parport_init(serial); @@ -1964,6 +1947,13 @@ static int mos7720_startup(struct usb_serial *serial) return ret_val; } #endif + /* start the interrupt urb */ + ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); + if (ret_val) { + dev_err(&dev->dev, "failed to submit interrupt urb: %d\n", + ret_val); + } + /* LSR For Port 1 */ read_mos_reg(serial, 0, MOS7720_LSR, &data); dev_dbg(&dev->dev, "LSR:%x\n", data); @@ -1973,6 +1963,8 @@ static int mos7720_startup(struct usb_serial *serial) static void mos7720_release(struct usb_serial *serial) { + usb_kill_urb(serial->port[0]->interrupt_in_urb); + #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT /* close the parallel port */ @@ -2056,7 +2048,6 @@ static struct usb_serial_driver moschip7720_2port_driver = { .close = mos7720_close, .throttle = mos7720_throttle, .unthrottle = mos7720_unthrottle, - .probe = mos77xx_probe, .attach = mos7720_startup, .release = mos7720_release, .port_probe = mos7720_port_probe, @@ -2070,7 +2061,7 @@ static struct usb_serial_driver moschip7720_2port_driver = { .chars_in_buffer = mos7720_chars_in_buffer, .break_ctl = mos7720_break, .read_bulk_callback = mos7720_bulk_in_callback, - .read_int_callback = NULL /* dynamically assigned in probe() */ + .read_int_callback = mos7720_interrupt_callback, }; static struct usb_serial_driver * const serial_drivers[] = { diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 57426d703a098dd2d45d67de11acc0ccb5b2f3a5..4f9af47e6a29adbf29498302dcaa92880dadd670 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -2116,6 +2116,17 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) return mos7840_num_ports; } +static int mos7840_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int mos7840_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -2391,6 +2402,7 @@ static struct usb_serial_driver moschip7840_4port_device = { .tiocmset = mos7840_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, + .attach = mos7840_attach, .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index f6c6900bccf01c87c88734b2a7417db9f0fade8d..a180b17d24323b074aee19e33bf0f497ad271d8a 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -38,6 +38,7 @@ static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int omninet_write_room(struct tty_struct *tty); static void omninet_disconnect(struct usb_serial *serial); +static int omninet_attach(struct usb_serial *serial); static int omninet_port_probe(struct usb_serial_port *port); static int omninet_port_remove(struct usb_serial_port *port); @@ -56,6 +57,7 @@ static struct usb_serial_driver zyxel_omninet_device = { .description = "ZyXEL - omni.net lcd plus usb", .id_table = id_table, .num_ports = 1, + .attach = omninet_attach, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, .open = omninet_open, @@ -104,6 +106,17 @@ struct omninet_data { __u8 od_outseq; /* Sequence number for bulk_out URBs */ }; +static int omninet_attach(struct usb_serial *serial) +{ + /* The second bulk-out endpoint is used for writing. */ + if (serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int omninet_port_probe(struct usb_serial_port *port) { struct omninet_data *od; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9894e341c6ac93953a119c224729881a00682ef9..42cc72e54c051b2115c358bcee8bfc534258d206 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_CC864_SINGLE 0x1006 #define TELIT_PRODUCT_DE910_DUAL 0x1010 #define TELIT_PRODUCT_UE910_V2 0x1012 +#define TELIT_PRODUCT_LE922_USBCFG1 0x1040 +#define TELIT_PRODUCT_LE922_USBCFG2 0x1041 #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 @@ -1210,6 +1212,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), + .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG2), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), @@ -1989,6 +1995,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ @@ -2000,6 +2007,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index a4b88bc038b6d0a943b76457714e65a9449881b2..b8bf52bf7a944d5fddd6976f90a63db3cd6807e5 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -134,6 +134,7 @@ static int oti6858_chars_in_buffer(struct tty_struct *tty); static int oti6858_tiocmget(struct tty_struct *tty); static int oti6858_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); +static int oti6858_attach(struct usb_serial *serial); static int oti6858_port_probe(struct usb_serial_port *port); static int oti6858_port_remove(struct usb_serial_port *port); @@ -158,6 +159,7 @@ static struct usb_serial_driver oti6858_device = { .write_bulk_callback = oti6858_write_bulk_callback, .write_room = oti6858_write_room, .chars_in_buffer = oti6858_chars_in_buffer, + .attach = oti6858_attach, .port_probe = oti6858_port_probe, .port_remove = oti6858_port_remove, }; @@ -324,6 +326,20 @@ static void send_data(struct work_struct *work) usb_serial_port_softint(port); } +static int oti6858_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int oti6858_port_probe(struct usb_serial_port *port) { struct oti6858_private *priv; diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index ae682e4eeaef575a23c90a8dda15407e862d42e1..1db4b61bdf7bd710d7be6e3ff81b97102d76fbe9 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID_UCSGT) }, @@ -220,9 +221,17 @@ static int pl2303_probe(struct usb_serial *serial, static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; + unsigned char num_ports = serial->num_ports; enum pl2303_type type = TYPE_01; unsigned char *buf; + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index e3b7af8adfb73ccefa92d4ba3c2c927a044dfa43..09d9be88209e1ce6b1f53dc052a53c5e4c491336 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -27,6 +27,7 @@ #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 +#define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb #define IODATA_PRODUCT_ID 0x0a03 diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 1bc6089b90083a05e0ef3e4ff71c0ef752e3831a..696458db7e3c45e661a9825d05df0fe25dc0a832 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x1410, 0xa021)}, /* Novatel Gobi 3000 Composite */ {USB_DEVICE(0x413c, 0x8193)}, /* Dell Gobi 3000 QDL */ {USB_DEVICE(0x413c, 0x8194)}, /* Dell Gobi 3000 Composite */ + {USB_DEVICE(0x413c, 0x81a6)}, /* Dell DW5570 QDL (MC8805) */ {USB_DEVICE(0x1199, 0x68a4)}, /* Sierra Wireless QDL */ {USB_DEVICE(0x1199, 0x68a5)}, /* Sierra Wireless Modem */ {USB_DEVICE(0x1199, 0x68a8)}, /* Sierra Wireless QDL */ diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 85acb50a7ee2f4fe74ae26dc3dc96611cbd9d918..bd1a1307e0f002311789d2186a1853562044fb51 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -408,16 +408,12 @@ static void qt2_close(struct usb_serial_port *port) { struct usb_serial *serial; struct qt2_port_private *port_priv; - unsigned long flags; int i; serial = port->serial; port_priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&port_priv->urb_lock, flags); usb_kill_urb(port_priv->write_urb); - port_priv->urb_in_use = false; - spin_unlock_irqrestore(&port_priv->urb_lock, flags); /* flush the port transmit buffer */ i = usb_control_msg(serial->dev, diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index ef0dbf0703c574eb62d1e2ce3be49e5186258aee..475e6c31b266b013ed4749b482c0477eda4a3904 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -154,6 +154,19 @@ static int spcp8x5_probe(struct usb_serial *serial, return 0; } +static int spcp8x5_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int spcp8x5_port_probe(struct usb_serial_port *port) { const struct usb_device_id *id = usb_get_serial_data(port->serial); @@ -477,6 +490,7 @@ static struct usb_serial_driver spcp8x5_device = { .tiocmget = spcp8x5_tiocmget, .tiocmset = spcp8x5_tiocmset, .probe = spcp8x5_probe, + .attach = spcp8x5_attach, .port_probe = spcp8x5_port_probe, .port_remove = spcp8x5_port_remove, }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index a8b9bdba314fdc7bba899a29368f085f6d8a61ed..bdbddbc8bd4d0cd62398102cc32f507198e6519c 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -579,6 +579,13 @@ static int ti_startup(struct usb_serial *serial) goto free_tdev; } + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + status = -ENODEV; + goto free_tdev; + } + return 0; free_tdev: diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index af3c7eecff91d4a49fb1b49cdfce7b7d4c9b11fe..16cc18369111d039ffededa7559075a869638708 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2109,6 +2109,13 @@ UNUSUAL_DEV( 0x152d, 0x2566, 0x0114, 0x0114, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported-by George Cherian */ +UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999, + "JMicron", + "JMS56x", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* * Entrega Technologies U1-SC25 (later Xircom PortGear PGSCSI) * and Mac USB Dock USB-SCSI */ diff --git a/drivers/usb/usbip/vudc_transfer.c b/drivers/usb/usbip/vudc_transfer.c index aba6bd478045113524935ccdd0055ffb4996e9eb..bc0296d937d0f628c16a04a544b14321a281af8c 100644 --- a/drivers/usb/usbip/vudc_transfer.c +++ b/drivers/usb/usbip/vudc_transfer.c @@ -339,6 +339,8 @@ static void v_timer(unsigned long _vudc) total = timer->frame_limit; } + /* We have to clear ep0 flags separately as it's not on the list */ + udc->ep[0].already_seen = 0; list_for_each_entry(_ep, &udc->gadget.ep_list, ep_list) { ep = to_vep(_ep); ep->already_seen = 0; diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 79451f7ef1b76301cc881379ad28ce12e4b4dc33..062c205f00469faf1f7226e152f61d920d71f27b 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -216,7 +216,6 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, struct scatterlist sg[4], sg_dst; void *dst_buf; size_t dst_size; - const u8 bzero[16] = { 0 }; u8 iv[crypto_skcipher_ivsize(tfm_cbc)]; size_t zero_padding; @@ -261,7 +260,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1)); sg_set_buf(&sg[2], b, blen); /* 0 if well behaved :) */ - sg_set_buf(&sg[3], bzero, zero_padding); + sg_set_page(&sg[3], ZERO_PAGE(0), zero_padding, 0); sg_init_one(&sg_dst, dst_buf, dst_size); skcipher_request_set_tfm(req, tfm_cbc); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c6f2d89c0e97cd4c184e615be6e3d86aca722a0d..64613fbf5cf8a5f7fbbcd41f55032ee8ec0eb233 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -130,14 +130,14 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, static void vhost_init_is_le(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - vq->is_le = true; + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) + || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { - vq->is_le = virtio_legacy_is_little_endian(); + vhost_init_is_le(vq); } struct vhost_flush_struct { @@ -1713,10 +1713,8 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) int r; bool is_le = vq->is_le; - if (!vq->private_data) { - vhost_reset_is_le(vq); + if (!vq->private_data) return 0; - } vhost_init_is_le(vq); diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c index f89245b8ba8e9a28483c4ff5edb03b80a1a9b2e3..68a113594808f220aa818424cd6e342897806a74 100644 --- a/drivers/video/fbdev/core/fbcmap.c +++ b/drivers/video/fbdev/core/fbcmap.c @@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap) int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); @@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 7f6c9e6cfc6c99d8d9912db5d2f78242923f51c5..1e56b50e408234f826fd28fdaa1bee0807911dcf 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -26,6 +26,7 @@ #include #include #include +#include enum { FB_GET_WIDTH = 0x00, @@ -234,7 +235,7 @@ static int goldfish_fb_probe(struct platform_device *pdev) fb->fb.var.activate = FB_ACTIVATE_NOW; fb->fb.var.height = readl(fb->reg_base + FB_GET_PHYS_HEIGHT); fb->fb.var.width = readl(fb->reg_base + FB_GET_PHYS_WIDTH); - fb->fb.var.pixclock = 10000; + fb->fb.var.pixclock = 0; fb->fb.var.red.offset = 11; fb->fb.var.red.length = 5; @@ -304,12 +305,25 @@ static int goldfish_fb_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_fb_of_match[] = { + { .compatible = "google,goldfish-fb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_fb_of_match); + +static const struct acpi_device_id goldfish_fb_acpi_match[] = { + { "GFSH0004", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_fb_acpi_match); static struct platform_driver goldfish_fb_driver = { .probe = goldfish_fb_probe, .remove = goldfish_fb_remove, .driver = { - .name = "goldfish_fb" + .name = "goldfish_fb", + .of_match_table = goldfish_fb_of_match, + .acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match), } }; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 48bfea91dbcac96c3265f5a652b616b96d06b707..50840984fbfac97a8b3ae8bcd232df244d446498 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -59,6 +59,7 @@ #define pr_fmt(fmt) "virtio-mmio: " fmt #include +#include #include #include #include @@ -497,6 +498,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) struct virtio_mmio_device *vm_dev; struct resource *mem; unsigned long magic; + int rc; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem) @@ -545,9 +547,25 @@ static int virtio_mmio_probe(struct platform_device *pdev) } vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); - if (vm_dev->version == 1) + if (vm_dev->version == 1) { writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + /* + * In the legacy case, ensure our coherently-allocated virtio + * ring will be at an address expressable as a 32-bit PFN. + */ + if (!rc) + dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32 + PAGE_SHIFT)); + } else { + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + } + if (rc) + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) + dev_warn(&pdev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + platform_set_drvdata(pdev, vm_dev); return register_virtio_device(&vm_dev->vdev); diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c index 6b5ee896af6318e9b50c384fcf3e062d24d89c47..7cc51223db1cbe8e905307850b7bd42b6c739279 100644 --- a/drivers/vme/bridges/vme_ca91cx42.c +++ b/drivers/vme/bridges/vme_ca91cx42.c @@ -464,7 +464,7 @@ static int ca91cx42_slave_get(struct vme_slave_resource *image, int *enabled, vme_bound = ioread32(bridge->base + CA91CX42_VSI_BD[i]); pci_offset = ioread32(bridge->base + CA91CX42_VSI_TO[i]); - *pci_base = (dma_addr_t)vme_base + pci_offset; + *pci_base = (dma_addr_t)*vme_base + pci_offset; *size = (unsigned long long)((vme_bound - *vme_base) + granularity); *enabled = 0; diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c index 630bd189f16788fac980690b745bede9a02e85ff..2a9d5cdedea2cf287cc98a55d77a3d6fc48d8ed9 100644 --- a/drivers/watchdog/mei_wdt.c +++ b/drivers/watchdog/mei_wdt.c @@ -389,6 +389,8 @@ static int mei_wdt_register(struct mei_wdt *wdt) wdt->wdd.max_timeout = MEI_WDT_MAX_TIMEOUT; watchdog_set_drvdata(&wdt->wdd, wdt); + watchdog_stop_on_reboot(&wdt->wdd); + ret = watchdog_register_device(&wdt->wdd); if (ret) { dev_err(dev, "unable to register watchdog device = %d.\n", ret); diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index 5796b5d1b3f2cbe135f8b3241ca2b497514a0061..4f47b5e9095662cfdb0ff928118fcbf17668d566 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -209,7 +209,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) wdt->wdd.parent = &pdev->dev; wdt->layout = regs; - if (readl(wdt->base + WDT_STS) & 1) + if (readl(wdt_addr(wdt, WDT_STS)) & 1) wdt->wdd.bootstatus = WDIOF_CARDRESET; /* diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index bb952121ea944b507044508fde7b32f7ec20638d..2ef2b61b69dfe0c644905fa4991871a2d668df7f 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1007,7 +1007,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 87e6035c9e81b24e686e731519df528060dc1a68..8e7a3d646531cc11c90abd42f9ba3bce6740f962 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -392,7 +392,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && - !swiotlb_force) { + (swiotlb_force != SWIOTLB_FORCE)) { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed * by the function. */ @@ -549,7 +549,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = xen_phys_to_bus(paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { diff --git a/fs/9p/acl.c b/fs/9p/acl.c index b3c2cc79c20d255f5d3cdf59e407ad65c67c7089..082d227fa56b378772883f3a4254e7fd05c436d5 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -277,6 +277,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, case ACL_TYPE_ACCESS: if (acl) { struct iattr iattr; + struct posix_acl *old_acl = acl; retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); if (retval) @@ -287,6 +288,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, * by the mode bits. So don't * update ACL. */ + posix_acl_release(old_acl); value = NULL; size = 0; } diff --git a/fs/attr.c b/fs/attr.c index c902b3d53508004203e00abb3ae051280d485423..c4093c5196be65d35882561c8c8a73859838ca50 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -200,7 +200,7 @@ EXPORT_SYMBOL(setattr_copy); * the file open for write, as there can be no conflicting delegation in * that case. */ -int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; @@ -224,7 +224,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return -EPERM; if (!inode_owner_or_capable(inode)) { - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) return error; } @@ -307,7 +307,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de if (error) return error; - if (inode->i_op->setattr) + if (mnt && inode->i_op->setattr2) + error = inode->i_op->setattr2(mnt, dentry, attr); + else if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); else error = simple_setattr(dentry, attr); @@ -320,4 +322,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return error; } +EXPORT_SYMBOL(notify_change2); + +int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +{ + return notify_change2(NULL, dentry, attr, delegated_inode); +} EXPORT_SYMBOL(notify_change); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 8712062275b83fb7f2995106c80b5e7e39292c21..5f685c8192981864c3e273acce95dacc40494367 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -106,6 +106,50 @@ static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer, return -EIO; } +static const char *bad_inode_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + return ERR_PTR(-EIO); +} + +static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type) +{ + return ERR_PTR(-EIO); +} + +static int bad_inode_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len) +{ + return -EIO; +} + +static int bad_inode_update_time(struct inode *inode, struct timespec *time, + int flags) +{ + return -EIO; +} + +static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, + struct file *file, unsigned int open_flag, + umode_t create_mode, int *opened) +{ + return -EIO; +} + +static int bad_inode_tmpfile(struct inode *inode, struct dentry *dentry, + umode_t mode) +{ + return -EIO; +} + +static int bad_inode_set_acl(struct inode *inode, struct posix_acl *acl, + int type) +{ + return -EIO; +} + static const struct inode_operations bad_inode_ops = { .create = bad_inode_create, @@ -118,14 +162,17 @@ static const struct inode_operations bad_inode_ops = .mknod = bad_inode_mknod, .rename = bad_inode_rename2, .readlink = bad_inode_readlink, - /* follow_link must be no-op, otherwise unmounting this inode - won't work */ - /* put_link returns void */ - /* truncate returns void */ .permission = bad_inode_permission, .getattr = bad_inode_getattr, .setattr = bad_inode_setattr, .listxattr = bad_inode_listxattr, + .get_link = bad_inode_get_link, + .get_acl = bad_inode_get_acl, + .fiemap = bad_inode_fiemap, + .update_time = bad_inode_update_time, + .atomic_open = bad_inode_atomic_open, + .tmpfile = bad_inode_tmpfile, + .set_acl = bad_inode_set_acl, }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 05b553368bb4f1a6cd10c8adc6c07b2f92c5f3b4..092a2eed1628952e959630e76136d97eee6a7ad5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -832,7 +832,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, return true; /* already a holder */ else if (bdev->bd_holder != NULL) return false; /* held by someone else */ - else if (bdev->bd_contains == bdev) + else if (whole == bdev) return true; /* is a whole device which isn't held */ else if (whole->bd_holder == bd_may_claim) @@ -1950,6 +1950,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; + struct block_device *bdev; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || @@ -1970,8 +1971,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) */ iput(old_inode); old_inode = inode; + bdev = I_BDEV(inode); - func(I_BDEV(inode), arg); + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_openers) + func(bdev, arg); + mutex_unlock(&bdev->bd_mutex); spin_lock(&blockdev_superblock->s_inode_list_lock); } diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index e0f071f6b5a761faa6a00b741017a759c91e100a..ff0b0be92d6122402f02c039eed8e4599fdce4df 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -86,6 +86,20 @@ btrfs_work_owner(struct btrfs_work *work) return work->wq->fs_info; } +bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq) +{ + /* + * We could compare wq->normal->pending with num_online_cpus() + * to support "thresh == NO_THRESHOLD" case, but it requires + * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's + * postpone it until someone needs the support of that case. + */ + if (wq->normal->thresh == NO_THRESHOLD) + return false; + + return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; +} + BTRFS_WORK_HELPER(worker_helper); BTRFS_WORK_HELPER(delalloc_helper); BTRFS_WORK_HELPER(flush_delalloc_helper); @@ -259,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) unsigned long flags; while (1) { + void *wtag; + spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -285,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); /* - * we don't want to call the ordered free functions - * with the lock held though + * We don't want to call the ordered free functions with the + * lock held though. Save the work as tag for the trace event, + * because the callback could free the structure. */ + wtag = work; work->ordered_free(work); - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } spin_unlock_irqrestore(lock, flags); } @@ -297,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) static void normal_work_helper(struct btrfs_work *work) { struct __btrfs_workqueue *wq; + void *wtag; int need_order = 0; /* @@ -310,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work) if (work->ordered_func) need_order = 1; wq = work->wq; + /* Safe for tracepoints in case work gets freed by the callback */ + wtag = work; trace_btrfs_work_sched(work); thresh_exec_hook(wq); @@ -319,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work) run_ordered_work(wq); } if (!need_order) - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 8e52484cd4615544a9d25687ba33f36a56c29d52..1f9597355c9d974ef11489afbed69d4a7df087d9 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -84,4 +84,5 @@ void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); void btrfs_set_work_high_priority(struct btrfs_work *work); struct btrfs_fs_info *btrfs_work_owner(struct btrfs_work *work); struct btrfs_fs_info *btrfs_workqueue_owner(struct __btrfs_workqueue *wq); +bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq); #endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0b8ce2b9f7d0c8c052b8f73df269798450263784..86245b884fce29c312b5ad2c4ba16b5b161a6fba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2210,6 +2210,8 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, cpu->target = le64_to_cpu(disk->target); cpu->flags = le64_to_cpu(disk->flags); cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); } static inline void @@ -2228,6 +2230,8 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, disk->target = cpu_to_le64(cpu->target); disk->flags = cpu_to_le64(cpu->flags); disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); } /* struct btrfs_super_block */ diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0fcf5f25d524ab632a21f4853eff43baf7fb5e25..4d8f8a8c9c908d9d83cf5cdbbc91480599bbe1fb 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1353,7 +1353,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) total_done++; btrfs_release_prepared_delayed_node(delayed_node); - if (async_work->nr == 0 || total_done < async_work->nr) + if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || + total_done < async_work->nr) goto again; free_path: @@ -1369,7 +1370,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, { struct btrfs_async_delayed_work *async_work; - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND || + btrfs_workqueue_normal_congested(fs_info->delayed_workers)) return 0; async_work = kmalloc(sizeof(*async_work), GFP_NOFS); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3a57f99d96aa7aa0af541e328af203ff73acdb64..1cd325765aaa8cd49be5100250c00e69561a0126 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -559,7 +559,15 @@ static noinline int check_leaf(struct btrfs_root *root, u32 nritems = btrfs_header_nritems(leaf); int slot; - if (nritems == 0) { + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an + * extent buffer alone we can not find out what is the id of the + * corresponding subvolume tree, so we can not figure out if the extent + * buffer corresponds to the root of the relocation tree or not. So skip + * this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { struct btrfs_root *check_root; key.objectid = btrfs_header_owner(leaf); @@ -572,17 +580,24 @@ static noinline int check_leaf(struct btrfs_root *root, * open_ctree() some roots has not yet been set up. */ if (!IS_ERR_OR_NULL(check_root)) { + struct extent_buffer *eb; + + eb = btrfs_root_node(check_root); /* if leaf is the root, then it's fine */ - if (leaf->start != - btrfs_root_bytenr(&check_root->root_item)) { + if (leaf != eb) { CORRUPT("non-root leaf's nritems is 0", - leaf, root, 0); + leaf, check_root, 0); + free_extent_buffer(eb); return -EIO; } + free_extent_buffer(eb); } return 0; } + if (nritems == 0) + return 0; + /* Check the 0 item */ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4607af38c72e100e6728ff41d8198140dd722d93..5909ae8c673102f857be948d97c3ff5f824cc188 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2537,11 +2537,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - btrfs_delayed_ref_unlock(locked_ref); spin_lock(&delayed_refs->lock); locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; cond_resched(); count++; @@ -2587,7 +2587,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e3a5a266917c0fac9da9f41ee080262f3394779..be4da91d880f6476dcf1e076ca555b65b96aa4cf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3819,10 +3819,7 @@ static int btrfs_read_locked_inode(struct inode *inode) break; case S_IFDIR: inode->i_fop = &btrfs_dir_file_operations; - if (root == root->fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; @@ -5682,6 +5679,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; + inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mtime = current_time(inode); @@ -10587,8 +10585,6 @@ static const struct inode_operations btrfs_dir_inode_operations = { static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, - .get_acl = btrfs_get_acl, - .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, }; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 11f4fffe503e2382460c20d09a1502901bee5a0b..dfd99867ff4d7ed2e96c3f10751f1b7f9f884c9d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2335,10 +2335,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; - mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = true; - mutex_unlock(&fs_info->qgroup_rescan_lock); - path = btrfs_alloc_path(); if (!path) goto out; @@ -2449,6 +2445,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, sizeof(fs_info->qgroup_rescan_progress)); fs_info->qgroup_rescan_progress.objectid = progress_objectid; init_completion(&fs_info->qgroup_rescan_completion); + fs_info->qgroup_rescan_running = true; spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c4af0cdb783d0e2ee203ad416abb745fcabd7e02..2cf5e142675e1870ffd257315b1447608c8ccc53 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1395,14 +1395,23 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, root_key.offset = objectid; if (root->root_key.objectid == objectid) { + u64 commit_root_gen; + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); BUG_ON(ret); - last_snap = btrfs_root_last_snapshot(&root->root_item); - btrfs_set_root_last_snapshot(&root->root_item, - trans->transid - 1); + /* + * Set the last_snapshot field to the generation of the commit + * root - like this ctree.c:btrfs_block_can_be_shared() behaves + * correctly (returns true) when the relocation root is created + * either inside the critical section of a transaction commit + * (through transaction.c:qgroup_account_snapshot()) and when + * it's created before the transaction commit is started. + */ + commit_root_gen = btrfs_header_generation(root->commit_root); + btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen); } else { /* * called by btrfs_reloc_post_snapshot_hook. diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3d33c4e41e5f9a38195436432e54314548076b0d..b89004513c09a9828be2ff88174ed8aab3adb864 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1940,12 +1940,11 @@ static noinline int find_dir_range(struct btrfs_root *root, next: /* check the next slot in the tree to see if it is a valid item */ nritems = btrfs_header_nritems(path->nodes[0]); + path->slots[0]++; if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) goto out; - } else { - path->slots[0]++; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); @@ -5205,6 +5204,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, if (di_key.type == BTRFS_ROOT_ITEM_KEY) continue; + btrfs_release_path(path); di_inode = btrfs_iget(root->fs_info->sb, &di_key, root, NULL); if (IS_ERR(di_inode)) { @@ -5214,13 +5214,12 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, if (btrfs_inode_in_log(di_inode, trans->transid)) { iput(di_inode); - continue; + break; } ctx->log_new_dentries = false; if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) log_mode = LOG_INODE_ALL; - btrfs_release_path(path); ret = btrfs_log_inode(trans, root, di_inode, log_mode, 0, LLONG_MAX, ctx); if (!ret && diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 16e6ded0b7f281bf72e8074b9d2896713fe4aef2..f3f21105b860447fb28373679ba434b44f475bb9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2507,9 +2507,20 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, if (err < 0) ret = err; } else { - ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, endoff, - true, &_got, &err)); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&ci->i_cap_wq, &wait); + + while (!try_get_cap_refs(ci, need, want, endoff, + true, &_got, &err)) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + } + + remove_wait_queue(&ci->i_cap_wq, &wait); + if (err == -EAGAIN) continue; if (err < 0) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a594c7879cc245a764afb945e37de2b908482951..1afa111910007f648022c44e30f4b6db8f1bcef1 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1255,7 +1255,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) struct ceph_mds_client *mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; struct ceph_mds_request *req; - int op, mask, err; + int op, err; + u32 mask; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1270,7 +1271,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; - req->r_args.getattr.mask = mask; + req->r_args.getattr.mask = cpu_to_le32(mask); err = ceph_mdsc_do_request(mdsc, NULL, req); switch (err) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ef4d046473256009843b6b4c82e1b5b1e451b02d..12f2252f6c9803309fb04f9c0a84e24087d7f997 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -305,7 +305,8 @@ static int frag_tree_split_cmp(const void *l, const void *r) { struct ceph_frag_tree_split *ls = (struct ceph_frag_tree_split*)l; struct ceph_frag_tree_split *rs = (struct ceph_frag_tree_split*)r; - return ceph_frag_compare(ls->frag, rs->frag); + return ceph_frag_compare(le32_to_cpu(ls->frag), + le32_to_cpu(rs->frag)); } static bool is_frag_child(u32 f, struct ceph_inode_frag *frag) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 815acd1a56d461b944c06d117457ccd99c42e2f0..6a26c7bd1286d84d394b99cd4274d28dcccc71ef 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -288,12 +288,13 @@ static int parse_reply_info_extra(void **p, void *end, struct ceph_mds_reply_info_parsed *info, u64 features) { - if (info->head->op == CEPH_MDS_OP_GETFILELOCK) + u32 op = le32_to_cpu(info->head->op); + + if (op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_READDIR || - info->head->op == CEPH_MDS_OP_LSSNAP) + else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP) return parse_reply_info_dir(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_CREATE) + else if (op == CEPH_MDS_OP_CREATE) return parse_reply_info_create(p, end, info, features); else return -EIO; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1f17f6bd7a601c56e40a6393c25eaee892e40a0f..203287f86525459bbd233b001b3b333437953dcf 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -646,6 +646,8 @@ struct TCP_Server_Info { unsigned int max_read; unsigned int max_write; __u8 preauth_hash[512]; + struct delayed_work reconnect; /* reconnect workqueue job */ + struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ #endif /* CONFIG_CIFS_SMB2 */ unsigned long echo_interval; }; @@ -849,6 +851,7 @@ cap_unix(struct cifs_ses *ses) struct cifs_tcon { struct list_head tcon_list; int tc_count; + struct list_head rlist; /* reconnect list */ struct list_head openFileList; spinlock_t open_file_lock; /* protects list above */ struct cifs_ses *ses; /* pointer to session associated with */ @@ -922,6 +925,7 @@ struct cifs_tcon { bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ + bool need_reopen_files:1; /* need to reopen tcon file handles */ bool use_resilient:1; /* use resilient instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ced0e42ce460963104d89bb8b941b899ceb33dea..cd8025a249bba990170e23d9ae6406e134ebdff2 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -206,6 +206,9 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, struct cifs_pending_open *open); extern void cifs_del_pending_open(struct cifs_pending_open *open); +extern void cifs_put_tcp_session(struct TCP_Server_Info *server, + int from_reconnect); +extern void cifs_put_tcon(struct cifs_tcon *tcon); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4547aeddd12b19459d23c6ac9215f01b39b15189..893be0722643038f3a043c11d0300b107de47577 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -52,6 +52,9 @@ #include "nterr.h" #include "rfc1002pdu.h" #include "fscache.h" +#ifdef CONFIG_CIFS_SMB2 +#include "smb2proto.h" +#endif #define CIFS_PORT 445 #define RFC1001_PORT 139 @@ -2100,8 +2103,8 @@ cifs_find_tcp_session(struct smb_vol *vol) return NULL; } -static void -cifs_put_tcp_session(struct TCP_Server_Info *server) +void +cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) { struct task_struct *task; @@ -2118,6 +2121,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) cancel_delayed_work_sync(&server->echo); +#ifdef CONFIG_CIFS_SMB2 + if (from_reconnect) + /* + * Avoid deadlock here: reconnect work calls + * cifs_put_tcp_session() at its end. Need to be sure + * that reconnect work does nothing with server pointer after + * that step. + */ + cancel_delayed_work(&server->reconnect); + else + cancel_delayed_work_sync(&server->reconnect); +#endif + spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); @@ -2182,6 +2198,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info) INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); +#ifdef CONFIG_CIFS_SMB2 + INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); + mutex_init(&tcp_ses->reconnect_mutex); +#endif memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, sizeof(tcp_ses->srcaddr)); memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, @@ -2340,7 +2360,7 @@ cifs_put_smb_ses(struct cifs_ses *ses) spin_unlock(&cifs_tcp_ses_lock); sesInfoFree(ses); - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); } #ifdef CONFIG_KEYS @@ -2514,7 +2534,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) mutex_unlock(&ses->session_mutex); /* existing SMB ses has a server reference already */ - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); free_xid(xid); return ses; } @@ -2604,7 +2624,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char *unc) return NULL; } -static void +void cifs_put_tcon(struct cifs_tcon *tcon) { unsigned int xid; @@ -3792,7 +3812,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) else if (ses) cifs_put_smb_ses(ses); else - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); bdi_destroy(&cifs_sb->bdi); } @@ -4103,7 +4123,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); if (IS_ERR(ses)) { tcon = (struct cifs_tcon *)ses; - cifs_put_tcp_session(master_tcon->ses->server); + cifs_put_tcp_session(master_tcon->ses->server, 0); goto out; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7f5f6176c6f15caff307e078320122141c119ab9..18a1e1d6671fc0825b2f265280a96e065e6c6aae 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -777,6 +777,11 @@ cifs_reopen_persistent_handles(struct cifs_tcon *tcon) struct list_head *tmp1; struct list_head tmp_list; + if (!tcon->use_persistent || !tcon->need_reopen_files) + return; + + tcon->need_reopen_files = false; + cifs_dbg(FYI, "Reopen persistent handles"); INIT_LIST_HEAD(&tmp_list); @@ -793,7 +798,8 @@ cifs_reopen_persistent_handles(struct cifs_tcon *tcon) list_for_each_safe(tmp, tmp1, &tmp_list) { open_file = list_entry(tmp, struct cifsFileInfo, rlist); - cifs_reopen_file(open_file, false /* do not flush */); + if (cifs_reopen_file(open_file, false /* do not flush */)) + tcon->need_reopen_files = true; list_del_init(&open_file->rlist); cifsFileInfo_put(open_file); } diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 9f51b81119f2b204361f25d86dcfd350a8917700..001528781b6b0f80552c33f3d8e93fb9cfc27b86 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -189,7 +189,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) xid = get_xid(); cifs_sb = CIFS_SB(inode->i_sb); - cifs_dbg(VFS, "cifs ioctl 0x%x\n", command); + cifs_dbg(FYI, "cifs ioctl 0x%x\n", command); switch (command) { case FS_IOC_GETFLAGS: if (pSMBFile == NULL) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 8f6a2a5863b9d9275bfb6afb00fc16b867101275..a27fc8791551cc86ca14d2d65e7870990a393f1e 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) rc = -ENOMEM; goto error_exit; } + spin_lock_init(&cifsFile->file_info_lock); file->private_data = cifsFile; cifsFile->tlink = cifs_get_tlink(tlink); tcon = tlink_tcon(tlink); diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index f9e766f464beec408b628146c4e0d4e04010e0f2..b2aff0c6f22c528eb628a25247bad85b1d096a3c 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -260,7 +260,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) * and check it for zero before using. */ max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < sizeof(struct smb2_lock_element)) { free_xid(xid); return -EINVAL; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5ca5ea4668a1482ef9643cd525ded6ebbaa7e326..87457227812c393fe1ee9e09bacd9f1c65d93f9e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -250,16 +250,19 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } cifs_mark_open_files_invalid(tcon); + if (tcon->use_persistent) + tcon->need_reopen_files = true; rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); mutex_unlock(&tcon->ses->session_mutex); - if (tcon->use_persistent) - cifs_reopen_persistent_handles(tcon); - cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) goto out; + + if (smb2_command != SMB2_INTERNAL_CMD) + queue_delayed_work(cifsiod_wq, &server->reconnect, 0); + atomic_inc(&tconInfoReconnectCount); out: /* @@ -280,7 +283,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) case SMB2_CHANGE_NOTIFY: case SMB2_QUERY_INFO: case SMB2_SET_INFO: - return -EAGAIN; + rc = -EAGAIN; } unload_nls(nls_codepage); return rc; @@ -1972,6 +1975,55 @@ smb2_echo_callback(struct mid_q_entry *mid) add_credits(server, credits_received, CIFS_ECHO_OP); } +void smb2_reconnect_server(struct work_struct *work) +{ + struct TCP_Server_Info *server = container_of(work, + struct TCP_Server_Info, reconnect.work); + struct cifs_ses *ses; + struct cifs_tcon *tcon, *tcon2; + struct list_head tmp_list; + int tcon_exist = false; + + /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ + mutex_lock(&server->reconnect_mutex); + + INIT_LIST_HEAD(&tmp_list); + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->need_reconnect || tcon->need_reopen_files) { + tcon->tc_count++; + list_add_tail(&tcon->rlist, &tmp_list); + tcon_exist = true; + } + } + } + /* + * Get the reference to server struct to be sure that the last call of + * cifs_put_tcon() in the loop below won't release the server pointer. + */ + if (tcon_exist) + server->srv_count++; + + spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { + if (!smb2_reconnect(SMB2_INTERNAL_CMD, tcon)) + cifs_reopen_persistent_handles(tcon); + list_del_init(&tcon->rlist); + cifs_put_tcon(tcon); + } + + cifs_dbg(FYI, "Reconnecting tcons finished\n"); + mutex_unlock(&server->reconnect_mutex); + + /* now we can safely release srv struct */ + if (tcon_exist) + cifs_put_tcp_session(server, 1); +} + int SMB2_echo(struct TCP_Server_Info *server) { @@ -1984,32 +2036,11 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); if (server->tcpStatus == CifsNeedNegotiate) { - struct list_head *tmp, *tmp2; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - - cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); - list_for_each(tmp2, &ses->tcon_list) { - tcon = list_entry(tmp2, struct cifs_tcon, - tcon_list); - /* add check for persistent handle reconnect */ - if (tcon && tcon->need_reconnect) { - spin_unlock(&cifs_tcp_ses_lock); - rc = smb2_reconnect(SMB2_ECHO, tcon); - spin_lock(&cifs_tcp_ses_lock); - } - } - } - spin_unlock(&cifs_tcp_ses_lock); + /* No need to send echo on newly established connections */ + queue_delayed_work(cifsiod_wq, &server->reconnect, 0); + return rc; } - /* if no session, renegotiate failed above */ - if (server->tcpStatus == CifsNeedNegotiate) - return -EIO; - rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index fd3709e8de33eddee8c66cca29cb6653921b020e..dc0d141f33e25730ea0242d2f946034e321cb1a4 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -80,6 +80,8 @@ #define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE) #define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE) +#define SMB2_INTERNAL_CMD cpu_to_le16(0xFFFF) + #define NUMBER_OF_SMB2_COMMANDS 0x0013 /* BB FIXME - analyze following length BB */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index eb2cde2f64ba7c818c5ea0db10dd8cb192d94c67..f2d511a6971b88c0019526bcd5b3938383f4f106 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -96,6 +96,7 @@ extern int smb2_open_file(const unsigned int xid, extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void smb2_reconnect_server(struct work_struct *work); /* * SMB2 Worker functions - most of protocol specific implementation details diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 699b7868108f658a3262943c7763b13d59767a89..c12bffefa3c9b59b2290ecdb243ae308b08e7224 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -23,7 +23,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include +#include #include #include #include @@ -69,46 +69,22 @@ str_to_key(unsigned char *str, unsigned char *key) static int smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) { - int rc; unsigned char key2[8]; - struct crypto_skcipher *tfm_des; - struct scatterlist sgin, sgout; - struct skcipher_request *req; + struct crypto_cipher *tfm_des; str_to_key(key, key2); - tfm_des = crypto_alloc_skcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC); + tfm_des = crypto_alloc_cipher("des", 0, 0); if (IS_ERR(tfm_des)) { - rc = PTR_ERR(tfm_des); - cifs_dbg(VFS, "could not allocate des crypto API\n"); - goto smbhash_err; - } - - req = skcipher_request_alloc(tfm_des, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; cifs_dbg(VFS, "could not allocate des crypto API\n"); - goto smbhash_free_skcipher; + return PTR_ERR(tfm_des); } - crypto_skcipher_setkey(tfm_des, key2, 8); - - sg_init_one(&sgin, in, 8); - sg_init_one(&sgout, out, 8); + crypto_cipher_setkey(tfm_des, key2, 8); + crypto_cipher_encrypt_one(tfm_des, out, in); + crypto_free_cipher(tfm_des); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sgin, &sgout, 8, NULL); - - rc = crypto_skcipher_encrypt(req); - if (rc) - cifs_dbg(VFS, "could not encrypt crypt key rc: %d\n", rc); - - skcipher_request_free(req); - -smbhash_free_skcipher: - crypto_free_skcipher(tfm_des); -smbhash_err: - return rc; + return 0; } static int diff --git a/fs/coredump.c b/fs/coredump.c index eb9c92c9b20f5de5e325d7a05e5d055d0816989e..8bdda8e660d8075a050b1bd121dcc75aa5eee8ad 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -744,7 +744,7 @@ void do_coredump(const siginfo_t *siginfo) goto close_fail; if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) + if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; } diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 6865663aac69690f7ccdce030c48488a13ce917b..abc18847b98d7774fb7e701286a35e3838013ec0 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -171,6 +171,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) BUG_ON(1); } + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!parent->i_sb->s_cop->is_encrypted(parent)) return 1; diff --git a/fs/dax.c b/fs/dax.c index 014defd2e744e00b4df273e88d2a7026a3db4ff9..bf6218da79287e0b0c3f416a00ba6340cbaf44f7 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1270,6 +1270,11 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct blk_dax_ctl dax = { 0 }; ssize_t map_len; + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + dax.sector = iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; diff --git a/fs/dcache.c b/fs/dcache.c index 972741b98ee91f74e39bad25f88a0ad4ab0ec2c9..362396ae5650ea81c62e68bfd1bce61a55df7c87 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,8 +1330,11 @@ int d_set_mounted(struct dentry *dentry) } spin_lock(&dentry->d_lock); if (!d_unlinked(dentry)) { - dentry->d_flags |= DCACHE_MOUNTED; - ret = 0; + ret = -EBUSY; + if (!d_mountpoint(dentry)) { + dentry->d_flags |= DCACHE_MOUNTED; + ret = 0; + } } spin_unlock(&dentry->d_lock); out: diff --git a/fs/exec.c b/fs/exec.c index 4e497b9ee71ee96d0647721e4649feff7adaf7c1..c8ca0640a10ca65993ddc903b48031bc0b8844fb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -1266,6 +1266,13 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); current->personality &= ~bprm->per_clear; + /* + * We have to apply CLOEXEC before we change whether the process is + * dumpable (in setup_new_exec) to avoid a race with a process in userspace + * trying to access the should-be-closed file descriptors of a process + * undergoing exec(2). + */ + do_close_on_exec(current->files); return 0; out: @@ -1275,8 +1282,22 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file_inode(file), MAY_READ) < 0) + struct inode *inode = file_inode(file); + if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) { + struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + + /* Ensure mm->user_ns contains the executable */ + user_ns = old = bprm->mm->user_ns; + while ((user_ns != &init_user_ns) && + !privileged_wrt_inode_uidgid(user_ns, inode)) + user_ns = user_ns->parent; + + if (old != user_ns) { + bprm->mm->user_ns = get_user_ns(user_ns); + put_user_ns(old); + } + } } EXPORT_SYMBOL(would_dump); @@ -1306,7 +1327,6 @@ void setup_new_exec(struct linux_binprm * bprm) !gid_eq(bprm->cred->gid, current_egid())) { current->pdeath_signal = 0; } else { - would_dump(bprm, bprm->file); if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) set_dumpable(current->mm, suid_dumpable); } @@ -1315,7 +1335,6 @@ void setup_new_exec(struct linux_binprm * bprm) group */ current->self_exec_id++; flush_signal_handlers(current, 0); - do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); @@ -1406,7 +1425,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; if (p->ptrace) { - if (p->ptrace & PT_PTRACE_CAP) + if (ptracer_capable(p, current_user_ns())) bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; else bprm->unsafe |= LSM_UNSAFE_PTRACE; @@ -1741,6 +1760,8 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out; + would_dump(bprm, bprm->file); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b1d52c14098e135f24e15ebeb8ca61b2e12da1c7..f976111710231d3090bf9229d8a31a013b70b1c1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -414,17 +414,19 @@ static inline int ext4_inode_journal_mode(struct inode *inode) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ /* We do not support data journalling with delayed allocation */ if (!S_ISREG(inode->i_mode) || - test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && - !test_opt(inode->i_sb, DELALLOC)) + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && + !test_opt(inode->i_sb, DELALLOC))) { + /* We do not support data journalling for encrypted data */ + if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode)) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + } if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ - else - BUG(); + BUG(); } static inline int ext4_should_journal_data(struct inode *inode) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index bdef750eedd64db9f93e8126534638fefe60653c..17a257cc3ac5c7f910c8dfe5aaa4a3411071d3a8 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -337,8 +337,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); - if (!value) + if (!value) { + error = -ENOMEM; goto out; + } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0241cabc4cebba99771f74f131781a28582b0ba7..1db9080026f5b9ad07bf06c7f2da3e3df5bdfb9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3579,6 +3579,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; ssize_t ret; + int rw = iov_iter_rw(iter); #ifdef CONFIG_EXT4_FS_ENCRYPTION if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) @@ -3596,12 +3597,12 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return 0; if (trace_android_fs_dataread_start_enabled() && - (iov_iter_rw(iter) == READ)) + (rw == READ)) trace_android_fs_dataread_start(inode, offset, count, current->pid, current->comm); if (trace_android_fs_datawrite_start_enabled() && - (iov_iter_rw(iter) == WRITE)) + (rw == WRITE)) trace_android_fs_datawrite_start(inode, offset, count, current->pid, current->comm); @@ -3614,10 +3615,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); if (trace_android_fs_dataread_start_enabled() && - (iov_iter_rw(iter) == READ)) + (rw == READ)) trace_android_fs_dataread_end(inode, offset, count); if (trace_android_fs_datawrite_start_enabled() && - (iov_iter_rw(iter) == WRITE)) + (rw == WRITE)) trace_android_fs_datawrite_end(inode, offset, count); return ret; @@ -4461,6 +4462,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct inode *inode; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; + loff_t size; int block; uid_t i_uid; gid_t i_gid; @@ -4561,6 +4563,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); + if ((size = i_size_read(inode)) < 0) { + EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6fbdf15b824e30afc98d2505284aad99678de484..cec9280950b899620c95bd3598b408e930607d56 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -751,6 +751,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secure_erase(q)) return -EOPNOTSUPP; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 81e4b5610d6e79acfc1bb76b5cad9a1e098a1400..168ab9fae22053eb1d5e840e2fb2cba9496cd290 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; - unsigned short border; + unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); @@ -2287,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_group_info *grinfo; struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[16]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 52b0530c5d65a95fa0512b29cb2b3645d0277b69..bbc316db94954abbfacf8fa2bba1ca799d13f669 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3193,10 +3193,15 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_set_bit(s++, buf); count++; } - for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { - ext4_set_bit(EXT4_B2C(sbi, s++), buf); - count++; + j = ext4_bg_num_gdb(sb, grp); + if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) { + ext4_error(sb, "Invalid number of block group " + "descriptor blocks: %d", j); + j = EXT4_BLOCKS_PER_GROUP(sb) - s; } + count += j; + for (; j > 0; j--) + ext4_set_bit(EXT4_B2C(sbi, s++), buf); } if (!count) return 0; @@ -3301,7 +3306,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) char *orig_data = kstrdup(data, GFP_KERNEL); struct buffer_head *bh; struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi; + struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3320,16 +3325,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; ext4_group_t first_not_zeroed; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - goto out_free_orig; + if ((data && !orig_data) || !sbi) + goto out_free_base; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); - if (!sbi->s_blockgroup_lock) { - kfree(sbi); - goto out_free_orig; - } + if (!sbi->s_blockgroup_lock) + goto out_free_base; + sb->s_fs_info = sbi; sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; @@ -3475,11 +3478,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, - &journal_devnum, &journal_ioprio, 0)) { - ext4_msg(sb, KERN_WARNING, - "failed to parse options in superblock: %s", - sbi->s_es->s_mount_opts); + if (sbi->s_es->s_mount_opts[0]) { + char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, + sizeof(sbi->s_es->s_mount_opts), + GFP_KERNEL); + if (!s_mount_opts) + goto failed_mount; + if (!parse_options(s_mount_opts, sb, &journal_devnum, + &journal_ioprio, 0)) { + ext4_msg(sb, KERN_WARNING, + "failed to parse options in superblock: %s", + s_mount_opts); + } + kfree(s_mount_opts); } sbi->s_def_mount_opt = sbi->s_mount_opt; if (!parse_options((char *) data, sb, &journal_devnum, @@ -3505,6 +3516,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "both data=journal and dax"); goto failed_mount; } + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "encrypted files will use data=ordered " + "instead of data journaling mode"); + } if (test_opt(sb, DELALLOC)) clear_opt(sb, DELALLOC); } else { @@ -3660,12 +3676,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) - goto cantfind_ext4; sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; + if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || + sbi->s_inodes_per_group > blocksize * 8) { + ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", + sbi->s_blocks_per_group); + goto failed_mount; + } sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); @@ -3748,13 +3768,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } sbi->s_cluster_ratio = clustersize / blocksize; - if (sbi->s_inodes_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#inodes per group too big: %lu", - sbi->s_inodes_per_group); - goto failed_mount; - } - /* Do we have standard group size of clustersize * 8 blocks ? */ if (sbi->s_blocks_per_group == clustersize << 3) set_opt2(sb, STD_GROUP_SIZE); @@ -3814,6 +3827,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); + if (ext4_has_feature_meta_bg(sb)) { + if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + ext4_msg(sb, KERN_WARNING, + "first meta block group too large: %u " + "(group descriptor block count %u)", + le32_to_cpu(es->s_first_meta_bg), db_count); + goto failed_mount; + } + } sbi->s_group_desc = ext4_kvmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); @@ -4160,7 +4182,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, + "Opts: %.*s%s%s", descr, + (int) sizeof(sbi->s_es->s_mount_opts), + sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); if (es->s_error_count) @@ -4239,8 +4263,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) out_fail: sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); +out_free_base: kfree(sbi); -out_free_orig: kfree(orig_data); return err ? err : ret; } @@ -4550,7 +4574,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) &EXT4_SB(sb)->s_freeinodes_counter)); BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - lock_buffer(sbh); + if (sync) + lock_buffer(sbh); if (buffer_write_io_error(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -4566,8 +4591,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) set_buffer_uptodate(sbh); } mark_buffer_dirty(sbh); - unlock_buffer(sbh); if (sync) { + unlock_buffer(sbh); error = __sync_dirty_buffer(sbh, test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); if (error) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e9b504bd8b295787b22c8c933747894e42451d2..b4dbc2f59656c18fa6c2e488e11a680c834378d7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -772,6 +772,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) if (sanity_check_ckpt(sbi)) goto fail_no_cp; + if (cur_page == cp1) + sbi->cur_cp_pack = 1; + else + sbi->cur_cp_pack = 2; + if (cp_blks <= 1) goto done; @@ -1123,7 +1128,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); - start_blk = __start_cp_addr(sbi); + start_blk = __start_cp_next_addr(sbi); /* need to wait for end_io results */ wait_on_all_pages_writeback(sbi); @@ -1187,6 +1192,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + __set_cp_next_pack(sbi); /* * redirty superblock if metadata like node page or inode cache is diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b3cf04ebeb7a2823f08039e1cf59b1ecb71f9329..aee4a45117aa8be5c501a3f32d1e190710b1897e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -717,7 +717,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } prealloc = 0; - ofs_in_node = dn.ofs_in_node; + last_ofs_in_node = ofs_in_node = dn.ofs_in_node; end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: @@ -1763,12 +1763,12 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (trace_android_fs_dataread_start_enabled() && - (iov_iter_rw(iter) == READ)) + (rw == READ)) trace_android_fs_dataread_start(inode, offset, count, current->pid, current->comm); if (trace_android_fs_datawrite_start_enabled() && - (iov_iter_rw(iter) == WRITE)) + (rw == WRITE)) trace_android_fs_datawrite_start(inode, offset, count, current->pid, current->comm); @@ -1784,10 +1784,10 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } if (trace_android_fs_dataread_start_enabled() && - (iov_iter_rw(iter) == READ)) + (rw == READ)) trace_android_fs_dataread_end(inode, offset, count); if (trace_android_fs_datawrite_start_enabled() && - (iov_iter_rw(iter) == WRITE)) + (rw == WRITE)) trace_android_fs_datawrite_end(inode, offset, count); trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fb245bd302e4a975abfc4d256a1c162f389d05ec..687998e9557c67af9ee7910f26b4c5762943c9bb 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -310,17 +310,17 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n", + seq_printf(s, " - inmem: %4d, wb_bios: %4d\n", si->inmem_pages, si->wb_bios); - seq_printf(s, " - nodes: %4lld in %4d\n", + seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n", + seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); - seq_printf(s, " - datas: %4lld in files:%4d\n", + seq_printf(s, " - datas: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); - seq_printf(s, " - meta: %4lld in %4d\n", + seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - imeta: %4lld\n", + seq_printf(s, " - imeta: %4d\n", si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); @@ -373,6 +373,7 @@ static int stat_open(struct inode *inode, struct file *file) } static const struct file_operations stat_fops = { + .owner = THIS_MODULE, .open = stat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e8de18a168af67782254796eb13b75068421619..506af456412fccbe5b9a76c45fa4eae14c6fe2df 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -428,7 +428,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ - struct percpu_counter dirty_pages; /* # of dirty pages */ + atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ @@ -764,6 +764,7 @@ struct f2fs_sb_info { /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ @@ -818,7 +819,7 @@ struct f2fs_sb_info { atomic_t nr_wb_bios; /* # of writeback bios */ /* # of pages, see count_type */ - struct percpu_counter nr_pages[NR_COUNT_TYPE]; + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; @@ -1232,7 +1233,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { - percpu_counter_inc(&sbi->nr_pages[count_type]); + atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES) return; @@ -1242,14 +1243,14 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) static inline void inode_inc_dirty_pages(struct inode *inode) { - percpu_counter_inc(&F2FS_I(inode)->dirty_pages); + atomic_inc(&F2FS_I(inode)->dirty_pages); inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) { - percpu_counter_dec(&sbi->nr_pages[count_type]); + atomic_dec(&sbi->nr_pages[count_type]); } static inline void inode_dec_dirty_pages(struct inode *inode) @@ -1258,19 +1259,19 @@ static inline void inode_dec_dirty_pages(struct inode *inode) !S_ISLNK(inode->i_mode)) return; - percpu_counter_dec(&F2FS_I(inode)->dirty_pages); + atomic_dec(&F2FS_I(inode)->dirty_pages); dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) { - return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); + return atomic_read(&sbi->nr_pages[count_type]); } -static inline s64 get_dirty_pages(struct inode *inode) +static inline int get_dirty_pages(struct inode *inode) { - return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages); + return atomic_read(&F2FS_I(inode)->dirty_pages); } static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) @@ -1329,22 +1330,27 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) { - block_t start_addr; - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - unsigned long long ckpt_version = cur_cp_version(ckpt); - - start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); - /* - * odd numbered checkpoint should at cp segment 0 - * and even segment must be at cp segment 1 - */ - if (!(ckpt_version & 1)) + if (sbi->cur_cp_pack == 2) start_addr += sbi->blocks_per_seg; + return start_addr; +} +static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + if (sbi->cur_cp_pack == 1) + start_addr += sbi->blocks_per_seg; return start_addr; } +static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi) +{ + sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1; +} + static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) { return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); @@ -2181,8 +2187,8 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; - s64 inmem_pages; + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; + int inmem_pages; unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c7865073cd26acc3c3aa697c2ec82f9d2edc3dc5..801111e1f8ef94c7caa9303984f7205450e00d53 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -967,7 +967,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, new_size = (dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); - } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen); + } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); f2fs_put_dnode(&dn); } else { @@ -1526,7 +1526,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) goto out; f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%lld", + "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6132b4ce4e4ce348eec2c7c33ddce3700cb14b1b..013c6a541d6b9b4a81e5da422c92b488fa8ccceb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -558,13 +558,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); - if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) { - kmem_cache_free(f2fs_inode_cachep, fi); - return NULL; - } - /* Initialize f2fs-specific inode info */ fi->vfs_inode.i_version = 1; + atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; init_rwsem(&fi->i_sem); @@ -687,16 +683,11 @@ static void f2fs_i_callback(struct rcu_head *head) static void f2fs_destroy_inode(struct inode *inode) { - percpu_counter_destroy(&F2FS_I(inode)->dirty_pages); call_rcu(&inode->i_rcu, f2fs_i_callback); } static void destroy_percpu_info(struct f2fs_sb_info *sbi) { - int i; - - for (i = 0; i < NR_COUNT_TYPE; i++) - percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); } @@ -1447,6 +1438,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -1471,6 +1463,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); + for (i = 0; i < NR_COUNT_TYPE; i++) + atomic_set(&sbi->nr_pages[i], 0); + INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); mutex_init(&sbi->wio_mutex[NODE]); @@ -1486,13 +1481,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) static int init_percpu_info(struct f2fs_sb_info *sbi) { - int i, err; - - for (i = 0; i < NR_COUNT_TYPE; i++) { - err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); - if (err) - return err; - } + int err; err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL); if (err) diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 7dca743b2ce1c8796155a14c3d3a83023eed3de4..940c683561dd377e33ff791ce6b406a45518dd54 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -44,6 +44,7 @@ void set_fs_pwd(struct fs_struct *fs, const struct path *path) if (old_pwd.dentry) path_put(&old_pwd); } +EXPORT_SYMBOL(set_fs_pwd); static inline int replace_path(struct path *p, const struct path *old, const struct path *new) { @@ -89,6 +90,7 @@ void free_fs_struct(struct fs_struct *fs) path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } +EXPORT_SYMBOL(free_fs_struct); void exit_fs(struct task_struct *tsk) { @@ -127,6 +129,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) } return fs; } +EXPORT_SYMBOL_GPL(copy_fs_struct); int unshare_fs_struct(void) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e920bf06c1fac297694533b1770f459727a8196b..3fd1e2183e7623212b2747193552e783ce84c7fc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2033,7 +2033,6 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); req->out.h.error = -ECONNABORTED; - clear_bit(FR_PENDING, &req->flags); clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); request_end(fc, req); @@ -2111,6 +2110,8 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&fiq->waitq.lock); fiq->connected = 0; list_splice_init(&fiq->pending, &to_end2); + list_for_each_entry(req, &to_end2, list) + clear_bit(FR_PENDING, &req->flags); while (forget_pending(fiq)) kfree(dequeue_forget(fiq, 1, NULL)); wake_up_all_locked(&fiq->waitq); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3545ec65950864d1d3338cfee54908492aafb2b1..fc8ba62d5c31f8a857988e6c0b799bdae40883b3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -68,7 +68,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec) if (sec || nsec) { struct timespec64 ts = { sec, - max_t(u32, nsec, NSEC_PER_SEC - 1) + min_t(u32, nsec, NSEC_PER_SEC - 1) }; return get_jiffies_64() + timespec64_to_jiffies(&ts); @@ -334,6 +334,7 @@ const struct dentry_operations fuse_dentry_operations = { const struct dentry_operations fuse_root_dentry_operations = { .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; int fuse_valid_type(int m) diff --git a/fs/inode.c b/fs/inode.c index 88110fd0b282e49246dc9cd93a1d6e173d951d7b..0aaebd1de454e914c677ee51d475243ddc7d8aea 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1780,7 +1780,7 @@ int dentry_needs_remove_privs(struct dentry *dentry) return mask; } -static int __remove_privs(struct dentry *dentry, int kill) +static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill) { struct iattr newattrs; @@ -1789,7 +1789,7 @@ static int __remove_privs(struct dentry *dentry, int kill) * Note we call this on write, so notify_change will not * encounter any conflicting delegations: */ - return notify_change(dentry, &newattrs, NULL); + return notify_change2(mnt, dentry, &newattrs, NULL); } /* @@ -1811,7 +1811,7 @@ int file_remove_privs(struct file *file) if (kill < 0) return kill; if (kill) - error = __remove_privs(dentry, kill); + error = __remove_privs(file->f_path.mnt, dentry, kill); if (!error) inode_has_no_xattr(inode); diff --git a/fs/internal.h b/fs/internal.h index f4da3341b4a37dd36564db16a8870efaec1f4e48..15fe2aac4e2cb79b488ef4ae998b73176c9995bc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -88,9 +88,11 @@ extern struct file *get_empty_filp(void); * super.c */ extern int do_remount_sb(struct super_block *, int, void *, int); +extern int do_remount_sb2(struct vfsmount *, struct super_block *, int, + void *, int); extern bool trylock_super(struct super_block *sb); extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); + int, const char *, struct vfsmount *, void *); extern struct super_block *user_get_super(dev_t); /* diff --git a/fs/iomap.c b/fs/iomap.c index a8ee8c33ca782dbe4a4c17f42bf91fda9e83a523..814ae8f9587dd52e6c4d190c5f77fa65fc51344b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -113,6 +113,9 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, BUG_ON(pos + len > iomap->offset + iomap->length); + if (fatal_signal_pending(current)) + return -EINTR; + page = grab_cache_page_write_begin(inode->i_mapping, index, flags); if (!page) return -ENOMEM; diff --git a/fs/namei.c b/fs/namei.c index 5b4eed2215304a14ac2614058ae2b2002a3f2ae9..8fa2ddc5ed35065f75a009a5eef452effc2a30f7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -375,9 +375,11 @@ EXPORT_SYMBOL(generic_permission); * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case". */ -static inline int do_inode_permission(struct inode *inode, int mask) +static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask) { if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { + if (likely(mnt && inode->i_op->permission2)) + return inode->i_op->permission2(mnt, inode, mask); if (likely(inode->i_op->permission)) return inode->i_op->permission(inode, mask); @@ -401,7 +403,7 @@ static inline int do_inode_permission(struct inode *inode, int mask) * This does not check for a read-only file system. You probably want * inode_permission(). */ -int __inode_permission(struct inode *inode, int mask) +int __inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask) { int retval; @@ -421,7 +423,7 @@ int __inode_permission(struct inode *inode, int mask) return -EACCES; } - retval = do_inode_permission(inode, mask); + retval = do_inode_permission(mnt, inode, mask); if (retval) return retval; @@ -429,7 +431,14 @@ int __inode_permission(struct inode *inode, int mask) if (retval) return retval; - return security_inode_permission(inode, mask); + retval = security_inode_permission(inode, mask); + return retval; +} +EXPORT_SYMBOL(__inode_permission2); + +int __inode_permission(struct inode *inode, int mask) +{ + return __inode_permission2(NULL, inode, mask); } EXPORT_SYMBOL(__inode_permission); @@ -465,14 +474,20 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ -int inode_permission(struct inode *inode, int mask) +int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask) { int retval; retval = sb_permission(inode->i_sb, inode, mask); if (retval) return retval; - return __inode_permission(inode, mask); + return __inode_permission2(mnt, inode, mask); +} +EXPORT_SYMBOL(inode_permission2); + +int inode_permission(struct inode *inode, int mask) +{ + return inode_permission2(NULL, inode, mask); } EXPORT_SYMBOL(inode_permission); @@ -1669,13 +1684,13 @@ static struct dentry *lookup_slow(const struct qstr *name, static inline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); + int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; if (unlazy_walk(nd, NULL, 0)) return -ECHILD; } - return inode_permission(nd->inode, MAY_EXEC); + return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC); } static inline int handle_dots(struct nameidata *nd, int type) @@ -2146,11 +2161,12 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->depth = 0; if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; + struct vfsmount *mnt = nd->root.mnt; struct inode *inode = root->d_inode; if (*s) { if (!d_can_lookup(root)) return ERR_PTR(-ENOTDIR); - retval = inode_permission(inode, MAY_EXEC); + retval = inode_permission2(mnt, inode, MAY_EXEC); if (retval) return ERR_PTR(retval); } @@ -2415,6 +2431,7 @@ EXPORT_SYMBOL(vfs_path_lookup); /** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup + * @mnt: mount we are looking up on * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * @@ -2423,7 +2440,7 @@ EXPORT_SYMBOL(vfs_path_lookup); * * The caller must hold base->i_mutex. */ -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len) { struct qstr this; unsigned int c; @@ -2457,12 +2474,18 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) return ERR_PTR(err); } - err = inode_permission(base->d_inode, MAY_EXEC); + err = inode_permission2(mnt, base->d_inode, MAY_EXEC); if (err) return ERR_PTR(err); return __lookup_hash(&this, base, 0); } +EXPORT_SYMBOL(lookup_one_len2); + +struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +{ + return lookup_one_len2(name, NULL, base, len); +} EXPORT_SYMBOL(lookup_one_len); /** @@ -2765,7 +2788,7 @@ EXPORT_SYMBOL(__check_sticky); * 11. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) +static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir) { struct inode *inode = d_backing_inode(victim); int error; @@ -2777,7 +2800,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) @@ -2809,7 +2832,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) * 4. We should have write and exec permissions on dir * 5. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct inode *dir, struct dentry *child) +static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child) { struct user_namespace *s_user_ns; audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); @@ -2821,7 +2844,7 @@ static inline int may_create(struct inode *dir, struct dentry *child) if (!kuid_has_mapping(s_user_ns, current_fsuid()) || !kgid_has_mapping(s_user_ns, current_fsgid())) return -EOVERFLOW; - return inode_permission(dir, MAY_WRITE | MAY_EXEC); + return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC); } /* @@ -2868,10 +2891,10 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } EXPORT_SYMBOL(unlock_rename); -int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool want_excl) +int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -2887,6 +2910,13 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_create2); + +int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + return vfs_create2(NULL, dir, dentry, mode, want_excl); +} EXPORT_SYMBOL(vfs_create); bool may_open_dev(const struct path *path) @@ -2898,6 +2928,7 @@ bool may_open_dev(const struct path *path) static int may_open(struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; + struct vfsmount *mnt = path->mnt; struct inode *inode = dentry->d_inode; int error; @@ -2922,7 +2953,7 @@ static int may_open(struct path *path, int acc_mode, int flag) break; } - error = inode_permission(inode, MAY_OPEN | acc_mode); + error = inode_permission2(mnt, inode, MAY_OPEN | acc_mode); if (error) return error; @@ -2957,7 +2988,7 @@ static int handle_truncate(struct file *filp) if (!error) error = security_path_truncate(path); if (!error) { - error = do_truncate(path->dentry, 0, + error = do_truncate2(path->mnt, path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, filp); } @@ -2978,7 +3009,7 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m if (error) return error; - error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); + error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -3409,7 +3440,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, goto out; dir = path.dentry->d_inode; /* we want directory to be writable */ - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + error = inode_permission2(nd->path.mnt, dir, MAY_WRITE | MAY_EXEC); if (error) goto out2; if (!dir->i_op->tmpfile) { @@ -3662,9 +3693,9 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname, } EXPORT_SYMBOL(user_path_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) +int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -3688,6 +3719,12 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mknod2); + +int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) +{ + return vfs_mknod2(NULL, dir, dentry, mode, dev); +} EXPORT_SYMBOL(vfs_mknod); static int may_mknod(umode_t mode) @@ -3730,12 +3767,12 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, goto out; switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(path.dentry->d_inode,dentry,mode,true); + error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true); if (!error) ima_post_path_mknod(dentry); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(path.dentry->d_inode,dentry,mode, + error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: @@ -3756,9 +3793,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); unsigned max_links = dir->i_sb->s_max_links; if (error) @@ -3780,6 +3817,12 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fsnotify_mkdir(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mkdir2); + +int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + return vfs_mkdir2(NULL, dir, dentry, mode); +} EXPORT_SYMBOL(vfs_mkdir); SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) @@ -3798,7 +3841,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) mode &= ~current_umask(); error = security_path_mkdir(&path, dentry, mode); if (!error) - error = vfs_mkdir(path.dentry->d_inode, dentry, mode); + error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -3812,9 +3855,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) return sys_mkdirat(AT_FDCWD, pathname, mode); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(mnt, dir, dentry, 1); if (error) return error; @@ -3849,6 +3892,12 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) d_delete(dentry); return error; } +EXPORT_SYMBOL(vfs_rmdir2); + +int vfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + return vfs_rmdir2(NULL, dir, dentry); +} EXPORT_SYMBOL(vfs_rmdir); static long do_rmdir(int dfd, const char __user *pathname) @@ -3894,7 +3943,7 @@ static long do_rmdir(int dfd, const char __user *pathname) error = security_path_rmdir(&path, dentry); if (error) goto exit3; - error = vfs_rmdir(path.dentry->d_inode, dentry); + error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry); exit3: dput(dentry); exit2: @@ -3933,10 +3982,10 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. */ -int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; - int error = may_delete(dir, dentry, 0); + int error = may_delete(mnt, dir, dentry, 0); if (error) return error; @@ -3971,6 +4020,12 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate return error; } +EXPORT_SYMBOL(vfs_unlink2); + +int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +{ + return vfs_unlink2(NULL, dir, dentry, delegated_inode); +} EXPORT_SYMBOL(vfs_unlink); /* @@ -4018,7 +4073,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = security_path_unlink(&path, dentry); if (error) goto exit2; - error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode); + error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } @@ -4068,9 +4123,9 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) +int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -4087,6 +4142,12 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_symlink2); + +int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) +{ + return vfs_symlink2(NULL, dir, dentry, oldname); +} EXPORT_SYMBOL(vfs_symlink); SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, @@ -4109,7 +4170,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, error = security_path_symlink(&path, dentry, from->name); if (!error) - error = vfs_symlink(path.dentry->d_inode, dentry, from->name); + error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -4144,7 +4205,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. */ -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -4153,7 +4214,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (!inode) return -ENOENT; - error = may_create(dir, new_dentry); + error = may_create(mnt, dir, new_dentry); if (error) return error; @@ -4203,6 +4264,12 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de fsnotify_link(dir, inode, new_dentry); return error; } +EXPORT_SYMBOL(vfs_link2); + +int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +{ + return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode); +} EXPORT_SYMBOL(vfs_link); /* @@ -4258,7 +4325,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); + error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); if (delegated_inode) { @@ -4333,7 +4400,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +int vfs_rename2(struct vfsmount *mnt, + struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, struct inode **delegated_inode, unsigned int flags) { @@ -4352,19 +4420,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (d_real_inode(old_dentry) == d_real_inode(new_dentry)) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(mnt, old_dir, old_dentry, is_dir); if (error) return error; if (!target) { - error = may_create(new_dir, new_dentry); + error = may_create(mnt, new_dir, new_dentry); } else { new_is_dir = d_is_dir(new_dentry); if (!(flags & RENAME_EXCHANGE)) - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(mnt, new_dir, new_dentry, is_dir); else - error = may_delete(new_dir, new_dentry, new_is_dir); + error = may_delete(mnt, new_dir, new_dentry, new_is_dir); } if (error) return error; @@ -4378,12 +4446,12 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ if (new_dir != old_dir) { if (is_dir) { - error = inode_permission(source, MAY_WRITE); + error = inode_permission2(mnt, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { - error = inode_permission(target, MAY_WRITE); + error = inode_permission2(mnt, target, MAY_WRITE); if (error) return error; } @@ -4460,6 +4528,14 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } +EXPORT_SYMBOL(vfs_rename2); + +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode, unsigned int flags) +{ + return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags); +} EXPORT_SYMBOL(vfs_rename); SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, @@ -4573,7 +4649,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, &new_path, new_dentry, flags); if (error) goto exit5; - error = vfs_rename(old_path.dentry->d_inode, old_dentry, + error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode, flags); exit5: @@ -4618,7 +4694,7 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna int vfs_whiteout(struct inode *dir, struct dentry *dentry) { - int error = may_create(dir, dentry); + int error = may_create(NULL, dir, dentry); if (error) return error; diff --git a/fs/namespace.c b/fs/namespace.c index e6c234b1a6456c1364bcc06b029f3cc4a155d5bd..4dc014ea14ea1ff56f5299533ec76d833a444dfa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -580,6 +580,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { + kfree(mnt->mnt.data); kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); @@ -746,26 +747,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) return NULL; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +static struct mountpoint *get_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); - struct mountpoint *mp; + struct mountpoint *mp, *new = NULL; int ret; - mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); - if (!mp) + if (d_mountpoint(dentry)) { +mountpoint: + read_seqlock_excl(&mount_lock); + mp = lookup_mountpoint(dentry); + read_sequnlock_excl(&mount_lock); + if (mp) + goto done; + } + + if (!new) + new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + if (!new) return ERR_PTR(-ENOMEM); + + /* Exactly one processes may set d_mounted */ ret = d_set_mounted(dentry); - if (ret) { - kfree(mp); - return ERR_PTR(ret); - } - mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); - INIT_HLIST_HEAD(&mp->m_list); + /* Someone else set d_mounted? */ + if (ret == -EBUSY) + goto mountpoint; + + /* The dentry is not available as a mountpoint? */ + mp = ERR_PTR(ret); + if (ret) + goto done; + + /* Add the new mountpoint to the hash table */ + read_seqlock_excl(&mount_lock); + new->m_dentry = dentry; + new->m_count = 1; + hlist_add_head(&new->m_hash, mp_hash(dentry)); + INIT_HLIST_HEAD(&new->m_list); + read_sequnlock_excl(&mount_lock); + + mp = new; + new = NULL; +done: + kfree(new); return mp; } @@ -948,11 +973,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) return ERR_PTR(-ENOMEM); + mnt->mnt.data = NULL; + if (type->alloc_mnt_data) { + mnt->mnt.data = type->alloc_mnt_data(); + if (!mnt->mnt.data) { + mnt_free_id(mnt); + free_vfsmnt(mnt); + return ERR_PTR(-ENOMEM); + } + } if (flags & MS_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; - root = mount_fs(type, flags, name, data); + root = mount_fs(type, flags, name, &mnt->mnt, data); if (IS_ERR(root)) { + kfree(mnt->mnt.data); mnt_free_id(mnt); free_vfsmnt(mnt); return ERR_CAST(root); @@ -980,6 +1015,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (!mnt) return ERR_PTR(-ENOMEM); + if (sb->s_op->clone_mnt_data) { + mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data); + if (!mnt->mnt.data) { + err = -ENOMEM; + goto out_free; + } + } + if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) mnt->mnt_group_id = 0; /* not a peer of original */ else @@ -1048,6 +1091,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return mnt; out_free: + kfree(mnt->mnt.data); mnt_free_id(mnt); free_vfsmnt(mnt); return ERR_PTR(err); @@ -1568,11 +1612,11 @@ void __detach_mounts(struct dentry *dentry) struct mount *mnt; namespace_lock(); + lock_mount_hash(); mp = lookup_mountpoint(dentry); if (IS_ERR_OR_NULL(mp)) goto out_unlock; - lock_mount_hash(); event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); @@ -1582,9 +1626,9 @@ void __detach_mounts(struct dentry *dentry) } else umount_tree(mnt, UMOUNT_CONNECTED); } - unlock_mount_hash(); put_mountpoint(mp); out_unlock: + unlock_mount_hash(); namespace_unlock(); } @@ -2013,9 +2057,7 @@ static struct mountpoint *lock_mount(struct path *path) namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = lookup_mountpoint(dentry); - if (!mp) - mp = new_mountpoint(dentry); + struct mountpoint *mp = get_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); inode_unlock(dentry->d_inode); @@ -2034,7 +2076,11 @@ static struct mountpoint *lock_mount(struct path *path) static void unlock_mount(struct mountpoint *where) { struct dentry *dentry = where->m_dentry; + + read_seqlock_excl(&mount_lock); put_mountpoint(where); + read_sequnlock_excl(&mount_lock); + namespace_unlock(); inode_unlock(dentry->d_inode); } @@ -2253,8 +2299,14 @@ static int do_remount(struct path *path, int flags, int mnt_flags, err = change_mount_flags(path->mnt, flags); else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; - else - err = do_remount_sb(sb, flags, data, 0); + else { + err = do_remount_sb2(path->mnt, sb, flags, data, 0); + namespace_lock(); + lock_mount_hash(); + propagate_remount(mnt); + unlock_mount_hash(); + namespace_unlock(); + } if (!err) { lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; @@ -3110,9 +3162,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); /* A moved mount should not expire automatically */ list_del_init(&new_mnt->mnt_expire); + put_mountpoint(root_mp); unlock_mount_hash(); chroot_fs_refs(&root, &new); - put_mountpoint(root_mp); error = 0; out4: unlock_mount(old_mp); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5f1af4cd1a336e8f273114f38b165d7e83ab1421..53e02b8bd9bd21df75bfbe2c3fbb0b49de76e4b1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -477,7 +477,7 @@ void nfs_force_use_readdirplus(struct inode *dir) { if (!list_empty(&NFS_I(dir)->open_files)) { nfs_advise_use_readdirplus(dir); - nfs_zap_mapping(dir, dir->i_mapping); + invalidate_mapping_pages(dir->i_mapping, 0, -1); } } @@ -886,17 +886,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) goto out; } -static bool nfs_dir_mapping_need_revalidate(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - - if (nfs_attribute_cache_expired(dir)) - return true; - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - return true; - return false; -} - /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. @@ -928,7 +917,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9ea85ae23c320b2dcb7409c97c3da3fa93a4fc12..a1de8ef63e56992c6e7eeb554c00dfc024111f94 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -374,7 +374,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, */ if (!PageUptodate(page)) { unsigned pglen = nfs_page_length(page); - unsigned end = offset + len; + unsigned end = offset + copied; if (pglen == 0) { zero_user_segments(page, 0, offset, diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4946ef40ba875e6255857ca3c27b6df352c7a46a..85ef38f9765f10db919dc0bb2748bd320be13cfd 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -283,7 +283,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) s->nfs_client->cl_rpcclient->cl_auth->au_flavor); out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98ace127bf861e723647019a0c8ad856f5ee0391..a5c38889e7ae7fb5584ca366972bdc8c3ea2688b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -28,6 +28,9 @@ static struct group_info *ff_zero_group; +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr); + static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) { @@ -1293,6 +1296,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; + ff_layout_read_record_layoutstats_done(task, hdr); pnfs_read_resend_pnfs(hdr); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 241da19b7da4a54a45b1a2bd6cae4cab8cab4dde..78ff8b63d5f70aae2eb62c99d932c255f47f4d8c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2678,7 +2678,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, sattr->ia_valid |= ATTR_MTIME; /* Except MODE, it seems harmless of setting twice. */ - if ((attrset[1] & FATTR4_WORD1_MODE)) + if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && + attrset[1] & FATTR4_WORD1_MODE) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) @@ -8371,6 +8372,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } + nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 259ef85f435aa7f9e0b0e4d06d3ce24a9e7a3ad3..415d7e69bc5ec1ae5d88077cb41164bdd5ad54a4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -299,6 +299,14 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * @@ -317,6 +325,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, }; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + pnfs_clear_layoutreturn_info(lo); return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); } @@ -411,7 +420,9 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) { + if (list_empty(&lo->plh_segs) && + !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { if (atomic_read(&lo->plh_outstanding) == 0) set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); @@ -816,14 +827,6 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -944,6 +947,7 @@ static void pnfs_clear_layoutcommit(struct inode *inode, void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) { clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); @@ -957,8 +961,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, /* Serialise LAYOUTGET/LAYOUTRETURN */ if (atomic_read(&lo->plh_outstanding) != 0) return false; - if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) return false; + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { if (stateid != NULL) { @@ -1252,13 +1257,11 @@ bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) * i_lock */ spin_lock(&ino->i_lock); lo = nfsi->layout; - if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); sleep = true; + } spin_unlock(&ino->i_lock); - - if (sleep) - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - return sleep; } @@ -1950,6 +1953,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); pnfs_set_plh_return_info(lo, range.iomode, 0); + /* Block LAYOUTGET */ + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() @@ -2286,6 +2291,10 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) struct nfs_pageio_descriptor pgio; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + /* Prevent deadlocks with layoutreturn! */ + pnfs_put_lseg(hdr->lseg); + hdr->lseg = NULL; + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5c295512c9671e4996136264ec8a848077d835ef..44cad8afda0e4e3b95f64a57d16ec2036a4b0c90 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -96,6 +96,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_RETURN, /* layoutreturn in progress */ + NFS_LAYOUT_RETURN_LOCK, /* Serialise layoutreturn */ NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 001796bcd6c895a49e3dd277a0862545fd2205a5..ddce94ce8142dd5399d8797186730fee91d30292 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2904,7 +2904,7 @@ module_param(max_session_slots, ushort, 0644); MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " "requests the client will negotiate"); module_param(max_session_cb_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of parallel NFSv4.1 " +MODULE_PARM_DESC(max_session_cb_slots, "Maximum number of parallel NFSv4.1 " "callbacks the client will process for a given server"); module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 42aace4fc4c807ec9f41e590e777f8f8f94d7803..64813697f4c4e26f02b245e83df6008ba7b7d801 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, struct nfs4_layout_stateid *ls; struct nfs4_stid *stp; - stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); + stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache, + nfsd4_free_layout_stateid); if (!stp) return NULL; - stp->sc_free = nfsd4_free_layout_stateid; + get_nfs4_file(fp); stp->sc_file = fp; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b4beaaa4eaac01233f874c7dfdb8d1a6d7cd3d6..a0dee8ae9f97f16a18e40ba19f8e84a45ad1a02b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -633,8 +633,8 @@ find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new) return co; } -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab) +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; @@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, idr_preload_end(); if (new_id < 0) goto out_free; + + stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; @@ -675,15 +677,12 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; - struct nfs4_ol_stateid *stp; - stid = nfs4_alloc_stid(clp, stateid_slab); + stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; - stp = openlockstateid(stid); - stp->st_stid.sc_free = nfs4_free_ol_stateid; - return stp; + return openlockstateid(stid); } static void nfs4_free_deleg(struct nfs4_stid *stid) @@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) goto out_dec; - dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) goto out_dec; - dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; - stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, lst = find_lock_stateid(lo, fi); if (lst == NULL) { spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab); + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c9399366f9dfc73b343d079fbad2dc2127927aae..4516e8b7d776305d94fb89f86256ee3fc54dec27 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab); +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 7ebfca6a14272e0a8620333ff8f8dd6dd52d92b9..7f99c96014b3b4f489f4a081b8cdf1e9f06d4dd2 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -488,7 +488,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, } /* you can only watch an inode if you have read permissions on it */ - ret = inode_permission(path->dentry->d_inode, MAY_READ); + ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ); if (ret) path_put(path); out: diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 741077deef3b5544dd71ae9635facb8ac1a228a9..a3645249f7ecfa4cbdae8434bb87b47ffd02dbb1 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, */ void fsnotify_unmount_inodes(struct super_block *sb) { - struct inode *inode, *next_i, *need_iput = NULL; + struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) { - struct inode *need_iput_tmp; - + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point @@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb) continue; } - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) - __iget(inode); - else - need_iput_tmp = NULL; + __iget(inode); spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - while (&next_i->i_sb_list != &sb->s_inodes) { - spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && - atomic_read(&next_i->i_count)) { - __iget(next_i); - need_iput = next_i; - spin_unlock(&next_i->i_lock); - break; - } - spin_unlock(&next_i->i_lock); - next_i = list_next_entry(next_i, i_sb_list); - } - - /* - * We can safely drop s_inode_list_lock here because either - * we actually hold references on both inode and next_i or - * end of list. Also no new inodes will be added since the - * umount has begun. - */ spin_unlock(&sb->s_inode_list_lock); - if (need_iput_tmp) - iput(need_iput_tmp); + if (iput_inode) + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); fsnotify_inode_delete(inode); - iput(inode); + iput_inode = inode; spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 4dc09da062c6d57d2604052309e94fb983d105fb..4da5c6a1134fc384bcb5220b1f16b6c64266ae35 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -337,7 +337,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); + error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ); if (error) path_put(path); return error; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 83d576f6a287b13939a7dd70622ae4d89b5031f8..77d1632e905d8b1ec249cf86c7ece183c38f2746 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, lockres->l_level, new_level); + /* + * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always + * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that + * we can recover correctly from node failure. Otherwise, we may get + * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. + */ + if (!ocfs2_is_o2cb_active() && + lockres->l_ops->flags & LOCK_TYPE_USES_LVB) + lvb = 1; + if (lvb) dlm_flags |= DLM_LKF_VALBLK; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 52c07346bea3f8960399184c0e6aead7545a25f2..820359096c7aa8df5b76e2a0c1404ce7f9d0337d 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; +inline int ocfs2_is_o2cb_active(void) +{ + return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); +} +EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); + static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index f2dce10fae543c254dcb4e6628d357b60a3ac16c..e3036e1790e86da7b4e13dcb0b8c88e3f19b6d50 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ +int ocfs2_is_o2cb_active(void); + extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ diff --git a/fs/open.c b/fs/open.c index d3ed8171e8e09291b44524e6de943bc3e71c91e3..568749b035faf380a88d2f2f38092dfebe9677f4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -34,8 +34,8 @@ #include "internal.h" -int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, - struct file *filp) +int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length, + unsigned int time_attrs, struct file *filp) { int ret; struct iattr newattrs; @@ -60,18 +60,25 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, inode_lock(dentry->d_inode); /* Note any delegations or leases have already been broken: */ - ret = notify_change(dentry, &newattrs, NULL); + ret = notify_change2(mnt, dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); return ret; } +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) +{ + return do_truncate2(NULL, dentry, length, time_attrs, filp); +} long vfs_truncate(const struct path *path, loff_t length) { struct inode *inode; + struct vfsmount *mnt; struct dentry *upperdentry; long error; inode = path->dentry->d_inode; + mnt = path->mnt; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) @@ -83,7 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (error) goto out; - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; @@ -117,7 +124,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (!error) error = security_path_truncate(path); if (!error) - error = do_truncate(path->dentry, length, 0, NULL); + error = do_truncate2(mnt, path->dentry, length, 0, NULL); put_write_and_out: put_write_access(upperdentry->d_inode); @@ -166,6 +173,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; + struct vfsmount *mnt; struct fd f; int error; @@ -182,6 +190,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) small = 0; dentry = f.file->f_path.dentry; + mnt = f.file->f_path.mnt; inode = dentry->d_inode; error = -EINVAL; if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) @@ -201,7 +210,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) if (!error) error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); + error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: fdput(f); @@ -357,6 +366,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) struct cred *override_cred; struct path path; struct inode *inode; + struct vfsmount *mnt; int res; unsigned int lookup_flags = LOOKUP_FOLLOW; @@ -387,6 +397,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) goto out; inode = d_backing_inode(path.dentry); + mnt = path.mnt; if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* @@ -398,7 +409,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) goto out_path_release; } - res = inode_permission(inode, mode | MAY_ACCESS); + res = inode_permission2(mnt, inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; @@ -442,7 +453,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -462,6 +473,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); struct inode *inode; + struct vfsmount *mnt; int error = -EBADF; error = -EBADF; @@ -469,12 +481,13 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) goto out; inode = file_inode(f.file); + mnt = f.file->f_path.mnt; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: @@ -493,7 +506,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -533,7 +546,7 @@ static int chmod_common(const struct path *path, umode_t mode) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); if (delegated_inode) { @@ -613,7 +626,7 @@ static int chown_common(const struct path *path, uid_t user, gid_t group) inode_lock(inode); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); diff --git a/fs/pnode.c b/fs/pnode.c index 234a9ac49958ed978f67aad68d81fb75cf5717ce..83b5bb1fdbb8a19974cee6416c75bbfed97aeda6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -458,3 +458,32 @@ int propagate_umount(struct list_head *list) __propagate_umount(mnt); return 0; } + +/* + * Iterates over all slaves, and slaves of slaves. + */ +static struct mount *next_descendent(struct mount *root, struct mount *cur) +{ + if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list)) + return first_slave(cur); + do { + if (cur->mnt_slave.next != &cur->mnt_master->mnt_slave_list) + return next_slave(cur); + cur = cur->mnt_master; + } while (cur != root); + return NULL; +} + +void propagate_remount(struct mount *mnt) +{ + struct mount *m = mnt; + struct super_block *sb = mnt->mnt.mnt_sb; + + if (sb->s_op->copy_mnt_data) { + m = next_descendent(mnt, m); + while (m) { + sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data); + m = next_descendent(mnt, m); + } + } +} diff --git a/fs/pnode.h b/fs/pnode.h index 550f5a8b4fcf462d12d36fb6e7c646670247c317..03a8001c4af2d56f65caa3aa9eb46a616f0e3c50 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,6 +44,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); +void propagate_remount(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 595522022aca04beeeabd3d2ed4e73992f268296..c9d48dc784953fa4af62f9af58dfbb56ed2faa70 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type) int error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return 0; - if (error == 0) - acl = NULL; + error = posix_acl_update_mode(inode, + &inode->i_mode, &acl); + if (error) + return error; } inode->i_ctime = current_time(inode); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 55313d9948954bbe4bff5769e487eb78012776db..d4e37acd48217dcb1090a1ee68bb4ec4cb7226e5 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) ctl_dir = container_of(head, struct ctl_dir, header); if (!dir_emit_dots(file, ctx)) - return 0; + goto out; pos = 2; @@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) break; } } +out: sysctl_head_finish(head); return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 87cf40b6ed52f432fd2196721d63a39db6e9da78..65d28f9ed232aeb94e12d9ffcb0afca45b696fbc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -150,10 +150,9 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) const char *kaddr; long pages_pinned; struct page *page; - unsigned int gup_flags = 0; - pages_pinned = get_user_pages_remote(current, mm, page_start_vaddr, - 1, gup_flags, &page, NULL); + pages_pinned = get_user_pages_remote(current, mm, + page_start_vaddr, 1, 0, &page, NULL); if (pages_pinned < 1) { seq_puts(m, "]"); return; @@ -396,8 +395,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma)) + if (is_stack(priv, vma)) { name = "[stack]"; + goto done; + } if (vma_get_anon_name(vma)) { seq_pad(m, ' '); diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 3f1190d18991539087ddef41b922b458d16914b7..6863773aff252796e72f1bc3083869cc99b40fb5 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -118,7 +118,9 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) if (err) goto out; show_mnt_opts(m, mnt); - if (sb->s_op->show_options) + if (sb->s_op->show_options2) + err = sb->s_op->show_options2(mnt, m, mnt_path.dentry); + else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); out: @@ -180,7 +182,9 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) err = show_sb_opts(m, sb); if (err) goto out; - if (sb->s_op->show_options) + if (sb->s_op->show_options2) { + err = sb->s_op->show_options2(mnt, m, mnt->mnt_root); + } else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt->mnt_root); seq_putc(m, '\n'); out: diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 97b79ccb5923fe984dd9c49362497410566a9150..9408a5477adab9590dfd40ec450445752127887a 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -30,11 +30,12 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) ci->userid = pi->userid; ci->d_uid = pi->d_uid; ci->under_android = pi->under_android; + set_top(ci, pi->top); } /* helper function for derived state */ -void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, bool under_android) +void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, + uid_t uid, bool under_android, struct inode *top) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); @@ -42,14 +43,14 @@ void setup_derived_state(struct inode *inode, perm_t perm, info->userid = userid; info->d_uid = uid; info->under_android = under_android; + set_top(info, top); } /* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry) { - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); - struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); appid_t appid; /* By default, each inode inherits from its parent. @@ -60,7 +61,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st * stage of each system call by fix_derived_permission(inode). */ - inherit_derived_state(parent->d_inode, dentry->d_inode); + inherit_derived_state(d_inode(parent), d_inode(dentry)); /* Derive custom permissions based on parent and current node */ switch (parent_info->perm) { @@ -71,6 +72,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st /* Legacy internal layout places users at top level */ info->perm = PERM_ROOT; info->userid = simple_strtoul(newdentry->d_name.name, NULL, 10); + set_top(info, &info->vfs_inode); break; case PERM_ROOT: /* Assume masked off by default. */ @@ -78,28 +80,33 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID; info->under_android = true; + set_top(info, &info->vfs_inode); } break; case PERM_ANDROID: if (!strcasecmp(newdentry->d_name.name, "data")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_DATA; + set_top(info, &info->vfs_inode); } else if (!strcasecmp(newdentry->d_name.name, "obb")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_OBB; + set_top(info, &info->vfs_inode); /* Single OBB directory is always shared */ } else if (!strcasecmp(newdentry->d_name.name, "media")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_MEDIA; + set_top(info, &info->vfs_inode); } break; case PERM_ANDROID_DATA: case PERM_ANDROID_OBB: case PERM_ANDROID_MEDIA: - appid = get_appid(sbi->pkgl_id, newdentry->d_name.name); + appid = get_appid(newdentry->d_name.name); if (appid != 0) { info->d_uid = multiuser_get_uid(parent_info->userid, appid); } + set_top(info, &info->vfs_inode); break; } } @@ -109,17 +116,72 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) get_derived_permission_new(parent, dentry, dentry); } -void get_derive_permissions_recursive(struct dentry *parent) { +static int descendant_may_need_fixup(perm_t perm) { + if (perm == PERM_PRE_ROOT || perm == PERM_ROOT || perm == PERM_ANDROID) + return 1; + return 0; +} + +static int needs_fixup(perm_t perm) { + if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB + || perm == PERM_ANDROID_MEDIA) + return 1; + return 0; +} + +void fixup_perms_recursive(struct dentry *dentry, const char* name, size_t len) { + struct dentry *child; + struct sdcardfs_inode_info *info; + if (!dget(dentry)) + return; + if (!d_inode(dentry)) { + dput(dentry); + return; + } + info = SDCARDFS_I(d_inode(dentry)); + + if (needs_fixup(info->perm)) { + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + dget(child); + if (!strncasecmp(child->d_name.name, name, len)) { + if (child->d_inode) { + get_derived_permission(dentry, child); + fixup_tmp_permissions(child->d_inode); + dput(child); + break; + } + } + dput(child); + } + spin_unlock(&dentry->d_lock); + } else if (descendant_may_need_fixup(info->perm)) { + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + fixup_perms_recursive(child, name, len); + } + spin_unlock(&dentry->d_lock); + } + dput(dentry); +} + +void fixup_top_recursive(struct dentry *parent) { struct dentry *dentry; + struct sdcardfs_inode_info *info; + if (!d_inode(parent)) + return; + info = SDCARDFS_I(d_inode(parent)); + spin_lock(&parent->d_lock); list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (dentry->d_inode) { - inode_lock(dentry->d_inode); - get_derived_permission(parent, dentry); - fix_derived_permission(dentry->d_inode); - get_derive_permissions_recursive(dentry); - inode_unlock(dentry->d_inode); + if (d_inode(dentry)) { + if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { + get_derived_permission(parent, dentry); + fixup_tmp_permissions(d_inode(dentry)); + fixup_top_recursive(dentry); + } } } + spin_unlock(&parent->d_lock); } /* main function for updating derived permission */ @@ -127,7 +189,7 @@ inline void update_derived_permission_lock(struct dentry *dentry) { struct dentry *parent; - if(!dentry || !dentry->d_inode) { + if(!dentry || !d_inode(dentry)) { printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); return; } @@ -135,9 +197,8 @@ inline void update_derived_permission_lock(struct dentry *dentry) * 1. need to check whether the dentry is updated or not * 2. remove the root dentry update */ - inode_lock(dentry->d_inode); if(IS_ROOT(dentry)) { - //setup_default_pre_root_state(dentry->d_inode); + //setup_default_pre_root_state(d_inode(dentry)); } else { parent = dget_parent(dentry); if(parent) { @@ -145,15 +206,14 @@ inline void update_derived_permission_lock(struct dentry *dentry) dput(parent); } } - fix_derived_permission(dentry->d_inode); - inode_unlock(dentry->d_inode); + fixup_tmp_permissions(d_inode(dentry)); } int need_graft_path(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); if(parent_info->perm == PERM_ANDROID && @@ -212,7 +272,7 @@ int is_base_obbpath(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); spin_lock(&SDCARDFS_D(dentry)->lock); diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index c249fa982d3c72706bfb708f56dd3d91409e35af..7750a0472389bef33da10d9db11f91c7430b81dd 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -216,7 +216,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) goto out_err; } - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index f95283ed84477bb2677ff515c582c7f88e7d2486..5b311708195b48bac05951d95ef35c7fda29ae7e 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -19,6 +19,7 @@ */ #include "sdcardfs.h" +#include /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) @@ -53,9 +54,12 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, { int err; struct dentry *lower_dentry; + struct vfsmount *lower_dentry_mnt; struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" @@ -70,11 +74,22 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_dentry_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; - err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); + + /* temporarily change umask for lower fs write */ + saved_fs = current->fs; + copied_fs = copy_fs_struct(current->fs); + if (!copied_fs) { + err = -ENOMEM; + goto out_unlock; + } + current->fs = copied_fs; + current->fs->umask = 0; + err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -85,6 +100,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: + current->fs = saved_fs; + free_fs_struct(copied_fs); +out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(saved_cred); @@ -138,6 +156,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) { int err; struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); struct dentry *lower_dir_dentry; struct path lower_path; @@ -156,10 +175,11 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - err = vfs_unlink(lower_dir_inode, lower_dentry, NULL); + err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL); /* * Note: unlinking on top of NFS can cause silly-renamed files. @@ -240,16 +260,15 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode int err; int make_nomedia_in_obb = 0; struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct dentry *lower_parent_dentry = NULL; struct path lower_path; struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; struct sdcardfs_inode_info *pi = SDCARDFS_I(dir); - char *page_buf; - char *nomedia_dir_name; - char *nomedia_fullpath; - int fullpath_namelen; int touch_err = 0; + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" @@ -272,14 +291,28 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* the lower_dentry is negative here */ sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); /* set last 16bytes of mode field to 0775 */ mode = (mode & S_IFMT) | 00775; - err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); - if (err) + /* temporarily change umask for lower fs write */ + saved_fs = current->fs; + copied_fs = copy_fs_struct(current->fs); + if (!copied_fs) { + err = -ENOMEM; + unlock_dir(lower_parent_dentry); + goto out_unlock; + } + current->fs = copied_fs; + current->fs->umask = 0; + err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode); + + if (err) { + unlock_dir(lower_parent_dentry); goto out; + } /* if it is a local obb dentry, setup it with the base obbpath */ if(need_graft_path(dentry)) { @@ -301,14 +334,18 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid); - if (err) + if (err) { + unlock_dir(lower_parent_dentry); goto out; + } fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); /* update number of links on parent directory */ set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); + unlock_dir(lower_parent_dentry); + if ((!sbi->options.multiuser) && (!strcasecmp(dentry->d_name.name, "obb")) && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) make_nomedia_in_obb = 1; @@ -316,43 +353,18 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* When creating /Android/data and /Android/obb, mark them as .nomedia */ if (make_nomedia_in_obb || ((pi->perm == PERM_ANDROID) && (!strcasecmp(dentry->d_name.name, "data")))) { - - page_buf = (char *)__get_free_page(GFP_KERNEL); - if (!page_buf) { - printk(KERN_ERR "sdcardfs: failed to allocate page buf\n"); - goto out; - } - - nomedia_dir_name = d_absolute_path(&lower_path, page_buf, PAGE_SIZE); - if (IS_ERR(nomedia_dir_name)) { - free_page((unsigned long)page_buf); - printk(KERN_ERR "sdcardfs: failed to get .nomedia dir name\n"); - goto out; - } - - fullpath_namelen = page_buf + PAGE_SIZE - nomedia_dir_name - 1; - fullpath_namelen += strlen("/.nomedia"); - nomedia_fullpath = kzalloc(fullpath_namelen + 1, GFP_KERNEL); - if (!nomedia_fullpath) { - free_page((unsigned long)page_buf); - printk(KERN_ERR "sdcardfs: failed to allocate .nomedia fullpath buf\n"); - goto out; - } - - strcpy(nomedia_fullpath, nomedia_dir_name); - free_page((unsigned long)page_buf); - strcat(nomedia_fullpath, "/.nomedia"); - touch_err = touch(nomedia_fullpath, 0664); + set_fs_pwd(current->fs, &lower_path); + touch_err = touch(".nomedia", 0664); if (touch_err) { - printk(KERN_ERR "sdcardfs: failed to touch(%s): %d\n", - nomedia_fullpath, touch_err); - kfree(nomedia_fullpath); + printk(KERN_ERR "sdcardfs: failed to create .nomedia in %s: %d\n", + lower_path.dentry->d_name.name, touch_err); goto out; } - kfree(nomedia_fullpath); } out: - unlock_dir(lower_parent_dentry); + current->fs = saved_fs; + free_fs_struct(copied_fs); +out_unlock: sdcardfs_put_lower_path(dentry, &lower_path); out_revert: REVERT_CRED(saved_cred); @@ -364,6 +376,7 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) { struct dentry *lower_dentry; struct dentry *lower_dir_dentry; + struct vfsmount *lower_mnt; int err; struct path lower_path; const struct cred *saved_cred = NULL; @@ -384,9 +397,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) sdcardfs_get_real_lower(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; lower_dir_dentry = lock_parent(lower_dentry); - err = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); + err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry); if (err) goto out; @@ -451,6 +465,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dentry = NULL; struct dentry *lower_old_dir_dentry = NULL; struct dentry *lower_new_dir_dentry = NULL; + struct vfsmount *lower_mnt = NULL; struct dentry *trap = NULL; struct dentry *new_parent = NULL; struct path lower_old_path, lower_new_path; @@ -475,6 +490,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, sdcardfs_get_lower_path(new_dentry, &lower_new_path); lower_old_dentry = lower_old_path.dentry; lower_new_dentry = lower_new_path.dentry; + lower_mnt = lower_old_path.mnt; lower_old_dir_dentry = dget_parent(lower_old_dentry); lower_new_dir_dentry = dget_parent(lower_new_dentry); @@ -490,7 +506,8 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - err = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + err = vfs_rename2(lower_mnt, + d_inode(lower_old_dir_dentry), lower_old_dentry, d_inode(lower_new_dir_dentry), lower_new_dentry, NULL, 0); if (err) @@ -517,11 +534,9 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, } /* At this point, not all dentry information has been moved, so * we pass along new_dentry for the name.*/ - inode_lock(d_inode(old_dentry)); get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); - fix_derived_permission(d_inode(old_dentry)); - get_derive_permissions_recursive(old_dentry); - inode_unlock(d_inode(old_dentry)); + fixup_tmp_permissions(d_inode(old_dentry)); + fixup_top_recursive(old_dentry); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -590,16 +605,63 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) } #endif -static int sdcardfs_permission(struct inode *inode, int mask) +static int sdcardfs_permission_wrn(struct inode *inode, int mask) +{ + WARN(1, "sdcardfs does not support permission. Use permission2.\n"); + return -EINVAL; +} + +void copy_attrs(struct inode *dest, const struct inode *src) +{ + dest->i_mode = src->i_mode; + dest->i_uid = src->i_uid; + dest->i_gid = src->i_gid; + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; +#ifdef CONFIG_FS_POSIX_ACL + dest->i_acl = src->i_acl; +#endif +#ifdef CONFIG_SECURITY + dest->i_security = src->i_security; +#endif +} + +static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask) { int err; + struct inode tmp; + struct inode *top = grab_top(SDCARDFS_I(inode)); + + if (!top) { + release_top(SDCARDFS_I(inode)); + WARN(1, "Top value was null!\n"); + return -EINVAL; + } /* * Permission check on sdcardfs inode. * Calling process should have AID_SDCARD_RW permission + * Since generic_permission only needs i_mode, i_uid, + * i_gid, and i_sb, we can create a fake inode to pass + * this information down in. + * + * The underlying code may attempt to take locks in some + * cases for features we're not using, but if that changes, + * locks must be dealt with to avoid undefined behavior. */ - err = generic_permission(inode, mask); - + copy_attrs(&tmp, inode); + tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); + tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + release_top(SDCARDFS_I(inode)); + tmp.i_sb = inode->i_sb; + if (IS_POSIXACL(inode)) + printk(KERN_WARNING "%s: This may be undefined behavior... \n", __func__); + err = generic_permission(&tmp, mask); /* XXX * Original sdcardfs code calls inode_permission(lower_inode,.. ) * for checking inode permission. But doing such things here seems @@ -628,26 +690,63 @@ static int sdcardfs_permission(struct inode *inode, int mask) } -static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) +static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia) +{ + WARN(1, "sdcardfs does not support setattr. User setattr2.\n"); + return -EINVAL; +} + +static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia) { int err; struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct inode *inode; struct inode *lower_inode; struct path lower_path; struct iattr lower_ia; struct dentry *parent; + struct inode tmp; + struct dentry tmp_d; + struct inode *top; + const struct cred *saved_cred = NULL; inode = d_inode(dentry); + top = grab_top(SDCARDFS_I(inode)); + + if (!top) { + release_top(SDCARDFS_I(inode)); + return -EINVAL; + } + + /* + * Permission check on sdcardfs inode. + * Calling process should have AID_SDCARD_RW permission + * Since generic_permission only needs i_mode, i_uid, + * i_gid, and i_sb, we can create a fake inode to pass + * this information down in. + * + * The underlying code may attempt to take locks in some + * cases for features we're not using, but if that changes, + * locks must be dealt with to avoid undefined behavior. + * + */ + copy_attrs(&tmp, inode); + tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); + tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + tmp.i_size = i_size_read(inode); + release_top(SDCARDFS_I(inode)); + tmp.i_sb = inode->i_sb; + tmp_d.d_inode = &tmp; /* * Check if user has permission to change dentry. We don't check if * this user can change the lower inode: that should happen when * calling notify_change on the lower inode. */ - err = setattr_prepare(dentry, ia); + err = setattr_prepare(&tmp_d, ia); - /* no vfs_XXX operations required, cred overriding will be skipped. wj*/ if (!err) { /* check the Android group ID */ parent = dget_parent(dentry); @@ -663,8 +762,12 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) if (err) goto out_err; + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dentry->d_sb), saved_cred); + sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; lower_inode = sdcardfs_lower_inode(inode); /* prepare our own lower struct iattr (with the lower file) */ @@ -685,7 +788,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) if (current->mm) down_write(¤t->mm->mmap_sem); if (ia->ia_valid & ATTR_SIZE) { - err = inode_newsize_ok(inode, ia->ia_size); + err = inode_newsize_ok(&tmp, ia->ia_size); if (err) { if (current->mm) up_write(¤t->mm->mmap_sem); @@ -708,7 +811,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) * tries to open(), unlink(), then ftruncate() a file. */ inode_lock(d_inode(lower_dentry)); - err = notify_change(lower_dentry, &lower_ia, /* note: lower_ia */ + err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */ NULL); inode_unlock(d_inode(lower_dentry)); if (current->mm) @@ -727,10 +830,35 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) out: sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(saved_cred); out_err: return err; } +static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct kstat *stat) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(inode); + struct inode *top = grab_top(info); + if (!top) + return -EINVAL; + + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + stat->nlink = inode->i_nlink; + stat->uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); + stat->gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); + stat->rdev = inode->i_rdev; + stat->size = i_size_read(inode); + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->blksize = (1 << inode->i_blkbits); + stat->blocks = inode->i_blocks; + release_top(info); + return 0; +} + static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -739,6 +867,7 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *lower_inode; struct path lower_path; struct dentry *parent; + int err; parent = dget_parent(dentry); if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { @@ -756,19 +885,17 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_inode = sdcardfs_lower_inode(inode); - sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - - generic_fillattr(inode, stat); + err = sdcardfs_fillattr(mnt, inode, stat); sdcardfs_put_lower_path(dentry, &lower_path); - return 0; + return err; } const struct inode_operations sdcardfs_symlink_iops = { - .permission = sdcardfs_permission, - .setattr = sdcardfs_setattr, + .permission2 = sdcardfs_permission, + .setattr2 = sdcardfs_setattr, /* XXX Following operations are implemented, * but FUSE(sdcard) or FAT does not support them * These methods are *NOT* perfectly tested. @@ -781,14 +908,14 @@ const struct inode_operations sdcardfs_symlink_iops = { const struct inode_operations sdcardfs_dir_iops = { .create = sdcardfs_create, .lookup = sdcardfs_lookup, -#if 0 - .permission = sdcardfs_permission, -#endif + .permission = sdcardfs_permission_wrn, + .permission2 = sdcardfs_permission, .unlink = sdcardfs_unlink, .mkdir = sdcardfs_mkdir, .rmdir = sdcardfs_rmdir, .rename = sdcardfs_rename, - .setattr = sdcardfs_setattr, + .setattr = sdcardfs_setattr_wrn, + .setattr2 = sdcardfs_setattr, .getattr = sdcardfs_getattr, /* XXX Following operations are implemented, * but FUSE(sdcard) or FAT does not support them @@ -800,7 +927,9 @@ const struct inode_operations sdcardfs_dir_iops = { }; const struct inode_operations sdcardfs_main_iops = { - .permission = sdcardfs_permission, - .setattr = sdcardfs_setattr, + .permission = sdcardfs_permission_wrn, + .permission2 = sdcardfs_permission, + .setattr = sdcardfs_setattr_wrn, + .setattr2 = sdcardfs_setattr, .getattr = sdcardfs_getattr, }; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 2d94870c09ba705d0c5de7280a0128b081735eb2..d271617290eeb2fc7fe6972490f2836096533dce 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -179,7 +179,7 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, struct inode *lower_inode; struct super_block *lower_sb; - lower_inode = lower_path->dentry->d_inode; + lower_inode = d_inode(lower_path->dentry); lower_sb = sdcardfs_lower_super(sb); /* check that the lower file system didn't cross a mount point */ @@ -240,6 +240,30 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* Use vfs_path_lookup to check if the dentry exists or not */ err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, &lower_path); + /* check for other cases */ + if (err == -ENOENT) { + struct dentry *child; + struct dentry *match = NULL; + inode_lock(d_inode(lower_dir_dentry)); + spin_lock(&lower_dir_dentry->d_lock); + list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { + if (child && d_inode(child)) { + if (strcasecmp(child->d_name.name, name)==0) { + match = dget(child); + break; + } + } + } + spin_unlock(&lower_dir_dentry->d_lock); + inode_unlock(d_inode(lower_dir_dentry)); + if (match) { + err = vfs_path_lookup(lower_dir_dentry, + lower_dir_mnt, + match->d_name.name, 0, + &lower_path); + dput(match); + } + } /* no error: handle positive dentries */ if (!err) { @@ -335,7 +359,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { ret = ERR_PTR(-EACCES); printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", @@ -362,18 +386,16 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, } if (ret) dentry = ret; - if (dentry->d_inode) { - fsstack_copy_attr_times(dentry->d_inode, - sdcardfs_lower_inode(dentry->d_inode)); - /* get drived permission */ - inode_lock(dentry->d_inode); + if (d_inode(dentry)) { + fsstack_copy_attr_times(d_inode(dentry), + sdcardfs_lower_inode(d_inode(dentry))); + /* get derived permission */ get_derived_permission(parent, dentry); - fix_derived_permission(dentry->d_inode); - inode_unlock(dentry->d_inode); + fixup_tmp_permissions(d_inode(dentry)); } /* update parent directory's atime */ - fsstack_copy_attr_atime(parent->d_inode, - sdcardfs_lower_inode(parent->d_inode)); + fsstack_copy_attr_atime(d_inode(parent), + sdcardfs_lower_inode(d_inode(parent))); out: sdcardfs_put_lower_path(parent, &lower_parent_path); diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index a6522286d7314b57e2269e31ab31ee4ec5bcd5dc..7a8eae29e44dcfde56ae2971bc08d07776224cac 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -28,7 +28,6 @@ enum { Opt_fsgid, Opt_gid, Opt_debug, - Opt_lower_fs, Opt_mask, Opt_multiuser, // May need? Opt_userid, @@ -49,7 +48,8 @@ static const match_table_t sdcardfs_tokens = { }; static int parse_options(struct super_block *sb, char *options, int silent, - int *debug, struct sdcardfs_mount_options *opts) + int *debug, struct sdcardfs_vfsmount_options *vfsopts, + struct sdcardfs_mount_options *opts) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -58,10 +58,10 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, we use AID_MEDIA_RW as uid, gid */ opts->fs_low_uid = AID_MEDIA_RW; opts->fs_low_gid = AID_MEDIA_RW; - opts->mask = 0; + vfsopts->mask = 0; opts->multiuser = false; opts->fs_user_id = 0; - opts->gid = 0; + vfsopts->gid = 0; /* by default, 0MB is reserved */ opts->reserved_mb = 0; @@ -94,7 +94,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_gid: if (match_int(&args[0], &option)) return 0; - opts->gid = option; + vfsopts->gid = option; break; case Opt_userid: if (match_int(&args[0], &option)) @@ -104,7 +104,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_mask: if (match_int(&args[0], &option)) return 0; - opts->mask = option; + vfsopts->mask = option; break; case Opt_multiuser: opts->multiuser = true; @@ -135,6 +135,65 @@ static int parse_options(struct super_block *sb, char *options, int silent, return 0; } +int parse_options_remount(struct super_block *sb, char *options, int silent, + struct sdcardfs_vfsmount_options *vfsopts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int debug; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, sdcardfs_tokens, args); + + switch (token) { + case Opt_debug: + debug = 1; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + vfsopts->gid = option; + + break; + case Opt_mask: + if (match_int(&args[0], &option)) + return 0; + vfsopts->mask = option; + break; + case Opt_multiuser: + case Opt_userid: + case Opt_fsuid: + case Opt_fsgid: + case Opt_reserved_mb: + printk( KERN_WARNING "Option \"%s\" can't be changed during remount\n", p); + break; + /* unknown option */ + default: + if (!silent) { + printk( KERN_ERR "Unrecognized mount option \"%s\" " + "or missing value", p); + } + return -EINVAL; + } + } + + if (debug) { + printk( KERN_INFO "sdcardfs : options - debug:%d\n", debug); + printk( KERN_INFO "sdcardfs : options - gid:%d\n", vfsopts->gid); + printk( KERN_INFO "sdcardfs : options - mask:%d\n", vfsopts->mask); + } + + return 0; +} + #if 0 /* * our custom d_alloc_root work-alike @@ -172,14 +231,15 @@ EXPORT_SYMBOL_GPL(sdcardfs_super_list); * There is no need to lock the sdcardfs_super_info's rwsem as there is no * way anyone can have a reference to the superblock at this point in time. */ -static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, - void *raw_data, int silent) +static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, + const char *dev_name, void *raw_data, int silent) { int err = 0; int debug; struct super_block *lower_sb; struct path lower_path; struct sdcardfs_sb_info *sb_info; + struct sdcardfs_vfsmount_options *mnt_opt = mnt->data; struct inode *inode; printk(KERN_INFO "sdcardfs version 2.0\n"); @@ -193,6 +253,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); + printk(KERN_INFO "sdcardfs: mnt -> %p\n", mnt); /* parse lower path */ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, @@ -212,7 +273,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb_info = sb->s_fs_info; /* parse options */ - err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); + err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options); if (err) { printk(KERN_ERR "sdcardfs: invalid options\n"); goto out_freesbi; @@ -236,7 +297,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb->s_op = &sdcardfs_sops; /* get a new inode and allocate our root dentry */ - inode = sdcardfs_iget(sb, lower_path.dentry->d_inode, 0); + inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_sput; @@ -268,16 +329,16 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); mutex_lock(&sdcardfs_super_list_lock); if(sb_info->options.multiuser) { - setup_derived_state(sb->s_root->d_inode, PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false); + setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); /*err = prepare_dir(sb_info->obbpath_s, sb_info->options.fs_low_uid, sb_info->options.fs_low_gid, 00755);*/ } else { - setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false); + setup_derived_state(d_inode(sb->s_root), PERM_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } - fix_derived_permission(sb->s_root->d_inode); + fixup_tmp_permissions(d_inode(sb->s_root)); sb_info->sb = sb; list_add(&sb_info->list, &sdcardfs_super_list); mutex_unlock(&sdcardfs_super_list_lock); @@ -306,9 +367,9 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, } /* A feature which supports mount_nodev() with options */ -static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, const char *, void *, int)) +static struct dentry *mount_nodev_with_options(struct vfsmount *mnt, + struct file_system_type *fs_type, int flags, const char *dev_name, void *data, + int (*fill_super)(struct vfsmount *, struct super_block *, const char *, void *, int)) { int error; @@ -319,7 +380,7 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, s->s_flags = flags; - error = fill_super(s, dev_name, data, flags & MS_SILENT ? 1 : 0); + error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); @@ -328,15 +389,27 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, return dget(s->s_root); } -struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, +static struct dentry *sdcardfs_mount(struct vfsmount *mnt, + struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { /* * dev_name is a lower_path_name, * raw_data is a option string. */ - return mount_nodev_with_options(fs_type, flags, dev_name, - raw_data, sdcardfs_read_super); + return mount_nodev_with_options(mnt, fs_type, flags, dev_name, + raw_data, sdcardfs_read_super); +} + +static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + WARN(1, "sdcardfs does not support mount. Use mount2.\n"); + return ERR_PTR(-EINVAL); +} + +void *sdcardfs_alloc_mnt_data(void) { + return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); } void sdcardfs_kill_sb(struct super_block *sb) { @@ -353,7 +426,9 @@ void sdcardfs_kill_sb(struct super_block *sb) { static struct file_system_type sdcardfs_fs_type = { .owner = THIS_MODULE, .name = SDCARDFS_NAME, - .mount = sdcardfs_mount, + .mount = sdcardfs_mount_wrn, + .mount2 = sdcardfs_mount, + .alloc_mnt_data = sdcardfs_alloc_mnt_data, .kill_sb = sdcardfs_kill_sb, .fs_flags = 0, }; diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index 31e2145b8359bba5e5a5fce2186d368a12572798..03776fa5f26c748c58941f469f3ad68f947a7204 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -29,26 +29,13 @@ #include -#define STRING_BUF_SIZE (512) - struct hashtable_entry { struct hlist_node hlist; - void *key; - unsigned int value; -}; - -struct sb_list { - struct super_block *sb; - struct list_head list; + const char *key; + atomic_t value; }; -struct packagelist_data { - DECLARE_HASHTABLE(package_to_appid,8); - struct mutex hashtable_lock; - -}; - -static struct packagelist_data *pkgl_data_all; +static DEFINE_HASHTABLE(package_to_appid, 8); static struct kmem_cache *hashtable_entry_cachep; @@ -64,22 +51,21 @@ static unsigned int str_hash(const char *key) { return h; } -appid_t get_appid(void *pkgl_id, const char *app_name) +appid_t get_appid(const char *app_name) { - struct packagelist_data *pkgl_dat = pkgl_data_all; struct hashtable_entry *hash_cur; unsigned int hash = str_hash(app_name); appid_t ret_id; - mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + rcu_read_lock(); + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(app_name, hash_cur->key)) { - ret_id = (appid_t)hash_cur->value; - mutex_unlock(&pkgl_dat->hashtable_lock); + ret_id = atomic_read(&hash_cur->value); + rcu_read_unlock(); return ret_id; } } - mutex_unlock(&pkgl_dat->hashtable_lock); + rcu_read_unlock(); return 0; } @@ -120,116 +106,118 @@ int open_flags_to_access_mode(int open_flags) { } } -static int insert_str_to_int_lock(struct packagelist_data *pkgl_dat, char *key, - unsigned int value) +static struct hashtable_entry *alloc_packagelist_entry(const char *key, + appid_t value) +{ + struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep, + GFP_KERNEL); + if (!ret) + return NULL; + + ret->key = kstrdup(key, GFP_KERNEL); + if (!ret->key) { + kmem_cache_free(hashtable_entry_cachep, ret); + return NULL; + } + + atomic_set(&ret->value, value); + return ret; +} + +static int insert_packagelist_entry_locked(const char *key, appid_t value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; unsigned int hash = str_hash(key); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { - hash_cur->value = value; + atomic_set(&hash_cur->value, value); return 0; } } - new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); + new_entry = alloc_packagelist_entry(key, value); if (!new_entry) return -ENOMEM; - new_entry->key = kstrdup(key, GFP_KERNEL); - new_entry->value = value; - hash_add(pkgl_dat->package_to_appid, &new_entry->hlist, hash); + hash_add_rcu(package_to_appid, &new_entry->hlist, hash); return 0; } -static void fixup_perms(struct super_block *sb) { +static void fixup_perms(struct super_block *sb, const char *key) { if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) { - inode_lock(sb->s_root->d_inode); - get_derive_permissions_recursive(sb->s_root); - inode_unlock(sb->s_root->d_inode); + fixup_perms_recursive(sb->s_root, key, strlen(key)); } } -static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, - unsigned int value) { - int ret; +static void fixup_all_perms(const char *key) +{ struct sdcardfs_sb_info *sbinfo; - mutex_lock(&sdcardfs_super_list_lock); - mutex_lock(&pkgl_dat->hashtable_lock); - ret = insert_str_to_int_lock(pkgl_dat, key, value); - mutex_unlock(&pkgl_dat->hashtable_lock); + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) + if (sbinfo) + fixup_perms(sbinfo->sb, key); +} - list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { - if (sbinfo) { - fixup_perms(sbinfo->sb); - } - } +static int insert_packagelist_entry(const char *key, appid_t value) +{ + int err; + + mutex_lock(&sdcardfs_super_list_lock); + err = insert_packagelist_entry_locked(key, value); + if (!err) + fixup_all_perms(key); mutex_unlock(&sdcardfs_super_list_lock); - return ret; + + return err; } -static void remove_str_to_int_lock(struct hashtable_entry *h_entry) { - kfree(h_entry->key); - hash_del(&h_entry->hlist); - kmem_cache_free(hashtable_entry_cachep, h_entry); +static void free_packagelist_entry(struct hashtable_entry *entry) +{ + kfree(entry->key); + hash_del_rcu(&entry->hlist); + kmem_cache_free(hashtable_entry_cachep, entry); } -static void remove_str_to_int(struct packagelist_data *pkgl_dat, const char *key) +static void remove_packagelist_entry_locked(const char *key) { - struct sdcardfs_sb_info *sbinfo; struct hashtable_entry *hash_cur; unsigned int hash = str_hash(key); - mutex_lock(&sdcardfs_super_list_lock); - mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { - remove_str_to_int_lock(hash_cur); - break; - } - } - mutex_unlock(&pkgl_dat->hashtable_lock); - list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { - if (sbinfo) { - fixup_perms(sbinfo->sb); + hash_del_rcu(&hash_cur->hlist); + synchronize_rcu(); + free_packagelist_entry(hash_cur); + return; } } +} + +static void remove_packagelist_entry(const char *key) +{ + mutex_lock(&sdcardfs_super_list_lock); + remove_packagelist_entry_locked(key); + fixup_all_perms(key); mutex_unlock(&sdcardfs_super_list_lock); return; } -static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) +static void packagelist_destroy(void) { struct hashtable_entry *hash_cur; struct hlist_node *h_t; + HLIST_HEAD(free_list); int i; - mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) - remove_str_to_int_lock(hash_cur); - mutex_unlock(&pkgl_dat->hashtable_lock); - hash_init(pkgl_dat->package_to_appid); -} - -static struct packagelist_data * packagelist_create(void) -{ - struct packagelist_data *pkgl_dat; + mutex_lock(&sdcardfs_super_list_lock); + hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->hlist, &free_list); - pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); - if (!pkgl_dat) { - printk(KERN_ERR "sdcardfs: Failed to create hash\n"); - return ERR_PTR(-ENOMEM); } - - mutex_init(&pkgl_dat->hashtable_lock); - hash_init(pkgl_dat->package_to_appid); - - return pkgl_dat; -} - -static void packagelist_destroy(struct packagelist_data *pkgl_dat) -{ - remove_all_hashentrys(pkgl_dat); + synchronize_rcu(); + hlist_for_each_entry_safe(hash_cur, h_t, &free_list, hlist) + free_packagelist_entry(hash_cur); + mutex_unlock(&sdcardfs_super_list_lock); printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); - kfree(pkgl_dat); } struct package_appid { @@ -245,26 +233,21 @@ static inline struct package_appid *to_package_appid(struct config_item *item) static ssize_t package_appid_attr_show(struct config_item *item, char *page) { - ssize_t count; - count = sprintf(page, "%d\n", get_appid(pkgl_data_all, item->ci_name)); - return count; + return scnprintf(page, PAGE_SIZE, "%u\n", get_appid(item->ci_name)); } static ssize_t package_appid_attr_store(struct config_item *item, const char *page, size_t count) { struct package_appid *package_appid = to_package_appid(item); - unsigned long tmp; - char *p = (char *) page; + unsigned int tmp; int ret; - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; + ret = kstrtouint(page, 10, &tmp); + if (ret) + return ret; - if (tmp > INT_MAX) - return -ERANGE; - ret = insert_str_to_int(pkgl_data_all, item->ci_name, (unsigned int)tmp); + ret = insert_packagelist_entry(item->ci_name, tmp); package_appid->add_pid = tmp; if (ret) return ret; @@ -289,7 +272,7 @@ static void package_appid_release(struct config_item *item) { printk(KERN_INFO "sdcardfs: removing %s\n", item->ci_dentry->d_name.name); /* item->ci_name is freed already, so we rely on the dentry */ - remove_str_to_int(pkgl_data_all, item->ci_dentry->d_name.name); + remove_packagelist_entry(item->ci_dentry->d_name.name); kfree(to_package_appid(item)); } @@ -333,21 +316,21 @@ static ssize_t packages_attr_show(struct config_item *item, char *page) { struct hashtable_entry *hash_cur; - struct hlist_node *h_t; int i; int count = 0, written = 0; - char errormsg[] = "\n"; + const char errormsg[] = "\n"; - mutex_lock(&pkgl_data_all->hashtable_lock); - hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) { - written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); + rcu_read_lock(); + hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { + written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", + (const char *)hash_cur->key, atomic_read(&hash_cur->value)); if (count + written == PAGE_SIZE - sizeof(errormsg)) { count += scnprintf(page + count, PAGE_SIZE - count, errormsg); break; } count += written; } - mutex_unlock(&pkgl_data_all->hashtable_lock); + rcu_read_unlock(); return count; } @@ -430,7 +413,6 @@ int packagelist_init(void) return -ENOMEM; } - pkgl_data_all = packagelist_create(); configfs_sdcardfs_init(); return 0; } @@ -438,7 +420,7 @@ int packagelist_init(void) void packagelist_exit(void) { configfs_sdcardfs_exit(); - packagelist_destroy(pkgl_data_all); + packagelist_destroy(); if (hashtable_entry_cachep) kmem_cache_destroy(hashtable_entry_cachep); } diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 7cb14ace29f10ed38f8bdbd84c6aa913022c9fb2..66a97ef8d2615969a1be4f6ffe4c432c0252ae39 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -68,14 +68,20 @@ #define AID_PACKAGE_INFO 1027 -#define fix_derived_permission(x) \ + +/* + * Permissions are handled by our permission function. + * We don't want anyone who happens to look at our inode value to prematurely + * block access, so store more permissive values. These are probably never + * used. + */ +#define fixup_tmp_permissions(x) \ do { \ (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ - (x)->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(x))); \ - (x)->i_mode = ((x)->i_mode & S_IFMT) | get_mode(SDCARDFS_I(x));\ + (x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\ } while (0) - /* OVERRIDE_CRED() and REVERT_CRED() * OVERRID_CRED() * backup original task->cred @@ -169,6 +175,8 @@ struct sdcardfs_inode_info { userid_t userid; uid_t d_uid; bool under_android; + /* top folder for ownership */ + struct inode *top; struct inode vfs_inode; }; @@ -185,12 +193,18 @@ struct sdcardfs_mount_options { uid_t fs_low_uid; gid_t fs_low_gid; userid_t fs_user_id; - gid_t gid; - mode_t mask; bool multiuser; unsigned int reserved_mb; }; +struct sdcardfs_vfsmount_options { + gid_t gid; + mode_t mask; +}; + +extern int parse_options_remount(struct super_block *sb, char *options, int silent, + struct sdcardfs_vfsmount_options *vfsopts); + /* sdcardfs super-block data in memory */ struct sdcardfs_sb_info { struct super_block *sb; @@ -321,9 +335,39 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ SDCARDFS_DENT_FUNC(lower_path) SDCARDFS_DENT_FUNC(orig_path) -static inline int get_gid(struct sdcardfs_inode_info *info) { - struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); - if (sb_info->options.gid == AID_SDCARD_RW) { +/* grab a refererence if we aren't linking to ourself */ +static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) +{ + struct inode *old_top = NULL; + BUG_ON(IS_ERR_OR_NULL(top)); + if (info->top && info->top != &info->vfs_inode) { + old_top = info->top; + } + if (top != &info->vfs_inode) + igrab(top); + info->top = top; + iput(old_top); +} + +static inline struct inode *grab_top(struct sdcardfs_inode_info *info) +{ + struct inode *top = info->top; + if (top) { + return igrab(top); + } else { + return NULL; + } +} + +static inline void release_top(struct sdcardfs_inode_info *info) +{ + iput(info->top); +} + +static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { + struct sdcardfs_vfsmount_options *opts = mnt->data; + + if (opts->gid == AID_SDCARD_RW) { /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing @@ -331,14 +375,15 @@ static inline int get_gid(struct sdcardfs_inode_info *info) { * assigned to app directories are still multiuser aware. */ return AID_SDCARD_RW; } else { - return multiuser_get_uid(info->userid, sb_info->options.gid); + return multiuser_get_uid(info->userid, opts->gid); } } -static inline int get_mode(struct sdcardfs_inode_info *info) { +static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { int owner_mode; int filtered_mode; - struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); - int visible_mode = 0775 & ~sb_info->options.mask; + struct sdcardfs_vfsmount_options *opts = mnt->data; + int visible_mode = 0775 & ~opts->mask; + if (info->perm == PERM_PRE_ROOT) { /* Top of multi-user view should always be visible to ensure @@ -348,7 +393,7 @@ static inline int get_mode(struct sdcardfs_inode_info *info) { /* Block "other" access to Android directories, since only apps * belonging to a specific user should be in there; we still * leave +x open for the default view. */ - if (sb_info->options.gid == AID_SDCARD_RW) { + if (opts->gid == AID_SDCARD_RW) { visible_mode = visible_mode & ~0006; } else { visible_mode = visible_mode & ~0007; @@ -396,18 +441,19 @@ extern struct mutex sdcardfs_super_list_lock; extern struct list_head sdcardfs_super_list; /* for packagelist.c */ -extern appid_t get_appid(void *pkgl_id, const char *app_name); +extern appid_t get_appid(const char *app_name); extern int check_caller_access_to_name(struct inode *parent_node, const char* name); extern int open_flags_to_access_mode(int open_flags); extern int packagelist_init(void); extern void packagelist_exit(void); /* for derived_perm.c */ -extern void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, bool under_android); +extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, + uid_t uid, bool under_android, struct inode *top); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry); -extern void get_derive_permissions_recursive(struct dentry *parent); +extern void fixup_top_recursive(struct dentry *parent); +extern void fixup_perms_recursive(struct dentry *dentry, const char *name, size_t len); extern void update_derived_permission_lock(struct dentry *dentry); extern int need_graft_path(struct dentry *dentry); @@ -444,7 +490,7 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m goto out_unlock; } - err = vfs_mkdir(d_inode(parent.dentry), dent, mode); + err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode); if (err) { if (err == -EEXIST) err = 0; @@ -455,7 +501,7 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m attrs.ia_gid = make_kgid(&init_user_ns, gid); attrs.ia_valid = ATTR_UID | ATTR_GID; inode_lock(d_inode(dent)); - notify_change(dent, &attrs, NULL); + notify_change2(parent.mnt, dent, &attrs, NULL); inode_unlock(d_inode(dent)); out_dput: @@ -513,12 +559,16 @@ static inline int check_min_free_space(struct dentry *dentry, size_t size, int d return 1; } -/* Copies attrs and maintains sdcardfs managed attrs */ +/* + * Copies attrs and maintains sdcardfs managed attrs + * Since our permission check handles all special permissions, set those to be open + */ static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src) { - dest->i_mode = (src->i_mode & S_IFMT) | get_mode(SDCARDFS_I(dest)); + dest->i_mode = (src->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | + S_IROTH | S_IXOTH; /* 0775 */ dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid); - dest->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(dest))); + dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); dest->i_rdev = src->i_rdev; dest->i_atime = src->i_atime; dest->i_mtime = src->i_mtime; diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 1d6490128c9905f36eba30e0e896a4634ee5dc32..edda32b68dc08d5f48213f0c20d75aac4b3fd07b 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -108,6 +108,50 @@ static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options return err; } +/* + * @mnt: mount point we are remounting + * @sb: superblock we are remounting + * @flags: numeric mount options + * @options: mount options string + */ +static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb, + int *flags, char *options) +{ + int err = 0; + + /* + * The VFS will take care of "ro" and "rw" flags among others. We + * can safely accept a few flags (RDONLY, MANDLOCK), and honor + * SILENT, but anything else left over is an error. + */ + if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) { + printk(KERN_ERR + "sdcardfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + } + printk(KERN_INFO "Remount options were %s for vfsmnt %p.\n", options, mnt); + err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data); + + + return err; +} + +static void* sdcardfs_clone_mnt_data(void *data) { + struct sdcardfs_vfsmount_options* opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); + struct sdcardfs_vfsmount_options* old = data; + if(!opt) return NULL; + opt->gid = old->gid; + opt->mask = old->mask; + return opt; +} + +static void sdcardfs_copy_mnt_data(void *data, void *newdata) { + struct sdcardfs_vfsmount_options* old = data; + struct sdcardfs_vfsmount_options* new = newdata; + old->gid = new->gid; + old->mask = new->mask; +} + /* * Called by iput() when the inode reference count reached zero * and the inode is not hashed anywhere. Used to clear anything @@ -126,6 +170,7 @@ static void sdcardfs_evict_inode(struct inode *inode) */ lower_inode = sdcardfs_lower_inode(inode); sdcardfs_set_lower_inode(inode, NULL); + set_top(SDCARDFS_I(inode), inode); iput(lower_inode); } @@ -190,19 +235,24 @@ static void sdcardfs_umount_begin(struct super_block *sb) lower_sb->s_op->umount_begin(lower_sb); } -static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) +static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, struct dentry *root) { struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); struct sdcardfs_mount_options *opts = &sbi->options; + struct sdcardfs_vfsmount_options *vfsopts = mnt->data; if (opts->fs_low_uid != 0) - seq_printf(m, ",uid=%u", opts->fs_low_uid); + seq_printf(m, ",fsuid=%u", opts->fs_low_uid); if (opts->fs_low_gid != 0) - seq_printf(m, ",gid=%u", opts->fs_low_gid); - + seq_printf(m, ",fsgid=%u", opts->fs_low_gid); + if (vfsopts->gid != 0) + seq_printf(m, ",gid=%u", vfsopts->gid); if (opts->multiuser) seq_printf(m, ",multiuser"); - + if (vfsopts->mask) + seq_printf(m, ",mask=%u", vfsopts->mask); + if (opts->fs_user_id) + seq_printf(m, ",userid=%u", opts->fs_user_id); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); @@ -213,9 +263,12 @@ const struct super_operations sdcardfs_sops = { .put_super = sdcardfs_put_super, .statfs = sdcardfs_statfs, .remount_fs = sdcardfs_remount_fs, + .remount_fs2 = sdcardfs_remount_fs2, + .clone_mnt_data = sdcardfs_clone_mnt_data, + .copy_mnt_data = sdcardfs_copy_mnt_data, .evict_inode = sdcardfs_evict_inode, .umount_begin = sdcardfs_umount_begin, - .show_options = sdcardfs_show_options, + .show_options2 = sdcardfs_show_options, .alloc_inode = sdcardfs_alloc_inode, .destroy_inode = sdcardfs_destroy_inode, .drop_inode = generic_delete_inode, diff --git a/fs/splice.c b/fs/splice.c index 5a7750bd2eea765d06f5e8b76687701b24867c7b..63b8f54485dc01de275c374d3adf0c9ecf6f745b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1086,7 +1086,13 @@ EXPORT_SYMBOL(do_splice_direct); static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { - while (pipe->nrbufs == pipe->buffers) { + for (;;) { + if (unlikely(!pipe->readers)) { + send_sig(SIGPIPE, current, 0); + return -EPIPE; + } + if (pipe->nrbufs != pipe->buffers) + return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) @@ -1095,7 +1101,6 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) pipe_wait(pipe); pipe->waiting_writers--; } - return 0; } static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, diff --git a/fs/super.c b/fs/super.c index 0bed501a60befd619ddac10aac0a443c081304e7..719579f5e98e25b0730eb631eeb4c039128c3f61 100644 --- a/fs/super.c +++ b/fs/super.c @@ -750,7 +750,8 @@ struct super_block *user_get_super(dev_t dev) } /** - * do_remount_sb - asks filesystem to change mount options. + * do_remount_sb2 - asks filesystem to change mount options. + * @mnt: mount we are looking at * @sb: superblock in question * @flags: numeric part of options * @data: the rest of options @@ -758,7 +759,7 @@ struct super_block *user_get_super(dev_t dev) * * Alters the mount options of a mounted file system. */ -int do_remount_sb(struct super_block *sb, int flags, void *data, int force) +int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int flags, void *data, int force) { int retval; int remount_ro; @@ -800,7 +801,16 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) } } - if (sb->s_op->remount_fs) { + if (mnt && sb->s_op->remount_fs2) { + retval = sb->s_op->remount_fs2(mnt, sb, &flags, data); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); + } + } else if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); if (retval) { if (!force) @@ -832,6 +842,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return retval; } +int do_remount_sb(struct super_block *sb, int flags, void *data, int force) +{ + return do_remount_sb2(NULL, sb, flags, data, force); +} + static void do_emergency_remount(struct work_struct *work) { struct super_block *sb, *p = NULL; @@ -1157,7 +1172,7 @@ struct dentry *mount_single(struct file_system_type *fs_type, EXPORT_SYMBOL(mount_single); struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) +mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data) { struct dentry *root; struct super_block *sb; @@ -1174,7 +1189,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) goto out_free_secdata; } - root = type->mount(type, flags, name, data); + if (type->mount2) + root = type->mount2(mnt, type, flags, name, data); + else + root = type->mount(type, flags, name, data); if (IS_ERR(root)) { error = PTR_ERR(root); goto out_free_secdata; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index fa9a20cc60d6744e13ef7cc2f788f0fda0861d58..fe5e8d4970ae1c0420ac05b95feb2fa071285620 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -34,6 +34,11 @@ #include #include "ubifs.h" +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs); +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node); + /* * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. * @NAME_LESS: name corresponding to the first argument is less than second @@ -402,7 +407,19 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, return 0; } - err = ubifs_tnc_read_node(c, zbr, node); + if (c->replaying) { + err = fallible_read_node(c, &zbr->key, zbr, node); + /* + * When the node was not found, return -ENOENT, 0 otherwise. + * Negative return codes stay as-is. + */ + if (err == 0) + err = -ENOENT; + else if (err == 1) + err = 0; + } else { + err = ubifs_tnc_read_node(c, zbr, node); + } if (err) return err; @@ -2766,7 +2783,11 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, if (nm->name) { if (err) { /* Handle collisions */ - err = resolve_collision(c, key, &znode, &n, nm); + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); if (unlikely(err < 0)) diff --git a/fs/utimes.c b/fs/utimes.c index 22307cdf7014ba84887a036356deb54fe235d13d..87ce37bcaa84e880b5e5e50ca368fb8ad93710cc 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -91,7 +91,7 @@ static int utimes_common(struct path *path, struct timespec *times) } retry_deleg: inode_lock(inode); - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e5ebc37704608a336dd8690d55f06389cd0173bd..d346d42c54d1590250040f0b36c05367287e7bf5 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -256,6 +256,9 @@ xfs_ag_resv_init( goto out; } + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + pag->pagf_freeblks + pag->pagf_flcount); out: return error; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index effb64cf714fee3894821a6d6796b3affd583faa..9f06a211e1570549cb8df3cdba2de2d01a79f890 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -95,10 +95,7 @@ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { - unsigned int blocks; - - blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); - return blocks; + return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); } /* @@ -365,35 +362,11 @@ xfs_alloc_fix_len( return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); ASSERT(rlen % args->prod == args->mod); + ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= + rlen + args->minleft); args->len = rlen; } -/* - * Fix up length if there is too little space left in the a.g. - * Return 1 if ok, 0 if too little, should give up. - */ -STATIC int -xfs_alloc_fix_minleft( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agf_t *agf; /* a.g. freelist header */ - int diff; /* free space difference */ - - if (args->minleft == 0) - return 1; - agf = XFS_BUF_TO_AGF(args->agbp); - diff = be32_to_cpu(agf->agf_freeblks) - - args->len - args->minleft; - if (diff >= 0) - return 1; - args->len += diff; /* shrink the allocated space */ - /* casts to (int) catch length underflows */ - if ((int)args->len >= (int)args->minlen) - return 1; - args->agbno = NULLAGBLOCK; - return 0; -} - /* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the @@ -689,8 +662,6 @@ xfs_alloc_ag_vextent( xfs_alloc_arg_t *args) /* argument structure for allocation */ { int error=0; - xfs_extlen_t reservation; - xfs_extlen_t oldmax; ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); @@ -698,20 +669,6 @@ xfs_alloc_ag_vextent( ASSERT(args->mod < args->prod); ASSERT(args->alignment > 0); - /* - * Clamp maxlen to the amount of free space minus any reservations - * that have been made. - */ - oldmax = args->maxlen; - reservation = xfs_ag_resv_needed(args->pag, args->resv); - if (args->maxlen > args->pag->pagf_freeblks - reservation) - args->maxlen = args->pag->pagf_freeblks - reservation; - if (args->maxlen == 0) { - args->agbno = NULLAGBLOCK; - args->maxlen = oldmax; - return 0; - } - /* * Branch to correct routine based on the type. */ @@ -731,8 +688,6 @@ xfs_alloc_ag_vextent( /* NOTREACHED */ } - args->maxlen = oldmax; - if (error || args->agbno == NULLAGBLOCK) return error; @@ -841,9 +796,6 @@ xfs_alloc_ag_vextent_exact( args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - ASSERT(args->agbno + args->len <= tend); /* @@ -1149,12 +1101,7 @@ xfs_alloc_ag_vextent_near( XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_nominleft(args); - return 0; - } - blen = args->len; + /* * We are allocating starting at bnew for blen blocks. */ @@ -1346,12 +1293,6 @@ xfs_alloc_ag_vextent_near( */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - trace_xfs_alloc_near_nominleft(args); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - return 0; - } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, args->datatype, ltbnoa, ltlena, <new); @@ -1553,8 +1494,6 @@ xfs_alloc_ag_vextent_size( } xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); /* @@ -2056,7 +1995,7 @@ xfs_alloc_space_available( int flags) { struct xfs_perag *pag = args->pag; - xfs_extlen_t longest; + xfs_extlen_t alloc_len, longest; xfs_extlen_t reservation; /* blocks that are still reserved */ int available; @@ -2066,17 +2005,28 @@ xfs_alloc_space_available( reservation = xfs_ag_resv_needed(pag, args->resv); /* do we have enough contiguous free space for the allocation? */ + alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, reservation); - if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) + if (longest < alloc_len) return false; /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - - reservation - min_free - args->total); - if (available < (int)args->minleft || available <= 0) + reservation - min_free - args->minleft); + if (available < (int)max(args->total, alloc_len)) return false; + /* + * Clamp maxlen to the amount of free space available for the actual + * extent allocation. + */ + if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { + args->maxlen = available; + ASSERT(args->maxlen > 0); + ASSERT(args->maxlen >= args->minlen); + } + return true; } @@ -2122,7 +2072,8 @@ xfs_alloc_fix_freelist( } need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags)) + if (!xfs_alloc_space_available(args, need, flags | + XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; /* @@ -2455,12 +2406,15 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) return false; - if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) return false; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS) + (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return false; /* @@ -2477,7 +2431,8 @@ xfs_agf_verify( return false; if (xfs_sb_version_hasreflink(&mp->m_sb) && - be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS) + (be32_to_cpu(agf->agf_refcount_level) < 1 || + be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) return false; return true;; @@ -2634,12 +2589,10 @@ xfs_alloc_vextent( xfs_agblock_t agsize; /* allocation group size */ int error; int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_extlen_t minleft;/* minimum left value, temp copy */ xfs_mount_t *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ int bump_rotor = 0; - int no_min = 0; xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ mp = args->mp; @@ -2668,7 +2621,6 @@ xfs_alloc_vextent( trace_xfs_alloc_vextent_badargs(args); return 0; } - minleft = args->minleft; switch (type) { case XFS_ALLOCTYPE_THIS_AG: @@ -2679,9 +2631,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->pag = xfs_perag_get(mp, args->agno); - args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2746,9 +2696,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2788,20 +2736,17 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (no_min == 1) { + if (flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - if (flags == 0) { - no_min = 1; - } else { - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } + + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; } } xfs_perag_put(args->pag); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7c404a6b0ae32292cf9a6eb39efd15026e8e76ed..1d0f48a501a3d6e9575b634ac022f30a91a2f309 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ - +#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ /* * Argument structure for xfs_alloc routines. diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 5ba2dac5e67c492a1a9fe5047995899290e25220..c06ec77a94182d346a7b96e3f1d47c3bcca0b5e0 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -421,7 +421,7 @@ xfs_allocbt_init_cursor( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e9e8569c0ee907652405575d882c8..6622d46ddec3890c0356dab26fbff448436584d7 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -131,9 +131,6 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (!xfs_inode_hasattr(ip)) - return -ENOATTR; - error = xfs_attr_args_init(&args, ip, name, flags); if (error) return error; @@ -392,9 +389,6 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - if (!xfs_inode_hasattr(dp)) - return -ENOATTR; - error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 8ea91f3630938a63523602e5d14c2553a472c015..2852521fc8ecf797be90372a23c3343e9a8039dc 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -253,6 +253,7 @@ xfs_attr3_leaf_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; struct xfs_attr3_icleaf_hdr ichdr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -273,7 +274,12 @@ xfs_attr3_leaf_verify( if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) return false; } - if (ichdr.count == 0) + /* + * In recovery there is a transient state where count == 0 is valid + * because we may have transitioned an empty shortform attr to a leaf + * if the attr didn't fit in shortform. + */ + if (pag && pag->pagf_init && ichdr.count == 0) return false; /* XXX: need to range check rest of attr header values */ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c6eb21940783e4de3c555520eddbba48d0b19be4..f52fd63fce198822fb0b7e2b78e2bebde17c40f8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,6 +49,8 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" +#include "xfs_rmap_btree.h" +#include "xfs_icache.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -190,8 +192,12 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ + xfs_filblks_t orig_len; mp = ip->i_mount; + + /* Calculate the worst-case size of the bmbt. */ + orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -199,12 +205,20 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) { + rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; + break; + } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } + + /* Calculate the worst-case size of the rmapbt. */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + + mp->m_rmap_maxlevels; + return rval; } @@ -504,7 +518,7 @@ void xfs_bmap_trace_exlist( xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork, /* data or attr fork */ + int whichfork, /* data or attr or cow fork */ unsigned long caller_ip) { xfs_extnum_t idx; /* extent record index */ @@ -513,11 +527,13 @@ xfs_bmap_trace_exlist( if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; + else if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(cnt == xfs_iext_count(ifp)); for (idx = 0; idx < cnt; idx++) - trace_xfs_extlist(ip, idx, whichfork, caller_ip); + trace_xfs_extlist(ip, idx, state, caller_ip); } /* @@ -811,7 +827,7 @@ xfs_bmap_extents_to_btree( XFS_BTREE_LONG_PTRS); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { @@ -1137,6 +1153,10 @@ xfs_bmap_add_attrfork( goto trans_cancel; if (XFS_IFORK_Q(ip)) goto trans_cancel; + if (ip->i_d.di_anextents != 0) { + error = -EFSCORRUPTED; + goto trans_cancel; + } if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1144,7 +1164,6 @@ xfs_bmap_add_attrfork( ASSERT(ip->i_d.di_aformat == 0); ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } - ASSERT(ip->i_d.di_anextents == 0); xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -1296,7 +1315,7 @@ xfs_bmap_read_extents( /* * Here with bp and block set to the leftmost leaf node in the tree. */ - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + room = xfs_iext_count(ifp); i = 0; /* * Loop over all leaf nodes. Copy information to the extent records. @@ -1361,8 +1380,9 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); } - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); + if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) + return -EFSCORRUPTED; + ASSERT(i == xfs_iext_count(ifp)); XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; error0: @@ -1404,7 +1424,7 @@ xfs_bmap_search_multi_extents( if (lastx > 0) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); } - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + if (lastx < xfs_iext_count(ifp)) { xfs_bmbt_get_all(ep, gotp); *eofp = 0; } else { @@ -1497,7 +1517,7 @@ xfs_bmap_first_unused( (error = xfs_iread_extents(tp, ip, whichfork))) return error; lowest = *first_unused; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); off = xfs_bmbt_get_startoff(ep); @@ -1582,7 +1602,7 @@ xfs_bmap_last_extent( return error; } - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *is_empty = 1; return 0; @@ -1735,7 +1755,7 @@ xfs_bmap_add_extent_delay_real( &bma->ip->i_d.di_nextents); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -1794,7 +1814,7 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (bma->idx < xfs_iext_count(ifp) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); @@ -2300,7 +2320,7 @@ xfs_bmap_add_extent_unwritten_real( ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ASSERT(*idx >= 0); - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2356,7 +2376,7 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < xfs_iext_count(&ip->i_df) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) @@ -2836,7 +2856,7 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); @@ -2966,7 +2986,7 @@ xfs_bmap_add_extent_hole_real( ifp = XFS_IFORK_PTR(bma->ip, whichfork); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -2992,7 +3012,7 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (bma->idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); if (isnullstartblock(right.br_startblock)) @@ -3700,7 +3720,7 @@ xfs_bmap_btalloc( align = xfs_get_cowextsz_hint(ap->ip); else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); - if (unlikely(align)) { + if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, &ap->length); @@ -3772,7 +3792,7 @@ xfs_bmap_btalloc( args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { + if (align) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); @@ -3883,7 +3903,6 @@ xfs_bmap_btalloc( args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; args.total = ap->minlen; - args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; ap->dfops->dop_low = true; @@ -4221,7 +4240,7 @@ xfs_bmapi_read( break; /* Else go on to the next record. */ - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + if (++lastx < xfs_iext_count(ifp)) xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); else eof = 1; @@ -4234,10 +4253,10 @@ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, + xfs_fileoff_t off, xfs_filblks_t len, + xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof) { @@ -4248,10 +4267,17 @@ xfs_bmapi_reserve_delalloc( char rt = XFS_IS_REALTIME_INODE(ip); xfs_extlen_t extsz; int error; + xfs_fileoff_t aoff = off; - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + /* + * Cap the alloc length. Keep track of prealloc so we know whether to + * tag the inode before we return. + */ + alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + if (prealloc && alen >= len) + prealloc = alen - len; /* Figure out the extent size, adjust alen */ if (whichfork == XFS_COW_FORK) @@ -4259,7 +4285,12 @@ xfs_bmapi_reserve_delalloc( else extsz = xfs_get_extsz_hint(ip); if (extsz) { - error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, + struct xfs_bmbt_irec prev; + + if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) + prev.br_startoff = NULLFILEOFF; + + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); } @@ -4312,6 +4343,16 @@ xfs_bmapi_reserve_delalloc( */ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + /* + * Tag the inode if blocks were preallocated. Note that COW fork + * preallocation can occur at the start or end of the extent, even when + * prealloc == 0, so we must also check the aligned offset and length. + */ + if (whichfork == XFS_DATA_FORK && prealloc) + xfs_inode_set_eofblocks_tag(ip); + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) + xfs_inode_set_cowblocks_tag(ip); + ASSERT(got->br_startoff <= aoff); ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); ASSERT(isnullstartblock(got->br_startblock)); @@ -4395,8 +4436,6 @@ xfs_bmapi_allocate( if (error) return error; - if (bma->dfops->dop_low) - bma->minleft = 0; if (bma->cur) bma->cur->bc_private.b.firstblock = *bma->firstblock; if (bma->blkno == NULLFSBLOCK) @@ -4568,8 +4607,6 @@ xfs_bmapi_write( int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4655,22 +4692,44 @@ xfs_bmapi_write( bma.firstblock = firstblock; while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + bool need_alloc = false, wasdelay = false; - /* - * Make sure we only reflink into a hole. - */ - if (flags & XFS_BMAPI_REMAP) - ASSERT(inhole); - if (flags & XFS_BMAPI_COWFORK) - ASSERT(!inhole); + /* in hole or beyoned EOF? */ + if (eof || bma.got.br_startoff > bno) { + if (flags & XFS_BMAPI_DELALLOC) { + /* + * For the COW fork we can reasonably get a + * request for converting an extent that races + * with other threads already having converted + * part of it, as there converting COW to + * regular blocks is not protected using the + * IOLOCK. + */ + ASSERT(flags & XFS_BMAPI_COWFORK); + if (!(flags & XFS_BMAPI_COWFORK)) { + error = -EIO; + goto error0; + } + + if (eof || bno >= end) + break; + } else { + need_alloc = true; + } + } else { + /* + * Make sure we only reflink into a hole. + */ + ASSERT(!(flags & XFS_BMAPI_REMAP)); + if (isnullstartblock(bma.got.br_startblock)) + wasdelay = true; + } /* * First, deal with the hole before the allocated space * that we found, if any. */ - if (inhole || wasdelay) { + if (need_alloc || wasdelay) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; @@ -4733,7 +4792,7 @@ xfs_bmapi_write( /* Else go on to the next record. */ bma.prev = bma.got; - if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { + if (++bma.idx < xfs_iext_count(ifp)) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), &bma.got); } else @@ -4885,7 +4944,7 @@ xfs_bmap_del_extent_delay( da_new = 0; ASSERT(*idx >= 0); - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -4902,8 +4961,11 @@ xfs_bmap_del_extent_delay( * sb counters as we might have to borrow some blocks for the * indirect block accounting. */ - xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, + error = xfs_trans_reserve_quota_nblks(NULL, ip, + -((long)del->br_blockcount), 0, isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + if (error) + return error; ip->i_delayed_blks -= del->br_blockcount; if (whichfork == XFS_COW_FORK) @@ -5013,7 +5075,7 @@ xfs_bmap_del_extent_cow( got_endoff = got->br_startoff + got->br_blockcount; ASSERT(*idx >= 0); - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -5119,8 +5181,7 @@ xfs_bmap_del_extent( state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); ASSERT(del->br_blockcount > 0); ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); @@ -5445,7 +5506,6 @@ __xfs_bunmapi( int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ xfs_bmbt_irec_t prev; /* previous extent record */ xfs_fileoff_t start; /* first file offset deleted */ int tmp_logflags; /* partial logging flags */ @@ -5477,8 +5537,7 @@ __xfs_bunmapi( if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { + if (xfs_iext_count(ifp) == 0) { *rlen = 0; return 0; } @@ -5963,7 +6022,7 @@ xfs_bmse_shift_one( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); xfs_bmbt_get_all(gotp, &got); @@ -6140,7 +6199,7 @@ xfs_bmap_shift_extents( * are collapsing out, so we cannot use the count of real extents here. * Instead we have to calculate it from the incore fork. */ - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); if (total_extents == 0) { *done = 1; goto del_cursor; @@ -6200,7 +6259,7 @@ xfs_bmap_shift_extents( * count can change. Update the total and grade the next record. */ if (direction == SHIFT_LEFT) { - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); stop_extent = total_extents; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 7cae6ec27fa6b26a84984fddb3dc35d6556e2122..e7d40b39f18f5ade0a3bef4f27697d907c22be2c 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -110,6 +110,9 @@ struct xfs_extent_free_item /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK 0x200 +/* Only convert delalloc space, don't allocate entirely new extents */ +#define XFS_BMAPI_DELALLOC 0x400 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -120,7 +123,8 @@ struct xfs_extent_free_item { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ - { XFS_BMAPI_COWFORK, "COWFORK" } + { XFS_BMAPI_COWFORK, "COWFORK" }, \ + { XFS_BMAPI_DELALLOC, "DELALLOC" } static inline int xfs_bmapi_aflag(int w) @@ -242,9 +246,8 @@ struct xfs_bmbt_rec_host * int fork, int *eofp, xfs_extnum_t *lastxp, struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, xfs_filblks_t len, - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev, - xfs_extnum_t *lastx, int eof); + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, + struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 8007d2ba9aef9c1c40e6f7159fc2a75a9d142213..f76c1693ff01062f34fd51a28d2dda694874855b 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -502,12 +502,11 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if + * a full btree split. Try again and if * successful activate the lowspace algorithm. */ args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; error = xfs_alloc_vextent(&args); if (error) goto error0; @@ -796,7 +795,7 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 0e80993c8a5914d3dfa1f861b2b459d7a513d67a..21e6a6ab6b9a4001f25313fd32bab0b79ec11ae4 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1769,8 +1769,28 @@ xfs_btree_lookup_get_block( if (error) return error; + /* Check the inode owner since the verifiers don't. */ + if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && + (cur->bc_flags & XFS_BTREE_LONG_PTRS) && + be64_to_cpu((*blkp)->bb_u.l.bb_owner) != + cur->bc_private.b.ip->i_ino) + goto out_bad; + + /* Did we get the level we were looking for? */ + if (be16_to_cpu((*blkp)->bb_level) != level) + goto out_bad; + + /* Check that internal nodes have at least one record. */ + if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) + goto out_bad; + xfs_btree_setbuf(cur, level, bp); return 0; + +out_bad: + *blkp = NULL; + xfs_trans_brelse(cur->bc_tp, bp); + return -EFSCORRUPTED; } /* diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 20a96dd5af7eb6d4ebbaf07a1f6a1e4b10e7f981..7825d78d4587aa18abb2f9d21b4c866e86b89334 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -36,21 +36,29 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; /* - * @mode, if set, indicates that the type field needs to be set up. - * This uses the transformation from file mode to DT_* as defined in linux/fs.h - * for file type specification. This will be propagated into the directory - * structure if appropriate for the given operation and filesystem config. + * Convert inode mode to directory entry filetype */ -const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { - [0] = XFS_DIR3_FT_UNKNOWN, - [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, - [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, - [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, -}; +unsigned char xfs_mode_to_ftype(int mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + return XFS_DIR3_FT_REG_FILE; + case S_IFDIR: + return XFS_DIR3_FT_DIR; + case S_IFCHR: + return XFS_DIR3_FT_CHRDEV; + case S_IFBLK: + return XFS_DIR3_FT_BLKDEV; + case S_IFIFO: + return XFS_DIR3_FT_FIFO; + case S_IFSOCK: + return XFS_DIR3_FT_SOCK; + case S_IFLNK: + return XFS_DIR3_FT_SYMLINK; + default: + return XFS_DIR3_FT_UNKNOWN; + } +} /* * ASCII case-insensitive (ie. A-Z) support for directories that was @@ -631,7 +639,8 @@ xfs_dir2_isblock( if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; - ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); + if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) + return -EFSCORRUPTED; *vp = rval; return 0; } diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index becc926c3e3d900db0a021dd091e46e828fa6f09..ae0d55bf6500f55e16c05bd99f0aca4112741755 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -18,6 +18,9 @@ #ifndef __XFS_DIR2_H__ #define __XFS_DIR2_H__ +#include "xfs_da_format.h" +#include "xfs_da_btree.h" + struct xfs_defer_ops; struct xfs_da_args; struct xfs_inode; @@ -32,10 +35,9 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; /* - * directory filetype conversion tables. + * Convert inode mode to directory entry filetype */ -#define S_SHIFT 12 -extern const unsigned char xfs_mode_to_ftype[]; +extern unsigned char xfs_mode_to_ftype(int mode); /* * directory operations vector for encode/decode routines diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 725fc7841fdeb38fdfdba08f1621a83f764baeea..e526f5a5f0beaee2a3377cb16021271ba181ad69 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -329,7 +329,7 @@ xfs_dir3_data_read( err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 51b4e0de1fdc424e13f039adf98ae2789a27ba74..d45c03779dae39011bdce9a3aa39aac2304f2a21 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2450,8 +2450,6 @@ xfs_ialloc_log_agi( ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); - /* * Compute byte offsets for the first and last fields in the first * region and log the agi buffer. This only logs up through @@ -2512,8 +2510,15 @@ xfs_agi_verify( if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) return false; - if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) + if (be32_to_cpu(agi->agi_level) < 1 || + be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) + return false; + + if (xfs_sb_version_hasfinobt(&mp->m_sb) && + (be32_to_cpu(agi->agi_free_level) < 1 || + be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) return false; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2592,6 +2597,8 @@ xfs_read_agi( XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); if (error) return error; + if (tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF); xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index eab68ae2e01184c3441eda64f2deb86773645015..6c6b95947e716d9d56f6cccc8ba870bfe673297c 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -357,7 +357,7 @@ xfs_inobt_init_cursor( struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 134424fac434fdd7fdd3cecf12d3007712b9734b..37ee7f01a35dbc79fbd4be82f42a8dcf46bd39dc 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -29,6 +29,7 @@ #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" +#include "xfs_dir2.h" /* * Check that none of the inode's in the buffer have a next @@ -386,12 +387,25 @@ xfs_dinode_verify( struct xfs_inode *ip, struct xfs_dinode *dip) { + uint16_t mode; uint16_t flags; uint64_t flags2; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return false; + /* don't allow invalid i_size */ + if (be64_to_cpu(dip->di_size) & (1ULL << 63)) + return false; + + mode = be16_to_cpu(dip->di_mode); + if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) + return false; + + /* No zero-length symlinks/dirs. */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) + return false; + /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return true; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5dd56d3dbb3aeb41e7e46b921ca9052dd9066c41..222e103356c6d838742703315a5f558551faf51e 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -775,6 +775,13 @@ xfs_idestroy_fork( } } +/* Count number of incore extents based on if_bytes */ +xfs_extnum_t +xfs_iext_count(struct xfs_ifork *ifp) +{ + return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); +} + /* * Convert in-core extents to on-disk form * @@ -803,7 +810,7 @@ xfs_iextents_copy( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(ifp->if_bytes > 0); - nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nrecs = xfs_iext_count(ifp); XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); ASSERT(nrecs > 0); @@ -941,7 +948,7 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); - ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(idx < xfs_iext_count(ifp)); if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; @@ -1017,7 +1024,7 @@ xfs_iext_add( int new_size; /* size of extents after adding */ xfs_extnum_t nextents; /* number of extents in file */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT((idx >= 0) && (idx <= nextents)); byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); new_size = ifp->if_bytes + byte_diff; @@ -1241,7 +1248,7 @@ xfs_iext_remove( trace_xfs_iext_remove(ip, idx, state, _RET_IP_); ASSERT(ext_diff > 0); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); if (new_size == 0) { @@ -1270,7 +1277,7 @@ xfs_iext_remove_inline( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); ASSERT(idx < XFS_INLINE_EXTS); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(((nextents - ext_diff) > 0) && (nextents - ext_diff) < XFS_INLINE_EXTS); @@ -1309,7 +1316,7 @@ xfs_iext_remove_direct( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); new_size = ifp->if_bytes - (ext_diff * sizeof(xfs_bmbt_rec_t)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (new_size == 0) { xfs_iext_destroy(ifp); @@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct( int size; /* size of file extents */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); size = nextents * sizeof(xfs_bmbt_rec_t); @@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext( xfs_extnum_t nextents; /* number of file extents */ xfs_fileoff_t startoff = 0; /* start offset of extent */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *idxp = 0; return NULL; @@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec( ASSERT(ifp->if_flags & XFS_IFEXTIREC); ASSERT(page_idx >= 0); - ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); - ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + ASSERT(page_idx <= xfs_iext_count(ifp)); + ASSERT(page_idx < xfs_iext_count(ifp) || realloc); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; @@ -1782,7 +1789,7 @@ xfs_iext_irec_init( xfs_extnum_t nextents; /* number of extents in file */ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); @@ -1906,7 +1913,7 @@ xfs_iext_irec_compact( ASSERT(ifp->if_flags & XFS_IFEXTIREC); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { xfs_iext_destroy(ifp); @@ -1996,3 +2003,49 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* + * Lookup the extent covering bno. + * + * If there is an extent covering bno return the extent index, and store the + * expanded extent structure in *gotp, and the extent index in *idx. + * If there is no extent covering bno, but there is an extent after it (e.g. + * it lies in a hole) return that extent in *gotp and its index in *idx + * instead. + * If bno is beyond the last extent return false, and return the index after + * the last valid index in *idxp. + */ +bool +xfs_iext_lookup_extent( + struct xfs_inode *ip, + struct xfs_ifork *ifp, + xfs_fileoff_t bno, + xfs_extnum_t *idxp, + struct xfs_bmbt_irec *gotp) +{ + struct xfs_bmbt_rec_host *ep; + + XFS_STATS_INC(ip->i_mount, xs_look_exlist); + + ep = xfs_iext_bno_to_ext(ifp, bno, idxp); + if (!ep) + return false; + xfs_bmbt_get_all(ep, gotp); + return true; +} + +/* + * Return true if there is an extent at index idx, and return the expanded + * extent structure at idx in that case. Else return false. + */ +bool +xfs_iext_get_extent( + struct xfs_ifork *ifp, + xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp) +{ + if (idx < 0 || idx >= xfs_iext_count(ifp)) + return false; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); + return true; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index c9476f50e32d116109d1f71dad3895c5659496b8..7fb8365326d1a745583c4f133bc5a63668316b33 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); struct xfs_bmbt_rec_host * xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); +xfs_extnum_t xfs_iext_count(struct xfs_ifork *); void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, struct xfs_bmbt_irec *, int); void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); @@ -181,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct xfs_ifork *); void xfs_iext_irec_compact_full(struct xfs_ifork *); void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); +bool xfs_iext_lookup_extent(struct xfs_inode *ip, + struct xfs_ifork *ifp, xfs_fileoff_t bno, + xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); +bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp); + extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 453bb2757ec23f334f351693fcfcdb5b5837ce35..2ba216966002980dcdc3a37bdbd9f3ad53c152bc 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -408,13 +408,14 @@ xfs_refcountbt_calc_size( */ xfs_extlen_t xfs_refcountbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_refc_mxr[0] == 0) return 0; - return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_refcountbt_calc_size(mp, agblocks); } /* @@ -429,22 +430,24 @@ xfs_refcountbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - *ask += xfs_refcountbt_max_size(mp); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_buf_relse(agbp); + *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 3be7768bd51a1c0ebd8c2ccc6e930a730bd39b54..9db008b955b7ed68bb62d8654073da0302668171 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 83e672ff7577e9040d22668a308097922b998cab..33a28efc3085b04e3c32c6428558deb183deb882 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -549,13 +549,14 @@ xfs_rmapbt_calc_size( */ xfs_extlen_t xfs_rmapbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_rmap_mxr[0] == 0) return 0; - return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_rmapbt_calc_size(mp, agblocks); } /* @@ -570,25 +571,24 @@ xfs_rmapbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; - xfs_extlen_t pool_len; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - /* Reserve 1% of the AG or enough for 1 block per record. */ - pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); - *ask += pool_len; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_buf_relse(agbp); + /* Reserve 1% of the AG or enough for 1 block per record. */ + *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 2a9ac472fb15a2408ca6d58addc6a5d439b61fe9..19c08e93304954d62c1c1dcdf35a36a49f38ee71 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a70aec9106263f3e45e454e6fa3ef5ebcdac26ea..584ec896a53374f81da05906d517fba717686504 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -242,7 +242,7 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || @@ -262,6 +262,12 @@ xfs_mount_validate_sb( return -EFSCORRUPTED; } + if (xfs_sb_version_hascrc(&mp->m_sb) && + sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { + xfs_notice(mp, "v5 SB sanity check failed"); + return -EFSCORRUPTED; + } + /* * Until this is fixed only page-sized or smaller data blocks work. */ @@ -338,13 +344,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); - if (sbp->sb_qflags & XFS_PQUOTA_ACCT) { + if (sbp->sb_qflags & XFS_PQUOTA_ACCT && + sbp->sb_gquotino != NULLFSINO) { /* * In older version of superblock, on-disk superblock only * has sb_gquotino, and in-core superblock has both sb_gquotino * and sb_pquotino. But, only one of them is supported at any * point of time. So, if PQUOTA is set in disk superblock, - * copy over sb_gquotino to sb_pquotino. + * copy over sb_gquotino to sb_pquotino. The NULLFSINO test + * above is to make sure we don't do this twice and wipe them + * both out! */ sbp->sb_pquotino = sbp->sb_gquotino; sbp->sb_gquotino = NULLFSINO; diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 8d74870468c24bb5661a0afa0e7ab436f8b4ded0..cf044c0f4d4178ee09e9b3e4c1eba2ceef161a1a 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -75,11 +75,14 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ * Minimum and maximum blocksize and sectorsize. * The blocksize upper limit is pretty much arbitrary. * The sectorsize upper limit is due to sizeof(sb_sectsize). + * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes + * cannot be used. */ #define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ #define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ #define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) #define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1)) #define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ #define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ #define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3e57a56cf8294770475e5edd04d03ee5565516fa..06763f5cc7018662a51a87e5f1b29c5f4e6f5e2b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1158,19 +1158,22 @@ xfs_vm_releasepage( * block_invalidatepage() can send pages that are still marked dirty * but otherwise have invalidated buffers. * - * We've historically freed buffers on the latter. Instead, quietly - * filter out all dirty pages to avoid spurious buffer state warnings. - * This can likely be removed once shrink_active_list() is fixed. + * We want to release the latter to avoid unnecessary buildup of the + * LRU, skip the former and warn if we've left any lingering + * delalloc/unwritten buffers on clean pages. Skip pages with delalloc + * or unwritten buffers and warn if the page is not dirty. Otherwise + * try to release the buffers. */ - if (PageDirty(page)) - return 0; - xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON_ONCE(delalloc)) + if (delalloc) { + WARN_ON_ONCE(!PageDirty(page)); return 0; - if (WARN_ON_ONCE(unwritten)) + } + if (unwritten) { + WARN_ON_ONCE(!PageDirty(page)); return 0; + } return try_to_free_buffers(page); } @@ -1361,6 +1364,26 @@ __xfs_get_blocks( if (error) goto out_unlock; + /* + * The only time we can ever safely find delalloc blocks on direct I/O + * is a dio write to post-eof speculative preallocation. All other + * scenarios are indicative of a problem or misuse (such as mixing + * direct and mapped I/O). + * + * The file may be unmapped by the time we get here so we cannot + * reliably fail the I/O based on mapping. Instead, fail the I/O if this + * is a read or a write within eof. Otherwise, carry on but warn as a + * precuation if the file happens to be mapped. + */ + if (direct && imap.br_startblock == DELAYSTARTBLOCK) { + if (!create || offset < i_size_read(VFS_I(ip))) { + WARN_ON_ONCE(1); + error = -EIO; + goto out_unlock; + } + WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping)); + } + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || @@ -1450,8 +1473,6 @@ __xfs_get_blocks( (new || ISUNWRITTEN(&imap)))) set_buffer_new(bh_result); - BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); - return 0; out_unlock: diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 552465e011ecb0df23aaeae57b94108dcdd24360..efb8ccd6bbf2c3c132befbb90e4928c699fa9195 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -359,9 +359,7 @@ xfs_bmap_count_blocks( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - xfs_bmap_count_leaves(ifp, 0, - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), - count); + xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count); return 0; } @@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole( ifp = XFS_IFORK_PTR(ip, whichfork); if (!moretocome && xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && - (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) + (lastx == xfs_iext_count(ifp) - 1)) out->bmv_oflags |= BMV_OF_LAST; } @@ -530,7 +528,6 @@ xfs_getbmap( xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ - int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ @@ -688,10 +685,8 @@ xfs_getbmap( goto out_free_map; } - nexleft = nex; - do { - nmap = (nexleft > subnex) ? subnex : nexleft; + nmap = (nex> subnex) ? subnex : nex; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); @@ -699,8 +694,8 @@ xfs_getbmap( goto out_free_map; ASSERT(nmap <= subnex); - for (i = 0; i < nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count; i++) { + for (i = 0; i < nmap && bmv->bmv_length && + cur_ext < bmv->bmv_count - 1; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; @@ -762,16 +757,27 @@ xfs_getbmap( continue; } + /* + * In order to report shared extents accurately, + * we report each distinct shared/unshared part + * of a single bmbt record using multiple bmap + * extents. To make that happen, we iterate the + * same map array item multiple times, each + * time trimming out the subextent that we just + * reported. + * + * Because of this, we must check the out array + * index (cur_ext) directly against bmv_count-1 + * to avoid overflows. + */ if (inject_map.br_startblock != NULLFSBLOCK) { map[i] = inject_map; i--; - } else - nexleft--; + } bmv->bmv_entries++; cur_ext++; } - } while (nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count); + } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); out_free_map: kmem_free(map); @@ -1792,6 +1798,7 @@ xfs_swap_extent_forks( struct xfs_ifork tempifp, *ifp, *tifp; int aforkblks = 0; int taforkblks = 0; + xfs_extnum_t nextents; __uint64_t tmp; int error; @@ -1877,14 +1884,13 @@ xfs_swap_extent_forks( switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* If the extents fit in the inode, fix the - * pointer. Otherwise it's already NULL or - * pointing to the extent. + /* + * If the extents fit in the inode, fix the pointer. Otherwise + * it's already NULL or pointing to the extent. */ - if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { - ifp->if_u1.if_extents = - ifp->if_u2.if_inline_ext; - } + nextents = xfs_iext_count(&ip->i_df); + if (nextents <= XFS_INLINE_EXTS) + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: @@ -1896,14 +1902,13 @@ xfs_swap_extent_forks( switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* If the extents fit in the inode, fix the - * pointer. Otherwise it's already NULL or - * pointing to the extent. + /* + * If the extents fit in the inode, fix the pointer. Otherwise + * it's already NULL or pointing to the extent. */ - if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { - tifp->if_u1.if_extents = - tifp->if_u2.if_inline_ext; - } + nextents = xfs_iext_count(&tip->i_df); + if (nextents <= XFS_INLINE_EXTS) + tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; (*target_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b5b9bffe352074806910a064ae41a941c97678c8..d7a67d7fbc7f9cb6627f859a629568c5afe0bb97 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -423,6 +423,7 @@ xfs_buf_allocate_memory( out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 7a30b8f11db7a26f8a82ded531e8a5170ea03ad5..9d06cc30e875e147a5560bad24e5a55aedb65cf0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -710,6 +710,10 @@ xfs_dq_get_next_id( /* Simple advance */ next_id = *id + 1; + /* If we'd wrap past the max ID, stop */ + if (next_id < *id) + return -ENOENT; + /* If new ID is within the current chunk, advancing it sufficed */ if (next_id % mp->m_quotainfo->qi_dqperchunk) { *id = next_id; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 6e4f7f900fea4c30f44477dc258db5334b980486..9a5d64b5f35a1d82b2112a7b07998d7063584405 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -939,7 +939,6 @@ xfs_file_clone_range( len, false); } -#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) STATIC ssize_t xfs_file_dedupe_range( struct file *src_file, @@ -950,14 +949,6 @@ xfs_file_dedupe_range( { int error; - /* - * Limit the total length we will dedupe for each operation. - * This is intended to bound the total time spent in this - * ioctl to something sane. - */ - if (len > XFS_MAX_DEDUPE_LEN) - len = XFS_MAX_DEDUPE_LEN; - error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); if (error) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 93d12fa2670d53bf8b6bf23c51325d48467b8ab2..242e8091296daff7a029b4233d53db8241ecbcd7 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -631,6 +631,20 @@ xfs_growfs_data_private( xfs_set_low_space_thresholds(mp); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + /* + * If we expanded the last AG, free the per-AG reservation + * so we can reinitialize it with the new size. + */ + if (new) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + error = xfs_ag_resv_free(pag); + xfs_perag_put(pag); + if (error) + goto out; + } + /* Reserve AG metadata blocks. */ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f295049db68159523ac936727c748e7264f80993..29cc9886a3cb6e7fb14b1f4097f898e322f39c3b 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -123,7 +123,6 @@ __xfs_inode_free( { /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); @@ -133,6 +132,8 @@ void xfs_inode_free( struct xfs_inode *ip) { + ASSERT(!xfs_isiflocked(ip)); + /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the @@ -981,6 +982,7 @@ xfs_reclaim_inode( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); + /* xfs_iflush_abort() drops the flush lock */ xfs_iflush_abort(ip, false); goto reclaim; } @@ -989,10 +991,10 @@ xfs_reclaim_inode( goto out_ifunlock; xfs_iunpin_wait(ip); } - if (xfs_iflags_test(ip, XFS_ISTALE)) - goto reclaim; - if (xfs_inode_clean(ip)) + if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) { + xfs_ifunlock(ip); goto reclaim; + } /* * Never flush out dirty data during non-blocking reclaim, as it would @@ -1030,25 +1032,24 @@ xfs_reclaim_inode( xfs_buf_relse(bp); } - xfs_iflock(ip); reclaim: + ASSERT(!xfs_isiflocked(ip)); + /* * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. - * We do this as early as possible under the ILOCK and flush lock so - * that xfs_iflush_cluster() can be guaranteed to detect races with us - * here. By doing this, we guarantee that once xfs_iflush_cluster has - * locked both the XFS_ILOCK and the flush lock that it will see either - * a valid, flushable inode that will serialise correctly against the - * locks below, or it will see a clean (and invalid) inode that it can - * skip. + * We do this as early as possible under the ILOCK so that + * xfs_iflush_cluster() can be guaranteed to detect races with us here. + * By doing this, we guarantee that once xfs_iflush_cluster has locked + * XFS_ILOCK that it will see either a valid, flushable inode that will + * serialise correctly, or it will see a clean (and invalid) inode that + * it can skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); - xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); @@ -1580,10 +1581,15 @@ xfs_inode_free_cowblocks( struct xfs_eofblocks *eofb = args; bool need_iolock = true; int match; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - if (!xfs_reflink_has_real_cow_blocks(ip)) { + /* + * Just clear the tag if we have an empty cow fork or none at all. It's + * possible the inode was fully unshared since it was originally tagged. + */ + if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return 0; @@ -1593,7 +1599,8 @@ xfs_inode_free_cowblocks( * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ - if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || + if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e560e6a12c1cc2d4b913292c62714bf78e9ae77..512ff13ed66a9568bc67cd82c2720c947855b64d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2041,7 +2041,6 @@ xfs_iunlink( agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); return 0; @@ -2133,7 +2132,6 @@ xfs_iunlink_remove( agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); } else { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f14c1de2549db758f4bed9641fbfa99769570c71..71e8a81c91a371d659dbad0b57099916ad0b2168 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * Synchronize processes attempting to flush the in-core inode back to disk. */ +static inline int xfs_isiflocked(struct xfs_inode *ip) +{ + return xfs_iflags_test(ip, XFS_IFLOCK); +} + extern void __xfs_iflock(struct xfs_inode *ip); static inline int xfs_iflock_nowait(struct xfs_inode *ip) @@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs_inode *ip) static inline void xfs_ifunlock(struct xfs_inode *ip) { + ASSERT(xfs_isiflocked(ip)); xfs_iflags_clear(ip, XFS_IFLOCK); smp_mb(); wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); } -static inline int xfs_isiflocked(struct xfs_inode *ip) -{ - return xfs_iflags_test(ip, XFS_IFLOCK); -} - /* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 9610e9c0095277c7b53feb9ad5e230d6b156327c..d90e7811ccdd9ccec14f9ebb29219bc065d36270 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork( struct xfs_bmbt_rec *p; ASSERT(ip->i_df.if_u1.if_extents != NULL); - ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); + ASSERT(xfs_iext_count(&ip->i_df) > 0); p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); @@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_bytes > 0) { struct xfs_bmbt_rec *p; - ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == + ASSERT(xfs_iext_count(ip->i_afp) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c245bed3249bf1a192f430c7b7c781e84baf93e3..a39197501a7cd8cb597eb0250233c5802b1d8b9d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr( if (attr) { if (ip->i_afp) { if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_afp->if_bytes / - sizeof(xfs_bmbt_rec_t); + fa.fsx_nextents = xfs_iext_count(ip->i_afp); else fa.fsx_nextents = ip->i_d.di_anextents; } else fa.fsx_nextents = 0; } else { if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_df.if_bytes / - sizeof(xfs_bmbt_rec_t); + fa.fsx_nextents = xfs_iext_count(&ip->i_df); else fa.fsx_nextents = ip->i_d.di_nextents; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 436e109bb01e59d32bda87e9dd43ab3229cba509..cdc6bdd495be2512ec47048b6a185227d8f7e2e0 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -395,11 +395,12 @@ xfs_iomap_prealloc_size( struct xfs_inode *ip, loff_t offset, loff_t count, - xfs_extnum_t idx, - struct xfs_bmbt_irec *prev) + xfs_extnum_t idx) { struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + struct xfs_bmbt_irec prev; int shift = 0; int64_t freesp; xfs_fsblock_t qblocks; @@ -419,8 +420,8 @@ xfs_iomap_prealloc_size( */ if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || - idx == 0 || - prev->br_startoff + prev->br_blockcount < offset_fsb) + !xfs_iext_get_extent(ifp, idx - 1, &prev) || + prev.br_startoff + prev.br_blockcount < offset_fsb) return mp->m_writeio_blocks; /* @@ -439,8 +440,8 @@ xfs_iomap_prealloc_size( * always extends to MAXEXTLEN rather than falling short due to things * like stripe unit/width alignment of real extents. */ - if (prev->br_blockcount <= (MAXEXTLEN >> 1)) - alloc_blocks = prev->br_blockcount << 1; + if (prev.br_blockcount <= (MAXEXTLEN >> 1)) + alloc_blocks = prev.br_blockcount << 1; else alloc_blocks = XFS_B_TO_FSB(mp, offset); if (!alloc_blocks) @@ -535,11 +536,11 @@ xfs_file_iomap_begin_delay( xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t maxbytes_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - xfs_fileoff_t end_fsb, orig_end_fsb; + xfs_fileoff_t end_fsb; int error = 0, eof = 0; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec prev; xfs_extnum_t idx; + xfs_fsblock_t prealloc_blocks = 0; ASSERT(!XFS_IS_REALTIME_INODE(ip)); ASSERT(!xfs_get_extsz_hint(ip)); @@ -563,8 +564,7 @@ xfs_file_iomap_begin_delay( goto out_unlock; } - xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, - &got, &prev); + eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); if (!eof && got.br_startoff <= offset_fsb) { if (xfs_is_reflink_inode(ip)) { bool shared; @@ -595,35 +595,32 @@ xfs_file_iomap_begin_delay( * the lower level functions are updated. */ count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = orig_end_fsb = - min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); if (eof) { - xfs_fsblock_t prealloc_blocks; - - prealloc_blocks = - xfs_iomap_prealloc_size(ip, offset, count, idx, &prev); + prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; + xfs_fileoff_t p_end_fsb; end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); - end_fsb = XFS_B_TO_FSBT(mp, end_offset) + - prealloc_blocks; + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; align = xfs_eof_alignment(ip, 0); if (align) - end_fsb = roundup_64(end_fsb, align); + p_end_fsb = roundup_64(p_end_fsb, align); - end_fsb = min(end_fsb, maxbytes_fsb); - ASSERT(end_fsb > offset_fsb); + p_end_fsb = min(p_end_fsb, maxbytes_fsb); + ASSERT(p_end_fsb > offset_fsb); + prealloc_blocks = p_end_fsb - end_fsb; } } retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, &got, - &prev, &idx, eof); + end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); switch (error) { case 0: break; @@ -631,8 +628,8 @@ xfs_file_iomap_begin_delay( case -EDQUOT: /* retry without any preallocation */ trace_xfs_delalloc_enospc(ip, offset, count); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; + if (prealloc_blocks) { + prealloc_blocks = 0; goto retry; } /*FALLTHRU*/ @@ -640,13 +637,6 @@ xfs_file_iomap_begin_delay( goto out_unlock; } - /* - * Tag the inode as speculatively preallocated so we can reclaim this - * space on demand, if necessary. - */ - if (end_fsb != orig_end_fsb) - xfs_inode_set_eofblocks_tag(ip); - trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -691,7 +681,7 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; - int flags = 0; + int flags = XFS_BMAPI_DELALLOC; int nres; if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 405a65cd9d6bcf4c7fdf21f502ee267c0af7e970..f5e0f608e2455a1c1cff2dae0bcca878db65cc5e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -97,13 +97,28 @@ xfs_init_security( static void xfs_dentry_to_name( + struct xfs_name *namep, + struct dentry *dentry) +{ + namep->name = dentry->d_name.name; + namep->len = dentry->d_name.len; + namep->type = XFS_DIR3_FT_UNKNOWN; +} + +static int +xfs_dentry_mode_to_name( struct xfs_name *namep, struct dentry *dentry, int mode) { namep->name = dentry->d_name.name; namep->len = dentry->d_name.len; - namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT]; + namep->type = xfs_mode_to_ftype(mode); + + if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN)) + return -EFSCORRUPTED; + + return 0; } STATIC void @@ -119,7 +134,7 @@ xfs_cleanup_inode( * xfs_init_security we must back out. * ENOSPC can hit here, among other things. */ - xfs_dentry_to_name(&teardown, dentry, 0); + xfs_dentry_to_name(&teardown, dentry); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); } @@ -154,8 +169,12 @@ xfs_generic_create( if (error) return error; + /* Verify mode is valid also for tmpfile case */ + error = xfs_dentry_mode_to_name(&name, dentry, mode); + if (unlikely(error)) + goto out_free_acl; + if (!tmpfile) { - xfs_dentry_to_name(&name, dentry, mode); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); } else { error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); @@ -248,7 +267,7 @@ xfs_vn_lookup( if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&name, dentry, 0); + xfs_dentry_to_name(&name, dentry); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { if (unlikely(error != -ENOENT)) @@ -275,7 +294,7 @@ xfs_vn_ci_lookup( if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&xname, dentry, 0); + xfs_dentry_to_name(&xname, dentry); error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); if (unlikely(error)) { if (unlikely(error != -ENOENT)) @@ -310,7 +329,9 @@ xfs_vn_link( struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry, inode->i_mode); + error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode); + if (unlikely(error)) + return error; error = xfs_link(XFS_I(dir), XFS_I(inode), &name); if (unlikely(error)) @@ -329,7 +350,7 @@ xfs_vn_unlink( struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry, 0); + xfs_dentry_to_name(&name, dentry); error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry))); if (error) @@ -359,7 +380,9 @@ xfs_vn_symlink( mode = S_IFLNK | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); - xfs_dentry_to_name(&name, dentry, mode); + error = xfs_dentry_mode_to_name(&name, dentry, mode); + if (unlikely(error)) + goto out; error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) @@ -395,6 +418,7 @@ xfs_vn_rename( { struct inode *new_inode = d_inode(ndentry); int omode = 0; + int error; struct xfs_name oname; struct xfs_name nname; @@ -405,8 +429,14 @@ xfs_vn_rename( if (flags & RENAME_EXCHANGE) omode = d_inode(ndentry)->i_mode; - xfs_dentry_to_name(&oname, odentry, omode); - xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode); + error = xfs_dentry_mode_to_name(&oname, odentry, omode); + if (omode && unlikely(error)) + return error; + + error = xfs_dentry_mode_to_name(&nname, ndentry, + d_inode(odentry)->i_mode); + if (unlikely(error)) + return error; return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 68640fb63a542346cac31553dd0aad1a78421a16..1455b25205a8ea88c858bb1206ce2def13255766 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -330,11 +330,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) } #define ASSERT_ALWAYS(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifdef DEBUG #define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC noinline @@ -345,7 +345,7 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) #ifdef XFS_WARN #define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC static noinline diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3b74fa011bb1566ce43dedc2fbdbc49336645d84..4017aa967331766233aa217fb2f4e088e6920f79 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3324,12 +3324,8 @@ xfs_log_force( xfs_mount_t *mp, uint flags) { - int error; - trace_xfs_log_force(mp, 0, _RET_IP_); - error = _xfs_log_force(mp, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force(mp, flags, NULL); } /* @@ -3473,12 +3469,8 @@ xfs_log_force_lsn( xfs_lsn_t lsn, uint flags) { - int error; - trace_xfs_log_force(mp, lsn, _RET_IP_); - error = _xfs_log_force_lsn(mp, lsn, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force_lsn(mp, lsn, flags, NULL); } /* diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index a60d9e2739d14a2ebcff8ee7dcedae7f5177bf3c..b669b123287bb115e561a6ea1c0be8ae4db359db 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks( return error; } rtblks = 0; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0; idx < nextents; idx++) rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); *O_rtblks = (xfs_qcnt_t)rtblks; @@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, + &ip); if (error) { *res = BULKSTAT_RV_NOTHING; return error; diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index fe86a668a57e70419e7fc6e5f126c0aa97cecb96..6e4c7446c3d4561f85d86686d5b4a40bc4cd0ce6 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -526,13 +526,14 @@ xfs_cui_recover( xfs_refcount_finish_one_cleanup(tp, rcur, error); error = xfs_defer_finish(&tp, &dfops, NULL); if (error) - goto abort_error; + goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); +abort_defer: xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a279b4e7f5feaa83a0cf1fbbfaf1c4d8e393ecbc..4d3f74e3c5e1d0bcf145c96b359d9d01bd06295d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -243,12 +243,11 @@ xfs_reflink_reserve_cow( struct xfs_bmbt_irec *imap, bool *shared) { - struct xfs_bmbt_irec got, prev; - xfs_fileoff_t end_fsb, orig_end_fsb; - int eof = 0, error = 0; - bool trimmed; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got; + int error = 0; + bool eof = false, trimmed; xfs_extnum_t idx; - xfs_extlen_t align; /* * Search the COW fork extent list first. This serves two purposes: @@ -258,8 +257,9 @@ xfs_reflink_reserve_cow( * extent list is generally faster than going out to the shared extent * tree. */ - xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx, - &got, &prev); + + if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) + eof = true; if (!eof && got.br_startoff <= imap->br_startoff) { trace_xfs_reflink_cow_found(ip, imap); xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); @@ -285,33 +285,12 @@ xfs_reflink_reserve_cow( if (error) return error; - end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; - - align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); - if (align) - end_fsb = roundup_64(end_fsb, align); - -retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - end_fsb - imap->br_startoff, &got, &prev, &idx, eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ + imap->br_blockcount, 0, &got, &idx, eof); + if (error == -ENOSPC || error == -EDQUOT) trace_xfs_reflink_cow_enospc(ip, imap); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; - goto retry; - } - /*FALLTHRU*/ - default: + if (error) return error; - } - - if (end_fsb != orig_end_fsb) - xfs_inode_set_cowblocks_tag(ip); trace_xfs_reflink_cow_alloc(ip, &got); return 0; @@ -486,7 +465,7 @@ xfs_reflink_trim_irec_to_next_cow( /* This is the extent before; try sliding up one. */ if (irec.br_startoff < offset_fsb) { idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + if (idx >= xfs_iext_count(ifp)) return 0; gotp = xfs_iext_get_ext(ifp, idx); xfs_bmbt_get_all(gotp, &irec); @@ -566,7 +545,7 @@ xfs_reflink_cancel_cow_blocks( xfs_bmap_del_extent_cow(ip, &idx, &got, &del); } - if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) + if (++idx >= xfs_iext_count(ifp)) break; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } @@ -1345,8 +1324,14 @@ xfs_reflink_remap_range( goto out_unlock; } - if (len == 0) + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (len == 0) { + if (is_dedupe) { + ret = 0; + goto out_unlock; + } len = isize - pos_in; + } /* Ensure offsets don't wrap and the input is inside i_size */ if (pos_in + len < pos_in || pos_out + len < pos_out || @@ -1697,37 +1682,3 @@ xfs_reflink_unshare( trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; } - -/* - * Does this inode have any real CoW reservations? - */ -bool -xfs_reflink_has_real_cow_blocks( - struct xfs_inode *ip) -{ - struct xfs_bmbt_irec irec; - struct xfs_ifork *ifp; - struct xfs_bmbt_rec_host *gotp; - xfs_extnum_t idx; - - if (!xfs_is_reflink_inode(ip)) - return false; - - /* Go find the old extent in the CoW fork. */ - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - gotp = xfs_iext_bno_to_ext(ifp, 0, &idx); - while (gotp) { - xfs_bmbt_get_all(gotp, &irec); - - if (!isnullstartblock(irec.br_startblock)) - return true; - - /* Roll on... */ - idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - break; - gotp = xfs_iext_get_ext(ifp, idx); - } - - return false; -} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index fad11607c9adf3937d6fa739c2ede29c7d0a8bb2..97ea9b487884e3fce14cad42951f1d37c760aeaf 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -50,6 +50,4 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); -extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip); - #endif /* __XFS_REFLINK_H */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 276d3023d60f8201b635ae1f0c2ccbf26aac74fd..de6195e3891096316e51fd4efcd49ff091024a4a 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -396,7 +396,7 @@ max_retries_show( int retries; struct xfs_error_cfg *cfg = to_error_cfg(kobject); - if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER) + if (cfg->max_retries == XFS_ERR_RETRY_FOREVER) retries = -1; else retries = cfg->max_retries; @@ -422,7 +422,7 @@ max_retries_store( return -EINVAL; if (val == -1) - cfg->retry_timeout = XFS_ERR_RETRY_FOREVER; + cfg->max_retries = XFS_ERR_RETRY_FOREVER; else cfg->max_retries = val; return count; diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h new file mode 100644 index 0000000000000000000000000000000000000000..939869c772b17a07a9776e323e1a253dba1f92d2 --- /dev/null +++ b/include/asm-generic/asm-prototypes.h @@ -0,0 +1,13 @@ +#include +#undef __memset +extern void *__memset(void *, int, __kernel_size_t); +#undef __memcpy +extern void *__memcpy(void *, const void *, __kernel_size_t); +#undef __memmove +extern void *__memmove(void *, const void *, __kernel_size_t); +#undef memset +extern void *memset(void *, int, __kernel_size_t); +#undef memcpy +extern void *memcpy(void *, const void *, __kernel_size_t); +#undef memmove +extern void *memmove(void *, const void *, __kernel_size_t); diff --git a/include/dt-bindings/clock/r8a7794-clock.h b/include/dt-bindings/clock/r8a7794-clock.h index 9d02f5317c7c315d2a88c9e381a3215416242509..88e64846cf370ff4a32c8423b5d00833c8cb5a81 100644 --- a/include/dt-bindings/clock/r8a7794-clock.h +++ b/include/dt-bindings/clock/r8a7794-clock.h @@ -20,8 +20,7 @@ #define R8A7794_CLK_QSPI 5 #define R8A7794_CLK_SDH 6 #define R8A7794_CLK_SD0 7 -#define R8A7794_CLK_Z 8 -#define R8A7794_CLK_RCAN 9 +#define R8A7794_CLK_RCAN 8 /* MSTP0 */ #define R8A7794_CLK_MSIOF0 0 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53d4ea5d496b42da20e42b2fa375a571dd2b436c..e47a7f7025a0900ac7e01d76d50fb0d74b58f2c8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1060,7 +1060,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q) static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} static inline void blk_pre_runtime_resume(struct request_queue *q) {} static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} -extern inline void blk_set_runtime_active(struct request_queue *q) {} +static inline void blk_set_runtime_active(struct request_queue *q) {} #endif /* diff --git a/include/linux/capability.h b/include/linux/capability.h index dbc21c719ce61b4aa45e90af3ec62ed9ced57b26..6ffb67e10c060afbb24ea34d0dbb38ac36043d3c 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -240,8 +240,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) return true; } #endif /* CONFIG_MULTIUSER */ +extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); +extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index aae03c4077b4640fe7b5d0c786c4f45d8befb91d..40b66f965df19e07bd989283c96098e3fe710ace 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -108,22 +108,16 @@ extern bool cpuhp_tasks_frozen; { .notifier_call = fn, .priority = pri }; \ __register_cpu_notifier(&fn##_nb); \ } -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); extern void __unregister_cpu_notifier(struct notifier_block *nb); -#else -#ifndef MODULE -extern int register_cpu_notifier(struct notifier_block *nb); -extern int __register_cpu_notifier(struct notifier_block *nb); -#else +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; @@ -133,7 +127,6 @@ static inline int __register_cpu_notifier(struct notifier_block *nb) { return 0; } -#endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 32dc0cbd51ca3729bef594f7a84832bc6389e3ea..cc57986d3bfeeb56a2eddc9dc4186a4fff4ac838 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -177,6 +177,7 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); +bool cpufreq_driver_is_slow(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); @@ -359,6 +360,14 @@ struct cpufreq_driver { */ #define CPUFREQ_NEED_INITIAL_FREQ_CHECK (1 << 5) +/* + * Indicates that it is safe to call cpufreq_driver_target from + * non-interruptable context in scheduler hot paths. Drivers must + * opt-in to this flag, as the safe default is that they might sleep + * or be too slow for hot path use. + */ +#define CPUFREQ_DRIVER_FAST (1 << 6) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); @@ -553,6 +562,32 @@ struct governor_attr { ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, size_t count); }; +/* CPUFREQ DEFAULT GOVERNOR */ +/* + * Performance governor is fallback governor if any other gov failed to auto + * load due latency restrictions + */ +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE +extern struct cpufreq_governor cpufreq_gov_performance; +#endif +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) +extern struct cpufreq_governor cpufreq_gov_powersave; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) +extern struct cpufreq_governor cpufreq_gov_userspace; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) +extern struct cpufreq_governor cpufreq_gov_ondemand; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) +extern struct cpufreq_governor cpufreq_gov_conservative; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED) +extern struct cpufreq_governor cpufreq_gov_sched; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_sched) +#endif /********************************************************************* * FREQUENCY TABLE HELPERS * @@ -886,4 +921,8 @@ unsigned int cpufreq_generic_get(unsigned int cpu); int cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); + +struct sched_domain; +unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); +unsigned long cpufreq_scale_max_freq_capacity(int cpu); #endif /* _LINUX_CPUFREQ_H */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index afe641c02dca3320f3bf4255092deafeb77d536f..ba1cad7b97cfa1546c83f2f8c628b5cd74529f2b 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -80,7 +80,6 @@ enum cpuhp_state { CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, - CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, @@ -94,6 +93,8 @@ enum cpuhp_state { CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, CPUHP_AP_KVM_ARM_VGIC_STARTING, CPUHP_AP_KVM_ARM_TIMER_STARTING, + /* Must be the last timer callback */ + CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT4_STARTING, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb31373c3478bdb0ed97dcf8856d1b81ccdc76e3..9a8eec9e59b2092a077d84372e0a2f6068b8a2e1 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -207,7 +207,7 @@ static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, #endif /* kernel/sched/idle.c */ -extern void sched_idle_set_state(struct cpuidle_state *idle_state); +extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index); extern void default_idle_call(void); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73b856552f5d5e30b706ef5d2e04ec..1b413a9aab81c6b1d8070aa734af967952050fb6 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -62,6 +62,21 @@ static inline const struct file_operations *debugfs_real_fops(struct file *filp) return filp->f_path.dentry->d_fsdata; } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = debugfs_attr_write, \ + .llseek = generic_file_llseek, \ +} + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, @@ -99,21 +114,6 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - __simple_attr_check_format(__fmt, 0ull); \ - return simple_attr_open(inode, file, __get, __set, __fmt); \ -} \ -static const struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = simple_attr_release, \ - .read = debugfs_attr_read, \ - .write = debugfs_attr_write, \ - .llseek = generic_file_llseek, \ -} - struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -233,8 +233,18 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ - static const struct file_operations __fops = { 0 } +static inline ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t debugfs_attr_write(struct file *file, + const char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) diff --git a/include/linux/efi.h b/include/linux/efi.h index 2d089487d2dacc480f7f15686d3feca89aa59d71..cba7177cbec74ec682f70b667d5d133cbe184138 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -103,6 +103,7 @@ typedef struct { #define EFI_PAGE_SHIFT 12 #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) +#define EFI_PAGES_MAX (U64_MAX >> EFI_PAGE_SHIFT) typedef struct { u32 type; @@ -930,6 +931,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); +extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2abd3887d7f5b1b84818a4ee24162e..bed7a849371d6184c0ba86ec3b11c8106f66d308 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1603,13 +1603,21 @@ extern bool inode_owner_or_capable(const struct inode *inode); * VFS helper functions.. */ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); +extern int vfs_create2(struct vfsmount *, struct inode *, struct dentry *, umode_t, bool); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); +extern int vfs_mkdir2(struct vfsmount *, struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); +extern int vfs_mknod2(struct vfsmount *, struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); +extern int vfs_symlink2(struct vfsmount *, struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); +extern int vfs_link2(struct vfsmount *, struct dentry *, struct inode *, struct dentry *, struct inode **); extern int vfs_rmdir(struct inode *, struct dentry *); +extern int vfs_rmdir2(struct vfsmount *, struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); +extern int vfs_unlink2(struct vfsmount *, struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_rename2(struct vfsmount *, struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); /* @@ -1737,6 +1745,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); + int (*permission2) (struct vfsmount *, struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); @@ -1751,6 +1760,7 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); + int (*setattr2) (struct vfsmount *, struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, @@ -1799,9 +1809,13 @@ struct super_operations { int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); + int (*remount_fs2) (struct vfsmount *, struct super_block *, int *, char *); + void *(*clone_mnt_data) (void *); + void (*copy_mnt_data) (void *, void *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); + int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); @@ -2035,6 +2049,9 @@ struct file_system_type { #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); + struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int, + const char *, void *); + void *(*alloc_mnt_data) (void); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -2333,6 +2350,8 @@ struct filename { extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); +extern int do_truncate2(struct vfsmount *, struct dentry *, loff_t start, + unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern long do_sys_open(int dfd, const char __user *filename, int flags, @@ -2575,8 +2594,11 @@ extern void emergency_remount(void); extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); +extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int inode_permission2(struct vfsmount *, struct inode *, int); extern int __inode_permission(struct inode *, int); +extern int __inode_permission2(struct vfsmount *, struct inode *, int); extern int generic_permission(struct inode *, int); extern int __check_sticky(struct inode *dir, struct inode *inode); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 228bd44efa4c6efecd3af2e3ec16be8bd02d3d1b..497f2b3a5a62c8da6f87107de16519b176cc9f1f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -115,6 +115,16 @@ struct st_sensor_bdu { u8 mask; }; +/** + * struct st_sensor_das - ST sensor device data alignment selection + * @addr: address of the register. + * @mask: mask to write the das flag for left alignment. + */ +struct st_sensor_das { + u8 addr; + u8 mask; +}; + /** * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt * @addr: address of the register. @@ -185,6 +195,7 @@ struct st_sensor_transfer_function { * @enable_axis: Enable one or more axis of the sensor. * @fs: Full scale register and full scale list available. * @bdu: Block data update register. + * @das: Data Alignment Selection register. * @drdy_irq: Data ready register of the sensor. * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read. * @bootime: samples to discard when sensor passing from power-down to power-up. @@ -200,6 +211,7 @@ struct st_sensor_settings { struct st_sensor_axis enable_axis; struct st_sensor_fullscale fs; struct st_sensor_bdu bdu; + struct st_sensor_das das; struct st_sensor_data_ready_irq drdy_irq; bool multi_read_bit; unsigned int bootime; diff --git a/include/linux/irq.h b/include/linux/irq.h index e79875574b393f33ed183fdc8f44277a49c26ba8..39e3254e5769d7da1d0f74cdfdf90bd9b40cfc8a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -184,6 +184,7 @@ struct irq_data { * * IRQD_TRIGGER_MASK - Mask for the trigger type bits * IRQD_SETAFFINITY_PENDING - Affinity setting is pending + * IRQD_ACTIVATED - Interrupt has already been activated * IRQD_NO_BALANCING - Balancing disabled for this IRQ * IRQD_PER_CPU - Interrupt is per cpu * IRQD_AFFINITY_SET - Interrupt affinity was set @@ -202,6 +203,7 @@ struct irq_data { enum { IRQD_TRIGGER_MASK = 0xf, IRQD_SETAFFINITY_PENDING = (1 << 8), + IRQD_ACTIVATED = (1 << 9), IRQD_NO_BALANCING = (1 << 10), IRQD_PER_CPU = (1 << 11), IRQD_AFFINITY_SET = (1 << 12), @@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d) return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED; } +static inline bool irqd_is_activated(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_ACTIVATED; +} + +static inline void irqd_set_activated(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_ACTIVATED; +} + +static inline void irqd_clr_activated(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_ACTIVATED; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 089f70f83e97c9a1adf1087f10d095a02bd3e152..23da3af459fe7af7b5e5c2eabccbda350122e88b 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -14,6 +14,7 @@ struct static_key_deferred { #ifdef HAVE_JUMP_LABEL extern void static_key_slow_dec_deferred(struct static_key_deferred *key); +extern void static_key_deferred_flush(struct static_key_deferred *key); extern void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); @@ -26,6 +27,10 @@ static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) STATIC_KEY_CHECK_USE(); static_key_slow_dec(&key->key); } +static inline void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); +} static inline void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 61d20c17f3b7e40ba706a49da463c2f9eabd7d93..254698856b8fc2723dae61e3dd20b6c8176129e7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -120,7 +120,7 @@ struct mem_cgroup_reclaim_iter { */ struct mem_cgroup_per_node { struct lruvec lruvec; - unsigned long lru_size[NR_LRU_LISTS]; + unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -432,7 +432,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg) int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, - int nr_pages); + int zid, int nr_pages); unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask); @@ -441,9 +441,23 @@ static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { struct mem_cgroup_per_node *mz; + unsigned long nr_pages = 0; + int zid; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_size[lru]; + for (zid = 0; zid < MAX_NR_ZONES; zid++) + nr_pages += mz->lru_zone_size[zid][lru]; + return nr_pages; +} + +static inline +unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, + enum lru_list lru, int zone_idx) +{ + struct mem_cgroup_per_node *mz; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + return mz->lru_zone_size[zone_idx][lru]; } void mem_cgroup_handle_over_high(void); @@ -671,6 +685,12 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { return 0; } +static inline +unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, + enum lru_list lru, int zone_idx) +{ + return 0; +} static inline unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01033fadea4766d5e6efddc78ca595d68c021464..134a2f69c21abf7921181af0adff033bb459edc5 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); /* VM interface that may be used by firmware interface */ extern int online_pages(unsigned long, unsigned long, int); -extern int test_pages_in_a_zone(unsigned long, unsigned long); +extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); typedef void (*online_page_callback_t)(struct page *page); @@ -284,7 +285,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target); +extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 46c927f1404c83a67ac48843ba6a346c38fc0fca..2d191bf833907c4fc0d7190898c9d690c04664af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1271,6 +1271,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); +extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 71613e8a720f99b6870f1e3f8957d2761a6ac1a0..41d376e7116dccae22d9881312cfb660cd9fd58e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -39,7 +39,7 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, { __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG - mem_cgroup_update_lru_size(lruvec, lru, nr_pages); + mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4d740f2638d6bb6aec8ec060a3465f4470610f3c..3139ea45b8f45e8d31f2cb348d8cbd7e77db38b1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -480,6 +480,7 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif + struct user_namespace *user_ns; /* store ref to file /proc//exe symlink points to */ struct file __rcu *exe_file; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index ea4019c5421f1a841ba22f07ab278aebf5e596a5..46a4b798c7cf2ca83c9e7bcd92f2169bd64726e8 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -142,7 +142,6 @@ struct mmc_request { /* Allow other commands during this ongoing data transfer or busy wait */ bool cap_cmd_during_tfr; - ktime_t io_start; #ifdef CONFIG_BLOCK int lat_hist_enabled; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0f088f3a2fed8df6435819a52360940310272462..f99c993dd500838803ee5f6fd03fee2602ca8fe8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, * @zonelist - The zonelist to search for a suitable zone * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter + * @return - Zoneref pointer for the first suitable zone found (see below) * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. + * + * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is + * never NULL). This may happen either genuinely, or due to concurrent nodemask + * update due to cpuset modification. */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, diff --git a/include/linux/mount.h b/include/linux/mount.h index 1172cce949a4c36226fea3b1c64c66d6b01c313e..4f333413934a15935bbe31d709a58fc9d5f0ab09 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -67,6 +67,7 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; + void *data; }; struct file; /* forward dec */ diff --git a/include/linux/namei.h b/include/linux/namei.h index a2866f6073e15a6c3fcdd1bce8244bdc7097f418..cf437f56baf4aae9fada164c1d860fb2d68cfc96 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -82,6 +82,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); +extern struct dentry *lookup_one_len2(const char *, struct vfsmount *mnt, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c0f6ad1b48b277f9aec83c869033f7b3b9c1c40..d213c7613051dd8aef10b5e0b08ed66e023ccd11 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2502,14 +2502,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) return NAPI_GRO_CB(skb)->frag0_len < hlen; } +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { if (!pskb_may_pull(skb, hlen)) return NULL; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + skb_gro_frag0_invalidate(skb); return skb->data + offset; } diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9094faf0699d61b7b07bc5b9b0d0e9f76dc67742..039e76e918965ece2d4e5a13d09c012246341674 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -282,7 +282,7 @@ enum nfsstat4 { static inline bool seqid_mutating_err(u32 err) { - /* rfc 3530 section 8.1.5: */ + /* See RFC 7530, section 9.1.7 */ switch (err) { case NFS4ERR_STALE_CLIENTID: case NFS4ERR_STALE_STATEID: @@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_BADXDR: case NFS4ERR_RESOURCE: case NFS4ERR_NOFILEHANDLE: + case NFS4ERR_MOVED: return false; }; return true; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c58752fe16c43110b736a0e21fe9a702459d8a34..f020ab4079d3e451bdd274fe7b7c4fd2601c803a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2256,12 +2256,29 @@ #define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 #define PCI_VENDOR_ID_MELLANOX 0x15b3 -#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007 +#define PCI_DEVICE_ID_MELLANOX_CONNECTIB 0x1011 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015 +#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE 0x5a46 -#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 -#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 -#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c -#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c +#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 +#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340 +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354 +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372 +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732 +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746 +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e #define PCI_VENDOR_ID_DFI 0x15bd diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 1c7eec09e5eba7ae8c0cc8e82172791f992bb361..3a481a49546ef1c85d8f88bf7668f8a2c8e8c0f1 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) static inline bool percpu_ref_tryget(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret; + bool ret; rcu_read_lock_sched(); @@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret = false; + bool ret = false; rcu_read_lock_sched(); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index bca26157f5b695fa1d2b6381800ad660faf972f5..f6bc7650191281650823101bfb3bda175496a4f8 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -19,6 +19,7 @@ struct dev_pm_opp; struct device; +struct opp_table; enum dev_pm_opp_event { OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, @@ -62,8 +63,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, void dev_pm_opp_put_supported_hw(struct device *dev); int dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct device *dev); -int dev_pm_opp_set_regulator(struct device *dev, const char *name); -void dev_pm_opp_put_regulator(struct device *dev); +struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name); +void dev_pm_opp_put_regulator(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -170,12 +171,12 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) static inline void dev_pm_opp_put_prop_name(struct device *dev) {} -static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) +static inline struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name) { - return -ENOTSUPP; + return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_put_regulator(struct device *dev) {} +static inline void dev_pm_opp_put_regulator(struct opp_table *opp_table) {} static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index e30deb04615618e5c424d341b794a8f8274643c5..bed9557b69e74150d1110514a45f50fea8a066d3 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -4,7 +4,8 @@ enum bq27xxx_chip { BQ27000 = 1, /* bq27000, bq27200 */ BQ27010, /* bq27010, bq27210 */ - BQ27500, /* bq27500, bq27510, bq27520 */ + BQ27500, /* bq27500 */ + BQ27510, /* bq27510, bq27520 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 504c98a278d46606d27f09d109589e0a1e2263d8..e0e539321ab95c38adb2ca9f28b358e04253c999 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -8,6 +8,9 @@ #include /* For task_active_pid_ns. */ #include +extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); + /* * Ptrace flags * @@ -19,7 +22,6 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 321f9ed552a995f396696b71343f66cbd2e01d9e..01f71e1d2e941e359fc5fdd07f0645813ef8f845 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -444,6 +444,10 @@ bool __rcu_is_watching(void); #error "Unknown RCU implementation specified to kernel configuration" #endif +#define RCU_SCHEDULER_INACTIVE 0 +#define RCU_SCHEDULER_INIT 1 +#define RCU_SCHEDULER_RUNNING 2 + /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures diff --git a/include/linux/sched.h b/include/linux/sched.h index 05d342ddf6c2ade15280adb3fbbd399599639bc2..177f952d876e8b9866ea803a4a28624a80e093dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -173,6 +173,9 @@ extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); +#ifdef CONFIG_CPU_QUIET +extern u64 nr_running_integral(unsigned int cpu); +#endif extern void sched_update_nr_prod(int cpu, long delta, bool inc); extern void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg); @@ -1044,6 +1047,14 @@ enum cpu_idle_type { #define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) +struct sched_capacity_reqs { + unsigned long cfs; + unsigned long rt; + unsigned long dl; + + unsigned long total; +}; + /* * Wake-queues are lists of tasks with a pending wakeup, whose * callers have already marked the task as woken internally, @@ -1107,6 +1118,7 @@ extern void wake_up_q(struct wake_q_head *head); #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ #define SD_NUMA 0x4000 /* cross-node balancing */ +#define SD_SHARE_CAP_STATES 0x8000 /* Domain members share capacity state */ #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) @@ -1139,6 +1151,24 @@ struct sched_domain_attr { extern int sched_domain_level_max; +struct capacity_state { + unsigned long cap; /* compute capacity */ + unsigned long power; /* power consumption at this compute capacity */ +}; + +struct idle_state { + unsigned long power; /* power consumption in this idle state */ +}; + +struct sched_group_energy { + unsigned int nr_idle_states; /* number of idle states */ + struct idle_state *idle_states; /* ptr to idle state array */ + unsigned int nr_cap_states; /* number of capacity states */ + struct capacity_state *cap_states; /* ptr to capacity state array */ +}; + +unsigned long capacity_curr_of(int cpu); + struct sched_group; struct sched_domain_shared { @@ -1246,6 +1276,8 @@ bool cpus_share_cache(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); +typedef +const struct sched_group_energy * const(*sched_domain_energy_f)(int cpu); #define SDTL_OVERLAP 0x01 @@ -1259,6 +1291,7 @@ struct sd_data { struct sched_domain_topology_level { sched_domain_mask_f mask; sched_domain_flags_f sd_flags; + sched_domain_energy_f energy; int flags; int numa_level; struct sd_data data; @@ -1638,6 +1671,7 @@ struct task_struct { struct list_head grp_list; u64 cpu_cycles; #endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif @@ -1789,6 +1823,7 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 00101b377cdcd7c3b2846f2b6ff30acc1bd2cdbe..ae9032aca964c9c57905a11f71fb2cdaea568f44 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -18,6 +18,16 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +extern unsigned int sysctl_sched_is_big_little; +extern unsigned int sysctl_sched_sync_hint_enable; +extern unsigned int sysctl_sched_initial_task_util; +extern unsigned int sysctl_sched_cstate_aware; +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int sysctl_sched_walt_init_task_load_pct; +extern unsigned int sysctl_sched_walt_cpu_high_irqload; +#endif #ifdef CONFIG_SCHED_HMP diff --git a/include/linux/sched_energy.h b/include/linux/sched_energy.h new file mode 100644 index 0000000000000000000000000000000000000000..1daf3e1f98a75ca54ee7f9e465c10842ea2429a6 --- /dev/null +++ b/include/linux/sched_energy.h @@ -0,0 +1,44 @@ +#ifndef _LINUX_SCHED_ENERGY_H +#define _LINUX_SCHED_ENERGY_H + +#include +#include + +/* + * There doesn't seem to be an NR_CPUS style max number of sched domain + * levels so here's an arbitrary constant one for the moment. + * + * The levels alluded to here correspond to entries in struct + * sched_domain_topology_level that are meant to be populated by arch + * specific code (topology.c). + */ +#define NR_SD_LEVELS 8 + +#define SD_LEVEL0 0 +#define SD_LEVEL1 1 +#define SD_LEVEL2 2 +#define SD_LEVEL3 3 +#define SD_LEVEL4 4 +#define SD_LEVEL5 5 +#define SD_LEVEL6 6 +#define SD_LEVEL7 7 + +/* + * Convenience macro for iterating through said sd levels. + */ +#define for_each_possible_sd_level(level) \ + for (level = 0; level < NR_SD_LEVELS; level++) + +#ifdef CONFIG_SMP + +extern struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS]; + +void init_sched_energy_costs(void); + +#else + +#define init_sched_energy_costs() do { } while (0) + +#endif /* CONFIG_SMP */ + +#endif diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 85cc819676e81d9f0e85331a17f70b271cd680d1..333ad11b3dd9cf11a8ee2fc2f10bece31c543444 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); +void rpc_cleanup_clids(void); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index e5d19344037491651c6c80f6b310842ee5715126..7440290f64acd3694dfc5c17618c55f6253aae01 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -66,6 +66,7 @@ struct svc_xprt { #define XPT_LISTENER 10 /* listening endpoint */ #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ +#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b7b9bdda7ee3ae84b6b5f7c10766c..55ff5593c193a33acfca4bc8c9708e06cbddb2ad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -150,8 +150,9 @@ enum { SWP_FILE = (1 << 7), /* set after swap_activate success */ SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ + SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ /* add others here before... */ - SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */ + SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5f81f8a187f2adf8f83f0b19fdd93ce09b46e714..d2613536fd036ee650e9cd5adf58d79c86c99d7a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -9,7 +9,13 @@ struct device; struct page; struct scatterlist; -extern int swiotlb_force; +enum swiotlb_force { + SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ + SWIOTLB_FORCE, /* swiotlb=force */ + SWIOTLB_NO_FORCE, /* swiotlb=noforce */ +}; + +extern enum swiotlb_force swiotlb_force; /* * Maximum allowable number of contiguous slabs to map, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b8521805847aa1c81920cafd863bc4a275..647532b0eb0314217260ca8b3e43992fe9c066d0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { + union { + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr; +#endif + }; s8 len; - u8 val[TCP_FASTOPEN_COOKIE_MAX]; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 09168c52ab6443f86efb14b51c9934725cb5e05c..361f8bf1429dfe28e63ad32d12561fab51c9e959 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -249,6 +249,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); +extern u64 ktime_get_boot_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 509a8f476cca6ad480d1a0b128883598cc59d158..fd09a1b4fcb8bfc8a0c0bced17e698de67c9bb3c 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -547,7 +547,9 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev) */ static inline size_t usb_ep_align(struct usb_ep *ep, size_t len) { - return round_up(len, (size_t)le16_to_cpu(ep->desc->wMaxPacketSize)); + int max_packet_size = (size_t)usb_endpoint_maxp(ep->desc) & 0x7ff; + + return round_up(len, max_packet_size); } /** diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 1c912f85e041756476008f9616be4414019a5614..f211c348e592fe2a96082e650fc92dd9d713d5e6 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, - bool little_endian) + bool little_endian, + bool has_data_valid) { memset(hdr, 0, sizeof(*hdr)); @@ -91,7 +92,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + } else if (has_data_valid && + skb->ip_summed == CHECKSUM_UNNECESSARY) { hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 71d92665d838dddfa980120b05b538f7bbef5ebe..677a04791bb18d362201a6aeb08e213376e91b95 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4585,6 +4585,17 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, */ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); +/** + * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * @dev: network device + * @bss: The BSS entry with which association was abandoned. + * + * Call this whenever - for reasons reported through other API, like deauth RX, + * an association attempt was abandoned. + * This function may sleep. The caller must hold the corresponding wdev's mutex. + */ +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); + /** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame * @dev: network device diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 521885cfc16e9f687129e26469986b19215db4e9..261202f73876a0c8820cd987a4bc1833066ae532 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include #include +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,8 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; - kuid_t uid_start; - kuid_t uid_end; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -96,7 +100,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index fc89e35e88402055848f4403fa2b96d7c5e309af..9dc2c182a263218ad63ceb326893b4c86c21ff95 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -142,7 +142,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ea3f80f58fd6d57f01d4cd9d7cbe659e7a123150..fc7c0dbdd1ffa3312f80491a038a734813fa9dbc 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -43,13 +43,12 @@ struct lwtunnel_encap_ops { int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); + + struct module *owner; }; #ifdef CONFIG_LWTUNNEL -static inline void lwtstate_free(struct lwtunnel_state *lws) -{ - kfree(lws); -} +void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) @@ -106,6 +105,8 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); +int lwtunnel_valid_encap_type(u16 encap_type); +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); int lwtunnel_build_state(struct net_device *dev, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, @@ -169,6 +170,15 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, return -EOPNOTSUPP; } +static inline int lwtunnel_valid_encap_type(u16 encap_type) +{ + return -EOPNOTSUPP; +} +static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) +{ + return -EOPNOTSUPP; +} + static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, diff --git a/include/net/route.h b/include/net/route.h index e56c5a4e04777b57d8a135ec30f26a6a03bd0994..c0874c87c173717f2c13c8af06d2482a76190243 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,8 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport, - sk ? sock_i_uid(sk) : GLOBAL_ROOT_UID); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -271,7 +270,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport, - sock_i_uid(sk)); + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/net/sock.h b/include/net/sock.h index 92b269709b9a8a7e5d69c55ac66b834501a2931c..97f8ed2202bff6849f6b826b9a597d95546c7644 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,6 +419,7 @@ struct sock { u32 sk_max_ack_backlog; __u32 sk_priority; __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -1651,6 +1652,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1658,6 +1660,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 931a47ba45718ad5c329b1085c7fac5319e7448f..1beab5532035dc2126405384d44457f183de2a90 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -205,10 +205,12 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { - ip4 = (struct in_device *)dev->ip_ptr; - if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) + ip4 = in_dev_get(dev); + if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) { ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, (struct in6_addr *)gid); + in_dev_put(ip4); + } dev_put(dev); } } diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h index 530c57bdefa06567c3bcaba9249845af1d4e535a..915c4357945c27992aa3b7561ab705dedff082ad 100644 --- a/include/sound/hdmi-codec.h +++ b/include/sound/hdmi-codec.h @@ -36,10 +36,10 @@ struct hdmi_codec_daifmt { HDMI_AC97, HDMI_SPDIF, } fmt; - int bit_clk_inv:1; - int frame_clk_inv:1; - int bit_clk_master:1; - int frame_clk_master:1; + unsigned int bit_clk_inv:1; + unsigned int frame_clk_inv:1; + unsigned int bit_clk_master:1; + unsigned int frame_clk_master:1; }; /* diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e030d6f6c19acb090348298e664739a8cb06c111..6d7fe1169956bb9711a3ea1828fa260a0b3f362a 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1162,22 +1162,26 @@ DECLARE_EVENT_CLASS(btrfs__work, __entry->func, __entry->ordered_func, __entry->ordered_free) ); -/* For situiations that the work is freed */ +/* + * For situiations when the work is freed, we pass fs_info and a tag that that + * matches address of the work structure so it can be paired with the + * scheduling event. + */ DECLARE_EVENT_CLASS(btrfs__work__done, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), - TP_ARGS(work), + TP_ARGS(fs_info, wtag), TP_STRUCT__entry_btrfs( - __field( void *, work ) + __field( void *, wtag ) ), - TP_fast_assign_btrfs(btrfs_work_owner(work), - __entry->work = work; + TP_fast_assign_btrfs(fs_info, + __entry->wtag = wtag; ), - TP_printk_btrfs("work->%p", __entry->work) + TP_printk_btrfs("work->%p", __entry->wtag) ); DEFINE_EVENT(btrfs__work, btrfs_work_queued, @@ -1196,9 +1200,9 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched, DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), - TP_ARGS(work) + TP_ARGS(fs_info, wtag) ); DEFINE_EVENT(btrfs__work, btrfs_ordered_sched, diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h index 951e6ca12da811fb94289b4e5c3b0f2134b7a2ec..faecc0bfdefffea07502d0c92a4c15f24786f29e 100644 --- a/include/trace/events/cpufreq_interactive.h +++ b/include/trace/events/cpufreq_interactive.h @@ -8,102 +8,102 @@ DECLARE_EVENT_CLASS(set, TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), + unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq), TP_STRUCT__entry( - __field( u32, cpu_id ) - __field(unsigned long, targfreq ) - __field(unsigned long, actualfreq ) - ), + __field(u32, cpu_id) + __field(unsigned long, targfreq) + __field(unsigned long, actualfreq) + ), TP_fast_assign( - __entry->cpu_id = (u32) cpu_id; - __entry->targfreq = targfreq; - __entry->actualfreq = actualfreq; + __entry->cpu_id = (u32)cpu_id; + __entry->targfreq = targfreq; + __entry->actualfreq = actualfreq; ), TP_printk("cpu=%u targ=%lu actual=%lu", - __entry->cpu_id, __entry->targfreq, - __entry->actualfreq) + __entry->cpu_id, __entry->targfreq, + __entry->actualfreq) ); DEFINE_EVENT(set, cpufreq_interactive_setspeed, TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), + unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq) ); DECLARE_EVENT_CLASS(loadeval, - TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), - - TP_STRUCT__entry( - __field(unsigned long, cpu_id ) - __field(unsigned long, load ) - __field(unsigned long, curtarg ) - __field(unsigned long, curactual ) - __field(unsigned long, newtarg ) - ), - - TP_fast_assign( - __entry->cpu_id = cpu_id; - __entry->load = load; - __entry->curtarg = curtarg; - __entry->curactual = curactual; - __entry->newtarg = newtarg; - ), - - TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", - __entry->cpu_id, __entry->load, __entry->curtarg, - __entry->curactual, __entry->newtarg) + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), + + TP_STRUCT__entry( + __field(unsigned long, cpu_id) + __field(unsigned long, load) + __field(unsigned long, curtarg) + __field(unsigned long, curactual) + __field(unsigned long, newtarg) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->load = load; + __entry->curtarg = curtarg; + __entry->curactual = curactual; + __entry->newtarg = newtarg; + ), + + TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curtarg, + __entry->curactual, __entry->newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_target, - TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_already, - TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, - TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); TRACE_EVENT(cpufreq_interactive_boost, - TP_PROTO(const char *s), - TP_ARGS(s), - TP_STRUCT__entry( - __string(s, s) - ), - TP_fast_assign( - __assign_str(s, s); - ), - TP_printk("%s", __get_str(s)) + TP_PROTO(const char *s), + TP_ARGS(s), + TP_STRUCT__entry( + __string(s, s) + ), + TP_fast_assign( + __assign_str(s, s); + ), + TP_printk("%s", __get_str(s)) ); TRACE_EVENT(cpufreq_interactive_unboost, - TP_PROTO(const char *s), - TP_ARGS(s), - TP_STRUCT__entry( - __string(s, s) - ), - TP_fast_assign( - __assign_str(s, s); - ), - TP_printk("%s", __get_str(s)) + TP_PROTO(const char *s), + TP_ARGS(s), + TP_STRUCT__entry( + __string(s, s) + ), + TP_fast_assign( + __assign_str(s, s); + ), + TP_printk("%s", __get_str(s)) ); #endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ diff --git a/include/trace/events/cpufreq_sched.h b/include/trace/events/cpufreq_sched.h new file mode 100644 index 0000000000000000000000000000000000000000..a46cd088e96990ae717ceec7e54aa5fe515c1dba --- /dev/null +++ b/include/trace/events/cpufreq_sched.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015 Steve Muckle + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cpufreq_sched + +#if !defined(_TRACE_CPUFREQ_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CPUFREQ_SCHED_H + +#include +#include + +TRACE_EVENT(cpufreq_sched_throttled, + TP_PROTO(unsigned int rem), + TP_ARGS(rem), + TP_STRUCT__entry( + __field( unsigned int, rem) + ), + TP_fast_assign( + __entry->rem = rem; + ), + TP_printk("throttled - %d usec remaining", __entry->rem) +); + +TRACE_EVENT(cpufreq_sched_request_opp, + TP_PROTO(int cpu, + unsigned long capacity, + unsigned int freq_new, + unsigned int requested_freq), + TP_ARGS(cpu, capacity, freq_new, requested_freq), + TP_STRUCT__entry( + __field( int, cpu) + __field( unsigned long, capacity) + __field( unsigned int, freq_new) + __field( unsigned int, requested_freq) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->capacity = capacity; + __entry->freq_new = freq_new; + __entry->requested_freq = requested_freq; + ), + TP_printk("cpu %d cap change, cluster cap request %ld => OPP %d " + "(cur %d)", + __entry->cpu, __entry->capacity, __entry->freq_new, + __entry->requested_freq) +); + +TRACE_EVENT(cpufreq_sched_update_capacity, + TP_PROTO(int cpu, + bool request, + struct sched_capacity_reqs *scr, + unsigned long new_capacity), + TP_ARGS(cpu, request, scr, new_capacity), + TP_STRUCT__entry( + __field( int, cpu) + __field( bool, request) + __field( unsigned long, cfs) + __field( unsigned long, rt) + __field( unsigned long, dl) + __field( unsigned long, total) + __field( unsigned long, new_total) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->request = request; + __entry->cfs = scr->cfs; + __entry->rt = scr->rt; + __entry->dl = scr->dl; + __entry->total = scr->total; + __entry->new_total = new_capacity; + ), + TP_printk("cpu=%d set_cap=%d cfs=%ld rt=%ld dl=%ld old_tot=%ld " + "new_tot=%ld", + __entry->cpu, __entry->request, __entry->cfs, __entry->rt, + __entry->dl, __entry->total, __entry->new_total) +); + +#endif /* _TRACE_CPUFREQ_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 49cc7c3de25221f2e014c80f12189bb1edc3b946..89d009e1093876cd9d0bfec3b14592cc4e75dc26 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -57,7 +57,7 @@ TRACE_EVENT(net_dev_start_xmit, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", + TP_printk("dev=%s queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, @@ -90,7 +90,7 @@ TRACE_EVENT(net_dev_xmit, __assign_str(name, dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + TP_printk("dev=%s skbaddr=%pK len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%pK len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) @@ -191,7 +191,7 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 070be71b1aacdc89f581fb4a31dd539cea3e1a06..ec6f81561558100b3c8236597b0c6c02cc8a2cde 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -172,6 +172,13 @@ TRACE_EVENT(cpu_frequency_limits, (unsigned long)__entry->cpu_id) ); +DEFINE_EVENT(cpu, cpu_capacity, + + TP_PROTO(unsigned int capacity, unsigned int cpu_id), + + TP_ARGS(capacity, cpu_id) +); + TRACE_EVENT(device_pm_callback_start, TP_PROTO(struct device *dev, const char *pm_ops, int event), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3f0b3dfb07463b8f41cc16a920ab0f60f6b9bc9c..904bedbaca7722b6210555db9c1107a253e1c438 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -1304,6 +1304,285 @@ TRACE_EVENT(sched_wake_idle_without_ipi, TP_printk("cpu=%d", __entry->cpu) ); +TRACE_EVENT(sched_contrib_scale_f, + TP_PROTO(int cpu, unsigned long freq_scale_factor, + unsigned long cpu_scale_factor), + TP_ARGS(cpu, freq_scale_factor, cpu_scale_factor), + TP_STRUCT__entry( + __field(int, cpu) + __field(unsigned long, freq_scale_factor) + __field(unsigned long, cpu_scale_factor) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->freq_scale_factor = freq_scale_factor; + __entry->cpu_scale_factor = cpu_scale_factor; + ), + TP_printk("cpu=%d freq_scale_factor=%lu cpu_scale_factor=%lu", + __entry->cpu, __entry->freq_scale_factor, + __entry->cpu_scale_factor) +); + +/* + * Tracepoint for accounting sched averages for tasks. + */ +TRACE_EVENT(sched_load_avg_task, + TP_PROTO(struct task_struct *tsk, struct sched_avg *avg), + TP_ARGS(tsk, avg), + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( unsigned long, load_avg ) + __field( unsigned long, util_avg ) + __field( u64, load_sum ) + __field( u32, util_sum ) + __field( u32, period_contrib ) + ), + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = task_cpu(tsk); + __entry->load_avg = avg->load_avg; + __entry->util_avg = avg->util_avg; + __entry->load_sum = avg->load_sum; + __entry->util_sum = avg->util_sum; + __entry->period_contrib = avg->period_contrib; + ), + TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu load_sum=%llu" + " util_sum=%u period_contrib=%u", + __entry->comm, + __entry->pid, + __entry->cpu, + __entry->load_avg, + __entry->util_avg, + (u64)__entry->load_sum, + (u32)__entry->util_sum, + (u32)__entry->period_contrib) +); +/* + * Tracepoint for accounting sched averages for cpus. + */ +TRACE_EVENT(sched_load_avg_cpu, + TP_PROTO(int cpu, struct cfs_rq *cfs_rq), + TP_ARGS(cpu, cfs_rq), + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, load_avg ) + __field( unsigned long, util_avg ) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->load_avg = cfs_rq->avg.load_avg; + __entry->util_avg = cfs_rq->avg.util_avg; + ), + TP_printk("cpu=%d load_avg=%lu util_avg=%lu", + __entry->cpu, __entry->load_avg, __entry->util_avg) +); +/* + * Tracepoint for sched_tune_config settings + */ +TRACE_EVENT(sched_tune_config, + TP_PROTO(int boost), + TP_ARGS(boost), + TP_STRUCT__entry( + __field( int, boost ) + ), + TP_fast_assign( + __entry->boost = boost; + ), + TP_printk("boost=%d ", __entry->boost) +); +/* + * Tracepoint for accounting CPU boosted utilization + */ +TRACE_EVENT(sched_boost_cpu, + TP_PROTO(int cpu, unsigned long util, long margin), + TP_ARGS(cpu, util, margin), + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, util ) + __field(long, margin ) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->util = util; + __entry->margin = margin; + ), + TP_printk("cpu=%d util=%lu margin=%ld", + __entry->cpu, + __entry->util, + __entry->margin) +); +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_tasks_update, + TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, + int boost, int max_boost), + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( int, tasks ) + __field( int, idx ) + __field( int, boost ) + __field( int, max_boost ) + ), + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = cpu; + __entry->tasks = tasks; + __entry->idx = idx; + __entry->boost = boost; + __entry->max_boost = max_boost; + ), + TP_printk("pid=%d comm=%s " + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d", + __entry->pid, __entry->comm, + __entry->cpu, __entry->tasks, __entry->idx, + __entry->boost, __entry->max_boost) +); +/* + * Tracepoint for schedtune_boostgroup_update + */ +TRACE_EVENT(sched_tune_boostgroup_update, + TP_PROTO(int cpu, int variation, int max_boost), + TP_ARGS(cpu, variation, max_boost), + TP_STRUCT__entry( + __field( int, cpu ) + __field( int, variation ) + __field( int, max_boost ) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->variation = variation; + __entry->max_boost = max_boost; + ), + TP_printk("cpu=%d variation=%d max_boost=%d", + __entry->cpu, __entry->variation, __entry->max_boost) +); +/* + * Tracepoint for accounting task boosted utilization + */ +TRACE_EVENT(sched_boost_task, + TP_PROTO(struct task_struct *tsk, unsigned long util, long margin), + TP_ARGS(tsk, util, margin), + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, util ) + __field( long, margin ) + ), + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->util = util; + __entry->margin = margin; + ), + TP_printk("comm=%s pid=%d util=%lu margin=%ld", + __entry->comm, __entry->pid, + __entry->util, + __entry->margin) +); +/* + * Tracepoint for accounting sched group energy + */ +TRACE_EVENT(sched_energy_diff, + TP_PROTO(struct task_struct *tsk, int scpu, int dcpu, int udelta, + int nrgb, int nrga, int nrgd, int capb, int capa, int capd, + int nrgn, int nrgp), + TP_ARGS(tsk, scpu, dcpu, udelta, + nrgb, nrga, nrgd, capb, capa, capd, + nrgn, nrgp), + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, scpu ) + __field( int, dcpu ) + __field( int, udelta ) + __field( int, nrgb ) + __field( int, nrga ) + __field( int, nrgd ) + __field( int, capb ) + __field( int, capa ) + __field( int, capd ) + __field( int, nrgn ) + __field( int, nrgp ) + ), + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->scpu = scpu; + __entry->dcpu = dcpu; + __entry->udelta = udelta; + __entry->nrgb = nrgb; + __entry->nrga = nrga; + __entry->nrgd = nrgd; + __entry->capb = capb; + __entry->capa = capa; + __entry->capd = capd; + __entry->nrgn = nrgn; + __entry->nrgp = nrgp; + ), + TP_printk("pid=%d comm=%s " + "src_cpu=%d dst_cpu=%d usage_delta=%d " + "nrg_before=%d nrg_after=%d nrg_diff=%d " + "cap_before=%d cap_after=%d cap_delta=%d " + "nrg_delta=%d nrg_payoff=%d", + __entry->pid, __entry->comm, + __entry->scpu, __entry->dcpu, __entry->udelta, + __entry->nrgb, __entry->nrga, __entry->nrgd, + __entry->capb, __entry->capa, __entry->capd, + __entry->nrgn, __entry->nrgp) +); +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_filter, + TP_PROTO(int nrg_delta, int cap_delta, + int nrg_gain, int cap_gain, + int payoff, int region), + TP_ARGS(nrg_delta, cap_delta, nrg_gain, cap_gain, payoff, region), + TP_STRUCT__entry( + __field( int, nrg_delta ) + __field( int, cap_delta ) + __field( int, nrg_gain ) + __field( int, cap_gain ) + __field( int, payoff ) + __field( int, region ) + ), + TP_fast_assign( + __entry->nrg_delta = nrg_delta; + __entry->cap_delta = cap_delta; + __entry->nrg_gain = nrg_gain; + __entry->cap_gain = cap_gain; + __entry->payoff = payoff; + __entry->region = region; + ), + TP_printk("nrg_delta=%d cap_delta=%d nrg_gain=%d cap_gain=%d payoff=%d region=%d", + __entry->nrg_delta, __entry->cap_delta, + __entry->nrg_gain, __entry->cap_gain, + __entry->payoff, __entry->region) +); +/* + * Tracepoint for system overutilized flag + */ +TRACE_EVENT(sched_overutilized, + TP_PROTO(bool overutilized), + TP_ARGS(overutilized), + TP_STRUCT__entry( + __field( bool, overutilized ) + ), + TP_fast_assign( + __entry->overutilized = overutilized; + ), + TP_printk("overutilized=%d", + __entry->overutilized ? 1 : 0) +); + TRACE_EVENT(sched_get_nr_running_avg, TP_PROTO(int avg, int big_avg, int iowait_avg), diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 7ea4c5e7c4487a963acfc75c027514a6cb360169..288c0c54a2b4ace63a94549c136fe255e2982089 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -11,16 +11,16 @@ TRACE_EVENT(swiotlb_bounced, TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size, - int swiotlb_force), + enum swiotlb_force swiotlb_force), TP_ARGS(dev, dev_addr, size, swiotlb_force), TP_STRUCT__entry( - __string( dev_name, dev_name(dev) ) - __field( u64, dma_mask ) - __field( dma_addr_t, dev_addr ) - __field( size_t, size ) - __field( int, swiotlb_force ) + __string( dev_name, dev_name(dev) ) + __field( u64, dma_mask ) + __field( dma_addr_t, dev_addr ) + __field( size_t, size ) + __field( enum swiotlb_force, swiotlb_force ) ), TP_fast_assign( @@ -37,7 +37,10 @@ TRACE_EVENT(swiotlb_bounced, __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __entry->swiotlb_force ? "swiotlb_force" : "" ) + __print_symbolic(__entry->swiotlb_force, + { SWIOTLB_NORMAL, "NORMAL" }, + { SWIOTLB_FORCE, "FORCE" }, + { SWIOTLB_NO_FORCE, "NO_FORCE" })) ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 41420e341e75de5f2e46cd4fac5eab9e15f68168..51f891fb1b18ad9b14713fe3a4f2a541c9ca5442 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -33,6 +33,8 @@ enum { BINDER_TYPE_HANDLE = B_PACK_CHARS('s', 'h', '*', B_TYPE_LARGE), BINDER_TYPE_WEAK_HANDLE = B_PACK_CHARS('w', 'h', '*', B_TYPE_LARGE), BINDER_TYPE_FD = B_PACK_CHARS('f', 'd', '*', B_TYPE_LARGE), + BINDER_TYPE_FDA = B_PACK_CHARS('f', 'd', 'a', B_TYPE_LARGE), + BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; enum { @@ -48,6 +50,14 @@ typedef __u64 binder_size_t; typedef __u64 binder_uintptr_t; #endif +/** + * struct binder_object_header - header shared by all binder metadata objects. + * @type: type of the object + */ +struct binder_object_header { + __u32 type; +}; + /* * This is the flattened representation of a Binder object for transfer * between processes. The 'offsets' supplied as part of a binder transaction @@ -56,9 +66,8 @@ typedef __u64 binder_uintptr_t; * between processes. */ struct flat_binder_object { - /* 8 bytes for large_flat_header. */ - __u32 type; - __u32 flags; + struct binder_object_header hdr; + __u32 flags; /* 8 bytes of data. */ union { @@ -70,6 +79,84 @@ struct flat_binder_object { binder_uintptr_t cookie; }; +/** + * struct binder_fd_object - describes a filedescriptor to be fixed up. + * @hdr: common header structure + * @pad_flags: padding to remain compatible with old userspace code + * @pad_binder: padding to remain compatible with old userspace code + * @fd: file descriptor + * @cookie: opaque data, used by user-space + */ +struct binder_fd_object { + struct binder_object_header hdr; + __u32 pad_flags; + union { + binder_uintptr_t pad_binder; + __u32 fd; + }; + + binder_uintptr_t cookie; +}; + +/* struct binder_buffer_object - object describing a userspace buffer + * @hdr: common header structure + * @flags: one or more BINDER_BUFFER_* flags + * @buffer: address of the buffer + * @length: length of the buffer + * @parent: index in offset array pointing to parent buffer + * @parent_offset: offset in @parent pointing to this buffer + * + * A binder_buffer object represents an object that the + * binder kernel driver can copy verbatim to the target + * address space. A buffer itself may be pointed to from + * within another buffer, meaning that the pointer inside + * that other buffer needs to be fixed up as well. This + * can be done by setting the BINDER_BUFFER_FLAG_HAS_PARENT + * flag in @flags, by setting @parent buffer to the index + * in the offset array pointing to the parent binder_buffer_object, + * and by setting @parent_offset to the offset in the parent buffer + * at which the pointer to this buffer is located. + */ +struct binder_buffer_object { + struct binder_object_header hdr; + __u32 flags; + binder_uintptr_t buffer; + binder_size_t length; + binder_size_t parent; + binder_size_t parent_offset; +}; + +enum { + BINDER_BUFFER_FLAG_HAS_PARENT = 0x01, +}; + +/* struct binder_fd_array_object - object describing an array of fds in a buffer + * @hdr: common header structure + * @num_fds: number of file descriptors in the buffer + * @parent: index in offset array to buffer holding the fd array + * @parent_offset: start offset of fd array in the buffer + * + * A binder_fd_array object represents an array of file + * descriptors embedded in a binder_buffer_object. It is + * different from a regular binder_buffer_object because it + * describes a list of file descriptors to fix up, not an opaque + * blob of memory, and hence the kernel needs to treat it differently. + * + * An example of how this would be used is with Android's + * native_handle_t object, which is a struct with a list of integers + * and a list of file descriptors. The native_handle_t struct itself + * will be represented by a struct binder_buffer_objct, whereas the + * embedded list of file descriptors is represented by a + * struct binder_fd_array_object with that binder_buffer_object as + * a parent. + */ +struct binder_fd_array_object { + struct binder_object_header hdr; + binder_size_t num_fds; + binder_size_t parent; + binder_size_t parent_offset; +}; + /* * On 64-bit platforms where user code may run in 32-bits the driver must * translate the buffer (and local binder) addresses appropriately. @@ -162,6 +249,11 @@ struct binder_transaction_data { } data; }; +struct binder_transaction_data_sg { + struct binder_transaction_data transaction_data; + binder_size_t buffers_size; +}; + struct binder_ptr_cookie { binder_uintptr_t ptr; binder_uintptr_t cookie; @@ -346,6 +438,12 @@ enum binder_driver_command_protocol { /* * void *: cookie */ + + BC_TRANSACTION_SG = _IOW('c', 17, struct binder_transaction_data_sg), + BC_REPLY_SG = _IOW('c', 18, struct binder_transaction_data_sg), + /* + * binder_transaction_data_sg: the sent command. + */ }; #endif /* _UAPI_LINUX_BINDER_H */ diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 436f0f3979586206807f650f81b887dd884413e1..a66c4ba51d80683df9faae39cda2028ddb612cba 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -53,6 +58,7 @@ enum { FRA_UID_END, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 270b764f880efa928a92fac73d20c64d8f635dad..bd017699420e222bbf76f373d5513985cc2fb57a 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -53,7 +53,7 @@ #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" -#define SDCARDFS_SUPER_MAGIC 0xb550ca10 +#define SDCARDFS_SUPER_MAGIC 0x5dca2df5 #define SMB_SUPER_MAGIC 0x517B #define CGROUP_SUPER_MAGIC 0x27e0eb diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 56368e9b462245b6a50a2756abac6233b95650d9..d3cbe48b286d8cffd598305eb3164aa55ba0998e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,7 +323,7 @@ * @NL80211_CMD_GET_SCAN: get scan results * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the - * probe requests at CCK rate or not. %NL80211_ATTR_MAC can be used to + * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to * specify a BSSID to scan for; if not included, the wildcard BSSID will * be used. * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to @@ -1937,6 +1937,9 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. * + * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also + * used in various commands/events for specifying the BSSID. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2336,6 +2339,8 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_BSSID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 8f97ca5b8e132ec7e08f80dd6b2fe02cfaf2f41e..e14377f2ec273a183ae3b2e3da887ef2f893f7d1 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -311,7 +311,6 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, - RTA_UID, RTA_VIA, RTA_NEWDST, RTA_PREF, @@ -319,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h index 48a19bda071b8db5396991dcd97cd9e0f3872ecf..d24eee12128fc5cb7d1f5612e85a7fb9b194520f 100644 --- a/include/uapi/rdma/cxgb3-abi.h +++ b/include/uapi/rdma/cxgb3-abi.h @@ -30,7 +30,7 @@ * SOFTWARE. */ #ifndef CXGB3_ABI_USER_H -#define CXBG3_ABI_USER_H +#define CXGB3_ABI_USER_H #include diff --git a/init/Kconfig b/init/Kconfig index 262fbd4e17e4ab2e97e3946e3e712819f3c483d6..6a4e13aff5ff2b8ef96ad26e4c3f158886e9b047 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -400,6 +400,15 @@ config IRQ_TIME_ACCOUNTING If in doubt, say N here. +config SCHED_WALT + bool "Support window based load tracking" + depends on SMP + help + This feature will allow the scheduler to maintain a tunable window + based set of metrics for tasks and runqueues. These metrics can be + used to guide task placement as well as task frequency requirements + for cpufreq governors. + config BSD_PROCESS_ACCT bool "BSD Process Accounting" depends on MULTIUSER @@ -971,6 +980,82 @@ menuconfig CGROUPS if CGROUPS +config CGROUP_DEBUG + bool "Example debug cgroup subsystem" + default n + help + This option enables a simple cgroup subsystem that + exports useful debugging information about the cgroups + framework. + + Say N if unsure. + +config CGROUP_FREEZER + bool "Freezer cgroup subsystem" + help + Provides a way to freeze and unfreeze all tasks in a + cgroup. + +config CGROUP_PIDS + bool "PIDs cgroup subsystem" + help + Provides enforcement of process number limits in the scope of a + cgroup. Any attempt to fork more processes than is allowed in the + cgroup will fail. PIDs are fundamentally a global resource because it + is fairly trivial to reach PID exhaustion before you reach even a + conservative kmemcg limit. As a result, it is possible to grind a + system to halt without being limited by other cgroup policies. The + PIDs cgroup subsystem is designed to stop this from happening. + + It should be noted that organisational operations (such as attaching + to a cgroup hierarchy will *not* be blocked by the PIDs subsystem), + since the PIDs limit only affects a process's ability to fork, not to + attach to a cgroup. + +config CGROUP_DEVICE + bool "Device controller for cgroups" + help + Provides a cgroup implementing whitelists for devices which + a process in the cgroup can mknod or open. + +config CPUSETS + bool "Cpuset support" + help + This option will let you create and manage CPUSETs which + allow dynamically partitioning a system into sets of CPUs and + Memory Nodes and assigning tasks to run only within those sets. + This is primarily useful on large SMP or NUMA systems. + + Say N if unsure. + +config PROC_PID_CPUSET + bool "Include legacy /proc//cpuset file" + depends on CPUSETS + default y + +config CGROUP_CPUACCT + bool "Simple CPU accounting cgroup subsystem" + help + Provides a simple Resource Controller for monitoring the + total CPU consumed by the tasks in a cgroup. + +config CGROUP_SCHEDTUNE + bool "CFS tasks boosting cgroup subsystem (EXPERIMENTAL)" + depends on SCHED_TUNE + help + This option provides the "schedtune" controller which improves the + flexibility of the task boosting mechanism by introducing the support + to define "per task" boost values. + + This new controller: + 1. allows only a two layers hierarchy, where the root defines the + system-wide boost value and its direct childrens define each one a + different "class of tasks" to be boosted with a different value + 2. supports up to 16 different task classes, each one which could be + configured with a different boost value + + Say N if unsure. + config PAGE_COUNTER bool @@ -1294,6 +1379,7 @@ config SCHED_AUTOGROUP config SCHED_TUNE bool "Boosting for CFS tasks (EXPERIMENTAL)" + depends on SMP help This option enables the system-wide support for task boosting. When this support is enabled a new sysctl interface is exposed to @@ -1318,6 +1404,16 @@ config SCHED_TUNE If unsure, say N. +config DEFAULT_USE_ENERGY_AWARE + bool "Default to enabling the Energy Aware Scheduler feature" + default n + help + This option defaults the ENERGY_AWARE scheduling feature to true, + as without SCHED_DEBUG set this feature can't be enabled or disabled + via sysctl. + + Say N if unsure. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8cbd6e6894d514d2078c4af3bcb4e77c68a73abf..a37a10bc07ea0fb3f9b0d7e3ba0da59bcd0cc46f 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -745,7 +745,7 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, } mode &= ~current_umask(); - ret = vfs_create(dir, path->dentry, mode, true); + ret = vfs_create2(path->mnt, dir, path->dentry, mode, true); path->dentry->d_fsdata = NULL; if (ret) return ERR_PTR(ret); @@ -761,7 +761,7 @@ static struct file *do_open(struct path *path, int oflag) if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return ERR_PTR(-EINVAL); acc = oflag2acc[oflag & O_ACCMODE]; - if (inode_permission(d_inode(path->dentry), acc)) + if (inode_permission2(path->mnt, d_inode(path->dentry), acc)) return ERR_PTR(-EACCES); return dentry_open(path, oflag, current_cred()); } @@ -794,7 +794,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, ro = mnt_want_write(mnt); /* we'll drop it in any case */ error = 0; inode_lock(d_inode(root)); - path.dentry = lookup_one_len(name->name, root, strlen(name->name)); + path.dentry = lookup_one_len2(name->name, mnt, root, strlen(name->name)); if (IS_ERR(path.dentry)) { error = PTR_ERR(path.dentry); goto out_putfd; @@ -865,7 +865,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) if (err) goto out_name; inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); - dentry = lookup_one_len(name->name, mnt->mnt_root, + dentry = lookup_one_len2(name->name, mnt, mnt->mnt_root, strlen(name->name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -877,7 +877,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) err = -ENOENT; } else { ihold(inode); - err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL); + err = vfs_unlink2(mnt, d_inode(dentry->d_parent), dentry, NULL); } dput(dentry); diff --git a/kernel/capability.c b/kernel/capability.c index 00411c82dac57c4cac0254c823f9adcb2a951549..4984e1f552ebfb97fa460b527b35c67ebd0f324d 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -456,6 +456,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, } EXPORT_SYMBOL(file_ns_capable); +/** + * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode? + * @ns: The user namespace in question + * @inode: The inode in question + * + * Return true if the inode uid and gid are within the namespace. + */ +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +{ + return kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); +} + /** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question @@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); + +/** + * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace + * @tsk: The task that may be ptraced + * @ns: The user namespace to search for CAP_SYS_PTRACE in + * + * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE + * in the specified user namespace. + */ +bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) +{ + int ret = 0; /* An absent tracer adds no restrictions */ + const struct cred *cred; + rcu_read_lock(); + cred = rcu_dereference(tsk->ptracer_cred); + if (cred) + ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + return (ret == 0); +} diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b848fe851c91822896c8cdaec88ce59bd1415fc9..eadd9421ac3c9f9460efae7a4c61af29c1beef29 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2862,25 +2862,6 @@ int subsys_cgroup_allow_attach(struct cgroup_taskset *tset) return 0; } -static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) -{ - struct cgroup_subsys_state *css; - int i; - int ret; - - for_each_css(css, i, cgrp) { - if (css->ss->allow_attach) { - ret = css->ss->allow_attach(tset); - if (ret) - return ret; - } else { - return -EACCES; - } - } - - return 0; -} - static int cgroup_procs_write_permission(struct task_struct *task, struct cgroup *dst_cgrp, struct kernfs_open_file *of) @@ -2895,24 +2876,9 @@ static int cgroup_procs_write_permission(struct task_struct *task, */ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) { - /* - * if the default permission check fails, give each - * cgroup a chance to extend the permission check - */ - struct cgroup_taskset tset = { - .src_csets = LIST_HEAD_INIT(tset.src_csets), - .dst_csets = LIST_HEAD_INIT(tset.dst_csets), - .csets = &tset.src_csets, - }; - struct css_set *cset; - cset = task_css_set(task); - list_add(&cset->mg_node, &tset.src_csets); - ret = cgroup_allow_attach(dst_cgrp, &tset); - list_del(&tset.src_csets); - if (ret) - ret = -EACCES; - } + !uid_eq(cred->euid, tcred->suid) && + !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) + ret = -EACCES; if (!ret && cgroup_on_dfl(dst_cgrp)) { struct super_block *sb = of->file->f_path.dentry->d_sb; @@ -5274,6 +5240,11 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, return ERR_PTR(err); } +/* + * The returned cgroup is fully initialized including its control mask, but + * it isn't associated with its kernfs_node and doesn't have the control + * mask applied. + */ static struct cgroup *cgroup_create(struct cgroup *parent) { struct cgroup_root *root = parent->root; @@ -5338,11 +5309,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgroup_propagate_control(cgrp); - /* @cgrp doesn't have dir yet so the following will only create csses */ - ret = cgroup_apply_control_enable(cgrp); - if (ret) - goto out_destroy; - return cgrp; out_cancel_ref: @@ -5350,9 +5316,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) out_free_cgrp: kfree(cgrp); return ERR_PTR(ret); -out_destroy: - cgroup_destroy_locked(cgrp); - return ERR_PTR(ret); } static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index adc552e1f9b16ae52c690e81e373b0b788d6f727..4732628dcf28bf6c4aa7dc19ba2c1364c6d39fac 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -135,7 +135,11 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_PROFILING=y +CONFIG_QFMT_V2=y CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QUOTA_TREE=y +CONFIG_QUOTACTL=y CONFIG_RANDOMIZE_BASE=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 437f33732f828059ef125bb67f5b812ab946510f..abec2cadc38b2fe3ebc66fd2e162d86bca28d082 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -1,10 +1,12 @@ # KEEP ALPHABETICALLY SORTED +# CONFIG_AIO is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_PM_WAKELOCKS_GC is not set # CONFIG_VT is not set +CONFIG_ARM64_SW_TTBR0_PAN=y CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_LOOP=y @@ -92,6 +94,7 @@ CONFIG_LOGIRUMBLEPAD2_FF=y CONFIG_LOGITECH_FF=y CONFIG_MD=y CONFIG_MEDIA_SUPPORT=y +CONFIG_MEMORY_STATE_TIME=y CONFIG_MSDOS_FS=y CONFIG_PANIC_TIMEOUT=5 CONFIG_PANTHERLORD_FF=y diff --git a/kernel/cpu.c b/kernel/cpu.c index 2918a9a6a9570acfda685054f75bff964e40b4e5..7c2314436fab82e2777eeba697ebfff16c7d25e9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -659,7 +659,6 @@ void __init cpuhp_threads_init(void) kthread_unpark(this_cpu_read(cpuhp_state.thread)); } -#ifdef CONFIG_HOTPLUG_CPU EXPORT_SYMBOL(register_cpu_notifier); EXPORT_SYMBOL(__register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) @@ -676,6 +675,7 @@ void __unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(__unregister_cpu_notifier); +#ifdef CONFIG_HOTPLUG_CPU /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0874e2edd2756bcbe5542a496fb6b131b092b3ea..79517e5549f119aff663ff5572507c70671a2b41 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -598,11 +598,11 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, /* * Wait for the other CPUs to be notified and be waiting for us: */ - time_left = loops_per_jiffy * HZ; + time_left = MSEC_PER_SEC; while (kgdb_do_roundup && --time_left && (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != online_cpus) - cpu_relax(); + udelay(1000); if (!time_left) pr_crit("Timed out waiting for secondary CPUs.\n"); diff --git a/kernel/events/core.c b/kernel/events/core.c index 90488307055b6cf94491d30dae4c26fcff3723c2..8e901de88d6ca55b7e8328f73f84ba5a653b8e35 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1474,7 +1474,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) static void list_add_event(struct perf_event *event, struct perf_event_context *ctx) { - lockdep_assert_held(&ctx->lock); WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); @@ -1629,6 +1628,8 @@ static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double attach due to group movement in perf_event_open. */ @@ -1702,6 +1703,8 @@ static void perf_group_detach(struct perf_event *event) struct perf_event *sibling, *tmp; struct list_head *list = NULL; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double detach due to exit/hot-unplug + close. */ @@ -1900,9 +1903,29 @@ __perf_remove_from_context(struct perf_event *event, */ static void perf_remove_from_context(struct perf_event *event, unsigned long flags) { - lockdep_assert_held(&event->ctx->mutex); + struct perf_event_context *ctx = event->ctx; + + lockdep_assert_held(&ctx->mutex); event_function_call(event, __perf_remove_from_context, (void *)flags); + + /* + * The above event_function_call() can NO-OP when it hits + * TASK_TOMBSTONE. In that case we must already have been detached + * from the context (by perf_event_exit_event()) but the grouping + * might still be in-tact. + */ + WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); + if ((flags & DETACH_GROUP) && + (event->attach_state & PERF_ATTACH_GROUP)) { + /* + * Since in that case we cannot possibly be scheduled, simply + * detach now. + */ + raw_spin_lock_irq(&ctx->lock); + perf_group_detach(event); + raw_spin_unlock_irq(&ctx->lock); + } } /* @@ -6589,6 +6612,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char *buf = NULL; char *name; + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + if (vma->vm_flags & VM_MAYSHARE) + flags = MAP_SHARED; + else + flags = MAP_PRIVATE; + + if (vma->vm_flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if (vma->vm_flags & VM_MAYEXEC) + flags |= MAP_EXECUTABLE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + if (vma->vm_flags & VM_HUGETLB) + flags |= MAP_HUGETLB; + if (file) { struct inode *inode; dev_t dev; @@ -6615,27 +6659,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) maj = MAJOR(dev); min = MINOR(dev); - if (vma->vm_flags & VM_READ) - prot |= PROT_READ; - if (vma->vm_flags & VM_WRITE) - prot |= PROT_WRITE; - if (vma->vm_flags & VM_EXEC) - prot |= PROT_EXEC; - - if (vma->vm_flags & VM_MAYSHARE) - flags = MAP_SHARED; - else - flags = MAP_PRIVATE; - - if (vma->vm_flags & VM_DENYWRITE) - flags |= MAP_DENYWRITE; - if (vma->vm_flags & VM_MAYEXEC) - flags |= MAP_EXECUTABLE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - if (vma->vm_flags & VM_HUGETLB) - flags |= MAP_HUGETLB; - goto got_name; } else { if (vma->vm_ops && vma->vm_ops->name) { @@ -9509,6 +9532,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) return 0; } +/* + * Variation on perf_event_ctx_lock_nested(), except we take two context + * mutexes. + */ +static struct perf_event_context * +__perf_event_ctx_lock_double(struct perf_event *group_leader, + struct perf_event_context *ctx) +{ + struct perf_event_context *gctx; + +again: + rcu_read_lock(); + gctx = READ_ONCE(group_leader->ctx); + if (!atomic_inc_not_zero(&gctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock_double(&gctx->mutex, &ctx->mutex); + + if (group_leader->ctx != gctx) { + mutex_unlock(&ctx->mutex); + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + goto again; + } + + return gctx; +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -9755,12 +9809,31 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - gctx = group_leader->ctx; - mutex_lock_double(&gctx->mutex, &ctx->mutex); + gctx = __perf_event_ctx_lock_double(group_leader, ctx); + if (gctx->task == TASK_TOMBSTONE) { err = -ESRCH; goto err_locked; } + + /* + * Check if we raced against another sys_perf_event_open() call + * moving the software group underneath us. + */ + if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { + /* + * If someone moved the group out from under us, check + * if this new event wound up on the same ctx, if so + * its the regular !move_group case, otherwise fail. + */ + if (gctx != ctx) { + err = -EINVAL; + goto err_locked; + } else { + perf_event_ctx_unlock(group_leader, gctx); + move_group = 0; + } + } } else { mutex_lock(&ctx->mutex); } @@ -9862,7 +9935,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_unpin_context(ctx); if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); if (task) { @@ -9888,7 +9961,7 @@ SYSCALL_DEFINE5(perf_event_open, err_locked: if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); /* err_file: */ fput(event_file); diff --git a/kernel/exit.c b/kernel/exit.c index 3076f3089919b60697f296ec01126e0af5947bcc..46a7c2bafda25d5b691e53781cb4e6347f0ef3d3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -55,6 +55,8 @@ #include #include +#include "sched/tune.h" + #include #include #include @@ -775,6 +777,9 @@ void __noreturn do_exit(long code) } exit_signals(tsk); /* sets PF_EXITING */ + + schedtune_exit_task(tsk); + /* * Ensure that all new tsk->pi_lock acquisitions must observe * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). diff --git a/kernel/fork.c b/kernel/fork.c index 23f9d08356b6b0ff752bdf2c3c80493caabd04c2..cb4faae8de55875c60027e2f55a4d4a785ecc1f4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -745,7 +745,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { mm->mmap = NULL; mm->mm_rb = RB_ROOT; @@ -785,6 +786,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) if (init_new_context(p, mm)) goto fail_nocontext; + mm->user_ns = get_user_ns(user_ns); return mm; fail_nocontext: @@ -830,7 +832,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -845,6 +847,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1127,7 +1130,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; err = dup_mmap(mm, oldmm); diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 17f51d63da5631ca0de1aa1aa6cc4077b220fe97..668f51b861f77365073aed0646804a06050a11de 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -37,10 +37,10 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk) { - int n, nodes; + int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ - for (n = 0, nodes = 0; n < num_online_nodes(); n++) { + for_each_online_node(n) { if (cpumask_intersects(mask, cpumask_of_node(n))) { node_set(n, *nodemsk); nodes++; @@ -81,7 +81,7 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, nodes = get_nodes_in_cpumask(affinity, &nodemsk); /* - * If the number of nodes in the mask is less than or equal the + * If the number of nodes in the mask is greater than or equal the * number of vectors we just spread the vectors across the nodes. */ if (nvec <= nodes) { diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8c0a0ae43521c7f8b9e97964912cc3ab5e10fd15..b59e6768c5e94ad831e78b539437cfd99576e780 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1346,6 +1346,30 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); +static void __irq_domain_activate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (irq_data->parent_data) + __irq_domain_activate_irq(irq_data->parent_data); + if (domain->ops->activate) + domain->ops->activate(domain, irq_data); + } +} + +static void __irq_domain_deactivate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (domain->ops->deactivate) + domain->ops->deactivate(domain, irq_data); + if (irq_data->parent_data) + __irq_domain_deactivate_irq(irq_data->parent_data); + } +} + /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt @@ -1356,13 +1380,9 @@ EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); */ void irq_domain_activate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (irq_data->parent_data) - irq_domain_activate_irq(irq_data->parent_data); - if (domain->ops->activate) - domain->ops->activate(domain, irq_data); + if (!irqd_is_activated(irq_data)) { + __irq_domain_activate_irq(irq_data); + irqd_set_activated(irq_data); } } @@ -1376,13 +1396,9 @@ void irq_domain_activate_irq(struct irq_data *irq_data) */ void irq_domain_deactivate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (domain->ops->deactivate) - domain->ops->deactivate(domain, irq_data); - if (irq_data->parent_data) - irq_domain_deactivate_irq(irq_data->parent_data); + if (irqd_is_activated(irq_data)) { + __irq_domain_deactivate_irq(irq_data); + irqd_clr_activated(irq_data); } } diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 93ad6c1fb9b6212e706eb3ae08f7b881192008ec..a9b8cf50059151c17f63d35cf4c622ae8b72f131 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -182,6 +182,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); +void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); + flush_delayed_work(&key->work); +} +EXPORT_SYMBOL_GPL(static_key_deferred_flush); + void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { diff --git a/kernel/memremap.c b/kernel/memremap.c index b501e390bb34403c11d6ae09c31ea9fcb769ea8e..9ecedc28b928debb6a5988a5db8b76833133d9e4 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -246,7 +246,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); + mem_hotplug_begin(); arch_remove_memory(align_start, align_size); + mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, @@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; + mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); + mem_hotplug_done(); if (error) goto err_add_memory; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index df9e8e9e0be7714fa580b547492c940b11221008..eef2ce9686366a72e9dadaa9056bcda8295ae93d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -151,8 +151,12 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns static void delayed_free_pidns(struct rcu_head *p) { - kmem_cache_free(pid_ns_cachep, - container_of(p, struct pid_namespace, rcu)); + struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu); + + dec_pid_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + + kmem_cache_free(pid_ns_cachep, ns); } static void destroy_pid_namespace(struct pid_namespace *ns) @@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns) ns_free_inum(&ns->ns); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); - dec_pid_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); call_rcu(&ns->rcu, delayed_free_pidns); } diff --git a/kernel/power/process.c b/kernel/power/process.c index b911deccb90a5a48085dee8f9c2dae674c526311..68d27ae215a2a1744e80586e5af90c58e7e4e428 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -104,13 +104,13 @@ static int try_to_freeze_tasks(bool user_only) if (wq_busy) show_workqueue_state(); - read_lock(&tasklist_lock); - for_each_process_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) - sched_show_task(p); - } - read_unlock(&tasklist_lock); + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); + } + read_unlock(&tasklist_lock); } else { pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e6474f7272ec2ce96c95532ea906da113acd5354..49ba7c1ade9d074b814105fb648f33b0ce95f061 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -27,6 +27,35 @@ #include #include +/* + * Access another process' address space via ptrace. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + if (!tsk->ptrace || + (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return 0; + } + + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + mmput(mm); + + return ret; +} + /* * ptrace a task: make the debugger its new parent and @@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; + rcu_read_lock(); + child->ptracer_cred = get_cred(__task_cred(new_parent)); + rcu_read_unlock(); } /** @@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; @@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); @@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); + retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); + if (!retval) { if (copied) break; @@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, + retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) @@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), + copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } @@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), + ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; @@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), + ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 80adef7d4c3d01d9ef9ed95c483956d2a858854f..0d6ff3e471be6c1597e0e78fb90d07eb0ce9c546 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -136,6 +136,7 @@ int rcu_jiffies_till_stall_check(void); #define TPS(x) tracepoint_string(x) void rcu_early_boot_tests(void); +void rcu_test_sync_prims(void); /* * This function really isn't for public consumption, but RCU is special in diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 1898559e6b60ddc52884f6977fca21e57c6f1f90..b23a4d076f3d2c64862172c83c18f21605e87159 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -185,9 +185,6 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused * benefits of doing might_sleep() to reduce latency.) * * Cool, huh? (Due to Josh Triplett.) - * - * But we want to make this a static inline later. The cond_resched() - * currently makes this problematic. */ void synchronize_sched(void) { @@ -195,7 +192,6 @@ void synchronize_sched(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_sched() in RCU read-side critical section"); - cond_resched(); } EXPORT_SYMBOL_GPL(synchronize_sched); diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 196f0302e2f4320ebd25dedc31d0ea8a4ab026c1..c64b827ecbca19656395e873ca06da0c92a6298e 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -60,12 +60,17 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * During boot, we forgive RCU lockdep issues. After this function is - * invoked, we start taking RCU lockdep issues seriously. + * invoked, we start taking RCU lockdep issues seriously. Note that unlike + * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE + * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. + * The reason for this is that Tiny RCU does not need kthreads, so does + * not have to care about the fact that the scheduler is half-initialized + * at a certain phase of the boot process. */ void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 69a5611a7e7c03dcf950d94badfcce3445863440..10f62c6f48e77a0b2e415fbdbfd4c2e6bcde7224 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -127,13 +127,16 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ int sysctl_panic_on_rcu_stall __read_mostly; /* - * The rcu_scheduler_active variable transitions from zero to one just - * before the first task is spawned. So when this variable is zero, RCU - * can assume that there is but one task, allowing RCU to (for example) + * The rcu_scheduler_active variable is initialized to the value + * RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the + * first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE, + * RCU can assume that there is but one task, allowing RCU to (for example) * optimize synchronize_rcu() to a simple barrier(). When this variable - * is one, RCU must actually do all the hard work required to detect real - * grace periods. This variable is also used to suppress boot-time false - * positives from lockdep-RCU error checking. + * is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required + * to detect real grace periods. This variable is also used to suppress + * boot-time false positives from lockdep-RCU error checking. Finally, it + * transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU + * is fully initialized, including all of its kthreads having been spawned. */ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); @@ -3985,18 +3988,22 @@ static int __init rcu_spawn_gp_kthread(void) early_initcall(rcu_spawn_gp_kthread); /* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. This function also enables RCU lockdep checking. + * This function is invoked towards the end of the scheduler's + * initialization process. Before this is called, the idle task might + * contain synchronous grace-period primitives (during which time, this idle + * task is booting the system, and such primitives are no-ops). After this + * function is called, any synchronous grace-period primitives are run as + * expedited, with the requesting task driving the grace period forward. + * A later core_initcall() rcu_exp_runtime_mode() will switch to full + * runtime RCU functionality. */ void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_INIT; + rcu_test_sync_prims(); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 24343eb87b582e4f1dd5d436e6d7ef830e935c19..78eba4120d4658137d62d3ede31bd52265270218 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -521,6 +521,20 @@ struct rcu_exp_work { struct work_struct rew_work; }; +/* + * Common code to drive an expedited grace period forward, used by + * workqueues and mid-boot-time tasks. + */ +static void rcu_exp_sel_wait_wake(struct rcu_state *rsp, + smp_call_func_t func, unsigned long s) +{ + /* Initialize the rcu_node tree in preparation for the wait. */ + sync_rcu_exp_select_cpus(rsp, func); + + /* Wait and clean up, including waking everyone. */ + rcu_exp_wait_wake(rsp, s); +} + /* * Work-queue handler to drive an expedited grace period forward. */ @@ -528,12 +542,8 @@ static void wait_rcu_exp_gp(struct work_struct *wp) { struct rcu_exp_work *rewp; - /* Initialize the rcu_node tree in preparation for the wait. */ rewp = container_of(wp, struct rcu_exp_work, rew_work); - sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func); - - /* Wait and clean up, including waking everyone. */ - rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s); + rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s); } /* @@ -559,12 +569,18 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - /* Marshall arguments and schedule the expedited grace period. */ - rew.rew_func = func; - rew.rew_rsp = rsp; - rew.rew_s = s; - INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); - schedule_work(&rew.rew_work); + /* Ensure that load happens before action based on it. */ + if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { + /* Direct call during scheduler init and early_initcalls(). */ + rcu_exp_sel_wait_wake(rsp, func, s); + } else { + /* Marshall arguments & schedule the expedited grace period. */ + rew.rew_func = func; + rew.rew_rsp = rsp; + rew.rew_s = s; + INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); + schedule_work(&rew.rew_work); + } /* Wait for expedited grace period to complete. */ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); @@ -666,6 +682,8 @@ void synchronize_rcu_expedited(void) { struct rcu_state *rsp = rcu_state_p; + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return; _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -683,3 +701,15 @@ void synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* + * Switch to run-time mode once Tree RCU has fully initialized. + */ +static int __init rcu_exp_runtime_mode(void) +{ + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_test_sync_prims(); + return 0; +} +core_initcall(rcu_exp_runtime_mode); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 85c5a883c6e31047194a8c74603ce71ab8381f67..56583e764ebf398a7b14f442f63ce6f707f046e4 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -670,7 +670,7 @@ void synchronize_rcu(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu() in RCU read-side critical section"); - if (!rcu_scheduler_active) + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f19271dce0a9784709d7bc7860a2de972e67a070..4f6db7e6a1179ee00c99f62d854a39b00c959d2b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -121,11 +121,14 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); * Should expedited grace-period primitives always fall back to their * non-expedited counterparts? Intended for use within RCU. Note * that if the user specifies both rcu_expedited and rcu_normal, then - * rcu_normal wins. + * rcu_normal wins. (Except during the time period during boot from + * when the first task is spawned until the rcu_exp_runtime_mode() + * core_initcall() is invoked, at which point everything is expedited.) */ bool rcu_gp_is_normal(void) { - return READ_ONCE(rcu_normal); + return READ_ONCE(rcu_normal) && + rcu_scheduler_active != RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_normal); @@ -135,13 +138,14 @@ static atomic_t rcu_expedited_nesting = /* * Should normal grace-period primitives be expedited? Intended for * use within RCU. Note that this function takes the rcu_expedited - * sysfs/boot variable into account as well as the rcu_expedite_gp() - * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() - * returns false is a -really- bad idea. + * sysfs/boot variable and rcu_scheduler_active into account as well + * as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp() + * until rcu_gp_is_expedited() returns false is a -really- bad idea. */ bool rcu_gp_is_expedited(void) { - return rcu_expedited || atomic_read(&rcu_expedited_nesting); + return rcu_expedited || atomic_read(&rcu_expedited_nesting) || + rcu_scheduler_active == RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); @@ -257,7 +261,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map); int notrace debug_lockdep_rcu_enabled(void) { - return rcu_scheduler_active && debug_locks && + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks && current->lockdep_recursion == 0; } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); @@ -591,7 +595,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); void synchronize_rcu_tasks(void) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(!rcu_scheduler_active, + RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ @@ -813,6 +817,23 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ +/* + * Test each non-SRCU synchronous grace-period wait API. This is + * useful just after a change in mode for these primitives, and + * during early boot. + */ +void rcu_test_sync_prims(void) +{ + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + synchronize_rcu(); + synchronize_rcu_bh(); + synchronize_sched(); + synchronize_rcu_expedited(); + synchronize_rcu_bh_expedited(); + synchronize_sched_expedited(); +} + #ifdef CONFIG_PROVE_RCU /* @@ -865,6 +886,7 @@ void rcu_early_boot_tests(void) early_boot_test_call_rcu_bh(); if (rcu_self_test_sched) early_boot_test_call_rcu_sched(); + rcu_test_sync_prims(); } static int rcu_verify_early_boot_tests(void) diff --git a/kernel/relay.c b/kernel/relay.c index da79a109dbebc6f7d4cf725182b05365a946614b..8f18d314a96a49a1e9fc6c4cf1c5bead5c96a787 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -809,11 +809,11 @@ void relay_subbufs_consumed(struct rchan *chan, { struct rchan_buf *buf; - if (!chan) + if (!chan || cpu >= NR_CPUS) return; buf = *per_cpu_ptr(chan->buf, cpu); - if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs) + if (!buf || subbufs_consumed > chan->n_subbufs) return; if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed) diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 11cb1b28a9c7d92daccc8639c5857dca58c1730e..eed4b722708a020beba6376184ee8947a3ae3a69 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -18,8 +18,8 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o obj-y += wait.o swait.o completion.o idle.o sched_avg.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o @@ -28,3 +28,4 @@ obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o +obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4f43951451a90de278d05d090c5c864b877bb346..5f82983262374565c0298f4af0a760a9b1844dc1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -92,6 +92,7 @@ #define CREATE_TRACE_POINTS #include +#include "walt.h" ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head); @@ -160,6 +161,18 @@ int sysctl_sched_rt_runtime = 950000; /* cpus with isolated domains */ cpumask_var_t cpu_isolated_map; +struct rq * +lock_rq_of(struct task_struct *p, struct rq_flags *flags) +{ + return task_rq_lock(p, flags); +} + +void +unlock_rq_of(struct rq *rq, struct task_struct *p, struct rq_flags *flags) +{ + task_rq_unlock(rq, p, flags); +} + /* * this_rq_lock - lock this runqueue and disable interrupts. */ @@ -2337,6 +2350,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.nr_migrations = 0; p->se.vruntime = 0; INIT_LIST_HEAD(&p->se.group_node); + walt_init_new_task_load(p); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = NULL; @@ -2700,6 +2714,9 @@ void wake_up_new_task(struct task_struct *p) add_new_task_to_grp(p); raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + + walt_init_new_task_load(p); + p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* @@ -2716,7 +2733,7 @@ void wake_up_new_task(struct task_struct *p) post_init_entity_util_avg(&p->se); mark_task_starting(p); - activate_task(rq, p, 0); + activate_task(rq, p, ENQUEUE_WAKEUP_NEW); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); check_preempt_curr(rq, p, WF_FORK); @@ -3101,6 +3118,36 @@ unsigned long nr_iowait_cpu(int cpu) return atomic_read(&this->nr_iowait); } +#ifdef CONFIG_CPU_QUIET +u64 nr_running_integral(unsigned int cpu) +{ + unsigned int seqcnt; + u64 integral; + struct rq *q; + + if (cpu >= nr_cpu_ids) + return 0; + + q = cpu_rq(cpu); + + /* + * Update average to avoid reading stalled value if there were + * no run-queue changes for a long time. On the other hand if + * the changes are happening right now, just read current value + * directly. + */ + + seqcnt = read_seqcount_begin(&q->ave_seqcnt); + integral = do_nr_running_integral(q); + if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) { + read_seqcount_begin(&q->ave_seqcnt); + integral = q->nr_running_integral; + } + + return integral; +} +#endif + void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) { struct rq *rq = this_rq(); @@ -3205,6 +3252,93 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } +#ifdef CONFIG_CPU_FREQ_GOV_SCHED + +static inline +unsigned long add_capacity_margin(unsigned long cpu_capacity) +{ + cpu_capacity = cpu_capacity * capacity_margin; + cpu_capacity /= SCHED_CAPACITY_SCALE; + return cpu_capacity; +} + +static inline +unsigned long sum_capacity_reqs(unsigned long cfs_cap, + struct sched_capacity_reqs *scr) +{ + unsigned long total = add_capacity_margin(cfs_cap + scr->rt); + return total += scr->dl; +} + +static void sched_freq_tick_pelt(int cpu) +{ + unsigned long cpu_utilization = capacity_max; + unsigned long capacity_curr = capacity_curr_of(cpu); + struct sched_capacity_reqs *scr; + + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr) + return; + + /* + * To make free room for a task that is building up its "real" + * utilization and to harm its performance the least, request + * a jump to a higher OPP as soon as the margin of free capacity + * is impacted (specified by capacity_margin). + */ + set_cfs_cpu_capacity(cpu, true, cpu_utilization); +} + +#ifdef CONFIG_SCHED_WALT +static void sched_freq_tick_walt(int cpu) +{ + unsigned long cpu_utilization = cpu_util(cpu); + unsigned long capacity_curr = capacity_curr_of(cpu); + + if (walt_disabled || !sysctl_sched_use_walt_cpu_util) + return sched_freq_tick_pelt(cpu); + + /* + * Add a margin to the WALT utilization. + * NOTE: WALT tracks a single CPU signal for all the scheduling + * classes, thus this margin is going to be added to the DL class as + * well, which is something we do not do in sched_freq_tick_pelt case. + */ + cpu_utilization = add_capacity_margin(cpu_utilization); + if (cpu_utilization <= capacity_curr) + return; + + /* + * It is likely that the load is growing so we + * keep the added margin in our request as an + * extra boost. + */ + set_cfs_cpu_capacity(cpu, true, cpu_utilization); + +} +#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu) +#else +#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu) +#endif /* CONFIG_SCHED_WALT */ + +static void sched_freq_tick(int cpu) +{ + unsigned long capacity_orig, capacity_curr; + + if (!sched_freq()) + return; + + capacity_orig = capacity_orig_of(cpu); + capacity_curr = capacity_curr_of(cpu); + if (capacity_curr == capacity_orig) + return; + + _sched_freq_tick(cpu); +} +#else +static inline void sched_freq_tick(int cpu) { } +#endif /* CONFIG_CPU_FREQ_GOV_SCHED */ + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3229,6 +3363,8 @@ void scheduler_tick(void) update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); cpu_load_update_active(rq); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, + walt_ktime_clock(), 0); calc_global_load_tick(rq); wallclock = sched_ktime_clock(); @@ -3258,6 +3394,7 @@ void scheduler_tick(void) check_for_migration(rq, curr); core_ctl_check(wallclock); + sched_freq_tick(cpu); } #ifdef CONFIG_NO_HZ_FULL @@ -3557,7 +3694,6 @@ static void __sched notrace __schedule(bool preempt) update_rq_clock(rq); next = pick_next_task(rq, prev, cookie); - clear_tsk_need_resched(prev); clear_preempt_need_resched(); rq->clock_skip_update = 0; @@ -6187,7 +6323,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_CONT " %*pbl", cpumask_pr_args(sched_group_cpus(group))); if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { - printk(KERN_CONT " (cpu_capacity = %d)", + printk(KERN_CONT " (cpu_capacity = %lu)", group->sgc->capacity); } @@ -6251,7 +6387,8 @@ static int sd_degenerate(struct sched_domain *sd) SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | - SD_SHARE_POWERDOMAIN)) { + SD_SHARE_POWERDOMAIN | + SD_SHARE_CAP_STATES)) { if (sd->groups != sd->groups->next) return 0; } @@ -6284,7 +6421,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_PREFER_SIBLING | - SD_SHARE_POWERDOMAIN); + SD_SHARE_POWERDOMAIN | + SD_SHARE_CAP_STATES); if (nr_node_ids == 1) pflags &= ~SD_SERIALIZE; } @@ -6363,6 +6501,8 @@ static int init_rootdomain(struct root_domain *rd) if (cpupri_init(&rd->cpupri) != 0) goto free_rto_mask; + + init_max_cpu_capacity(&rd->max_cpu_capacity); return 0; free_rto_mask: @@ -6474,11 +6614,14 @@ DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DEFINE_PER_CPU(struct sched_domain *, sd_numa); DEFINE_PER_CPU(struct sched_domain *, sd_asym); +DEFINE_PER_CPU(struct sched_domain *, sd_ea); +DEFINE_PER_CPU(struct sched_domain *, sd_scs); static void update_top_cache_domain(int cpu) { struct sched_domain_shared *sds = NULL; struct sched_domain *sd; + struct sched_domain *ea_sd = NULL; int id = cpu; int size = 1; @@ -6499,6 +6642,17 @@ static void update_top_cache_domain(int cpu) sd = highest_flag_domain(cpu, SD_ASYM_PACKING); rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); + + for_each_domain(cpu, sd) { + if (sd->groups->sge) + ea_sd = sd; + else + break; + } + rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); + + sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES); + rcu_assign_pointer(per_cpu(sd_scs, cpu), sd); } /* @@ -6676,6 +6830,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) * die on a /0 trap. */ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); + sg->sgc->max_capacity = SCHED_CAPACITY_SCALE; /* * Make sure the first group of this domain contains the @@ -6806,6 +6961,66 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) update_group_capacity(sd, cpu); } +/* + * Check that the per-cpu provided sd energy data is consistent for all cpus + * within the mask. + */ +static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn, + const struct cpumask *cpumask) +{ + const struct sched_group_energy * const sge = fn(cpu); + struct cpumask mask; + int i; + + if (cpumask_weight(cpumask) <= 1) + return; + + cpumask_xor(&mask, cpumask, get_cpu_mask(cpu)); + + for_each_cpu(i, &mask) { + const struct sched_group_energy * const e = fn(i); + int y; + + BUG_ON(e->nr_idle_states != sge->nr_idle_states); + + for (y = 0; y < (e->nr_idle_states); y++) { + BUG_ON(e->idle_states[y].power != + sge->idle_states[y].power); + } + + BUG_ON(e->nr_cap_states != sge->nr_cap_states); + + for (y = 0; y < (e->nr_cap_states); y++) { + BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap); + BUG_ON(e->cap_states[y].power != + sge->cap_states[y].power); + } + } +} + +static void init_sched_energy(int cpu, struct sched_domain *sd, + sched_domain_energy_f fn) +{ + if (!(fn && fn(cpu))) + return; + + if (cpu != group_balance_cpu(sd->groups)) + return; + + if (sd->child && !sd->child->groups->sge) { + pr_err("BUG: EAS setup broken for CPU%d\n", cpu); +#ifdef CONFIG_SCHED_DEBUG + pr_err(" energy data on %s but not on %s domain\n", + sd->name, sd->child->name); +#endif + return; + } + + check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups)); + + sd->groups->sge = fn(cpu); +} + /* * Initializers for schedule domains * Non-inlined to reduce accumulated stack pressure in build_sched_domains() @@ -6922,6 +7137,7 @@ static int sched_domains_curr_level; * SD_NUMA - describes NUMA topologies * SD_SHARE_POWERDOMAIN - describes shared power domain * SD_ASYM_CPUCAPACITY - describes mixed capacity topologies + * SD_SHARE_CAP_STATES - describes shared capacity states * * Odd one out, which beside describing the topology has a quirk also * prescribes the desired behaviour that goes along with it: @@ -6934,7 +7150,8 @@ static int sched_domains_curr_level; SD_NUMA | \ SD_ASYM_PACKING | \ SD_ASYM_CPUCAPACITY | \ - SD_SHARE_POWERDOMAIN) + SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES) static struct sched_domain * sd_init(struct sched_domain_topology_level *tl, @@ -7531,10 +7748,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, /* Calculate CPU capacity for physical packages and nodes */ for (i = nr_cpumask_bits-1; i >= 0; i--) { + struct sched_domain_topology_level *tl = sched_domain_topology; + if (!cpumask_test_cpu(i, cpu_map)) continue; - for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { + for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) { + init_sched_energy(i, sd, tl->energy); claim_allocations(i, sd); init_sched_groups_capacity(i, sd); } @@ -7545,20 +7765,10 @@ static int build_sched_domains(const struct cpumask *cpu_map, for_each_cpu(i, cpu_map) { rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); - - /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ - if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) - WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); - cpu_attach_domain(sd, d.rd, i); } rcu_read_unlock(); - if (rq && sched_debug_enabled) { - pr_info("span: %*pbl (max cpu_capacity = %lu)\n", - cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); - } - ret = 0; error: __free_domain_allocs(&d, alloc_state, cpu_map); @@ -7945,6 +8155,7 @@ void __init sched_init_smp(void) { cpumask_var_t non_isolated_cpus; + walt_init_cpu_efficiency(); alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); alloc_cpumask_var(&fallback_doms, GFP_KERNEL); diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c new file mode 100644 index 0000000000000000000000000000000000000000..1d471d5334149e08dd0a5b0fc5ba1cbb11cf95b1 --- /dev/null +++ b/kernel/sched/cpufreq_sched.c @@ -0,0 +1,485 @@ +/* + * Copyright (C) 2015 Michael Turquette + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#include "sched.h" + +#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */ +#define THROTTLE_UP_NSEC 500000 /* 500us default */ + +struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE; +static bool __read_mostly cpufreq_driver_slow; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED +static struct cpufreq_governor cpufreq_gov_sched; +#endif + +static DEFINE_PER_CPU(unsigned long, enabled); +DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); + +/** + * gov_data - per-policy data internal to the governor + * @up_throttle: next throttling period expiry if increasing OPP + * @down_throttle: next throttling period expiry if decreasing OPP + * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP + * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP + * @task: worker thread for dvfs transition that may block/sleep + * @irq_work: callback used to wake up worker thread + * @requested_freq: last frequency requested by the sched governor + * + * struct gov_data is the per-policy cpufreq_sched-specific data structure. A + * per-policy instance of it is created when the cpufreq_sched governor receives + * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data + * member of struct cpufreq_policy. + * + * Readers of this data must call down_read(policy->rwsem). Writers must + * call down_write(policy->rwsem). + */ +struct gov_data { + ktime_t up_throttle; + ktime_t down_throttle; + unsigned int up_throttle_nsec; + unsigned int down_throttle_nsec; + struct task_struct *task; + struct irq_work irq_work; + unsigned int requested_freq; +}; + +static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, + unsigned int freq) +{ + struct gov_data *gd = policy->governor_data; + + /* avoid race with cpufreq_sched_stop */ + if (!down_write_trylock(&policy->rwsem)) + return; + + __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); + + gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec); + gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec); + up_write(&policy->rwsem); +} + +static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq) +{ + ktime_t now = ktime_get(); + + ktime_t throttle = gd->requested_freq < cur_freq ? + gd->down_throttle : gd->up_throttle; + + if (ktime_after(now, throttle)) + return false; + + while (1) { + int usec_left = ktime_to_ns(ktime_sub(throttle, now)); + + usec_left /= NSEC_PER_USEC; + trace_cpufreq_sched_throttled(usec_left); + usleep_range(usec_left, usec_left + 100); + now = ktime_get(); + if (ktime_after(now, throttle)) + return true; + } +} + +/* + * we pass in struct cpufreq_policy. This is safe because changing out the + * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP), + * which tears down all of the data structures and __cpufreq_governor(policy, + * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the + * new policy pointer + */ +static int cpufreq_sched_thread(void *data) +{ + struct sched_param param; + struct cpufreq_policy *policy; + struct gov_data *gd; + unsigned int new_request = 0; + unsigned int last_request = 0; + int ret; + + policy = (struct cpufreq_policy *) data; + gd = policy->governor_data; + + param.sched_priority = 50; + ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m); + if (ret) { + pr_warn("%s: failed to set SCHED_FIFO\n", __func__); + do_exit(-EINVAL); + } else { + pr_debug("%s: kthread (%d) set to SCHED_FIFO\n", + __func__, gd->task->pid); + } + + do { + new_request = gd->requested_freq; + if (new_request == last_request) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } else { + /* + * if the frequency thread sleeps while waiting to be + * unthrottled, start over to check for a newer request + */ + if (finish_last_request(gd, policy->cur)) + continue; + last_request = new_request; + cpufreq_sched_try_driver_target(policy, new_request); + } + } while (!kthread_should_stop()); + + return 0; +} + +static void cpufreq_sched_irq_work(struct irq_work *irq_work) +{ + struct gov_data *gd; + + gd = container_of(irq_work, struct gov_data, irq_work); + if (!gd) + return; + + wake_up_process(gd->task); +} + +static void update_fdomain_capacity_request(int cpu) +{ + unsigned int freq_new, index_new, cpu_tmp; + struct cpufreq_policy *policy; + struct gov_data *gd; + unsigned long capacity = 0; + + /* + * Avoid grabbing the policy if possible. A test is still + * required after locking the CPU's policy to avoid racing + * with the governor changing. + */ + if (!per_cpu(enabled, cpu)) + return; + + policy = cpufreq_cpu_get(cpu); + if (IS_ERR_OR_NULL(policy)) + return; + + if (policy->governor != &cpufreq_gov_sched || + !policy->governor_data) + goto out; + + gd = policy->governor_data; + + /* find max capacity requested by cpus in this policy */ + for_each_cpu(cpu_tmp, policy->cpus) { + struct sched_capacity_reqs *scr; + + scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp); + capacity = max(capacity, scr->total); + } + + /* Convert the new maximum capacity request into a cpu frequency */ + freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT; + index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L); + freq_new = policy->freq_table[index_new].frequency; + + if (freq_new > policy->max) + freq_new = policy->max; + + if (freq_new < policy->min) + freq_new = policy->min; + + trace_cpufreq_sched_request_opp(cpu, capacity, freq_new, + gd->requested_freq); + if (freq_new == gd->requested_freq) + goto out; + + gd->requested_freq = freq_new; + + /* + * Throttling is not yet supported on platforms with fast cpufreq + * drivers. + */ + if (cpufreq_driver_slow) + irq_work_queue_on(&gd->irq_work, cpu); + else + cpufreq_sched_try_driver_target(policy, freq_new); + +out: + cpufreq_cpu_put(policy); +} + +void update_cpu_capacity_request(int cpu, bool request) +{ + unsigned long new_capacity; + struct sched_capacity_reqs *scr; + + /* The rq lock serializes access to the CPU's sched_capacity_reqs. */ + lockdep_assert_held(&cpu_rq(cpu)->lock); + + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + + new_capacity = scr->cfs + scr->rt; + new_capacity = new_capacity * capacity_margin + / SCHED_CAPACITY_SCALE; + new_capacity += scr->dl; + + if (new_capacity == scr->total) + return; + + trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity); + + scr->total = new_capacity; + if (request) + update_fdomain_capacity_request(cpu); +} + +static inline void set_sched_freq(void) +{ + static_key_slow_inc(&__sched_freq); +} + +static inline void clear_sched_freq(void) +{ + static_key_slow_dec(&__sched_freq); +} + +static struct attribute_group sched_attr_group_gov_pol; +static struct attribute_group *get_sysfs_attr(void) +{ + return &sched_attr_group_gov_pol; +} + +static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) +{ + struct gov_data *gd; + int cpu; + int rc; + + for_each_cpu(cpu, policy->cpus) + memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0, + sizeof(struct sched_capacity_reqs)); + + gd = kzalloc(sizeof(*gd), GFP_KERNEL); + if (!gd) + return -ENOMEM; + + gd->up_throttle_nsec = policy->cpuinfo.transition_latency ? + policy->cpuinfo.transition_latency : + THROTTLE_UP_NSEC; + gd->down_throttle_nsec = THROTTLE_DOWN_NSEC; + pr_debug("%s: throttle threshold = %u [ns]\n", + __func__, gd->up_throttle_nsec); + + rc = sysfs_create_group(&policy->kobj, get_sysfs_attr()); + if (rc) { + pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc); + goto err; + } + + policy->governor_data = gd; + if (cpufreq_driver_is_slow()) { + cpufreq_driver_slow = true; + gd->task = kthread_create(cpufreq_sched_thread, policy, + "kschedfreq:%d", + cpumask_first(policy->related_cpus)); + if (IS_ERR_OR_NULL(gd->task)) { + pr_err("%s: failed to create kschedfreq thread\n", + __func__); + goto err; + } + get_task_struct(gd->task); + kthread_bind_mask(gd->task, policy->related_cpus); + wake_up_process(gd->task); + init_irq_work(&gd->irq_work, cpufreq_sched_irq_work); + } + + set_sched_freq(); + + return 0; + +err: + policy->governor_data = NULL; + kfree(gd); + return -ENOMEM; +} + +static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy) +{ + struct gov_data *gd = policy->governor_data; + + clear_sched_freq(); + if (cpufreq_driver_slow) { + kthread_stop(gd->task); + put_task_struct(gd->task); + } + + sysfs_remove_group(&policy->kobj, get_sysfs_attr()); + + policy->governor_data = NULL; + + kfree(gd); +} + +static int cpufreq_sched_start(struct cpufreq_policy *policy) +{ + int cpu; + + for_each_cpu(cpu, policy->cpus) + per_cpu(enabled, cpu) = 1; + + return 0; +} + +static void cpufreq_sched_limits(struct cpufreq_policy *policy) +{ + unsigned int clamp_freq; + struct gov_data *gd = policy->governor_data;; + + pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n", + policy->cpu, policy->min, policy->max, + policy->cur); + + clamp_freq = clamp(gd->requested_freq, policy->min, policy->max); + + if (policy->cur != clamp_freq) + __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L); +} + +static void cpufreq_sched_stop(struct cpufreq_policy *policy) +{ + int cpu; + + for_each_cpu(cpu, policy->cpus) + per_cpu(enabled, cpu) = 0; +} + +/* Tunables */ +static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf) +{ + return sprintf(buf, "%u\n", gd->up_throttle_nsec); +} + +static ssize_t store_up_throttle_nsec(struct gov_data *gd, + const char *buf, size_t count) +{ + int ret; + long unsigned int val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + gd->up_throttle_nsec = val; + return count; +} + +static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf) +{ + return sprintf(buf, "%u\n", gd->down_throttle_nsec); +} + +static ssize_t store_down_throttle_nsec(struct gov_data *gd, + const char *buf, size_t count) +{ + int ret; + long unsigned int val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + gd->down_throttle_nsec = val; + return count; +} + +/* + * Create show/store routines + * - sys: One governor instance for complete SYSTEM + * - pol: One governor instance per struct cpufreq_policy + */ +#define show_gov_pol_sys(file_name) \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + return show_##file_name(policy->governor_data, buf); \ +} + +#define store_gov_pol_sys(file_name) \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + return store_##file_name(policy->governor_data, buf, count); \ +} + +#define gov_pol_attr_rw(_name) \ + static struct freq_attr _name##_gov_pol = \ + __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define show_store_gov_pol_sys(file_name) \ + show_gov_pol_sys(file_name); \ + store_gov_pol_sys(file_name) +#define tunable_handlers(file_name) \ + show_gov_pol_sys(file_name); \ + store_gov_pol_sys(file_name); \ + gov_pol_attr_rw(file_name) + +tunable_handlers(down_throttle_nsec); +tunable_handlers(up_throttle_nsec); + +/* Per policy governor instance */ +static struct attribute *sched_attributes_gov_pol[] = { + &up_throttle_nsec_gov_pol.attr, + &down_throttle_nsec_gov_pol.attr, + NULL, +}; + +static struct attribute_group sched_attr_group_gov_pol = { + .attrs = sched_attributes_gov_pol, + .name = "sched", +}; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED +static +#endif +struct cpufreq_governor cpufreq_gov_sched = { + .name = "sched", + .init = cpufreq_sched_policy_init, + .exit = cpufreq_sched_policy_exit, + .start = cpufreq_sched_start, + .stop = cpufreq_sched_stop, + .limits = cpufreq_sched_limits, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_sched_init(void) +{ + int cpu; + + for_each_cpu(cpu, cpu_possible_mask) + per_cpu(enabled, cpu) = 0; + return cpufreq_register_governor(&cpufreq_gov_sched); +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_sched; +} +#endif + +/* Try to make this the default governor */ +fs_initcall(cpufreq_sched_init); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c deleted file mode 100644 index 69e06898997d393daebef868bcd8f2368b838a99..0000000000000000000000000000000000000000 --- a/kernel/sched/cpufreq_schedutil.c +++ /dev/null @@ -1,576 +0,0 @@ -/* - * CPUFreq governor based on scheduler-provided CPU utilization data. - * - * Copyright (C) 2016, Intel Corporation - * Author: Rafael J. Wysocki - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include - -#include "sched.h" - -struct sugov_tunables { - struct gov_attr_set attr_set; - unsigned int rate_limit_us; -}; - -struct sugov_policy { - struct cpufreq_policy *policy; - - struct sugov_tunables *tunables; - struct list_head tunables_hook; - - raw_spinlock_t update_lock; /* For shared policies */ - u64 last_freq_update_time; - s64 freq_update_delay_ns; - unsigned int next_freq; - - /* The next fields are only needed if fast switch cannot be used. */ - struct irq_work irq_work; - struct work_struct work; - struct mutex work_lock; - bool work_in_progress; - - bool need_freq_update; -}; - -struct sugov_cpu { - struct update_util_data update_util; - struct sugov_policy *sg_policy; - - unsigned int cached_raw_freq; - unsigned long iowait_boost; - unsigned long iowait_boost_max; - u64 last_update; - - /* The fields below are only needed when sharing a policy. */ - unsigned long util; - unsigned long max; - unsigned int flags; -}; - -static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); - -/************************ Governor internals ***********************/ - -static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) -{ - s64 delta_ns; - - if (sg_policy->work_in_progress) - return false; - - if (unlikely(sg_policy->need_freq_update)) { - sg_policy->need_freq_update = false; - /* - * This happens when limits change, so forget the previous - * next_freq value and force an update. - */ - sg_policy->next_freq = UINT_MAX; - return true; - } - - delta_ns = time - sg_policy->last_freq_update_time; - return delta_ns >= sg_policy->freq_update_delay_ns; -} - -static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, - unsigned int next_freq) -{ - struct cpufreq_policy *policy = sg_policy->policy; - - sg_policy->last_freq_update_time = time; - - if (policy->fast_switch_enabled) { - if (sg_policy->next_freq == next_freq) { - trace_cpu_frequency(policy->cur, smp_processor_id()); - return; - } - sg_policy->next_freq = next_freq; - next_freq = cpufreq_driver_fast_switch(policy, next_freq); - if (next_freq == CPUFREQ_ENTRY_INVALID) - return; - - policy->cur = next_freq; - trace_cpu_frequency(next_freq, smp_processor_id()); - } else if (sg_policy->next_freq != next_freq) { - sg_policy->next_freq = next_freq; - sg_policy->work_in_progress = true; - irq_work_queue(&sg_policy->irq_work); - } -} - -/** - * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. - * @util: Current CPU utilization. - * @max: CPU capacity. - * - * If the utilization is frequency-invariant, choose the new frequency to be - * proportional to it, that is - * - * next_freq = C * max_freq * util / max - * - * Otherwise, approximate the would-be frequency-invariant utilization by - * util_raw * (curr_freq / max_freq) which leads to - * - * next_freq = C * curr_freq * util_raw / max - * - * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. - * - * The lowest driver-supported frequency which is equal or greater than the raw - * next_freq (as calculated above) is returned, subject to policy min/max and - * cpufreq driver limitations. - */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) -{ - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - struct cpufreq_policy *policy = sg_policy->policy; - unsigned int freq = arch_scale_freq_invariant() ? - policy->cpuinfo.max_freq : policy->cur; - - freq = (freq + (freq >> 2)) * util / max; - - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) - return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; - return cpufreq_driver_resolve_freq(policy, freq); -} - -static void sugov_get_util(unsigned long *util, unsigned long *max) -{ - struct rq *rq = this_rq(); - unsigned long cfs_max; - - cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); - - *util = min(rq->cfs.avg.util_avg, cfs_max); - *max = cfs_max; -} - -static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, - unsigned int flags) -{ - if (flags & SCHED_CPUFREQ_IOWAIT) { - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; - } else if (sg_cpu->iowait_boost) { - s64 delta_ns = time - sg_cpu->last_update; - - /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) - sg_cpu->iowait_boost = 0; - } -} - -static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, - unsigned long *max) -{ - unsigned long boost_util = sg_cpu->iowait_boost; - unsigned long boost_max = sg_cpu->iowait_boost_max; - - if (!boost_util) - return; - - if (*util * boost_max < *max * boost_util) { - *util = boost_util; - *max = boost_max; - } - sg_cpu->iowait_boost >>= 1; -} - -static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned int flags) -{ - struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util, max; - unsigned int next_f; - - sugov_set_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - - if (!sugov_should_update_freq(sg_policy, time)) - return; - - if (flags & SCHED_CPUFREQ_RT_DL) { - next_f = policy->cpuinfo.max_freq; - } else { - sugov_get_util(&util, &max); - sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); - } - sugov_update_commit(sg_policy, time, next_f); -} - -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max, - unsigned int flags) -{ - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - struct cpufreq_policy *policy = sg_policy->policy; - unsigned int max_f = policy->cpuinfo.max_freq; - u64 last_freq_update_time = sg_policy->last_freq_update_time; - unsigned int j; - - if (flags & SCHED_CPUFREQ_RT_DL) - return max_f; - - sugov_iowait_boost(sg_cpu, &util, &max); - - for_each_cpu(j, policy->cpus) { - struct sugov_cpu *j_sg_cpu; - unsigned long j_util, j_max; - s64 delta_ns; - - if (j == smp_processor_id()) - continue; - - j_sg_cpu = &per_cpu(sugov_cpu, j); - /* - * If the CPU utilization was last updated before the previous - * frequency update and the time elapsed between the last update - * of the CPU utilization and the last frequency update is long - * enough, don't take the CPU into account as it probably is - * idle now (and clear iowait_boost for it). - */ - delta_ns = last_freq_update_time - j_sg_cpu->last_update; - if (delta_ns > TICK_NSEC) { - j_sg_cpu->iowait_boost = 0; - continue; - } - if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) - return max_f; - - j_util = j_sg_cpu->util; - j_max = j_sg_cpu->max; - if (j_util * max > j_max * util) { - util = j_util; - max = j_max; - } - - sugov_iowait_boost(j_sg_cpu, &util, &max); - } - - return get_next_freq(sg_cpu, util, max); -} - -static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned int flags) -{ - struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - unsigned long util, max; - unsigned int next_f; - - sugov_get_util(&util, &max); - - raw_spin_lock(&sg_policy->update_lock); - - sg_cpu->util = util; - sg_cpu->max = max; - sg_cpu->flags = flags; - - sugov_set_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - - if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); - sugov_update_commit(sg_policy, time, next_f); - } - - raw_spin_unlock(&sg_policy->update_lock); -} - -static void sugov_work(struct work_struct *work) -{ - struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); - - mutex_lock(&sg_policy->work_lock); - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, - CPUFREQ_RELATION_L); - mutex_unlock(&sg_policy->work_lock); - - sg_policy->work_in_progress = false; -} - -static void sugov_irq_work(struct irq_work *irq_work) -{ - struct sugov_policy *sg_policy; - - sg_policy = container_of(irq_work, struct sugov_policy, irq_work); - schedule_work_on(smp_processor_id(), &sg_policy->work); -} - -/************************** sysfs interface ************************/ - -static struct sugov_tunables *global_tunables; -static DEFINE_MUTEX(global_tunables_lock); - -static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) -{ - return container_of(attr_set, struct sugov_tunables, attr_set); -} - -static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) -{ - struct sugov_tunables *tunables = to_sugov_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->rate_limit_us); -} - -static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, - size_t count) -{ - struct sugov_tunables *tunables = to_sugov_tunables(attr_set); - struct sugov_policy *sg_policy; - unsigned int rate_limit_us; - - if (kstrtouint(buf, 10, &rate_limit_us)) - return -EINVAL; - - tunables->rate_limit_us = rate_limit_us; - - list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) - sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; - - return count; -} - -static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); - -static struct attribute *sugov_attributes[] = { - &rate_limit_us.attr, - NULL -}; - -static struct kobj_type sugov_tunables_ktype = { - .default_attrs = sugov_attributes, - .sysfs_ops = &governor_sysfs_ops, -}; - -/********************** cpufreq governor interface *********************/ - -static struct cpufreq_governor schedutil_gov; - -static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy; - - sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); - if (!sg_policy) - return NULL; - - sg_policy->policy = policy; - init_irq_work(&sg_policy->irq_work, sugov_irq_work); - INIT_WORK(&sg_policy->work, sugov_work); - mutex_init(&sg_policy->work_lock); - raw_spin_lock_init(&sg_policy->update_lock); - return sg_policy; -} - -static void sugov_policy_free(struct sugov_policy *sg_policy) -{ - mutex_destroy(&sg_policy->work_lock); - kfree(sg_policy); -} - -static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) -{ - struct sugov_tunables *tunables; - - tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); - if (tunables) { - gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); - if (!have_governor_per_policy()) - global_tunables = tunables; - } - return tunables; -} - -static void sugov_tunables_free(struct sugov_tunables *tunables) -{ - if (!have_governor_per_policy()) - global_tunables = NULL; - - kfree(tunables); -} - -static int sugov_init(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy; - struct sugov_tunables *tunables; - unsigned int lat; - int ret = 0; - - /* State should be equivalent to EXIT */ - if (policy->governor_data) - return -EBUSY; - - sg_policy = sugov_policy_alloc(policy); - if (!sg_policy) - return -ENOMEM; - - mutex_lock(&global_tunables_lock); - - if (global_tunables) { - if (WARN_ON(have_governor_per_policy())) { - ret = -EINVAL; - goto free_sg_policy; - } - policy->governor_data = sg_policy; - sg_policy->tunables = global_tunables; - - gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); - goto out; - } - - tunables = sugov_tunables_alloc(sg_policy); - if (!tunables) { - ret = -ENOMEM; - goto free_sg_policy; - } - - tunables->rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) - tunables->rate_limit_us *= lat; - - policy->governor_data = sg_policy; - sg_policy->tunables = tunables; - - ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, - get_governor_parent_kobj(policy), "%s", - schedutil_gov.name); - if (ret) - goto fail; - - out: - mutex_unlock(&global_tunables_lock); - - cpufreq_enable_fast_switch(policy); - return 0; - - fail: - policy->governor_data = NULL; - sugov_tunables_free(tunables); - - free_sg_policy: - mutex_unlock(&global_tunables_lock); - - sugov_policy_free(sg_policy); - pr_err("initialization failed (error %d)\n", ret); - return ret; -} - -static void sugov_exit(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - struct sugov_tunables *tunables = sg_policy->tunables; - unsigned int count; - - cpufreq_disable_fast_switch(policy); - - mutex_lock(&global_tunables_lock); - - count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); - policy->governor_data = NULL; - if (!count) - sugov_tunables_free(tunables); - - mutex_unlock(&global_tunables_lock); - - sugov_policy_free(sg_policy); -} - -static int sugov_start(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - unsigned int cpu; - - sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; - sg_policy->last_freq_update_time = 0; - sg_policy->next_freq = UINT_MAX; - sg_policy->work_in_progress = false; - sg_policy->need_freq_update = false; - - for_each_cpu(cpu, policy->cpus) { - struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); - - sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_RT; - sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } - } - return 0; -} - -static void sugov_stop(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - unsigned int cpu; - - for_each_cpu(cpu, policy->cpus) - cpufreq_remove_update_util_hook(cpu); - - synchronize_sched(); - - irq_work_sync(&sg_policy->irq_work); - cancel_work_sync(&sg_policy->work); -} - -static void sugov_limits(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - - if (!policy->fast_switch_enabled) { - mutex_lock(&sg_policy->work_lock); - cpufreq_policy_apply_limits(policy); - mutex_unlock(&sg_policy->work_lock); - } - - sg_policy->need_freq_update = true; -} - -static struct cpufreq_governor schedutil_gov = { - .name = "schedutil", - .owner = THIS_MODULE, - .init = sugov_init, - .exit = sugov_exit, - .start = sugov_start, - .stop = sugov_stop, - .limits = sugov_limits, -}; - -#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL -struct cpufreq_governor *cpufreq_default_governor(void) -{ - return &schedutil_gov; -} -#endif - -static int __init sugov_register(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} -fs_initcall(sugov_register); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 6ce492fbedcf8bb7c8d7766f1fb78b0a742b5a9d..0d8cc065de20baca0e62ddafd921cc4db380ce7a 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -8,7 +8,7 @@ #ifdef CONFIG_PARAVIRT #include #endif - +#include "walt.h" #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 8cc525696737114b2bc0ebb696ddf4081a87c42f..ae8bd29ce44e1d728e205d0d26de236d5e6966ea 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -260,10 +260,61 @@ set_table_entry(struct ctl_table *entry, } } +static struct ctl_table * +sd_alloc_ctl_energy_table(struct sched_group_energy *sge) +{ + struct ctl_table *table = sd_alloc_ctl_entry(5); + + if (table == NULL) + return NULL; + + set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states, + sizeof(int), 0644, proc_dointvec_minmax, false); + set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power, + sge->nr_idle_states*sizeof(struct idle_state), 0644, + proc_doulongvec_minmax, false); + set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states, + sizeof(int), 0644, proc_dointvec_minmax, false); + set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap, + sge->nr_cap_states*sizeof(struct capacity_state), 0644, + proc_doulongvec_minmax, false); + + return table; +} + +static struct ctl_table * +sd_alloc_ctl_group_table(struct sched_group *sg) +{ + struct ctl_table *table = sd_alloc_ctl_entry(2); + + if (table == NULL) + return NULL; + + table->procname = kstrdup("energy", GFP_KERNEL); + table->mode = 0555; + table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge); + + return table; +} + static struct ctl_table * sd_alloc_ctl_domain_table(struct sched_domain *sd) { - struct ctl_table *table = sd_alloc_ctl_entry(14); + struct ctl_table *table; + unsigned int nr_entries = 14; + + int i = 0; + struct sched_group *sg = sd->groups; + + if (sg->sge) { + int nr_sgs = 0; + + do {} while (nr_sgs++, sg = sg->next, sg != sd->groups); + + nr_entries += nr_sgs; + } + + table = sd_alloc_ctl_entry(nr_entries); if (table == NULL) return NULL; @@ -296,7 +347,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); - /* &table[13] is terminator */ + sg = sd->groups; + if (sg->sge) { + char buf[32]; + struct ctl_table *entry = &table[13]; + + do { + snprintf(buf, 32, "group%d", i); + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0555; + entry->child = sd_alloc_ctl_group_table(sg); + } while (entry++, i++, sg = sg->next, sg != sd->groups); + } + /* &table[nr_entries-1] is terminator */ return table; } diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c new file mode 100644 index 0000000000000000000000000000000000000000..b0656b7a93e3379c978858618e5e09ca831ccf47 --- /dev/null +++ b/kernel/sched/energy.c @@ -0,0 +1,124 @@ +/* + * Obtain energy cost data from DT and populate relevant scheduler data + * structures. + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#define pr_fmt(fmt) "sched-energy: " fmt + +#define DEBUG + +#include +#include +#include +#include +#include +#include + +struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS]; + +static void free_resources(void) +{ + int cpu, sd_level; + struct sched_group_energy *sge; + + for_each_possible_cpu(cpu) { + for_each_possible_sd_level(sd_level) { + sge = sge_array[cpu][sd_level]; + if (sge) { + kfree(sge->cap_states); + kfree(sge->idle_states); + kfree(sge); + } + } + } +} + +void init_sched_energy_costs(void) +{ + struct device_node *cn, *cp; + struct capacity_state *cap_states; + struct idle_state *idle_states; + struct sched_group_energy *sge; + const struct property *prop; + int sd_level, i, nstates, cpu; + const __be32 *val; + + for_each_possible_cpu(cpu) { + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_warn("CPU device node missing for CPU %d\n", cpu); + return; + } + + if (!of_find_property(cn, "sched-energy-costs", NULL)) { + pr_warn("CPU device node has no sched-energy-costs\n"); + return; + } + + for_each_possible_sd_level(sd_level) { + cp = of_parse_phandle(cn, "sched-energy-costs", sd_level); + if (!cp) + break; + + prop = of_find_property(cp, "busy-cost-data", NULL); + if (!prop || !prop->value) { + pr_warn("No busy-cost data, skipping sched_energy init\n"); + goto out; + } + + sge = kcalloc(1, sizeof(struct sched_group_energy), + GFP_NOWAIT); + + nstates = (prop->length / sizeof(u32)) / 2; + cap_states = kcalloc(nstates, + sizeof(struct capacity_state), + GFP_NOWAIT); + + for (i = 0, val = prop->value; i < nstates; i++) { + cap_states[i].cap = be32_to_cpup(val++); + cap_states[i].power = be32_to_cpup(val++); + } + + sge->nr_cap_states = nstates; + sge->cap_states = cap_states; + + prop = of_find_property(cp, "idle-cost-data", NULL); + if (!prop || !prop->value) { + pr_warn("No idle-cost data, skipping sched_energy init\n"); + goto out; + } + + nstates = (prop->length / sizeof(u32)); + idle_states = kcalloc(nstates, + sizeof(struct idle_state), + GFP_NOWAIT); + + for (i = 0, val = prop->value; i < nstates; i++) + idle_states[i].power = be32_to_cpup(val++); + + sge->nr_idle_states = nstates; + sge->idle_states = idle_states; + + sge_array[cpu][sd_level] = sge; + } + } + + pr_info("Sched-energy-costs installed from DT\n"); + return; + +out: + free_resources(); +} diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7af3c6b2b67b9136472ded9b9ec3ea1cea383220..4eaf13e20da295deae9ae398ad7dc0fb5c5434fc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -30,11 +30,13 @@ #include #include #include +#include #include "sched.h" +#include "tune.h" #include -/* QHMP forward declarations */ +/* QHMP/Zone forward declarations */ struct lb_env; struct sd_lb_stats; @@ -102,6 +104,7 @@ static inline void inc_throttled_cfs_rq_hmp_stats( static inline void init_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq) { } #ifdef CONFIG_SMP + static inline int bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds) { @@ -116,6 +119,7 @@ static inline bool update_sd_pick_busiest_active_balance(struct lb_env *env, return false; } #endif /* CONFIG_SMP */ + #endif /* CONFIG_SCHED_HMP */ /* @@ -133,6 +137,17 @@ static inline bool update_sd_pick_busiest_active_balance(struct lb_env *env, unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_is_big_little = 0; +unsigned int sysctl_sched_sync_hint_enable = 1; +unsigned int sysctl_sched_initial_task_util = 0; +unsigned int sysctl_sched_cstate_aware = 1; + +#ifdef CONFIG_SCHED_WALT +unsigned int sysctl_sched_use_walt_cpu_util = 1; +unsigned int sysctl_sched_use_walt_task_util = 1; +__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = + (10 * NSEC_PER_MSEC); +#endif /* * The initial- and re-scaling of tunables is configurable * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) @@ -785,10 +800,13 @@ void init_entity_runnable_average(struct sched_entity *se) if (entity_is_task(se)) sa->load_avg = scale_load_down(se->load.weight); sa->load_sum = sa->load_avg * LOAD_AVG_MAX; + /* * At this point, util_avg won't be used in select_task_rq_fair anyway */ - sa->util_avg = 0; + sa->util_avg = sched_freq() ? + sysctl_sched_initial_task_util : + 0; sa->util_sum = 0; /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ } @@ -2921,6 +2939,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, scale_freq = arch_scale_freq_capacity(NULL, cpu); scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + trace_sched_contrib_scale_f(cpu, scale_freq, scale_cpu); /* delta_w is the amount already accumulated against our next period */ delta_w = sa->period_contrib; @@ -3188,6 +3207,10 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg) update_tg_load_avg(cfs_rq, 0); + + if (entity_is_task(se)) + trace_sched_load_avg_task(task_of(se), &se->avg); + trace_sched_load_avg_cpu(cpu, cfs_rq); } /** @@ -4653,6 +4676,30 @@ static inline void hrtick_update(struct rq *rq) } #endif +#ifdef CONFIG_SMP +static bool cpu_overutilized(int cpu); +static unsigned long capacity_orig_of(int cpu); +static unsigned long cpu_util(int cpu); +static inline unsigned long boosted_cpu_util(int cpu); +#else +#define boosted_cpu_util(cpu) cpu_util(cpu) +#endif + +#ifdef CONFIG_SMP +static void update_capacity_of(int cpu) +{ + unsigned long req_cap; + + if (!sched_freq()) + return; + + /* Convert scale-invariant capacity to cpu. */ + req_cap = boosted_cpu_util(cpu); + req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); + set_cfs_cpu_capacity(cpu, true, req_cap); +} +#endif + /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -4663,6 +4710,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; +#ifdef CONFIG_SMP + int task_new = flags & ENQUEUE_WAKEUP_NEW; + int task_wakeup = flags & ENQUEUE_WAKEUP; +#endif /* * If in_iowait is set, the code below may not trigger any cpufreq @@ -4709,6 +4760,30 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) inc_rq_hmp_stats(rq, p, 1); } +#ifdef CONFIG_SMP + + if (!se) { + if (!task_new && !rq->rd->overutilized && + cpu_overutilized(rq->cpu)) { + rq->rd->overutilized = true; + trace_sched_overutilized(true); + } + + /* + * We want to potentially trigger a freq switch + * request only for tasks that are waking up; this is + * because we get here also during load balancing, but + * in these cases it seems wise to trigger as single + * request after load balancing is done. + */ + if (task_new || task_wakeup) + update_capacity_of(cpu_of(rq)); + } + + /* Update SchedTune accouting */ + schedtune_enqueue_task(p, cpu_of(rq)); + +#endif /* CONFIG_SMP */ hrtick_update(rq); } @@ -4772,6 +4847,30 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) dec_rq_hmp_stats(rq, p, 1); } +#ifdef CONFIG_SMP + + if (!se) { + /* + * We want to potentially trigger a freq switch + * request only for tasks that are going to sleep; + * this is because we get here also during load + * balancing, but in these cases it seems wise to + * trigger as single request after load balancing is + * done. + */ + if (task_sleep) { + if (rq->cfs.nr_running) + update_capacity_of(cpu_of(rq)); + else if (sched_freq()) + set_cfs_cpu_capacity(cpu_of(rq), false, 0); + } + } + + /* Update SchedTune accouting */ + schedtune_dequeue_task(p, cpu_of(rq)); + +#endif /* CONFIG_SMP */ + hrtick_update(rq); } @@ -5078,15 +5177,6 @@ static unsigned long target_load(int cpu, int type) return max(rq->cpu_load[type-1], total); } -static unsigned long capacity_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity; -} - -static unsigned long capacity_orig_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity_orig; -} static unsigned long cpu_avg_load_per_task(int cpu) { @@ -5237,6 +5327,397 @@ static void record_wakee(struct task_struct *p) } } +/* + * Returns the current capacity of cpu after applying both + * cpu and freq scaling. + */ +unsigned long capacity_curr_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig * + arch_scale_freq_capacity(NULL, cpu) + >> SCHED_CAPACITY_SHIFT; +} + +static inline bool energy_aware(void) +{ + return sched_feat(ENERGY_AWARE); +} + +struct energy_env { + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int util_delta; + int src_cpu; + int dst_cpu; + int energy; + int payoff; + struct task_struct *task; + struct { + int before; + int after; + int delta; + int diff; + } nrg; + struct { + int before; + int after; + int delta; + } cap; +}; + +/* + * __cpu_norm_util() returns the cpu util relative to a specific capacity, + * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for + * energy calculations. Using the scale-invariant util returned by + * cpu_util() and approximating scale-invariant util by: + * + * util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time + * + * the normalized util can be found using the specific capacity. + * + * capacity = capacity_orig * curr_freq/max_freq + * + * norm_util = running_time/time ~ util/capacity + */ +static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta) +{ + int util = __cpu_util(cpu, delta); + + if (util >= capacity) + return SCHED_CAPACITY_SCALE; + + return (util << SCHED_CAPACITY_SHIFT)/capacity; +} + +static int calc_util_delta(struct energy_env *eenv, int cpu) +{ + if (cpu == eenv->src_cpu) + return -eenv->util_delta; + if (cpu == eenv->dst_cpu) + return eenv->util_delta; + return 0; +} + +static +unsigned long group_max_util(struct energy_env *eenv) +{ + int i, delta; + unsigned long max_util = 0; + + for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) { + delta = calc_util_delta(eenv, i); + max_util = max(max_util, __cpu_util(i, delta)); + } + + return max_util; +} + +/* + * group_norm_util() returns the approximated group util relative to it's + * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in + * energy calculations. Since task executions may or may not overlap in time in + * the group the true normalized util is between max(cpu_norm_util(i)) and + * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The + * latter is used as the estimate as it leads to a more pessimistic energy + * estimate (more busy). + */ +static unsigned +long group_norm_util(struct energy_env *eenv, struct sched_group *sg) +{ + int i, delta; + unsigned long util_sum = 0; + unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap; + + for_each_cpu(i, sched_group_cpus(sg)) { + delta = calc_util_delta(eenv, i); + util_sum += __cpu_norm_util(i, capacity, delta); + } + + if (util_sum > SCHED_CAPACITY_SCALE) + return SCHED_CAPACITY_SCALE; + return util_sum; +} + +static int find_new_capacity(struct energy_env *eenv, + const struct sched_group_energy const *sge) +{ + int idx; + unsigned long util = group_max_util(eenv); + + for (idx = 0; idx < sge->nr_cap_states; idx++) { + if (sge->cap_states[idx].cap >= util) + break; + } + + eenv->cap_idx = idx; + + return idx; +} + +static int group_idle_state(struct sched_group *sg) +{ + int i, state = INT_MAX; + + /* Find the shallowest idle state in the sched group. */ + for_each_cpu(i, sched_group_cpus(sg)) + state = min(state, idle_get_state_idx(cpu_rq(i))); + + /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ + state++; + + return state; +} + +/* + * sched_group_energy(): Computes the absolute energy consumption of cpus + * belonging to the sched_group including shared resources shared only by + * members of the group. Iterates over all cpus in the hierarchy below the + * sched_group starting from the bottom working it's way up before going to + * the next cpu until all cpus are covered at all levels. The current + * implementation is likely to gather the same util statistics multiple times. + * This can probably be done in a faster but more complex way. + * Note: sched_group_energy() may fail when racing with sched_domain updates. + */ +static int sched_group_energy(struct energy_env *eenv) +{ + struct sched_domain *sd; + int cpu, total_energy = 0; + struct cpumask visit_cpus; + struct sched_group *sg; + + WARN_ON(!eenv->sg_top->sge); + + cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); + + while (!cpumask_empty(&visit_cpus)) { + struct sched_group *sg_shared_cap = NULL; + + cpu = cpumask_first(&visit_cpus); + + /* + * Is the group utilization affected by cpus outside this + * sched_group? + */ + sd = rcu_dereference(per_cpu(sd_scs, cpu)); + + if (!sd) + /* + * We most probably raced with hotplug; returning a + * wrong energy estimation is better than entering an + * infinite loop. + */ + return -EINVAL; + + if (sd->parent) + sg_shared_cap = sd->parent->groups; + + for_each_domain(cpu, sd) { + sg = sd->groups; + + /* Has this sched_domain already been visited? */ + if (sd->child && group_first_cpu(sg) != cpu) + break; + + do { + unsigned long group_util; + int sg_busy_energy, sg_idle_energy; + int cap_idx, idle_idx; + + if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) + eenv->sg_cap = sg_shared_cap; + else + eenv->sg_cap = sg; + + cap_idx = find_new_capacity(eenv, sg->sge); + + if (sg->group_weight == 1) { + /* Remove capacity of src CPU (before task move) */ + if (eenv->util_delta == 0 && + cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { + eenv->cap.before = sg->sge->cap_states[cap_idx].cap; + eenv->cap.delta -= eenv->cap.before; + } + /* Add capacity of dst CPU (after task move) */ + if (eenv->util_delta != 0 && + cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { + eenv->cap.after = sg->sge->cap_states[cap_idx].cap; + eenv->cap.delta += eenv->cap.after; + } + } + + idle_idx = group_idle_state(sg); + group_util = group_norm_util(eenv, sg); + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) + >> SCHED_CAPACITY_SHIFT; + sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) + * sg->sge->idle_states[idle_idx].power) + >> SCHED_CAPACITY_SHIFT; + + total_energy += sg_busy_energy + sg_idle_energy; + + if (!sd->child) + cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); + + if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) + goto next_cpu; + + } while (sg = sg->next, sg != sd->groups); + } +next_cpu: + cpumask_clear_cpu(cpu, &visit_cpus); + continue; + } + + eenv->energy = total_energy; + return 0; +} + +static inline bool cpu_in_sg(struct sched_group *sg, int cpu) +{ + return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg)); +} + +/* + * energy_diff(): Estimate the energy impact of changing the utilization + * distribution. eenv specifies the change: utilisation amount, source, and + * destination cpu. Source or destination cpu may be -1 in which case the + * utilization is removed from or added to the system (e.g. task wake-up). If + * both are specified, the utilization is migrated. + */ +static inline int __energy_diff(struct energy_env *eenv) +{ + struct sched_domain *sd; + struct sched_group *sg; + int sd_cpu = -1, energy_before = 0, energy_after = 0; + + struct energy_env eenv_before = { + .util_delta = 0, + .src_cpu = eenv->src_cpu, + .dst_cpu = eenv->dst_cpu, + .nrg = { 0, 0, 0, 0}, + .cap = { 0, 0, 0 }, + }; + + if (eenv->src_cpu == eenv->dst_cpu) + return 0; + + sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu; + sd = rcu_dereference(per_cpu(sd_ea, sd_cpu)); + + if (!sd) + return 0; /* Error */ + + sg = sd->groups; + + do { + if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) { + eenv_before.sg_top = eenv->sg_top = sg; + + if (sched_group_energy(&eenv_before)) + return 0; /* Invalid result abort */ + energy_before += eenv_before.energy; + + /* Keep track of SRC cpu (before) capacity */ + eenv->cap.before = eenv_before.cap.before; + eenv->cap.delta = eenv_before.cap.delta; + + if (sched_group_energy(eenv)) + return 0; /* Invalid result abort */ + energy_after += eenv->energy; + } + } while (sg = sg->next, sg != sd->groups); + + eenv->nrg.before = energy_before; + eenv->nrg.after = energy_after; + eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; + eenv->payoff = 0; + + trace_sched_energy_diff(eenv->task, + eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, + eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, + eenv->cap.before, eenv->cap.after, eenv->cap.delta, + eenv->nrg.delta, eenv->payoff); + + return eenv->nrg.diff; +} + +#ifdef CONFIG_SCHED_TUNE + +struct target_nrg schedtune_target_nrg; + +/* + * System energy normalization + * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE], + * corresponding to the specified energy variation. + */ +static inline int +normalize_energy(int energy_diff) +{ + u32 normalized_nrg; +#ifdef CONFIG_SCHED_DEBUG + int max_delta; + + /* Check for boundaries */ + max_delta = schedtune_target_nrg.max_power; + max_delta -= schedtune_target_nrg.min_power; + WARN_ON(abs(energy_diff) >= max_delta); +#endif + + /* Do scaling using positive numbers to increase the range */ + normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff; + + /* Scale by energy magnitude */ + normalized_nrg <<= SCHED_CAPACITY_SHIFT; + + /* Normalize on max energy for target platform */ + normalized_nrg = reciprocal_divide( + normalized_nrg, schedtune_target_nrg.rdiv); + + return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; +} + +static inline int +energy_diff(struct energy_env *eenv) +{ + unsigned int boost; + int nrg_delta; + + /* Conpute "absolute" energy diff */ + __energy_diff(eenv); + + /* Return energy diff when boost margin is 0 */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_task_boost(eenv->task); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif + if (boost == 0) + return eenv->nrg.diff; + + /* Compute normalized energy diff */ + nrg_delta = normalize_energy(eenv->nrg.diff); + eenv->nrg.delta = nrg_delta; + + eenv->payoff = schedtune_accept_deltas( + eenv->nrg.delta, + eenv->cap.delta, + eenv->task); + + /* + * When SchedTune is enabled, the energy_diff() function will return + * the computed energy payoff value. Since the energy_diff() return + * value is expected to be negative by its callers, this evaluation + * function return a negative value each time the evaluation return a + * positive payoff, which is the condition for the acceptance of + * a scheduling decision + */ + return -eenv->payoff; +} +#else /* CONFIG_SCHED_TUNE */ +#define energy_diff(eenv) __energy_diff(eenv) +#endif + /* * Detect M:N waker/wakee relationships via a switching-frequency heuristic. * @@ -5333,6 +5814,169 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return 1; } +static inline int task_util(struct task_struct *p) +{ +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_task_util) { + unsigned long demand = p->ravg.demand; + return (demand << 10) / walt_ravg_window; + } +#endif + return p->se.avg.util_avg; +} + +static inline unsigned long boosted_task_util(struct task_struct *task); + +static inline bool __task_fits(struct task_struct *p, int cpu, int util) +{ + unsigned long capacity = capacity_of(cpu); + + util += boosted_task_util(p); + + return (capacity * 1024) > (util * capacity_margin); +} + +static inline bool task_fits_max(struct task_struct *p, int cpu) +{ + unsigned long capacity = capacity_of(cpu); + unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val; + + if (capacity == max_capacity) + return true; + + if (capacity * capacity_margin > max_capacity * 1024) + return true; + + return __task_fits(p, cpu, 0); +} + +static inline bool task_fits_spare(struct task_struct *p, int cpu) +{ + return __task_fits(p, cpu, cpu_util(cpu)); +} + +static bool cpu_overutilized(int cpu) +{ + return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); +} + +#ifdef CONFIG_SCHED_TUNE + +static long +schedtune_margin(unsigned long signal, long boost) +{ + long long margin = 0; + + /* + * Signal proportional compensation (SPC) + * + * The Boost (B) value is used to compute a Margin (M) which is + * proportional to the complement of the original Signal (S): + * M = B * (SCHED_LOAD_SCALE - S), if B is positive + * M = B * S, if B is negative + * The obtained M could be used by the caller to "boost" S. + */ + + if (boost >= 0) { + margin = SCHED_CAPACITY_SCALE - signal; + margin *= boost; + } else + margin = -signal * boost; + /* + * Fast integer division by constant: + * Constant : (C) = 100 + * Precision : 0.1% (P) = 0.1 + * Reference : C * 100 / P (R) = 100000 + * + * Thus: + * Shift bits : ceil(log(R,2)) (S) = 17 + * Mult const : round(2^S/C) (M) = 1311 + * + * + */ + margin *= 1311; + margin >>= 17; + + if (boost < 0) + margin *= -1; + return margin; +} + +static inline int +schedtune_cpu_margin(unsigned long util, int cpu) +{ + int boost; + +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_cpu_boost(cpu); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif + if (boost == 0) + return 0; + + return schedtune_margin(util, boost); +} + +static inline long +schedtune_task_margin(struct task_struct *task) +{ + int boost; + unsigned long util; + long margin; + +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_task_boost(task); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif + if (boost == 0) + return 0; + + util = task_util(task); + margin = schedtune_margin(util, boost); + + return margin; +} + +#else /* CONFIG_SCHED_TUNE */ + +static inline int +schedtune_cpu_margin(unsigned long util, int cpu) +{ + return 0; +} + +static inline int +schedtune_task_margin(struct task_struct *task) +{ + return 0; +} + +#endif /* CONFIG_SCHED_TUNE */ + +static inline unsigned long +boosted_cpu_util(int cpu) +{ + unsigned long util = cpu_util(cpu); + long margin = schedtune_cpu_margin(util, cpu); + + trace_sched_boost_cpu(cpu, util, margin); + + return util + margin; +} + +static inline unsigned long +boosted_task_util(struct task_struct *task) +{ + unsigned long util = task_util(task); + long margin = schedtune_task_margin(task); + + trace_sched_boost_task(task, util, margin); + + return util + margin; +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -5342,7 +5986,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag) { struct sched_group *idlest = NULL, *group = sd->groups; + struct sched_group *fit_group = NULL, *spare_group = NULL; unsigned long min_load = ULONG_MAX, this_load = 0; + unsigned long fit_capacity = ULONG_MAX; + unsigned long max_spare_capacity = capacity_margin - SCHED_CAPACITY_SCALE; int load_idx = sd->forkexec_idx; int imbalance = 100 + (sd->imbalance_pct-100)/2; @@ -5350,7 +5997,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, load_idx = sd->wake_idx; do { - unsigned long load, avg_load; + unsigned long load, avg_load, spare_capacity; int local_group; int i; @@ -5373,6 +6020,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, load = target_load(i, load_idx); avg_load += load; + + /* + * Look for most energy-efficient group that can fit + * that can fit the task. + */ + if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) { + fit_capacity = capacity_of(i); + fit_group = group; + } + + /* + * Look for group which has most spare capacity on a + * single cpu. + */ + spare_capacity = capacity_of(i) - cpu_util(i); + if (spare_capacity > max_spare_capacity) { + max_spare_capacity = spare_capacity; + spare_group = group; + } } /* Adjust by relative CPU capacity of the group */ @@ -5386,6 +6052,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, } } while (group = group->next, group != sd->groups); + if (fit_group) + return fit_group; + + if (spare_group) + return spare_group; + if (!idlest || 100*this_load < imbalance*min_load) return NULL; return idlest; @@ -5410,7 +6082,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { - if (idle_cpu(i)) { + if (task_fits_spare(p, i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); if (idle && idle->exit_latency < min_exit_latency) { @@ -5422,7 +6094,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) min_exit_latency = idle->exit_latency; latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; - } else if ((!idle || idle->exit_latency == min_exit_latency) && + } else if (idle_cpu(i) && + (!idle || idle->exit_latency == min_exit_latency) && rq->idle_stamp > latest_idle_timestamp) { /* * If equal or no active idle state, then @@ -5431,6 +6104,13 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) */ latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; + } else if (shallowest_idle_cpu == -1) { + /* + * If we haven't found an idle CPU yet + * pick a non-idle one that can fit the task as + * fallback. + */ + shallowest_idle_cpu = i; } } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(i); @@ -5654,94 +6334,305 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t static int select_idle_sibling(struct task_struct *p, int prev, int target) { struct sched_domain *sd; - int i; + struct sched_group *sg; + int i = task_cpu(p); + int best_idle = -1; + int best_idle_cstate = -1; + int best_idle_capacity = INT_MAX; - if (idle_cpu(target)) - return target; + if (!sysctl_sched_cstate_aware) { + if (idle_cpu(target)) + return target; - /* - * If the previous cpu is cache affine and idle, don't be stupid. - */ - if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) - return prev; + /* + * If the prevous cpu is cache affine and idle, don't be stupid. + */ + if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) + return i; - sd = rcu_dereference(per_cpu(sd_llc, target)); - if (!sd) - return target; + sd = rcu_dereference(per_cpu(sd_llc, target)); + if (!sd) + return target; - i = select_idle_core(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + i = select_idle_core(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; - i = select_idle_cpu(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + i = select_idle_cpu(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; + + i = select_idle_smt(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; + } + + /* + * Otherwise, iterate the domains and find an elegible idle cpu. + */ + sd = rcu_dereference(per_cpu(sd_llc, target)); + for_each_lower_domain(sd) { + sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(sg), + tsk_cpus_allowed(p))) + goto next; + + + if (sysctl_sched_cstate_aware) { + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { + struct rq *rq = cpu_rq(i); + int idle_idx = idle_get_state_idx(rq); + unsigned long new_usage = boosted_task_util(p); + unsigned long capacity_orig = capacity_orig_of(i); + if (new_usage > capacity_orig || !idle_cpu(i)) + goto next; + + if (i == target && new_usage <= capacity_curr_of(target)) + return target; + + if (best_idle < 0 || (idle_idx < best_idle_cstate && capacity_orig <= best_idle_capacity)) { + best_idle = i; + best_idle_cstate = idle_idx; + best_idle_capacity = capacity_orig; + } + } + } else { + for_each_cpu(i, sched_group_cpus(sg)) { + if (i == target || !idle_cpu(i)) + goto next; + } - i = select_idle_smt(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + target = cpumask_first_and(sched_group_cpus(sg), + tsk_cpus_allowed(p)); + goto done; + } +next: + sg = sg->next; + } while (sg != sd->groups); + } + if (best_idle > 0) + target = best_idle; +done: return target; } -/* - * cpu_util returns the amount of capacity of a CPU that is used by CFS - * tasks. The unit of the return value must be the one of capacity so we can - * compare the utilization with the capacity of the CPU that is available for - * CFS task (ie cpu_capacity). - * - * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the - * recent utilization of currently non-runnable tasks on a CPU. It represents - * the amount of utilization of a CPU in the range [0..capacity_orig] where - * capacity_orig is the cpu_capacity available at the highest frequency - * (arch_scale_freq_capacity()). - * The utilization of a CPU converges towards a sum equal to or less than the - * current capacity (capacity_curr <= capacity_orig) of the CPU because it is - * the running time on this CPU scaled by capacity_curr. - * - * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even - * higher than capacity_orig because of unfortunate rounding in - * cfs.avg.util_avg or just after migrating tasks and new task wakeups until - * the average stabilizes with the new running time. We need to check that the - * utilization stays within the range of [0..capacity_orig] and cap it if - * necessary. Without utilization capping, a group could be seen as overloaded - * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of - * available capacity. We allow utilization to overshoot capacity_curr (but not - * capacity_orig) as it useful for predicting the capacity required after task - * migrations (scheduler-driven DVFS). - */ -static int cpu_util(int cpu) +static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle) { - unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; - unsigned long capacity = capacity_orig_of(cpu); + int iter_cpu; + int target_cpu = -1; + int target_util = 0; + int backup_capacity = 0; + int best_idle_cpu = -1; + int best_idle_cstate = INT_MAX; + int backup_cpu = -1; + unsigned long task_util_boosted, new_util; - return (util >= capacity) ? capacity : util; -} + task_util_boosted = boosted_task_util(p); + for (iter_cpu = 0; iter_cpu < NR_CPUS; iter_cpu++) { + int cur_capacity; + struct rq *rq; + int idle_idx; -static inline int task_util(struct task_struct *p) -{ - return p->se.avg.util_avg; + /* + * Iterate from higher cpus for boosted tasks. + */ + int i = boosted ? NR_CPUS-iter_cpu-1 : iter_cpu; + + if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(p))) + continue; + + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + new_util = cpu_util(i) + task_util_boosted; + + /* + * Ensure minimum capacity to grant the required boost. + * The target CPU can be already at a capacity level higher + * than the one required to boost the task. + */ + if (new_util > capacity_orig_of(i)) + continue; + +#ifdef CONFIG_SCHED_WALT + if (walt_cpu_high_irqload(i)) + continue; +#endif + /* + * Unconditionally favoring tasks that prefer idle cpus to + * improve latency. + */ + if (idle_cpu(i) && prefer_idle) { + if (best_idle_cpu < 0) + best_idle_cpu = i; + continue; + } + + cur_capacity = capacity_curr_of(i); + rq = cpu_rq(i); + idle_idx = idle_get_state_idx(rq); + + if (new_util < cur_capacity) { + if (cpu_rq(i)->nr_running) { + if(prefer_idle) { + // Find a target cpu with lowest + // utilization. + if (target_util == 0 || + target_util < new_util) { + target_cpu = i; + target_util = new_util; + } + } else { + // Find a target cpu with highest + // utilization. + if (target_util == 0 || + target_util > new_util) { + target_cpu = i; + target_util = new_util; + } + } + } else if (!prefer_idle) { + if (best_idle_cpu < 0 || + (sysctl_sched_cstate_aware && + best_idle_cstate > idle_idx)) { + best_idle_cstate = idle_idx; + best_idle_cpu = i; + } + } + } else if (backup_capacity == 0 || + backup_capacity > cur_capacity) { + // Find a backup cpu with least capacity. + backup_capacity = cur_capacity; + backup_cpu = i; + } + } + + if (prefer_idle && best_idle_cpu >= 0) + target_cpu = best_idle_cpu; + else if (target_cpu < 0) + target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu; + + return target_cpu; } -/* - * Disable WAKE_AFFINE in the case where task @p doesn't fit in the - * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu. - * - * In that case WAKE_AFFINE doesn't make sense and we'll let - * BALANCE_WAKE sort things out. - */ -static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) +static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) { - long min_cap, max_cap; + struct sched_domain *sd; + struct sched_group *sg, *sg_target; + int target_max_cap = INT_MAX; + int target_cpu = task_cpu(p); + unsigned long task_util_boosted, new_util; + int i; - min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu)); - max_cap = cpu_rq(cpu)->rd->max_cpu_capacity; + if (sysctl_sched_sync_hint_enable && sync) { + int cpu = smp_processor_id(); + cpumask_t search_cpus; + cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); + if (cpumask_test_cpu(cpu, &search_cpus)) + return cpu; + } - /* Minimum capacity is close to max, no need to abort wake_affine */ - if (max_cap - min_cap < max_cap >> 3) - return 0; + sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p))); + + if (!sd) + return target; + + sg = sd->groups; + sg_target = sg; + + if (sysctl_sched_is_big_little) { - return min_cap * 1024 < task_util(p) * capacity_margin; + /* + * Find group with sufficient capacity. We only get here if no cpu is + * overutilized. We may end up overutilizing a cpu by adding the task, + * but that should not be any worse than select_idle_sibling(). + * load_balance() should sort it out later as we get above the tipping + * point. + */ + do { + /* Assuming all cpus are the same in group */ + int max_cap_cpu = group_first_cpu(sg); + + /* + * Assume smaller max capacity means more energy-efficient. + * Ideally we should query the energy model for the right + * answer but it easily ends up in an exhaustive search. + */ + if (capacity_of(max_cap_cpu) < target_max_cap && + task_fits_max(p, max_cap_cpu)) { + sg_target = sg; + target_max_cap = capacity_of(max_cap_cpu); + } + } while (sg = sg->next, sg != sd->groups); + + task_util_boosted = boosted_task_util(p); + /* Find cpu with sufficient capacity */ + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + new_util = cpu_util(i) + task_util_boosted; + + /* + * Ensure minimum capacity to grant the required boost. + * The target CPU can be already at a capacity level higher + * than the one required to boost the task. + */ + if (new_util > capacity_orig_of(i)) + continue; + + if (new_util < capacity_curr_of(i)) { + target_cpu = i; + if (cpu_rq(i)->nr_running) + break; + } + + /* cpu has capacity at higher OPP, keep it as fallback */ + if (target_cpu == task_cpu(p)) + target_cpu = i; + } + } else { + /* + * Find a cpu with sufficient capacity + */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + bool boosted = schedtune_task_boost(p) > 0; + bool prefer_idle = schedtune_prefer_idle(p) > 0; +#else + bool boosted = 0; + bool prefer_idle = 0; +#endif + int tmp_target = find_best_target(p, boosted, prefer_idle); + if (tmp_target >= 0) { + target_cpu = tmp_target; + if ((boosted || prefer_idle) && idle_cpu(target_cpu)) + return target_cpu; + } + } + + if (target_cpu != task_cpu(p)) { + struct energy_env eenv = { + .util_delta = task_util(p), + .src_cpu = task_cpu(p), + .dst_cpu = target_cpu, + .task = p, + }; + + /* Not enough spare capacity on previous cpu */ + if (cpu_overutilized(task_cpu(p))) + return target_cpu; + + if (energy_diff(&eenv) >= 0) + return task_cpu(p); + } + + return target_cpu; } /* @@ -5771,8 +6662,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); - want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) - && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + want_affine = (!wake_wide(p) && task_fits_max(p, cpu) && + cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) || + energy_aware(); } rcu_read_lock(); @@ -5803,7 +6695,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } if (!sd) { - if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ + if (energy_aware() && !cpu_rq(cpu)->rd->overutilized) + new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync); + else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); } else while (sd) { @@ -5898,6 +6792,8 @@ static void task_dead_fair(struct task_struct *p) { remove_entity_load_avg(&p->se); } +#else +#define task_fits_max(p, cpu) true #endif /* CONFIG_SMP */ static unsigned long @@ -6144,6 +7040,8 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie c if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); + rq->misfit_task = !task_fits_max(p, rq->cpu); + return p; simple: cfs_rq = &rq->cfs; @@ -6165,9 +7063,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie c if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); + rq->misfit_task = !task_fits_max(p, rq->cpu); + return p; idle: + rq->misfit_task = 0; /* * This is OK, because current is on_cpu, which avoids it being picked * for load-balance and preemption/IRQs are still disabled avoiding @@ -6380,6 +7281,13 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; enum fbq_type { regular, remote, all }; +enum group_type { + group_other = 0, + group_misfit_task, + group_imbalanced, + group_overloaded, +}; + #define LBF_ALL_PINNED 0x01 #define LBF_NEED_BREAK 0x02 #define LBF_DST_PINNED 0x04 @@ -6402,6 +7310,7 @@ struct lb_env { int new_dst_cpu; enum cpu_idle_type idle; long imbalance; + unsigned int src_grp_nr_running; /* The set of CPUs under consideration for load-balancing */ struct cpumask *cpus; unsigned int busiest_grp_capacity; @@ -6797,6 +7706,10 @@ static void attach_one_task(struct rq *rq, struct task_struct *p) { raw_spin_lock(&rq->lock); attach_task(rq, p); + /* + * We want to potentially raise target_cpu's OPP. + */ + update_capacity_of(cpu_of(rq)); raw_spin_unlock(&rq->lock); } @@ -6818,6 +7731,11 @@ static void attach_tasks(struct lb_env *env) attach_task(env->dst_rq, p); } + /* + * We want to potentially raise env.dst_cpu's OPP. + */ + update_capacity_of(env->dst_cpu); + raw_spin_unlock(&env->dst_rq->lock); } @@ -6913,12 +7831,6 @@ static unsigned long task_h_load(struct task_struct *p) /********** Helpers for find_busiest_group ************************/ -enum group_type { - group_other = 0, - group_imbalanced, - group_overloaded, -}; - /* * sg_lb_stats - stats of a sched_group required for load_balancing */ @@ -6938,6 +7850,7 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; int group_no_capacity; + int group_misfit_task; /* A cpu has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; @@ -7039,13 +7952,43 @@ static unsigned long scale_rt_capacity(int cpu) return 1; } +void init_max_cpu_capacity(struct max_cpu_capacity *mcc) +{ + raw_spin_lock_init(&mcc->lock); + mcc->val = 0; + mcc->cpu = -1; +} + static void update_cpu_capacity(struct sched_domain *sd, int cpu) { unsigned long capacity = arch_scale_cpu_capacity(sd, cpu); struct sched_group *sdg = sd->groups; + struct max_cpu_capacity *mcc; + unsigned long max_capacity; + int max_cap_cpu; + unsigned long flags; cpu_rq(cpu)->cpu_capacity_orig = capacity; + mcc = &cpu_rq(cpu)->rd->max_cpu_capacity; + + raw_spin_lock_irqsave(&mcc->lock, flags); + max_capacity = mcc->val; + max_cap_cpu = mcc->cpu; + + if ((max_capacity > capacity && max_cap_cpu == cpu) || + (max_capacity < capacity)) { + mcc->val = capacity; + mcc->cpu = cpu; +#ifdef CONFIG_SCHED_DEBUG + raw_spin_unlock_irqrestore(&mcc->lock, flags); + pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity); + goto skip_unlock; +#endif + } + raw_spin_unlock_irqrestore(&mcc->lock, flags); + +skip_unlock: __attribute__ ((unused)); capacity *= scale_rt_capacity(cpu); capacity >>= SCHED_CAPACITY_SHIFT; @@ -7054,13 +7997,14 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) cpu_rq(cpu)->cpu_capacity = capacity; sdg->sgc->capacity = capacity; + sdg->sgc->max_capacity = capacity; } void update_group_capacity(struct sched_domain *sd, int cpu) { struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; - unsigned long capacity; + unsigned long capacity, max_capacity; unsigned long interval; interval = msecs_to_jiffies(sd->balance_interval); @@ -7073,6 +8017,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu) } capacity = 0; + max_capacity = 0; if (child->flags & SD_OVERLAP) { /* @@ -7099,11 +8044,12 @@ void update_group_capacity(struct sched_domain *sd, int cpu) */ if (unlikely(!rq->sd)) { capacity += capacity_of(cpu); - continue; + } else { + sgc = rq->sd->groups->sgc; + capacity += sgc->capacity; } - sgc = rq->sd->groups->sgc; - capacity += sgc->capacity; + max_capacity = max(capacity, max_capacity); } } else { /* @@ -7113,16 +8059,20 @@ void update_group_capacity(struct sched_domain *sd, int cpu) group = child->groups; do { + struct sched_group_capacity *sgc = group->sgc; cpumask_t *cpus = sched_group_cpus(group); /* Revisit this later. This won't work for MT domain */ - if (!cpu_isolated(cpumask_first(cpus))) - capacity += group->sgc->capacity; + if (!cpu_isolated(cpumask_first(cpus))) { + capacity += sgc->capacity; + max_capacity = max(sgc->max_capacity, max_capacity); + } group = group->next; } while (group != child->groups); } sdg->sgc->capacity = capacity; + sdg->sgc->max_capacity = max_capacity; } /* @@ -7227,6 +8177,9 @@ group_type group_classify(struct sched_group *group, if (sg_imbalanced(group)) return group_imbalanced; + if (sgs->group_misfit_task) + return group_misfit_task; + return group_other; } @@ -7238,11 +8191,12 @@ group_type group_classify(struct sched_group *group, * @local_group: Does group contain this_cpu. * @sgs: variable to hold the statistics for this group. * @overload: Indicate more than one runnable task for any CPU. + * @overutilized: Indicate overutilization for any CPU. */ static inline void update_sg_lb_stats(struct lb_env *env, struct sched_group *group, int load_idx, int local_group, struct sg_lb_stats *sgs, - bool *overload) + bool *overload, bool *overutilized) { unsigned long load; int i, nr_running; @@ -7289,6 +8243,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, */ if (!nr_running && idle_cpu(i)) sgs->idle_cpus++; + + if (cpu_overutilized(i)) { + *overutilized = true; + if (!sgs->group_misfit_task && rq->misfit_task) + sgs->group_misfit_task = capacity_of(i); + } } /* Isolated CPU has no weight */ @@ -7411,7 +8371,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd struct sched_group *sg = env->sd->groups; struct sg_lb_stats tmp_sgs; int load_idx, prefer_sibling = 0; - bool overload = false; + bool overload = false, overutilized = false; if (child && child->flags & SD_PREFER_SIBLING) prefer_sibling = 1; @@ -7433,7 +8393,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } update_sg_lb_stats(env, sg, load_idx, local_group, sgs, - &overload); + &overload, &overutilized); if (local_group) goto next_group; @@ -7473,10 +8433,23 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd if (env->sd->flags & SD_NUMA) env->fbq_type = fbq_classify_group(&sds->busiest_stat); + env->src_grp_nr_running = sds->busiest_stat.sum_nr_running; + if (!env->sd->parent) { /* update overload indicator if we are at root domain */ if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload; + + /* Update over-utilization (tipping point, U >= 0) indicator */ + if (env->dst_rq->rd->overutilized != overutilized) { + env->dst_rq->rd->overutilized = overutilized; + trace_sched_overutilized(overutilized); + } + } else { + if (!env->dst_rq->rd->overutilized && overutilized) { + env->dst_rq->rd->overutilized = true; + trace_sched_overutilized(true); + } } } @@ -7697,6 +8670,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env) * this level. */ update_sd_lb_stats(env, &sds); + + if (energy_aware() && !env->dst_rq->rd->overutilized) + goto out_balanced; + local = &sds.local_stat; busiest = &sds.busiest_stat; @@ -7890,6 +8867,13 @@ static int need_active_balance(struct lb_env *env) return 1; } + if (energy_aware() && (capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && + env->src_rq->cfs.h_nr_running == 1 && + cpu_overutilized(env->src_cpu) && + !cpu_overutilized(env->dst_cpu)) { + return 1; + } + return unlikely(sd->nr_balance_failed > sd->cache_nice_tries + NEED_ACTIVE_BALANCE_THRESHOLD); } @@ -8035,6 +9019,11 @@ static int load_balance(int this_cpu, struct rq *this_rq, * ld_moved - cumulative load moved across iterations */ cur_ld_moved = detach_tasks(&env); + /* + * We want to potentially lower env.src_cpu's OPP. + */ + if (cur_ld_moved) + update_capacity_of(env.src_cpu); /* * We've detached some tasks from busiest_rq. Every @@ -8128,8 +9117,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, * excessive cache_hot migrations and active balances. */ if (idle != CPU_NEWLY_IDLE && - !(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE)) - sd->nr_balance_failed++; + !(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE)) { + if (env.src_grp_nr_running > 1) + sd->nr_balance_failed++; + } if (need_active_balance(&env)) { raw_spin_lock_irqsave(&busiest->lock, flags); @@ -8279,6 +9270,7 @@ static int idle_balance(struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; u64 curr_cost = 0; + long removed_util=0; if (cpu_isolated(this_cpu)) return 0; @@ -8289,8 +9281,9 @@ static int idle_balance(struct rq *this_rq) */ this_rq->idle_stamp = rq_clock(this_rq); - if (this_rq->avg_idle < sysctl_sched_migration_cost || - !this_rq->rd->overload) { + if (!energy_aware() && + (this_rq->avg_idle < sysctl_sched_migration_cost || + !this_rq->rd->overload)) { rcu_read_lock(); sd = rcu_dereference_check_sched_domain(this_rq->sd); if (sd) @@ -8302,6 +9295,17 @@ static int idle_balance(struct rq *this_rq) raw_spin_unlock(&this_rq->lock); + /* + * If removed_util_avg is !0 we most probably migrated some task away + * from this_cpu. In this case we might be willing to trigger an OPP + * update, but we want to do so if we don't find anybody else to pull + * here (we will trigger an OPP update with the pulled task's enqueue + * anyway). + * + * Record removed_util before calling update_blocked_averages, and use + * it below (before returning) to see if an OPP update is required. + */ + removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg); update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { @@ -8368,6 +9372,13 @@ static int idle_balance(struct rq *this_rq) if (pulled_task) this_rq->idle_stamp = 0; + else if (removed_util) { + /* + * No task pulled and someone has been migrated away. + * Good case to trigger an OPP update. + */ + update_capacity_of(this_cpu); + } return pulled_task; } @@ -8450,6 +9461,10 @@ static int active_load_balance_cpu_stop(void *data) p = detach_one_task(&env); if (p) { schedstat_inc(sd->alb_pushed); + /* + * We want to potentially lower env.src_cpu's OPP. + */ + update_capacity_of(env.src_cpu); /* Active balancing done, reset the failure counter. */ sd->nr_balance_failed = 0; moved = true; @@ -8822,27 +9837,6 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)); } -static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) -{ - unsigned long now = jiffies; - - /* - * None are in tickless mode and hence no need for NOHZ idle load - * balancing. - */ - if (likely(!atomic_read(&nohz.nr_cpus))) - return 0; - -#ifdef CONFIG_SCHED_HMP - return _nohz_kick_needed_hmp(rq, cpu, type); -#endif - - if (time_before(now, nohz.next_balance)) - return 0; - - return (rq->nr_running >= 2); -} - /* * Current heuristic for kicking the idle load balancer in the presence * of an idle cpu in the system. @@ -8856,6 +9850,7 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) */ static inline bool nohz_kick_needed(struct rq *rq, int *type) { + unsigned long now = jiffies; #ifndef CONFIG_SCHED_HMP struct sched_domain_shared *sds; struct sched_domain *sd; @@ -8874,13 +9869,28 @@ static inline bool nohz_kick_needed(struct rq *rq, int *type) set_cpu_sd_state_busy(); nohz_balance_exit_idle(cpu); - if (_nohz_kick_needed(rq, cpu, type)) + /* + * None are in tickless mode and hence no need for NOHZ idle load + * balancing. + */ + if (likely(!atomic_read(&nohz.nr_cpus))) + return false; + +#ifdef CONFIG_SCHED_HMP + return _nohz_kick_needed_hmp(rq, cpu, type); +#endif + + if (time_before(now, nohz.next_balance)) + return false; + + if (rq->nr_running >= 2 && + (!energy_aware() || cpu_overutilized(cpu))) return true; #ifndef CONFIG_SCHED_HMP rcu_read_lock(); sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - if (sds) { + if (sds && !energy_aware()) { /* * XXX: write a coherent comment on why we do this. * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com @@ -8993,6 +10003,16 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) if (static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); + +#ifdef CONFIG_SMP + if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) { + rq->rd->overutilized = true; + trace_sched_overutilized(true); + } + + rq->misfit_task = !task_fits_max(curr, rq->cpu); +#endif + } /* @@ -9502,7 +10522,7 @@ __init void init_sched_fair_class(void) } -/* QHMP sched implementation begins here */ +/* QHMP/Zone sched implementation begins here */ #ifdef CONFIG_SCHED_HMP #ifdef CONFIG_SMP diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 6651a37ba0578ed1a50a3ffc7dcd302f45319bae..c30c48fde7e67582c40e9f56dc0c12a5eaa3d8cb 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -69,3 +69,12 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) +/* + * Energy aware scheduling. Use platform energy model to guide scheduling + * decisions optimizing for energy efficiency. + */ +#ifdef CONFIG_DEFAULT_USE_ENERGY_AWARE +SCHED_FEAT(ENERGY_AWARE, true) +#else +SCHED_FEAT(ENERGY_AWARE, false) +#endif diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 1d8718d5300d81b896f66e9c8c3ca9df97b4cbf4..cf75f00f7037701bef7b2d7afadf08295032e5a5 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -23,9 +23,10 @@ extern char __cpuidle_text_start[], __cpuidle_text_end[]; * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. */ -void sched_idle_set_state(struct cpuidle_state *idle_state) +void sched_idle_set_state(struct cpuidle_state *idle_state, int index) { idle_set_state(this_rq(), idle_state); + idle_set_state_idx(this_rq(), index); } static int __read_mostly cpu_idle_force_poll; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 67ab0143dbce35cb7d083724213407c004cd8711..bcac7118d23da9b1efbc1af21f9e62a3b87cf686 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -61,6 +61,8 @@ dec_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) { } #endif /* CONFIG_SCHED_HMP */ +#include "walt.h" + int sched_rr_timeslice = RR_TIMESLICE; static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); @@ -1022,7 +1024,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { - static bool once; + static bool once = false; rt_rq->rt_throttled = 1; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eed063916aecb911976ee1564ff7d7ffe462be6f..41622ca57fa732b8cec2960bb477a82f5f2038bd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -25,17 +25,16 @@ struct rq; struct cpuidle_state; +#ifdef CONFIG_SCHED_HMP +#define NUM_TRACKED_WINDOWS 2 +#define NUM_LOAD_INDICES 1000 + struct hmp_sched_stats { int nr_big_tasks; u64 cumulative_runnable_avg; u64 pred_demands_sum; }; - -#ifdef CONFIG_SCHED_HMP -#define NUM_TRACKED_WINDOWS 2 -#define NUM_LOAD_INDICES 1000 - struct load_subtractions { u64 window_start; u64 subs; @@ -80,7 +79,7 @@ struct cpu_cycle { u64 time; }; -#endif +#endif /* CONFIG_SCHED_HMP */ /* task_struct::on_rq states: */ @@ -502,8 +501,14 @@ struct cfs_rq { struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ +#ifdef CONFIG_SCHED_WALT + u64 cumulative_runnable_avg; +#endif + #ifdef CONFIG_CFS_BANDWIDTH +#ifdef CONFIG_SCHED_HMP struct hmp_sched_stats hmp_stats; +#endif int runtime_enabled; u64 runtime_expires; @@ -605,6 +610,12 @@ struct dl_rq { #ifdef CONFIG_SMP +struct max_cpu_capacity { + raw_spinlock_t lock; + unsigned long val; + int cpu; +}; + /* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by @@ -623,6 +634,9 @@ struct root_domain { /* Indicate more than one runnable task for any CPU */ bool overload; + /* Indicate one or more cpus over-utilized (tipping point) */ + bool overutilized; + /* * The bit corresponding to a CPU gets set here if such CPU has more * than one runnable -deadline task (as it is below for RT tasks). @@ -639,7 +653,8 @@ struct root_domain { cpumask_var_t rto_mask; struct cpupri cpupri; - unsigned long max_cpu_capacity; + /* Maximum cpu capacity in the system. */ + struct max_cpu_capacity max_cpu_capacity; }; extern struct root_domain def_root_domain; @@ -668,6 +683,7 @@ struct rq { #endif #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; + unsigned int misfit_task; #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP unsigned long last_load_update_tick; @@ -677,6 +693,14 @@ struct rq { #ifdef CONFIG_NO_HZ_FULL unsigned long last_sched_tick; #endif + +#ifdef CONFIG_CPU_QUIET + /* time-based average load */ + u64 nr_last_stamp; + u64 nr_running_integral; + seqcount_t ave_seqcnt; +#endif + /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -739,11 +763,10 @@ struct rq { u64 max_idle_balance_cost; #endif - struct hmp_sched_stats hmp_stats; - #ifdef CONFIG_SCHED_HMP struct sched_cluster *cluster; struct cpumask freq_domain_cpumask; + struct hmp_sched_stats hmp_stats; int cstate, wakeup_latency, wakeup_energy; u64 window_start; @@ -818,6 +841,7 @@ struct rq { #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ struct cpuidle_state *idle_state; + int idle_state_idx; #endif }; @@ -984,6 +1008,8 @@ DECLARE_PER_CPU(int, sd_llc_id); DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DECLARE_PER_CPU(struct sched_domain *, sd_numa); DECLARE_PER_CPU(struct sched_domain *, sd_asym); +DECLARE_PER_CPU(struct sched_domain *, sd_ea); +DECLARE_PER_CPU(struct sched_domain *, sd_scs); struct sched_group_capacity { atomic_t ref; @@ -991,7 +1017,8 @@ struct sched_group_capacity { * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity * for a single CPU. */ - unsigned int capacity; + unsigned long capacity; + unsigned long max_capacity; /* Max per-cpu capacity in group */ unsigned long next_update; int imbalance; /* XXX unrelated to capacity but shared group state */ @@ -1004,6 +1031,7 @@ struct sched_group { unsigned int group_weight; struct sched_group_capacity *sgc; + const struct sched_group_energy const *sge; /* * The CPUs this group covers. @@ -1325,6 +1353,7 @@ extern const u32 sched_prio_to_wmult[40]; #else #define ENQUEUE_MIGRATED 0x00 #endif +#define ENQUEUE_WAKEUP_NEW 0x40 #define RETRY_TASK ((void *)-1UL) @@ -1419,6 +1448,7 @@ extern const struct sched_class idle_sched_class; #ifdef CONFIG_SMP +extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); extern void update_group_capacity(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); @@ -1440,6 +1470,17 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) SCHED_WARN_ON(!rcu_read_lock_held()); return rq->idle_state; } + +static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx) +{ + rq->idle_state_idx = idle_state_idx; +} + +static inline int idle_get_state_idx(struct rq *rq) +{ + WARN_ON(!rcu_read_lock_held()); + return rq->idle_state_idx; +} #else static inline void idle_set_state(struct rq *rq, struct cpuidle_state *idle_state) @@ -1450,6 +1491,15 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) { return NULL; } + +static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx) +{ +} + +static inline int idle_get_state_idx(struct rq *rq) +{ + return -1; +} #endif extern void sysrq_sched_debug_show(void); @@ -1504,7 +1554,7 @@ static inline void sched_update_tick_dependency(struct rq *rq) static inline void sched_update_tick_dependency(struct rq *rq) { } #endif -static inline void add_nr_running(struct rq *rq, unsigned count) +static inline void __add_nr_running(struct rq *rq, unsigned count) { unsigned prev_nr = rq->nr_running; @@ -1521,7 +1571,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } -static inline void sub_nr_running(struct rq *rq, unsigned count) +static inline void __sub_nr_running(struct rq *rq, unsigned count) { sched_update_nr_prod(cpu_of(rq), count, false); rq->nr_running -= count; @@ -1529,6 +1579,43 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } +#ifdef CONFIG_CPU_QUIET +#define NR_AVE_SCALE(x) ((x) << FSHIFT) +static inline u64 do_nr_running_integral(struct rq *rq) +{ + s64 nr, deltax; + u64 nr_running_integral = rq->nr_running_integral; + + deltax = rq->clock_task - rq->nr_last_stamp; + nr = NR_AVE_SCALE(rq->nr_running); + + nr_running_integral += nr * deltax; + + return nr_running_integral; +} + +static inline void add_nr_running(struct rq *rq, unsigned count) +{ + write_seqcount_begin(&rq->ave_seqcnt); + rq->nr_running_integral = do_nr_running_integral(rq); + rq->nr_last_stamp = rq->clock_task; + __add_nr_running(rq, count); + write_seqcount_end(&rq->ave_seqcnt); +} + +static inline void sub_nr_running(struct rq *rq, unsigned count) +{ + write_seqcount_begin(&rq->ave_seqcnt); + rq->nr_running_integral = do_nr_running_integral(rq); + rq->nr_last_stamp = rq->clock_task; + __sub_nr_running(rq, count); + write_seqcount_end(&rq->ave_seqcnt); +} +#else +#define add_nr_running __add_nr_running +#define sub_nr_running __sub_nr_running +#endif + static inline void rq_last_tick_reset(struct rq *rq) { #ifdef CONFIG_NO_HZ_FULL @@ -1601,10 +1688,146 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) } #endif +#ifdef CONFIG_SMP +static inline unsigned long capacity_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity; +} + +static inline unsigned long capacity_orig_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig; +} + +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int walt_ravg_window; +extern unsigned int walt_disabled; + +/* + * cpu_util returns the amount of capacity of a CPU that is used by CFS + * tasks. The unit of the return value must be the one of capacity so we can + * compare the utilization with the capacity of the CPU that is available for + * CFS task (ie cpu_capacity). + * + * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the + * recent utilization of currently non-runnable tasks on a CPU. It represents + * the amount of utilization of a CPU in the range [0..capacity_orig] where + * capacity_orig is the cpu_capacity available at the highest frequency + * (arch_scale_freq_capacity()). + * The utilization of a CPU converges towards a sum equal to or less than the + * current capacity (capacity_curr <= capacity_orig) of the CPU because it is + * the running time on this CPU scaled by capacity_curr. + * + * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even + * higher than capacity_orig because of unfortunate rounding in + * cfs.avg.util_avg or just after migrating tasks and new task wakeups until + * the average stabilizes with the new running time. We need to check that the + * utilization stays within the range of [0..capacity_orig] and cap it if + * necessary. Without utilization capping, a group could be seen as overloaded + * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of + * available capacity. We allow utilization to overshoot capacity_curr (but not + * capacity_orig) as it useful for predicting the capacity required after task + * migrations (scheduler-driven DVFS). + */ +static inline unsigned long __cpu_util(int cpu, int delta) +{ + unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; + unsigned long capacity = capacity_orig_of(cpu); + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { + util = cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT; + do_div(util, walt_ravg_window); + } +#endif + delta += util; + if (delta < 0) + return 0; + + return (delta >= capacity) ? capacity : delta; +} + +static inline unsigned long cpu_util(int cpu) +{ + return __cpu_util(cpu, 0); +} + +#endif + +#ifdef CONFIG_CPU_FREQ_GOV_SCHED +#define capacity_max SCHED_CAPACITY_SCALE +extern unsigned int capacity_margin; +extern struct static_key __sched_freq; + +static inline bool sched_freq(void) +{ + return static_key_false(&__sched_freq); +} + +DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); +void update_cpu_capacity_request(int cpu, bool request); + +static inline void set_cfs_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ + struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { + int rtdl = scr->rt + scr->dl; + /* + * WALT tracks the utilization of a CPU considering the load + * generated by all the scheduling classes. + * Since the following call to: + * update_cpu_capacity + * is already adding the RT and DL utilizations let's remove + * these contributions from the WALT signal. + */ + if (capacity > rtdl) + capacity -= rtdl; + else + capacity = 0; + } +#endif + if (scr->cfs != capacity) { + scr->cfs = capacity; + update_cpu_capacity_request(cpu, request); + } +} + +static inline void set_rt_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ + if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) { + per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity; + update_cpu_capacity_request(cpu, request); + } +} + +static inline void set_dl_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ + if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) { + per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity; + update_cpu_capacity_request(cpu, request); + } +} +#else +static inline bool sched_freq(void) { return false; } +static inline void set_cfs_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ } +static inline void set_rt_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ } +static inline void set_dl_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ } +#endif + static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); - sched_avg_update(rq); } #else static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } @@ -1639,6 +1862,9 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } +extern struct rq *lock_rq_of(struct task_struct *p, struct rq_flags *flags); +extern void unlock_rq_of(struct rq *rq, struct task_struct *p, struct rq_flags *flags); + #ifdef CONFIG_SMP #ifdef CONFIG_PREEMPT @@ -1711,7 +1937,8 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { - raw_spin_unlock(&busiest->lock); + if (this_rq != busiest) + raw_spin_unlock(&busiest->lock); lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); } diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 427bc8f8b88cc8b96ffcec69aa4c43d82817f038..a44076934968b5d2b1ba768dc4e474c6bad2dca1 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -1,4 +1,5 @@ #include "sched.h" +#include "walt.h" /* * stop-task scheduling class. diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 5a09d5ae35eb25b8a31eb2b66e7da3a24a296010..5e5811c3c090184f1faafb8211277033d7965073 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -3,30 +3,109 @@ #include #include #include +#include +#include +#include +#include #include "sched.h" +#include "tune.h" unsigned int sysctl_sched_cfs_boost __read_mostly; #ifdef CONFIG_CGROUP_SCHEDTUNE +static bool schedtune_initialized = false; +#endif /* CONFIG_CGROUP_SCHEDTUNE */ -#ifdef CONFIG_SCHED_HMP -struct schedtune; -static inline void init_sched_boost(struct schedtune *st); -static void schedtune_attach(struct cgroup_taskset *tset); -static u64 sched_boost_override_read(struct cgroup_subsys_state *css, - struct cftype *cft); -static int sched_boost_override_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 override); -static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css, - struct cftype *cft); -static int sched_boost_enabled_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 enable); -static u64 sched_colocate_read(struct cgroup_subsys_state *css, - struct cftype *cft); -static int sched_colocate_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 colocate); -#endif /* CONFIG_SCHED_HMP */ +unsigned int sysctl_sched_cfs_boost __read_mostly; + +extern struct target_nrg schedtune_target_nrg; + +/* Performance Boost region (B) threshold params */ +static int perf_boost_idx; + +/* Performance Constraint region (C) threshold params */ +static int perf_constrain_idx; + +/** + * Performance-Energy (P-E) Space thresholds constants + */ +struct threshold_params { + int nrg_gain; + int cap_gain; +}; + +/* + * System specific P-E space thresholds constants + */ +static struct threshold_params +threshold_gains[] = { + { 0, 5 }, /* < 10% */ + { 1, 5 }, /* < 20% */ + { 2, 5 }, /* < 30% */ + { 3, 5 }, /* < 40% */ + { 4, 5 }, /* < 50% */ + { 5, 4 }, /* < 60% */ + { 5, 3 }, /* < 70% */ + { 5, 2 }, /* < 80% */ + { 5, 1 }, /* < 90% */ + { 5, 0 } /* <= 100% */ +}; + +static int +__schedtune_accept_deltas(int nrg_delta, int cap_delta, + int perf_boost_idx, int perf_constrain_idx) +{ + int payoff = -INT_MAX; + int gain_idx = -1; + + /* Performance Boost (B) region */ + if (nrg_delta >= 0 && cap_delta > 0) + gain_idx = perf_boost_idx; + /* Performance Constraint (C) region */ + else if (nrg_delta < 0 && cap_delta <= 0) + gain_idx = perf_constrain_idx; + + /* Default: reject schedule candidate */ + if (gain_idx == -1) + return payoff; + + /* + * Evaluate "Performance Boost" vs "Energy Increase" + * + * - Performance Boost (B) region + * + * Condition: nrg_delta > 0 && cap_delta > 0 + * Payoff criteria: + * cap_gain / nrg_gain < cap_delta / nrg_delta = + * cap_gain * nrg_delta < cap_delta * nrg_gain + * Note that since both nrg_gain and nrg_delta are positive, the + * inequality does not change. Thus: + * + * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta) + * + * - Performance Constraint (C) region + * + * Condition: nrg_delta < 0 && cap_delta < 0 + * payoff criteria: + * cap_gain / nrg_gain > cap_delta / nrg_delta = + * cap_gain * nrg_delta < cap_delta * nrg_gain + * Note that since nrg_gain > 0 while nrg_delta < 0, the + * inequality change. Thus: + * + * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta) + * + * This means that, in case of same positive defined {cap,nrg}_gain + * for both the B and C regions, we can use the same payoff formula + * where a positive value represents the accept condition. + */ + payoff = cap_delta * threshold_gains[gain_idx].nrg_gain; + payoff -= nrg_delta * threshold_gains[gain_idx].cap_gain; + + return payoff; +} + +#ifdef CONFIG_CGROUP_SCHEDTUNE /* * EAS scheduler tunables for task groups. @@ -68,8 +147,17 @@ struct schedtune { /* Controls whether further updates are allowed to the colocate flag */ bool colocate_update_disabled; -#endif +#endif /* CONFIG_SCHED_HMP */ + + /* Performance Boost (B) region threshold params */ + int perf_boost_idx; + + /* Performance Constraint (C) region threshold params */ + int perf_constrain_idx; + /* Hint to bias scheduling of tasks on that SchedTune CGroup + * towards idle CPUs */ + int prefer_idle; }; static inline struct schedtune *css_st(struct cgroup_subsys_state *css) @@ -106,8 +194,42 @@ root_schedtune = { .colocate = false, .colocate_update_disabled = false, #endif + .perf_boost_idx = 0, + .perf_constrain_idx = 0, + .prefer_idle = 0, }; +int +schedtune_accept_deltas(int nrg_delta, int cap_delta, + struct task_struct *task) +{ + struct schedtune *ct; + int perf_boost_idx; + int perf_constrain_idx; + + /* Optimal (O) region */ + if (nrg_delta < 0 && cap_delta > 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0); + return INT_MAX; + } + + /* Suboptimal (S) region */ + if (nrg_delta > 0 && cap_delta < 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5); + return -INT_MAX; + } + + /* Get task specific perf Boost/Constraints indexes */ + rcu_read_lock(); + ct = task_schedtune(task); + perf_boost_idx = ct->perf_boost_idx; + perf_constrain_idx = ct->perf_constrain_idx; + rcu_read_unlock(); + + return __schedtune_accept_deltas(nrg_delta, cap_delta, + perf_boost_idx, perf_constrain_idx); +} + /* * Maximum number of boost groups to support * When per-task boosting is used we still allow only limited number of @@ -136,20 +258,470 @@ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { * maximum per-CPU boosting value. */ struct boost_groups { + bool idle; /* Maximum boost value for all RUNNABLE tasks on a CPU */ - unsigned boost_max; + int boost_max; struct { /* The boost for tasks on that boost group */ - unsigned boost; + int boost; /* Count of RUNNABLE tasks on that boost group */ unsigned tasks; } group[BOOSTGROUPS_COUNT]; + /* CPU's boost group locking */ + raw_spinlock_t lock; }; /* Boost groups affecting each CPU in the system */ DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups); +#ifdef CONFIG_SCHED_HMP +static inline void init_sched_boost(struct schedtune *st) +{ + st->sched_boost_no_override = false; + st->sched_boost_enabled = true; + st->sched_boost_enabled_backup = st->sched_boost_enabled; + st->colocate = false; + st->colocate_update_disabled = false; +} + +bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2) +{ + return task_schedtune(tsk1) == task_schedtune(tsk2); +} + +void update_cgroup_boost_settings(void) +{ + int i; + + for (i = 0; i < BOOSTGROUPS_COUNT; i++) { + if (!allocated_group[i]) + break; + + if (allocated_group[i]->sched_boost_no_override) + continue; + + allocated_group[i]->sched_boost_enabled = false; + } +} + +void restore_cgroup_boost_settings(void) +{ + int i; + + for (i = 0; i < BOOSTGROUPS_COUNT; i++) { + if (!allocated_group[i]) + break; + + allocated_group[i]->sched_boost_enabled = + allocated_group[i]->sched_boost_enabled_backup; + } +} + +bool task_sched_boost(struct task_struct *p) +{ + struct schedtune *st = task_schedtune(p); + + return st->sched_boost_enabled; +} + +static u64 +sched_boost_override_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->sched_boost_no_override; +} + +static int sched_boost_override_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 override) +{ + struct schedtune *st = css_st(css); + + st->sched_boost_no_override = !!override; + + return 0; +} + +#endif /* CONFIG_SCHED_HMP */ + +static void +schedtune_cpu_update(int cpu) +{ + struct boost_groups *bg; + int boost_max; + int idx; + + bg = &per_cpu(cpu_boost_groups, cpu); + + /* The root boost group is always active */ + boost_max = bg->group[0].boost; + for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { + /* + * A boost group affects a CPU only if it has + * RUNNABLE tasks on that CPU + */ + if (bg->group[idx].tasks == 0) + continue; + + boost_max = max(boost_max, bg->group[idx].boost); + } + /* Ensures boost_max is non-negative when all cgroup boost values + * are neagtive. Avoids under-accounting of cpu capacity which may cause + * task stacking and frequency spikes.*/ + boost_max = max(boost_max, 0); + bg->boost_max = boost_max; +} + +static int +schedtune_boostgroup_update(int idx, int boost) +{ + struct boost_groups *bg; + int cur_boost_max; + int old_boost; + int cpu; + + /* Update per CPU boost groups */ + for_each_possible_cpu(cpu) { + bg = &per_cpu(cpu_boost_groups, cpu); + + /* + * Keep track of current boost values to compute the per CPU + * maximum only when it has been affected by the new value of + * the updated boost group + */ + cur_boost_max = bg->boost_max; + old_boost = bg->group[idx].boost; + + /* Update the boost value of this boost group */ + bg->group[idx].boost = boost; + + /* Check if this update increase current max */ + if (boost > cur_boost_max && bg->group[idx].tasks) { + bg->boost_max = boost; + trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max); + continue; + } + + /* Check if this update has decreased current max */ + if (cur_boost_max == old_boost && old_boost > boost) { + schedtune_cpu_update(cpu); + trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max); + continue; + } + + trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max); + } + + return 0; +} + +#define ENQUEUE_TASK 1 +#define DEQUEUE_TASK -1 + +static inline void +schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) +{ + struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); + int tasks = bg->group[idx].tasks + task_count; + + /* Update boosted tasks count while avoiding to make it negative */ + bg->group[idx].tasks = max(0, tasks); + + trace_sched_tune_tasks_update(p, cpu, tasks, idx, + bg->group[idx].boost, bg->boost_max); + + /* Boost group activation or deactivation on that RQ */ + if (tasks == 1 || tasks == 0) + schedtune_cpu_update(cpu); +} + +/* + * NOTE: This function must be called while holding the lock on the CPU RQ + */ +void schedtune_enqueue_task(struct task_struct *p, int cpu) +{ + struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); + unsigned long irq_flags; + struct schedtune *st; + int idx; + + if (!unlikely(schedtune_initialized)) + return; + + /* + * When a task is marked PF_EXITING by do_exit() it's going to be + * dequeued and enqueued multiple times in the exit path. + * Thus we avoid any further update, since we do not want to change + * CPU boosting while the task is exiting. + */ + if (p->flags & PF_EXITING) + return; + + /* + * Boost group accouting is protected by a per-cpu lock and requires + * interrupt to be disabled to avoid race conditions for example on + * do_exit()::cgroup_exit() and task migration. + */ + raw_spin_lock_irqsave(&bg->lock, irq_flags); + rcu_read_lock(); + + st = task_schedtune(p); + idx = st->idx; + + schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK); + + rcu_read_unlock(); + raw_spin_unlock_irqrestore(&bg->lock, irq_flags); +} + +int schedtune_can_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + struct boost_groups *bg; + struct rq_flags irq_flags; + unsigned int cpu; + struct rq *rq; + int src_bg; /* Source boost group index */ + int dst_bg; /* Destination boost group index */ + int tasks; + + if (!unlikely(schedtune_initialized)) + return 0; + + + cgroup_taskset_for_each(task, css, tset) { + + /* + * Lock the CPU's RQ the task is enqueued to avoid race + * conditions with migration code while the task is being + * accounted + */ + rq = lock_rq_of(task, &irq_flags); + + if (!task->on_rq) { + unlock_rq_of(rq, task, &irq_flags); + continue; + } + + /* + * Boost group accouting is protected by a per-cpu lock and requires + * interrupt to be disabled to avoid race conditions on... + */ + cpu = cpu_of(rq); + bg = &per_cpu(cpu_boost_groups, cpu); + raw_spin_lock(&bg->lock); + + dst_bg = css_st(css)->idx; + src_bg = task_schedtune(task)->idx; + + /* + * Current task is not changing boostgroup, which can + * happen when the new hierarchy is in use. + */ + if (unlikely(dst_bg == src_bg)) { + raw_spin_unlock(&bg->lock); + unlock_rq_of(rq, task, &irq_flags); + continue; + } + + /* + * This is the case of a RUNNABLE task which is switching its + * current boost group. + */ + + /* Move task from src to dst boost group */ + tasks = bg->group[src_bg].tasks - 1; + bg->group[src_bg].tasks = max(0, tasks); + bg->group[dst_bg].tasks += 1; + + raw_spin_unlock(&bg->lock); + unlock_rq_of(rq, task, &irq_flags); + + /* Update CPU boost group */ + if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1) + schedtune_cpu_update(task_cpu(task)); + + } + + return 0; +} + +#ifdef CONFIG_SCHED_HMP +static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->sched_boost_enabled; +} + +static int sched_boost_enabled_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 enable) +{ + struct schedtune *st = css_st(css); + + st->sched_boost_enabled = !!enable; + st->sched_boost_enabled_backup = st->sched_boost_enabled; + + return 0; +} + +static u64 sched_colocate_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->colocate; +} + +static int sched_colocate_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 colocate) +{ + struct schedtune *st = css_st(css); + + if (st->colocate_update_disabled) + return -EPERM; + + st->colocate = !!colocate; + st->colocate_update_disabled = true; + return 0; +} + +#else /* CONFIG_SCHED_HMP */ + +static inline void init_sched_boost(struct schedtune *st) { } + +#endif /* CONFIG_SCHED_HMP */ + +void schedtune_cancel_attach(struct cgroup_taskset *tset) +{ + /* This can happen only if SchedTune controller is mounted with + * other hierarchies ane one of them fails. Since usually SchedTune is + * mouted on its own hierarcy, for the time being we do not implement + * a proper rollback mechanism */ + WARN(1, "SchedTune cancel attach not implemented"); +} + +/* + * NOTE: This function must be called while holding the lock on the CPU RQ + */ +void schedtune_dequeue_task(struct task_struct *p, int cpu) +{ + struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); + unsigned long irq_flags; + struct schedtune *st; + int idx; + + if (!unlikely(schedtune_initialized)) + return; + + /* + * When a task is marked PF_EXITING by do_exit() it's going to be + * dequeued and enqueued multiple times in the exit path. + * Thus we avoid any further update, since we do not want to change + * CPU boosting while the task is exiting. + * The last dequeue is already enforce by the do_exit() code path + * via schedtune_exit_task(). + */ + if (p->flags & PF_EXITING) + return; + + /* + * Boost group accouting is protected by a per-cpu lock and requires + * interrupt to be disabled to avoid race conditions on... + */ + raw_spin_lock_irqsave(&bg->lock, irq_flags); + rcu_read_lock(); + + st = task_schedtune(p); + idx = st->idx; + + schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK); + + rcu_read_unlock(); + raw_spin_unlock_irqrestore(&bg->lock, irq_flags); +} + +void schedtune_exit_task(struct task_struct *tsk) +{ + struct schedtune *st; + struct rq_flags irq_flags; + unsigned int cpu; + struct rq *rq; + int idx; + + if (!unlikely(schedtune_initialized)) + return; + + rq = lock_rq_of(tsk, &irq_flags); + rcu_read_lock(); + + cpu = cpu_of(rq); + st = task_schedtune(tsk); + idx = st->idx; + schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK); + + rcu_read_unlock(); + unlock_rq_of(rq, tsk, &irq_flags); +} + +int schedtune_cpu_boost(int cpu) +{ + struct boost_groups *bg; + + bg = &per_cpu(cpu_boost_groups, cpu); + return bg->boost_max; +} + +int schedtune_task_boost(struct task_struct *p) +{ + struct schedtune *st; + int task_boost; + + /* Get task boost value */ + rcu_read_lock(); + st = task_schedtune(p); + task_boost = st->boost; + rcu_read_unlock(); + + return task_boost; +} + +int schedtune_prefer_idle(struct task_struct *p) +{ + struct schedtune *st; + int prefer_idle; + + /* Get prefer_idle value */ + rcu_read_lock(); + st = task_schedtune(p); + prefer_idle = st->prefer_idle; + rcu_read_unlock(); + + return prefer_idle; +} + static u64 +prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->prefer_idle; +} + +static int +prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft, + u64 prefer_idle) +{ + struct schedtune *st = css_st(css); + st->prefer_idle = prefer_idle; + + return 0; +} + +static s64 boost_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct schedtune *st = css_st(css); @@ -157,28 +729,63 @@ boost_read(struct cgroup_subsys_state *css, struct cftype *cft) return st->boost; } +#ifdef CONFIG_SCHED_HMP +static void schedtune_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + struct schedtune *st; + bool colocate; + + cgroup_taskset_first(tset, &css); + st = css_st(css); + + colocate = st->colocate; + + cgroup_taskset_for_each(task, css, tset) + sync_cgroup_colocation(task, colocate); + +} +#endif + static int boost_write(struct cgroup_subsys_state *css, struct cftype *cft, - u64 boost) + s64 boost) { struct schedtune *st = css_st(css); + unsigned threshold_idx; + int boost_pct; - if (boost < 0 || boost > 100) + if (boost < -100 || boost > 100) return -EINVAL; + boost_pct = boost; + + /* + * Update threshold params for Performance Boost (B) + * and Performance Constraint (C) regions. + * The current implementatio uses the same cuts for both + * B and C regions. + */ + threshold_idx = clamp(boost_pct, 0, 99) / 10; + st->perf_boost_idx = threshold_idx; + st->perf_constrain_idx = threshold_idx; st->boost = boost; - if (css == &root_schedtune.css) + if (css == &root_schedtune.css) { sysctl_sched_cfs_boost = boost; + perf_boost_idx = threshold_idx; + perf_constrain_idx = threshold_idx; + } + + /* Update CPU boost */ + schedtune_boostgroup_update(st->idx, st->boost); + + trace_sched_tune_config(st->boost); return 0; } static struct cftype files[] = { - { - .name = "boost", - .read_u64 = boost_read, - .write_u64 = boost_write, - }, #ifdef CONFIG_SCHED_HMP { .name = "sched_boost_no_override", @@ -196,32 +803,36 @@ static struct cftype files[] = { .write_u64 = sched_colocate_write, }, #endif + { + .name = "boost", + .read_s64 = boost_read, + .write_s64 = boost_write, + }, + { + .name = "prefer_idle", + .read_u64 = prefer_idle_read, + .write_u64 = prefer_idle_write, + }, { } /* terminate */ }; -static int -schedtune_boostgroup_init(struct schedtune *st) -{ - /* Keep track of allocated boost groups */ - allocated_group[st->idx] = st; - - return 0; -} static int -schedtune_init(void) +schedtune_boostgroup_init(struct schedtune *st) { struct boost_groups *bg; int cpu; + /* Keep track of allocated boost groups */ + allocated_group[st->idx] = st; + /* Initialize the per CPU boost groups */ for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); - memset(bg, 0, sizeof(struct boost_groups)); + bg->group[st->idx].boost = 0; + bg->group[st->idx].tasks = 0; } - pr_info(" schedtune configured to support %d boost groups\n", - BOOSTGROUPS_COUNT); return 0; } @@ -231,10 +842,8 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css) struct schedtune *st; int idx; - if (!parent_css) { - schedtune_init(); + if (!parent_css) return &root_schedtune.css; - } /* Allow only single level hierachies */ if (parent_css != &root_schedtune.css) { @@ -273,6 +882,9 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css) static void schedtune_boostgroup_release(struct schedtune *st) { + /* Reset this boost group */ + schedtune_boostgroup_update(st->idx, 0); + /* Keep track of allocated boost groups */ allocated_group[st->idx] = NULL; } @@ -289,156 +901,239 @@ schedtune_css_free(struct cgroup_subsys_state *css) struct cgroup_subsys schedtune_cgrp_subsys = { .css_alloc = schedtune_css_alloc, .css_free = schedtune_css_free, - .legacy_cftypes = files, - .early_init = 1, .allow_attach = subsys_cgroup_allow_attach, .attach = schedtune_attach, + .can_attach = schedtune_can_attach, + .cancel_attach = schedtune_cancel_attach, + .legacy_cftypes = files, + .early_init = 1, }; -#endif /* CONFIG_CGROUP_SCHEDTUNE */ - -int -sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +static inline void +schedtune_init_cgroups(void) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + struct boost_groups *bg; + int cpu; - if (ret || !write) - return ret; + /* Initialize the per CPU boost groups */ + for_each_possible_cpu(cpu) { + bg = &per_cpu(cpu_boost_groups, cpu); + memset(bg, 0, sizeof(struct boost_groups)); + raw_spin_lock_init(&bg->lock); + } - return 0; + pr_info("schedtune: configured to support %d boost groups\n", + BOOSTGROUPS_COUNT); + + schedtune_initialized = true; } -/* QHMP/Zone implementation s*/ +#else /* CONFIG_CGROUP_SCHEDTUNE */ -static void schedtune_attach(struct cgroup_taskset *tset) +int +schedtune_accept_deltas(int nrg_delta, int cap_delta, + struct task_struct *task) { - struct task_struct *task; - struct cgroup_subsys_state *css; - struct schedtune *st; - bool colocate; - - cgroup_taskset_first(tset, &css); - st = css_st(css); + /* Optimal (O) region */ + if (nrg_delta < 0 && cap_delta > 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0); + return INT_MAX; + } - colocate = st->colocate; + /* Suboptimal (S) region */ + if (nrg_delta > 0 && cap_delta < 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5); + return -INT_MAX; + } - cgroup_taskset_for_each(task, css, tset) - sync_cgroup_colocation(task, colocate); + return __schedtune_accept_deltas(nrg_delta, cap_delta, + perf_boost_idx, perf_constrain_idx); } -#ifdef CONFIG_SCHED_HMP -static inline void init_sched_boost(struct schedtune *st) -{ - st->sched_boost_no_override = false; - st->sched_boost_enabled = true; - st->sched_boost_enabled_backup = st->sched_boost_enabled; - st->colocate = false; - st->colocate_update_disabled = false; -} +#endif /* CONFIG_CGROUP_SCHEDTUNE */ -bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2) +int +sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - return task_schedtune(tsk1) == task_schedtune(tsk2); -} + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + unsigned threshold_idx; + int boost_pct; -void update_cgroup_boost_settings(void) -{ - int i; + if (ret || !write) + return ret; - for (i = 0; i < BOOSTGROUPS_COUNT; i++) { - if (!allocated_group[i]) - break; + if (sysctl_sched_cfs_boost < -100 || sysctl_sched_cfs_boost > 100) + return -EINVAL; + boost_pct = sysctl_sched_cfs_boost; - if (allocated_group[i]->sched_boost_no_override) - continue; + /* + * Update threshold params for Performance Boost (B) + * and Performance Constraint (C) regions. + * The current implementatio uses the same cuts for both + * B and C regions. + */ + threshold_idx = clamp(boost_pct, 0, 99) / 10; + perf_boost_idx = threshold_idx; + perf_constrain_idx = threshold_idx; - allocated_group[i]->sched_boost_enabled = false; - } + return 0; } -void restore_cgroup_boost_settings(void) +#ifdef CONFIG_SCHED_DEBUG +static void +schedtune_test_nrg(unsigned long delta_pwr) { - int i; + unsigned long test_delta_pwr; + unsigned long test_norm_pwr; + int idx; - for (i = 0; i < BOOSTGROUPS_COUNT; i++) { - if (!allocated_group[i]) - break; + /* + * Check normalization constants using some constant system + * energy values + */ + pr_info("schedtune: verify normalization constants...\n"); + for (idx = 0; idx < 6; ++idx) { + test_delta_pwr = delta_pwr >> idx; - allocated_group[i]->sched_boost_enabled = - allocated_group[i]->sched_boost_enabled_backup; + /* Normalize on max energy for target platform */ + test_norm_pwr = reciprocal_divide( + test_delta_pwr << SCHED_CAPACITY_SHIFT, + schedtune_target_nrg.rdiv); + + pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n", + idx, test_delta_pwr, test_norm_pwr); } } +#else +#define schedtune_test_nrg(delta_pwr) +#endif -bool task_sched_boost(struct task_struct *p) +/* + * Compute the min/max power consumption of a cluster and all its CPUs + */ +static void +schedtune_add_cluster_nrg( + struct sched_domain *sd, + struct sched_group *sg, + struct target_nrg *ste) { - struct schedtune *st = task_schedtune(p); + struct sched_domain *sd2; + struct sched_group *sg2; - return st->sched_boost_enabled; -} + struct cpumask *cluster_cpus; + char str[32]; -static u64 -sched_boost_override_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - struct schedtune *st = css_st(css); - - return st->sched_boost_no_override; -} + unsigned long min_pwr; + unsigned long max_pwr; + int cpu; -static int sched_boost_override_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 override) -{ - struct schedtune *st = css_st(css); + /* Get Cluster energy using EM data for the first CPU */ + cluster_cpus = sched_group_cpus(sg); + snprintf(str, 32, "CLUSTER[%*pbl]", + cpumask_pr_args(cluster_cpus)); - st->sched_boost_no_override = !!override; + min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power; + max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power; + pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", + str, min_pwr, max_pwr); - return 0; + /* + * Keep track of this cluster's energy in the computation of the + * overall system energy + */ + ste->min_power += min_pwr; + ste->max_power += max_pwr; + + /* Get CPU energy using EM data for each CPU in the group */ + for_each_cpu(cpu, cluster_cpus) { + /* Get a SD view for the specific CPU */ + for_each_domain(cpu, sd2) { + /* Get the CPU group */ + sg2 = sd2->groups; + min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power; + max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power; + + ste->min_power += min_pwr; + ste->max_power += max_pwr; + + snprintf(str, 32, "CPU[%d]", cpu); + pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", + str, min_pwr, max_pwr); + + /* + * Assume we have EM data only at the CPU and + * the upper CLUSTER level + */ + BUG_ON(!cpumask_equal( + sched_group_cpus(sg), + sched_group_cpus(sd2->parent->groups) + )); + break; + } + } } -static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css, - struct cftype *cft) +/* + * Initialize the constants required to compute normalized energy. + * The values of these constants depends on the EM data for the specific + * target system and topology. + * Thus, this function is expected to be called by the code + * that bind the EM to the topology information. + */ +static int +schedtune_init(void) { - struct schedtune *st = css_st(css); - - return st->sched_boost_enabled; -} + struct target_nrg *ste = &schedtune_target_nrg; + unsigned long delta_pwr = 0; + struct sched_domain *sd; + struct sched_group *sg; -static int sched_boost_enabled_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 enable) -{ - struct schedtune *st = css_st(css); + pr_info("schedtune: init normalization constants...\n"); + ste->max_power = 0; + ste->min_power = 0; - st->sched_boost_enabled = !!enable; - st->sched_boost_enabled_backup = st->sched_boost_enabled; + rcu_read_lock(); - return 0; -} + /* + * When EAS is in use, we always have a pointer to the highest SD + * which provides EM data. + */ + sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask))); + if (!sd) { + pr_info("schedtune: no energy model data\n"); + goto nodata; + } -static u64 sched_colocate_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - struct schedtune *st = css_st(css); + sg = sd->groups; + do { + schedtune_add_cluster_nrg(sd, sg, ste); + } while (sg = sg->next, sg != sd->groups); - return st->colocate; -} + rcu_read_unlock(); -static int sched_colocate_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 colocate) -{ - struct schedtune *st = css_st(css); + pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", + "SYSTEM", ste->min_power, ste->max_power); - if (st->colocate_update_disabled) - return -EPERM; + /* Compute normalization constants */ + delta_pwr = ste->max_power - ste->min_power; + ste->rdiv = reciprocal_value(delta_pwr); + pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n", + ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2); - st->colocate = !!colocate; - st->colocate_update_disabled = true; - return 0; -} + schedtune_test_nrg(delta_pwr); -#else /* CONFIG_SCHED_HMP */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + schedtune_init_cgroups(); +#else + pr_info("schedtune: configured to support global boosting only\n"); +#endif /* CONFIG_CGROUP_SCHEDTUNE */ -static inline void init_sched_boost(struct schedtune *st) { } + return 0; -#endif /* CONFIG_SCHED_HMP */ +nodata: + rcu_read_unlock(); + return -EINVAL; +} +postcore_initcall(schedtune_init); diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h new file mode 100644 index 0000000000000000000000000000000000000000..4f6441771e4c9e169380deb8c3b4c53ae0014c47 --- /dev/null +++ b/kernel/sched/tune.h @@ -0,0 +1,55 @@ + +#ifdef CONFIG_SCHED_TUNE + +#include + +/* + * System energy normalization constants + */ +struct target_nrg { + unsigned long min_power; + unsigned long max_power; + struct reciprocal_value rdiv; +}; + +#ifdef CONFIG_CGROUP_SCHEDTUNE + +int schedtune_cpu_boost(int cpu); +int schedtune_task_boost(struct task_struct *tsk); + +int schedtune_prefer_idle(struct task_struct *tsk); + +void schedtune_exit_task(struct task_struct *tsk); + +void schedtune_enqueue_task(struct task_struct *p, int cpu); +void schedtune_dequeue_task(struct task_struct *p, int cpu); + +#else /* CONFIG_CGROUP_SCHEDTUNE */ + +#define schedtune_cpu_boost(cpu) get_sysctl_sched_cfs_boost() +#define schedtune_task_boost(tsk) get_sysctl_sched_cfs_boost() + +#define schedtune_exit_task(task) do { } while (0) + +#define schedtune_enqueue_task(task, cpu) do { } while (0) +#define schedtune_dequeue_task(task, cpu) do { } while (0) + +#endif /* CONFIG_CGROUP_SCHEDTUNE */ + +int schedtune_normalize_energy(int energy); +int schedtune_accept_deltas(int nrg_delta, int cap_delta, + struct task_struct *task); + +#else /* CONFIG_SCHED_TUNE */ + +#define schedtune_cpu_boost(cpu) 0 +#define schedtune_task_boost(tsk) 0 + +#define schedtune_exit_task(task) do { } while (0) + +#define schedtune_enqueue_task(task, cpu) do { } while (0) +#define schedtune_dequeue_task(task, cpu) do { } while (0) + +#define schedtune_accept_deltas(nrg_delta, cap_delta, task) nrg_delta + +#endif /* CONFIG_SCHED_TUNE */ diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c new file mode 100644 index 0000000000000000000000000000000000000000..7f686ffccbf072c57c8e8c990f1042476681b4fd --- /dev/null +++ b/kernel/sched/walt.c @@ -0,0 +1,1168 @@ +/* + * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * Window Assisted Load Tracking (WALT) implementation credits: + * Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park, + * Pavan Kumar Kondeti, Olav Haugan + * + * 2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla + * and Todd Kjos + */ + +#include +#include +#include +#include "sched.h" +#include "walt.h" + +#define WINDOW_STATS_RECENT 0 +#define WINDOW_STATS_MAX 1 +#define WINDOW_STATS_MAX_RECENT_AVG 2 +#define WINDOW_STATS_AVG 3 +#define WINDOW_STATS_INVALID_POLICY 4 + +#define EXITING_TASK_MARKER 0xdeaddead + +static __read_mostly unsigned int walt_ravg_hist_size = 5; +static __read_mostly unsigned int walt_window_stats_policy = + WINDOW_STATS_MAX_RECENT_AVG; +static __read_mostly unsigned int walt_account_wait_time = 1; +static __read_mostly unsigned int walt_freq_account_wait_time = 0; +static __read_mostly unsigned int walt_io_is_busy = 0; + +unsigned int sysctl_sched_walt_init_task_load_pct = 15; + +/* 1 -> use PELT based load stats, 0 -> use window-based load stats */ +unsigned int __read_mostly walt_disabled = 0; + +static unsigned int max_possible_efficiency = 1024; +static unsigned int min_possible_efficiency = 1024; + +/* + * Maximum possible frequency across all cpus. Task demand and cpu + * capacity (cpu_power) metrics are scaled in reference to it. + */ +static unsigned int max_possible_freq = 1; + +/* + * Minimum possible max_freq across all cpus. This will be same as + * max_possible_freq on homogeneous systems and could be different from + * max_possible_freq on heterogenous systems. min_max_freq is used to derive + * capacity (cpu_power) of cpus. + */ +static unsigned int min_max_freq = 1; + +static unsigned int max_capacity = 1024; +static unsigned int min_capacity = 1024; +static unsigned int max_load_scale_factor = 1024; +static unsigned int max_possible_capacity = 1024; + +/* Mask of all CPUs that have max_possible_capacity */ +static cpumask_t mpc_mask = CPU_MASK_ALL; + +/* Window size (in ns) */ +__read_mostly unsigned int walt_ravg_window = 20000000; + +/* Min window size (in ns) = 10ms */ +#define MIN_SCHED_RAVG_WINDOW 10000000 + +/* Max window size (in ns) = 1s */ +#define MAX_SCHED_RAVG_WINDOW 1000000000 + +static unsigned int sync_cpu; +static ktime_t ktime_last; +static bool walt_ktime_suspended; + +static unsigned int task_load(struct task_struct *p) +{ + return p->ravg.demand; +} + +void +walt_inc_cumulative_runnable_avg(struct rq *rq, + struct task_struct *p) +{ + rq->cumulative_runnable_avg += p->ravg.demand; +} + +void +walt_dec_cumulative_runnable_avg(struct rq *rq, + struct task_struct *p) +{ + rq->cumulative_runnable_avg -= p->ravg.demand; + BUG_ON((s64)rq->cumulative_runnable_avg < 0); +} + +static void +fixup_cumulative_runnable_avg(struct rq *rq, + struct task_struct *p, s64 task_load_delta) +{ + rq->cumulative_runnable_avg += task_load_delta; + if ((s64)rq->cumulative_runnable_avg < 0) + panic("cra less than zero: tld: %lld, task_load(p) = %u\n", + task_load_delta, task_load(p)); +} + +u64 walt_ktime_clock(void) +{ + if (unlikely(walt_ktime_suspended)) + return ktime_to_ns(ktime_last); + return ktime_get_ns(); +} + +static void walt_resume(void) +{ + walt_ktime_suspended = false; +} + +static int walt_suspend(void) +{ + ktime_last = ktime_get(); + walt_ktime_suspended = true; + return 0; +} + +static struct syscore_ops walt_syscore_ops = { + .resume = walt_resume, + .suspend = walt_suspend +}; + +static int __init walt_init_ops(void) +{ + register_syscore_ops(&walt_syscore_ops); + return 0; +} +late_initcall(walt_init_ops); + +void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + cfs_rq->cumulative_runnable_avg += p->ravg.demand; +} + +void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + cfs_rq->cumulative_runnable_avg -= p->ravg.demand; +} + +static int exiting_task(struct task_struct *p) +{ + if (p->flags & PF_EXITING) { + if (p->ravg.sum_history[0] != EXITING_TASK_MARKER) { + p->ravg.sum_history[0] = EXITING_TASK_MARKER; + } + return 1; + } + return 0; +} + +static int __init set_walt_ravg_window(char *str) +{ + get_option(&str, &walt_ravg_window); + + walt_disabled = (walt_ravg_window < MIN_SCHED_RAVG_WINDOW || + walt_ravg_window > MAX_SCHED_RAVG_WINDOW); + return 0; +} + +early_param("walt_ravg_window", set_walt_ravg_window); + +static void +update_window_start(struct rq *rq, u64 wallclock) +{ + s64 delta; + int nr_windows; + + delta = wallclock - rq->window_start; + /* If the MPM global timer is cleared, set delta as 0 to avoid kernel BUG happening */ + if (delta < 0) { + delta = 0; + WARN_ONCE(1, "WALT wallclock appears to have gone backwards or reset\n"); + } + + if (delta < walt_ravg_window) + return; + + nr_windows = div64_u64(delta, walt_ravg_window); + rq->window_start += (u64)nr_windows * (u64)walt_ravg_window; +} + +static u64 scale_exec_time(u64 delta, struct rq *rq) +{ + unsigned int cur_freq = rq->cur_freq; + int sf; + + if (unlikely(cur_freq > max_possible_freq)) + cur_freq = rq->max_possible_freq; + + /* round up div64 */ + delta = div64_u64(delta * cur_freq + max_possible_freq - 1, + max_possible_freq); + + sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency); + + delta *= sf; + delta >>= 10; + + return delta; +} + +static int cpu_is_waiting_on_io(struct rq *rq) +{ + if (!walt_io_is_busy) + return 0; + + return atomic_read(&rq->nr_iowait); +} + +void walt_account_irqtime(int cpu, struct task_struct *curr, + u64 delta, u64 wallclock) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags, nr_windows; + u64 cur_jiffies_ts; + + raw_spin_lock_irqsave(&rq->lock, flags); + + /* + * cputime (wallclock) uses sched_clock so use the same here for + * consistency. + */ + delta += sched_clock() - wallclock; + cur_jiffies_ts = get_jiffies_64(); + + if (is_idle_task(curr)) + walt_update_task_ravg(curr, rq, IRQ_UPDATE, walt_ktime_clock(), + delta); + + nr_windows = cur_jiffies_ts - rq->irqload_ts; + + if (nr_windows) { + if (nr_windows < 10) { + /* Decay CPU's irqload by 3/4 for each window. */ + rq->avg_irqload *= (3 * nr_windows); + rq->avg_irqload = div64_u64(rq->avg_irqload, + 4 * nr_windows); + } else { + rq->avg_irqload = 0; + } + rq->avg_irqload += rq->cur_irqload; + rq->cur_irqload = 0; + } + + rq->cur_irqload += delta; + rq->irqload_ts = cur_jiffies_ts; + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + + +#define WALT_HIGH_IRQ_TIMEOUT 3 + +u64 walt_irqload(int cpu) { + struct rq *rq = cpu_rq(cpu); + s64 delta; + delta = get_jiffies_64() - rq->irqload_ts; + + /* + * Current context can be preempted by irq and rq->irqload_ts can be + * updated by irq context so that delta can be negative. + * But this is okay and we can safely return as this means there + * was recent irq occurrence. + */ + + if (delta < WALT_HIGH_IRQ_TIMEOUT) + return rq->avg_irqload; + else + return 0; +} + +int walt_cpu_high_irqload(int cpu) { + return walt_irqload(cpu) >= sysctl_sched_walt_cpu_high_irqload; +} + +static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p, + u64 irqtime, int event) +{ + if (is_idle_task(p)) { + /* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */ + if (event == PICK_NEXT_TASK) + return 0; + + /* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */ + return irqtime || cpu_is_waiting_on_io(rq); + } + + if (event == TASK_WAKE) + return 0; + + if (event == PUT_PREV_TASK || event == IRQ_UPDATE || + event == TASK_UPDATE) + return 1; + + /* Only TASK_MIGRATE && PICK_NEXT_TASK left */ + return walt_freq_account_wait_time; +} + +/* + * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) + */ +static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, + int event, u64 wallclock, u64 irqtime) +{ + int new_window, nr_full_windows = 0; + int p_is_curr_task = (p == rq->curr); + u64 mark_start = p->ravg.mark_start; + u64 window_start = rq->window_start; + u32 window_size = walt_ravg_window; + u64 delta; + + new_window = mark_start < window_start; + if (new_window) { + nr_full_windows = div64_u64((window_start - mark_start), + window_size); + if (p->ravg.active_windows < USHRT_MAX) + p->ravg.active_windows++; + } + + /* Handle per-task window rollover. We don't care about the idle + * task or exiting tasks. */ + if (new_window && !is_idle_task(p) && !exiting_task(p)) { + u32 curr_window = 0; + + if (!nr_full_windows) + curr_window = p->ravg.curr_window; + + p->ravg.prev_window = curr_window; + p->ravg.curr_window = 0; + } + + if (!account_busy_for_cpu_time(rq, p, irqtime, event)) { + /* account_busy_for_cpu_time() = 0, so no update to the + * task's current window needs to be made. This could be + * for example + * + * - a wakeup event on a task within the current + * window (!new_window below, no action required), + * - switching to a new task from idle (PICK_NEXT_TASK) + * in a new window where irqtime is 0 and we aren't + * waiting on IO */ + + if (!new_window) + return; + + /* A new window has started. The RQ demand must be rolled + * over if p is the current task. */ + if (p_is_curr_task) { + u64 prev_sum = 0; + + /* p is either idle task or an exiting task */ + if (!nr_full_windows) { + prev_sum = rq->curr_runnable_sum; + } + + rq->prev_runnable_sum = prev_sum; + rq->curr_runnable_sum = 0; + } + + return; + } + + if (!new_window) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. No rollover + * since we didn't start a new window. An example of this is + * when a task starts execution and then sleeps within the + * same window. */ + + if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) + delta = wallclock - mark_start; + else + delta = irqtime; + delta = scale_exec_time(delta, rq); + rq->curr_runnable_sum += delta; + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.curr_window += delta; + + return; + } + + if (!p_is_curr_task) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. A new window + * has also started, but p is not the current task, so the + * window is not rolled over - just split up and account + * as necessary into curr and prev. The window is only + * rolled over when a new window is processed for the current + * task. + * + * Irqtime can't be accounted by a task that isn't the + * currently running task. */ + + if (!nr_full_windows) { + /* A full window hasn't elapsed, account partial + * contribution to previous completed window. */ + delta = scale_exec_time(window_start - mark_start, rq); + if (!exiting_task(p)) + p->ravg.prev_window += delta; + } else { + /* Since at least one full window has elapsed, + * the contribution to the previous window is the + * full window (window_size). */ + delta = scale_exec_time(window_size, rq); + if (!exiting_task(p)) + p->ravg.prev_window = delta; + } + rq->prev_runnable_sum += delta; + + /* Account piece of busy time in the current window. */ + delta = scale_exec_time(wallclock - window_start, rq); + rq->curr_runnable_sum += delta; + if (!exiting_task(p)) + p->ravg.curr_window = delta; + + return; + } + + if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. A new window + * has started and p is the current task so rollover is + * needed. If any of these three above conditions are true + * then this busy time can't be accounted as irqtime. + * + * Busy time for the idle task or exiting tasks need not + * be accounted. + * + * An example of this would be a task that starts execution + * and then sleeps once a new window has begun. */ + + if (!nr_full_windows) { + /* A full window hasn't elapsed, account partial + * contribution to previous completed window. */ + delta = scale_exec_time(window_start - mark_start, rq); + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.prev_window += delta; + + delta += rq->curr_runnable_sum; + } else { + /* Since at least one full window has elapsed, + * the contribution to the previous window is the + * full window (window_size). */ + delta = scale_exec_time(window_size, rq); + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.prev_window = delta; + + } + /* + * Rollover for normal runnable sum is done here by overwriting + * the values in prev_runnable_sum and curr_runnable_sum. + * Rollover for new task runnable sum has completed by previous + * if-else statement. + */ + rq->prev_runnable_sum = delta; + + /* Account piece of busy time in the current window. */ + delta = scale_exec_time(wallclock - window_start, rq); + rq->curr_runnable_sum = delta; + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.curr_window = delta; + + return; + } + + if (irqtime) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. A new window + * has started and p is the current task so rollover is + * needed. The current task must be the idle task because + * irqtime is not accounted for any other task. + * + * Irqtime will be accounted each time we process IRQ activity + * after a period of idleness, so we know the IRQ busy time + * started at wallclock - irqtime. */ + + BUG_ON(!is_idle_task(p)); + mark_start = wallclock - irqtime; + + /* Roll window over. If IRQ busy time was just in the current + * window then that is all that need be accounted. */ + rq->prev_runnable_sum = rq->curr_runnable_sum; + if (mark_start > window_start) { + rq->curr_runnable_sum = scale_exec_time(irqtime, rq); + return; + } + + /* The IRQ busy time spanned multiple windows. Process the + * busy time preceding the current window start first. */ + delta = window_start - mark_start; + if (delta > window_size) + delta = window_size; + delta = scale_exec_time(delta, rq); + rq->prev_runnable_sum += delta; + + /* Process the remaining IRQ busy time in the current window. */ + delta = wallclock - window_start; + rq->curr_runnable_sum = scale_exec_time(delta, rq); + + return; + } + + BUG(); +} + +static int account_busy_for_task_demand(struct task_struct *p, int event) +{ + /* No need to bother updating task demand for exiting tasks + * or the idle task. */ + if (exiting_task(p) || is_idle_task(p)) + return 0; + + /* When a task is waking up it is completing a segment of non-busy + * time. Likewise, if wait time is not treated as busy time, then + * when a task begins to run or is migrated, it is not running and + * is completing a segment of non-busy time. */ + if (event == TASK_WAKE || (!walt_account_wait_time && + (event == PICK_NEXT_TASK || event == TASK_MIGRATE))) + return 0; + + return 1; +} + +/* + * Called when new window is starting for a task, to record cpu usage over + * recently concluded window(s). Normally 'samples' should be 1. It can be > 1 + * when, say, a real-time task runs without preemption for several windows at a + * stretch. + */ +static void update_history(struct rq *rq, struct task_struct *p, + u32 runtime, int samples, int event) +{ + u32 *hist = &p->ravg.sum_history[0]; + int ridx, widx; + u32 max = 0, avg, demand; + u64 sum = 0; + + /* Ignore windows where task had no activity */ + if (!runtime || is_idle_task(p) || exiting_task(p) || !samples) + goto done; + + /* Push new 'runtime' value onto stack */ + widx = walt_ravg_hist_size - 1; + ridx = widx - samples; + for (; ridx >= 0; --widx, --ridx) { + hist[widx] = hist[ridx]; + sum += hist[widx]; + if (hist[widx] > max) + max = hist[widx]; + } + + for (widx = 0; widx < samples && widx < walt_ravg_hist_size; widx++) { + hist[widx] = runtime; + sum += hist[widx]; + if (hist[widx] > max) + max = hist[widx]; + } + + p->ravg.sum = 0; + + if (walt_window_stats_policy == WINDOW_STATS_RECENT) { + demand = runtime; + } else if (walt_window_stats_policy == WINDOW_STATS_MAX) { + demand = max; + } else { + avg = div64_u64(sum, walt_ravg_hist_size); + if (walt_window_stats_policy == WINDOW_STATS_AVG) + demand = avg; + else + demand = max(avg, runtime); + } + + /* + * A throttled deadline sched class task gets dequeued without + * changing p->on_rq. Since the dequeue decrements hmp stats + * avoid decrementing it here again. + */ + if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || + !p->dl.dl_throttled)) + fixup_cumulative_runnable_avg(rq, p, demand); + + p->ravg.demand = demand; + +done: + trace_walt_update_history(rq, p, runtime, samples, event); + return; +} + +static void add_to_task_demand(struct rq *rq, struct task_struct *p, + u64 delta) +{ + delta = scale_exec_time(delta, rq); + p->ravg.sum += delta; + if (unlikely(p->ravg.sum > walt_ravg_window)) + p->ravg.sum = walt_ravg_window; +} + +/* + * Account cpu demand of task and/or update task's cpu demand history + * + * ms = p->ravg.mark_start; + * wc = wallclock + * ws = rq->window_start + * + * Three possibilities: + * + * a) Task event is contained within one window. + * window_start < mark_start < wallclock + * + * ws ms wc + * | | | + * V V V + * |---------------| + * + * In this case, p->ravg.sum is updated *iff* event is appropriate + * (ex: event == PUT_PREV_TASK) + * + * b) Task event spans two windows. + * mark_start < window_start < wallclock + * + * ms ws wc + * | | | + * V V V + * -----|------------------- + * + * In this case, p->ravg.sum is updated with (ws - ms) *iff* event + * is appropriate, then a new window sample is recorded followed + * by p->ravg.sum being set to (wc - ws) *iff* event is appropriate. + * + * c) Task event spans more than two windows. + * + * ms ws_tmp ws wc + * | | | | + * V V V V + * ---|-------|-------|-------|-------|------ + * | | + * |<------ nr_full_windows ------>| + * + * In this case, p->ravg.sum is updated with (ws_tmp - ms) first *iff* + * event is appropriate, window sample of p->ravg.sum is recorded, + * 'nr_full_window' samples of window_size is also recorded *iff* + * event is appropriate and finally p->ravg.sum is set to (wc - ws) + * *iff* event is appropriate. + * + * IMPORTANT : Leave p->ravg.mark_start unchanged, as update_cpu_busy_time() + * depends on it! + */ +static void update_task_demand(struct task_struct *p, struct rq *rq, + int event, u64 wallclock) +{ + u64 mark_start = p->ravg.mark_start; + u64 delta, window_start = rq->window_start; + int new_window, nr_full_windows; + u32 window_size = walt_ravg_window; + + new_window = mark_start < window_start; + if (!account_busy_for_task_demand(p, event)) { + if (new_window) + /* If the time accounted isn't being accounted as + * busy time, and a new window started, only the + * previous window need be closed out with the + * pre-existing demand. Multiple windows may have + * elapsed, but since empty windows are dropped, + * it is not necessary to account those. */ + update_history(rq, p, p->ravg.sum, 1, event); + return; + } + + if (!new_window) { + /* The simple case - busy time contained within the existing + * window. */ + add_to_task_demand(rq, p, wallclock - mark_start); + return; + } + + /* Busy time spans at least two windows. Temporarily rewind + * window_start to first window boundary after mark_start. */ + delta = window_start - mark_start; + nr_full_windows = div64_u64(delta, window_size); + window_start -= (u64)nr_full_windows * (u64)window_size; + + /* Process (window_start - mark_start) first */ + add_to_task_demand(rq, p, window_start - mark_start); + + /* Push new sample(s) into task's demand history */ + update_history(rq, p, p->ravg.sum, 1, event); + if (nr_full_windows) + update_history(rq, p, scale_exec_time(window_size, rq), + nr_full_windows, event); + + /* Roll window_start back to current to process any remainder + * in current window. */ + window_start += (u64)nr_full_windows * (u64)window_size; + + /* Process (wallclock - window_start) next */ + mark_start = window_start; + add_to_task_demand(rq, p, wallclock - mark_start); +} + +/* Reflect task activity on its demand and cpu's busy time statistics */ +void walt_update_task_ravg(struct task_struct *p, struct rq *rq, + int event, u64 wallclock, u64 irqtime) +{ + if (walt_disabled || !rq->window_start) + return; + + lockdep_assert_held(&rq->lock); + + update_window_start(rq, wallclock); + + if (!p->ravg.mark_start) + goto done; + + update_task_demand(p, rq, event, wallclock); + update_cpu_busy_time(p, rq, event, wallclock, irqtime); + +done: + trace_walt_update_task_ravg(p, rq, event, wallclock, irqtime); + + p->ravg.mark_start = wallclock; +} + +unsigned long __weak arch_get_cpu_efficiency(int cpu) +{ + return SCHED_CAPACITY_SCALE; +} + +void walt_init_cpu_efficiency(void) +{ + int i, efficiency; + unsigned int max = 0, min = UINT_MAX; + + for_each_possible_cpu(i) { + efficiency = arch_get_cpu_efficiency(i); + cpu_rq(i)->efficiency = efficiency; + + if (efficiency > max) + max = efficiency; + if (efficiency < min) + min = efficiency; + } + + if (max) + max_possible_efficiency = max; + + if (min) + min_possible_efficiency = min; +} + +static void reset_task_stats(struct task_struct *p) +{ + u32 sum = 0; + + if (exiting_task(p)) + sum = EXITING_TASK_MARKER; + + memset(&p->ravg, 0, sizeof(struct ravg)); + /* Retain EXITING_TASK marker */ + p->ravg.sum_history[0] = sum; +} + +void walt_mark_task_starting(struct task_struct *p) +{ + u64 wallclock; + struct rq *rq = task_rq(p); + + if (!rq->window_start) { + reset_task_stats(p); + return; + } + + wallclock = walt_ktime_clock(); + p->ravg.mark_start = wallclock; +} + +void walt_set_window_start(struct rq *rq) +{ + int cpu = cpu_of(rq); + struct rq *sync_rq = cpu_rq(sync_cpu); + + if (rq->window_start) + return; + + if (cpu == sync_cpu) { + rq->window_start = walt_ktime_clock(); + } else { + raw_spin_unlock(&rq->lock); + double_rq_lock(rq, sync_rq); + rq->window_start = cpu_rq(sync_cpu)->window_start; + rq->curr_runnable_sum = rq->prev_runnable_sum = 0; + raw_spin_unlock(&sync_rq->lock); + } + + rq->curr->ravg.mark_start = rq->window_start; +} + +void walt_migrate_sync_cpu(int cpu) +{ + if (cpu == sync_cpu) + sync_cpu = smp_processor_id(); +} + +void walt_fixup_busy_time(struct task_struct *p, int new_cpu) +{ + struct rq *src_rq = task_rq(p); + struct rq *dest_rq = cpu_rq(new_cpu); + u64 wallclock; + + if (!p->on_rq && p->state != TASK_WAKING) + return; + + if (exiting_task(p)) { + return; + } + + if (p->state == TASK_WAKING) + double_rq_lock(src_rq, dest_rq); + + wallclock = walt_ktime_clock(); + + walt_update_task_ravg(task_rq(p)->curr, task_rq(p), + TASK_UPDATE, wallclock, 0); + walt_update_task_ravg(dest_rq->curr, dest_rq, + TASK_UPDATE, wallclock, 0); + + walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); + + if (p->ravg.curr_window) { + src_rq->curr_runnable_sum -= p->ravg.curr_window; + dest_rq->curr_runnable_sum += p->ravg.curr_window; + } + + if (p->ravg.prev_window) { + src_rq->prev_runnable_sum -= p->ravg.prev_window; + dest_rq->prev_runnable_sum += p->ravg.prev_window; + } + + if ((s64)src_rq->prev_runnable_sum < 0) { + src_rq->prev_runnable_sum = 0; + WARN_ON(1); + } + if ((s64)src_rq->curr_runnable_sum < 0) { + src_rq->curr_runnable_sum = 0; + WARN_ON(1); + } + + trace_walt_migration_update_sum(src_rq, p); + trace_walt_migration_update_sum(dest_rq, p); + + if (p->state == TASK_WAKING) + double_rq_unlock(src_rq, dest_rq); +} + +/* Keep track of max/min capacity possible across CPUs "currently" */ +static void __update_min_max_capacity(void) +{ + int i; + int max = 0, min = INT_MAX; + + for_each_online_cpu(i) { + if (cpu_rq(i)->capacity > max) + max = cpu_rq(i)->capacity; + if (cpu_rq(i)->capacity < min) + min = cpu_rq(i)->capacity; + } + + max_capacity = max; + min_capacity = min; +} + +static void update_min_max_capacity(void) +{ + unsigned long flags; + int i; + + local_irq_save(flags); + for_each_possible_cpu(i) + raw_spin_lock(&cpu_rq(i)->lock); + + __update_min_max_capacity(); + + for_each_possible_cpu(i) + raw_spin_unlock(&cpu_rq(i)->lock); + local_irq_restore(flags); +} + +/* + * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that + * least efficient cpu gets capacity of 1024 + */ +static unsigned long capacity_scale_cpu_efficiency(int cpu) +{ + return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency; +} + +/* + * Return 'capacity' of a cpu in reference to cpu with lowest max_freq + * (min_max_freq), such that one with lowest max_freq gets capacity of 1024. + */ +static unsigned long capacity_scale_cpu_freq(int cpu) +{ + return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq; +} + +/* + * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so + * that "most" efficient cpu gets a load_scale_factor of 1 + */ +static unsigned long load_scale_cpu_efficiency(int cpu) +{ + return DIV_ROUND_UP(1024 * max_possible_efficiency, + cpu_rq(cpu)->efficiency); +} + +/* + * Return load_scale_factor of a cpu in reference to cpu with best max_freq + * (max_possible_freq), so that one with best max_freq gets a load_scale_factor + * of 1. + */ +static unsigned long load_scale_cpu_freq(int cpu) +{ + return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq); +} + +static int compute_capacity(int cpu) +{ + int capacity = 1024; + + capacity *= capacity_scale_cpu_efficiency(cpu); + capacity >>= 10; + + capacity *= capacity_scale_cpu_freq(cpu); + capacity >>= 10; + + return capacity; +} + +static int compute_load_scale_factor(int cpu) +{ + int load_scale = 1024; + + /* + * load_scale_factor accounts for the fact that task load + * is in reference to "best" performing cpu. Task's load will need to be + * scaled (up) by a factor to determine suitability to be placed on a + * (little) cpu. + */ + load_scale *= load_scale_cpu_efficiency(cpu); + load_scale >>= 10; + + load_scale *= load_scale_cpu_freq(cpu); + load_scale >>= 10; + + return load_scale; +} + +static int cpufreq_notifier_policy(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = (struct cpufreq_policy *)data; + int i, update_max = 0; + u64 highest_mpc = 0, highest_mplsf = 0; + const struct cpumask *cpus = policy->related_cpus; + unsigned int orig_min_max_freq = min_max_freq; + unsigned int orig_max_possible_freq = max_possible_freq; + /* Initialized to policy->max in case policy->related_cpus is empty! */ + unsigned int orig_max_freq = policy->max; + + if (val != CPUFREQ_NOTIFY && val != CPUFREQ_REMOVE_POLICY && + val != CPUFREQ_CREATE_POLICY) + return 0; + + if (val == CPUFREQ_REMOVE_POLICY || val == CPUFREQ_CREATE_POLICY) { + update_min_max_capacity(); + return 0; + } + + for_each_cpu(i, policy->related_cpus) { + cpumask_copy(&cpu_rq(i)->freq_domain_cpumask, + policy->related_cpus); + orig_max_freq = cpu_rq(i)->max_freq; + cpu_rq(i)->min_freq = policy->min; + cpu_rq(i)->max_freq = policy->max; + cpu_rq(i)->cur_freq = policy->cur; + cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq; + } + + max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq); + if (min_max_freq == 1) + min_max_freq = UINT_MAX; + min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq); + BUG_ON(!min_max_freq); + BUG_ON(!policy->max); + + /* Changes to policy other than max_freq don't require any updates */ + if (orig_max_freq == policy->max) + return 0; + + /* + * A changed min_max_freq or max_possible_freq (possible during bootup) + * needs to trigger re-computation of load_scale_factor and capacity for + * all possible cpus (even those offline). It also needs to trigger + * re-computation of nr_big_task count on all online cpus. + * + * A changed rq->max_freq otoh needs to trigger re-computation of + * load_scale_factor and capacity for just the cluster of cpus involved. + * Since small task definition depends on max_load_scale_factor, a + * changed load_scale_factor of one cluster could influence + * classification of tasks in another cluster. Hence a changed + * rq->max_freq will need to trigger re-computation of nr_big_task + * count on all online cpus. + * + * While it should be sufficient for nr_big_tasks to be + * re-computed for only online cpus, we have inadequate context + * information here (in policy notifier) with regard to hotplug-safety + * context in which notification is issued. As a result, we can't use + * get_online_cpus() here, as it can lead to deadlock. Until cpufreq is + * fixed up to issue notification always in hotplug-safe context, + * re-compute nr_big_task for all possible cpus. + */ + + if (orig_min_max_freq != min_max_freq || + orig_max_possible_freq != max_possible_freq) { + cpus = cpu_possible_mask; + update_max = 1; + } + + /* + * Changed load_scale_factor can trigger reclassification of tasks as + * big or small. Make this change "atomic" so that tasks are accounted + * properly due to changed load_scale_factor + */ + for_each_cpu(i, cpus) { + struct rq *rq = cpu_rq(i); + + rq->capacity = compute_capacity(i); + rq->load_scale_factor = compute_load_scale_factor(i); + + if (update_max) { + u64 mpc, mplsf; + + mpc = div_u64(((u64) rq->capacity) * + rq->max_possible_freq, rq->max_freq); + rq->max_possible_capacity = (int) mpc; + + mplsf = div_u64(((u64) rq->load_scale_factor) * + rq->max_possible_freq, rq->max_freq); + + if (mpc > highest_mpc) { + highest_mpc = mpc; + cpumask_clear(&mpc_mask); + cpumask_set_cpu(i, &mpc_mask); + } else if (mpc == highest_mpc) { + cpumask_set_cpu(i, &mpc_mask); + } + + if (mplsf > highest_mplsf) + highest_mplsf = mplsf; + } + } + + if (update_max) { + max_possible_capacity = highest_mpc; + max_load_scale_factor = highest_mplsf; + } + + __update_min_max_capacity(); + + return 0; +} + +static int cpufreq_notifier_trans(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data; + unsigned int cpu = freq->cpu, new_freq = freq->new; + unsigned long flags; + int i; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + BUG_ON(!new_freq); + + if (cpu_rq(cpu)->cur_freq == new_freq) + return 0; + + for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) { + struct rq *rq = cpu_rq(i); + + raw_spin_lock_irqsave(&rq->lock, flags); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, + walt_ktime_clock(), 0); + rq->cur_freq = new_freq; + raw_spin_unlock_irqrestore(&rq->lock, flags); + } + + return 0; +} + +static struct notifier_block notifier_policy_block = { + .notifier_call = cpufreq_notifier_policy +}; + +static struct notifier_block notifier_trans_block = { + .notifier_call = cpufreq_notifier_trans +}; + +static int register_sched_callback(void) +{ + int ret; + + ret = cpufreq_register_notifier(¬ifier_policy_block, + CPUFREQ_POLICY_NOTIFIER); + + if (!ret) + ret = cpufreq_register_notifier(¬ifier_trans_block, + CPUFREQ_TRANSITION_NOTIFIER); + + return 0; +} + +/* + * cpufreq callbacks can be registered at core_initcall or later time. + * Any registration done prior to that is "forgotten" by cpufreq. See + * initialization of variable init_cpufreq_transition_notifier_list_called + * for further information. + */ +core_initcall(register_sched_callback); + +void walt_init_new_task_load(struct task_struct *p) +{ + int i; + u32 init_load_windows = + div64_u64((u64)sysctl_sched_walt_init_task_load_pct * + (u64)walt_ravg_window, 100); + u32 init_load_pct = current->init_load_pct; + + p->init_load_pct = 0; + memset(&p->ravg, 0, sizeof(struct ravg)); + + if (init_load_pct) { + init_load_windows = div64_u64((u64)init_load_pct * + (u64)walt_ravg_window, 100); + } + + p->ravg.demand = init_load_windows; + for (i = 0; i < RAVG_HIST_SIZE_MAX; ++i) + p->ravg.sum_history[i] = init_load_windows; +} diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h new file mode 100644 index 0000000000000000000000000000000000000000..e181c87a928d89134ea9c6c47d635f36f715f29a --- /dev/null +++ b/kernel/sched/walt.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __WALT_H +#define __WALT_H + +#ifdef CONFIG_SCHED_WALT + +void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event, + u64 wallclock, u64 irqtime); +void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p); +void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p); +void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p); +void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p); +void walt_fixup_busy_time(struct task_struct *p, int new_cpu); +void walt_init_new_task_load(struct task_struct *p); +void walt_mark_task_starting(struct task_struct *p); +void walt_set_window_start(struct rq *rq); +void walt_migrate_sync_cpu(int cpu); +void walt_init_cpu_efficiency(void); +u64 walt_ktime_clock(void); +void walt_account_irqtime(int cpu, struct task_struct *curr, u64 delta, + u64 wallclock); + +u64 walt_irqload(int cpu); +int walt_cpu_high_irqload(int cpu); + +#else /* CONFIG_SCHED_WALT */ + +static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq, + int event, u64 wallclock, u64 irqtime) { } +static inline void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { } +static inline void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { } +static inline void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p) { } +static inline void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p) { } +static inline void walt_fixup_busy_time(struct task_struct *p, int new_cpu) { } +static inline void walt_init_new_task_load(struct task_struct *p) { } +static inline void walt_mark_task_starting(struct task_struct *p) { } +static inline void walt_set_window_start(struct rq *rq) { } +static inline void walt_migrate_sync_cpu(int cpu) { } +static inline void walt_init_cpu_efficiency(void) { } +static inline u64 walt_ktime_clock(void) { return 0; } + +#endif /* CONFIG_SCHED_WALT */ + +extern unsigned int walt_disabled; + +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index db14070d0dcd030720939a6d967907f6df2df2d3..d2a397f7331ad919998bb7541b00592a96e5db55 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -529,6 +529,64 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_granularity_ns, .extra2 = &max_sched_granularity_ns, }, + { + .procname = "sched_is_big_little", + .data = &sysctl_sched_is_big_little, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_SCHED_WALT + { + .procname = "sched_use_walt_cpu_util", + .data = &sysctl_sched_use_walt_cpu_util, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_use_walt_task_util", + .data = &sysctl_sched_use_walt_task_util, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_walt_init_task_load_pct", + .data = &sysctl_sched_walt_init_task_load_pct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_walt_cpu_high_irqload", + .data = &sysctl_sched_walt_cpu_high_irqload, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif + { + .procname = "sched_sync_hint_enable", + .data = &sysctl_sched_sync_hint_enable, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_initial_task_util", + .data = &sysctl_sched_initial_task_util, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_cstate_aware", + .data = &sysctl_sched_cstate_aware, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_wakeup_granularity_ns", .data = &sysctl_sched_wakeup_granularity, @@ -2742,6 +2800,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int break; if (neg) continue; + val = convmul * val / convdiv; if ((min && val < *min) || (max && val > *max)) continue; *i = val; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f6aae7977824ab7595950e378c2dd0f0bbe0256d..d2a20e83ebaed372ddb68d00bd8f44a50989b804 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -871,6 +871,9 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { int cpu = smp_processor_id(); + if (!bc) + return; + /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { int was_periodic = clockevent_state_periodic(bc); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 37dec7e3db43e3f60001f4a623b6e4c38b53e793..bfe589e929e86e83464c71e830aa7c079ed49a92 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -299,10 +299,10 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset; static inline u32 arch_gettimeoffset(void) { return 0; } #endif -static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr, +static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, cycle_t delta) { - s64 nsec; + u64 nsec; nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; @@ -425,6 +425,35 @@ u64 ktime_get_raw_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); +/** + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. + * + * To keep it NMI safe since we're accessing from tracing, we're not using a + * separate timekeeper with updates to monotonic clock and boot offset + * protected with seqlocks. This has the following minor side effects: + * + * (1) Its possible that a timestamp be taken after the boot offset is updated + * but before the timekeeper is updated. If this happens, the new boot offset + * is added to the old timekeeping making the clock appear to update slightly + * earlier: + * CPU 0 CPU 1 + * timekeeping_inject_sleeptime64() + * __timekeeping_inject_sleeptime(tk, delta); + * timestamp(); + * timekeeping_update(tk, TK_CLEAR_NTP...); + * + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be + * partially updated. Since the tk->offs_boot update is a rare event, this + * should be a rare occurrence which postprocessing should be able to handle. + */ +u64 notrace ktime_get_boot_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); +} +EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index df9cde4f88637bc22ac99c48c8d877bd0047aa9f..79beb603e99d96f9d9ecfded01bc8f0d262ae994 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1126,6 +1126,7 @@ static struct { { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, + { ktime_get_boot_fast_ns, "boot", 1 }, ARCH_TRACE_CLOCKS }; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 305f535e24eec1ce3e5a529b8e1b3d9cc4e390df..3cb38f1bcd28117be7c80acbc23adef951e5f39b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -847,6 +847,10 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data = per_cpu_ptr(data->cpu_data, cpu); + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + /* * Comments display at + 1 to depth. Since * this is a leaf function, keep the comments @@ -855,7 +859,8 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data->depth = call->depth - 1; /* No need to keep this function around for this depth */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = 0; } @@ -885,11 +890,16 @@ print_graph_entry_nested(struct trace_iterator *iter, struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + cpu_data = per_cpu_ptr(data->cpu_data, cpu); cpu_data->depth = call->depth; /* Save this function pointer to see if the exit matches */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = call->func; } @@ -1119,7 +1129,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, */ cpu_data->depth = trace->depth - 1; - if (trace->depth < FTRACE_RETFUNC_DEPTH) { + if (trace->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(trace->depth < 0)) { if (cpu_data->enter_funcs[trace->depth] != trace->func) func_match = 0; cpu_data->enter_funcs[trace->depth] = 0; diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index b97286c48735b898957d7951e0b536f0209cdad2..f00b0131c8f9577ce2efe36ee19fc1a8b7a8283c 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -266,7 +266,7 @@ static int get_sample(void) static struct cpumask save_cpumask; static bool disable_migrate; -static void move_to_next_cpu(void) +static void move_to_next_cpu(bool initmask) { static struct cpumask *current_mask; int next_cpu; @@ -275,7 +275,7 @@ static void move_to_next_cpu(void) return; /* Just pick the first CPU on first iteration */ - if (!current_mask) { + if (initmask) { current_mask = &save_cpumask; get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask); @@ -330,10 +330,12 @@ static void move_to_next_cpu(void) static int kthread_fn(void *data) { u64 interval; + bool initmask = true; while (!kthread_should_stop()) { - move_to_next_cpu(); + move_to_next_cpu(initmask); + initmask = false; local_irq_disable(); get_sample(); diff --git a/kernel/ucount.c b/kernel/ucount.c index 9d20d5dd298af25d0cd95635e217180601703959..4bbd38ec37886d3d104e3d37dc80d101ab3767ac 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -128,10 +128,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; - spin_lock(&ucounts_lock); + spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); if (!ucounts) { - spin_unlock(&ucounts_lock); + spin_unlock_irq(&ucounts_lock); new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) @@ -141,7 +141,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->uid = uid; atomic_set(&new->count, 0); - spin_lock(&ucounts_lock); + spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); if (ucounts) { kfree(new); @@ -152,16 +152,18 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) } if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) ucounts = NULL; - spin_unlock(&ucounts_lock); + spin_unlock_irq(&ucounts_lock); return ucounts; } static void put_ucounts(struct ucounts *ucounts) { + unsigned long flags; + if (atomic_dec_and_test(&ucounts->count)) { - spin_lock(&ucounts_lock); + spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); - spin_unlock(&ucounts_lock); + spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f3631a3c97e126b7ad50d0a69783450a8e28fb30..bf9885e4417dc6c9fbcf0669d86a90345e61c126 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -421,7 +421,6 @@ static void watchdog_overflow_callback(struct perf_event *event, */ if (is_hardlockup()) { int this_cpu = smp_processor_id(); - struct pt_regs *regs = get_irq_regs(); /* only print hardlockups once */ if (__this_cpu_read(hard_watchdog_warn) == true) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f2bd21b93dfca464ae24bb18c6492bd8c7f1eb88..efb0b4d267a15c9fbfce12a9c3af7fcd8a4cb0d3 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -678,43 +678,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, } EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); +static inline void pipe_truncate(struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + if (pipe->nrbufs) { + size_t off = i->iov_offset; + int idx = i->idx; + int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); + if (off) { + pipe->bufs[idx].len = off - pipe->bufs[idx].offset; + idx = next_idx(idx, pipe); + nrbufs++; + } + while (pipe->nrbufs > nrbufs) { + pipe_buf_release(pipe, &pipe->bufs[idx]); + idx = next_idx(idx, pipe); + pipe->nrbufs--; + } + } +} + static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - int idx = i->idx; - size_t off = i->iov_offset, orig_sz; - if (unlikely(i->count < size)) size = i->count; - orig_sz = size; - if (size) { + struct pipe_buffer *buf; + size_t off = i->iov_offset, left = size; + int idx = i->idx; if (off) /* make it relative to the beginning of buffer */ - size += off - pipe->bufs[idx].offset; + left += off - pipe->bufs[idx].offset; while (1) { buf = &pipe->bufs[idx]; - if (size <= buf->len) + if (left <= buf->len) break; - size -= buf->len; + left -= buf->len; idx = next_idx(idx, pipe); } - buf->len = size; i->idx = idx; - off = i->iov_offset = buf->offset + size; - } - if (off) - idx = next_idx(idx, pipe); - if (pipe->nrbufs) { - int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - /* [curbuf,unused) is in use. Free [idx,unused) */ - while (idx != unused) { - pipe_buf_release(pipe, &pipe->bufs[idx]); - idx = next_idx(idx, pipe); - pipe->nrbufs--; - } + i->iov_offset = buf->offset + left; } - i->count -= orig_sz; + i->count -= size; + /* ... and discard everything past that point */ + pipe_truncate(i); } void iov_iter_advance(struct iov_iter *i, size_t size) @@ -774,6 +781,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction, size_t count) { BUG_ON(direction != ITER_PIPE); + WARN_ON(pipe->nrbufs == pipe->buffers); i->type = direction; i->pipe = pipe; i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 22e13a0e19d76a2b704edab2464f0c2a0e57783f..ad1d2962d129cb7ea673edc0bb02e753529f9289 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -53,7 +53,7 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -int swiotlb_force; +enum swiotlb_force swiotlb_force; /* * Used to do a quick range check in swiotlb_tbl_unmap_single and @@ -106,8 +106,12 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) - swiotlb_force = 1; + if (!strcmp(str, "force")) { + swiotlb_force = SWIOTLB_FORCE; + } else if (!strcmp(str, "noforce")) { + swiotlb_force = SWIOTLB_NO_FORCE; + io_tlb_nslabs = 1; + } return 0; } @@ -541,8 +545,15 @@ static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir) { - dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); + dma_addr_t start_dma_addr; + + if (swiotlb_force == SWIOTLB_NO_FORCE) { + dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", + &phys); + return SWIOTLB_MAP_ERROR; + } + start_dma_addr = phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); } @@ -707,6 +718,9 @@ static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, int do_panic) { + if (swiotlb_force == SWIOTLB_NO_FORCE) + return; + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. @@ -749,7 +763,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (dma_capable(dev, dev_addr, size) && !swiotlb_force) + if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) return dev_addr; trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); @@ -888,7 +902,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || !dma_capable(hwdev, dev_addr, sg->length)) { phys_addr_t map = map_single(hwdev, sg_phys(sg), sg->length, dir); diff --git a/mm/compaction.c b/mm/compaction.c index 0409a4ad6ea1363611d49269ecbb5ef88afe5c87..70e6bec46dc27f90c126c995cb36b8b50eb76ae8 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -634,22 +634,6 @@ isolate_freepages_range(struct compact_control *cc, return pfn; } -/* Update the number of anon and file isolated pages in the zone */ -static void acct_isolated(struct zone *zone, struct compact_control *cc) -{ - struct page *page; - unsigned int count[2] = { 0, }; - - if (list_empty(&cc->migratepages)) - return; - - list_for_each_entry(page, &cc->migratepages, lru) - count[!!page_is_file_cache(page)]++; - - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]); - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]); -} - /* Similar to reclaim, but different enough that they don't share logic */ static bool too_many_isolated(struct zone *zone) { @@ -866,6 +850,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Successfully isolated */ del_page_from_lru_list(page, lruvec, page_lru(page)); + inc_node_page_state(page, + NR_ISOLATED_ANON + page_is_file_cache(page)); isolate_success: list_add(&page->lru, &cc->migratepages); @@ -902,7 +888,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, spin_unlock_irqrestore(zone_lru_lock(zone), flags); locked = false; } - acct_isolated(zone, cc); putback_movable_pages(&cc->migratepages); cc->nr_migratepages = 0; cc->last_migrated_pfn = 0; @@ -988,7 +973,6 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) break; } - acct_isolated(cc->zone, cc); return pfn; } @@ -1258,10 +1242,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, low_pfn = isolate_migratepages_block(cc, low_pfn, block_end_pfn, isolate_mode); - if (!low_pfn || cc->contended) { - acct_isolated(zone, cc); + if (!low_pfn || cc->contended) return ISOLATE_ABORT; - } /* * Either we isolated something and proceed with migration. Or @@ -1271,7 +1253,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, break; } - acct_isolated(zone, cc); /* Record where migration scanner will be restarted. */ cc->migrate_pfn = low_pfn; diff --git a/mm/filemap.c b/mm/filemap.c index 50b52fe51937ca70e62a33ab1553aef9b77ad1a0..d8d7df82c69a9acf977e08bd07b3f5d8f7f1a53e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -144,7 +144,7 @@ static int page_cache_tree_insert(struct address_space *mapping, workingset_node_pages_dec(node); /* Wakeup waiters for exceptional entry lock */ dax_wake_mapping_entry_waiter(mapping, page->index, - false); + true); } } radix_tree_replace_slot(slot, page); @@ -1686,7 +1686,7 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, int error = 0; if (unlikely(*ppos >= inode->i_sb->s_maxbytes)) - return -EINVAL; + return 0; iov_iter_truncate(iter, inode->i_sb->s_maxbytes); index = *ppos >> PAGE_SHIFT; @@ -1703,6 +1703,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, cond_resched(); find_page: + if (fatal_signal_pending(current)) { + error = -EINTR; + goto out; + } + page = find_get_page(mapping, index); if (!page) { page_cache_sync_readahead(mapping, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d4a6e40015128c626f606e339474d8b23d90607e..917555cf6be064acb584770e90c95dba4a3f1436 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -772,6 +772,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pmd_lockptr(mm, pmd)); + /* + * When we COW a devmap PMD entry, we split it into PTEs, so we should + * not be in this function with `flags & FOLL_COW` set. + */ + WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); + if (flags & FOLL_WRITE && !pmd_write(*pmd)) return NULL; @@ -872,15 +878,17 @@ void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd) { pmd_t entry; unsigned long haddr; + bool write = fe->flags & FAULT_FLAG_WRITE; fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd); if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) goto unlock; entry = pmd_mkyoung(orig_pmd); + if (write) + entry = pmd_mkdirty(entry); haddr = fe->address & HPAGE_PMD_MASK; - if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, - fe->flags & FAULT_FLAG_WRITE)) + if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, write)) update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd); unlock: @@ -1116,6 +1124,16 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) return ret; } +/* + * FOLL_FORCE can write to even unwritable pmd's, but only + * after we've gone through a COW cycle and they are dirty. + */ +static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) +{ + return pmd_write(pmd) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); +} + struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, @@ -1126,7 +1144,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, assert_spin_locked(pmd_lockptr(mm, pmd)); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) + if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) goto out; /* Avoid dumping huge zero page */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 418bf01a50ed1f9dde0ca6c083aafa86c1869f23..b6adedbafaf59cbfa2d16b5c7a6d6c3fd46463e7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1773,23 +1773,32 @@ static int gather_surplus_pages(struct hstate *h, int delta) } /* - * When releasing a hugetlb pool reservation, any surplus pages that were - * allocated to satisfy the reservation must be explicitly freed if they were - * never used. - * Called with hugetlb_lock held. + * This routine has two main purposes: + * 1) Decrement the reservation count (resv_huge_pages) by the value passed + * in unused_resv_pages. This corresponds to the prior adjustments made + * to the associated reservation map. + * 2) Free any unused surplus pages that may have been allocated to satisfy + * the reservation. As many as unused_resv_pages may be freed. + * + * Called with hugetlb_lock held. However, the lock could be dropped (and + * reacquired) during calls to cond_resched_lock. Whenever dropping the lock, + * we must make sure nobody else can claim pages we are in the process of + * freeing. Do this by ensuring resv_huge_page always is greater than the + * number of huge pages we plan to free when dropping the lock. */ static void return_unused_surplus_pages(struct hstate *h, unsigned long unused_resv_pages) { unsigned long nr_pages; - /* Uncommit the reservation */ - h->resv_huge_pages -= unused_resv_pages; - /* Cannot return gigantic pages currently */ if (hstate_is_gigantic(h)) - return; + goto out; + /* + * Part (or even all) of the reservation could have been backed + * by pre-allocated pages. Only free surplus pages. + */ nr_pages = min(unused_resv_pages, h->surplus_huge_pages); /* @@ -1799,12 +1808,22 @@ static void return_unused_surplus_pages(struct hstate *h, * when the nodes with surplus pages have no free pages. * free_pool_huge_page() will balance the the freed pages across the * on-line nodes with memory and will handle the hstate accounting. + * + * Note that we decrement resv_huge_pages as we free the pages. If + * we drop the lock, resv_huge_pages will still be sufficiently large + * to cover subsequent pages we may free. */ while (nr_pages--) { + h->resv_huge_pages--; + unused_resv_pages--; if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) - break; + goto out; cond_resched_lock(&hugetlb_lock); } + +out: + /* Fully uncommit the reservation */ + h->resv_huge_pages -= unused_resv_pages; } @@ -3450,15 +3469,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * Keep the pte_same checks anyway to make transition from the mutex easier. */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, pte_t pte, - struct page *pagecache_page, spinlock_t *ptl) + unsigned long address, pte_t *ptep, + struct page *pagecache_page, spinlock_t *ptl) { + pte_t pte; struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; int ret = 0, outside_reserve = 0; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ + pte = huge_ptep_get(ptep); old_page = pte_page(pte); retry_avoidcopy: @@ -3733,7 +3754,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); + ret = hugetlb_cow(mm, vma, address, ptep, page, ptl); } spin_unlock(ptl); @@ -3888,8 +3909,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & FAULT_FLAG_WRITE) { if (!huge_pte_write(entry)) { - ret = hugetlb_cow(mm, vma, address, ptep, entry, - pagecache_page, ptl); + ret = hugetlb_cow(mm, vma, address, ptep, + pagecache_page, ptl); goto out_put_page; } entry = huge_pte_mkdirty(entry); diff --git a/mm/init-mm.c b/mm/init-mm.c index a56a851908d245248803a602f68d5833cf1f8412..975e49f00f34c1b282dee5840740effb9a999bed 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -21,5 +22,6 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .user_ns = &init_user_ns, INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 87e1a7ca38463d0809e54ca8c74d4e2b0eb415eb..5d7c006373d30580f97d336c1cbb60edc7927735 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1403,6 +1403,9 @@ static void collapse_shmem(struct mm_struct *mm, spin_lock_irq(&mapping->tree_lock); + slot = radix_tree_lookup_slot(&mapping->page_tree, index); + VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot, + &mapping->tree_lock), page); VM_BUG_ON_PAGE(page_mapped(page), page); /* @@ -1426,6 +1429,7 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_replace_slot(slot, new_page + (index % HPAGE_PMD_NR)); + slot = radix_tree_iter_next(&iter); index++; continue; out_lru: @@ -1521,9 +1525,11 @@ static void collapse_shmem(struct mm_struct *mm, if (!page || iter.index < page->index) { if (!nr_none) break; - /* Put holes back where they were */ - radix_tree_replace_slot(slot, NULL); nr_none--; + /* Put holes back where they were */ + radix_tree_delete(&mapping->page_tree, + iter.index); + slot = radix_tree_iter_next(&iter); continue; } @@ -1537,6 +1543,7 @@ static void collapse_shmem(struct mm_struct *mm, putback_lru_page(page); unlock_page(page); spin_lock_irq(&mapping->tree_lock); + slot = radix_tree_iter_next(&iter); } VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f60bd046413394af54c672aee8ab5f5623e38fcc..fc59ffb74c7d8f9fc59af9b253f2a5c9d77114e0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -625,8 +625,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) { + struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg); unsigned long nr = 0; - struct mem_cgroup_per_node *mz; enum lru_list lru; VM_BUG_ON((unsigned)nid >= nr_node_ids); @@ -634,8 +634,7 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - mz = mem_cgroup_nodeinfo(memcg, nid); - nr += mz->lru_size[lru]; + nr += mem_cgroup_get_lru_size(lruvec, lru); } return nr; } @@ -1002,6 +1001,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd * mem_cgroup_update_lru_size - account for adding or removing an lru page * @lruvec: mem_cgroup per zone lru vector * @lru: index of lru list the page is sitting on + * @zid: zone id of the accounted pages * @nr_pages: positive when adding or negative when removing * * This function must be called under lru_lock, just before a page is added @@ -1009,27 +1009,25 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd * so as to allow it to check that lru_size 0 is consistent with list_empty). */ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, - int nr_pages) + int zid, int nr_pages) { struct mem_cgroup_per_node *mz; unsigned long *lru_size; long size; - bool empty; if (mem_cgroup_disabled()) return; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - lru_size = mz->lru_size + lru; - empty = list_empty(lruvec->lists + lru); + lru_size = &mz->lru_zone_size[zid][lru]; if (nr_pages < 0) *lru_size += nr_pages; size = *lru_size; - if (WARN_ONCE(size < 0 || empty != !size, - "%s(%p, %d, %d): lru_size %ld but %sempty\n", - __func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) { + if (WARN_ONCE(size < 0, + "%s(%p, %d, %d): lru_size %ld\n", + __func__, lruvec, lru, nr_pages, size)) { VM_BUG_ON(1); *lru_size = 0; } @@ -4362,9 +4360,9 @@ static int mem_cgroup_do_precharge(unsigned long count) return ret; } - /* Try charges one by one with reclaim */ + /* Try charges one by one with reclaim, but do not retry */ while (count--) { - ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); + ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1); if (ret) return ret; mc.precharge++; diff --git a/mm/memory.c b/mm/memory.c index e18c57bdc75c4c96e3ef79546afe11fab5a3c07a..cbb1e5e5f79159159d1f90fe607a9e512bdcaf53 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3868,7 +3868,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * Access another process' address space as given in mm. If non-NULL, use the * given task for page fault accounting. */ -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index cad4b9125695cfbfcc7509e942f6d8f8a37f460c..ede137345a99f4bf20612112117fd79fa658863d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg) node_set_state(node, N_MEMORY); } -int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target) +bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift) { struct zone *zone = page_zone(pfn_to_page(pfn)); enum zone_type idx = zone_idx(zone); int i; + *zone_shift = 0; + if (idx < target) { /* pages must be at end of current zone */ if (pfn + nr_pages != zone_end_pfn(zone)) - return 0; + return false; /* no zones in use between current zone and target */ for (i = idx + 1; i < target; i++) if (zone_is_initialized(zone - idx + i)) - return 0; + return false; } if (target < idx) { /* pages must be at beginning of current zone */ if (pfn != zone->zone_start_pfn) - return 0; + return false; /* no zones in use between current zone and target */ for (i = target + 1; i < idx; i++) if (zone_is_initialized(zone - idx + i)) - return 0; + return false; } - return target - idx; + *zone_shift = target - idx; + return true; } /* Must be protected by mem_hotplug_begin() */ @@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ !can_online_high_movable(zone)) return -EINVAL; - if (online_type == MMOP_ONLINE_KERNEL) - zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL); - else if (online_type == MMOP_ONLINE_MOVABLE) - zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE); + if (online_type == MMOP_ONLINE_KERNEL) { + if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift)) + return -EINVAL; + } else if (online_type == MMOP_ONLINE_MOVABLE) { + if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift)) + return -EINVAL; + } zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); if (!zone) @@ -1477,17 +1483,20 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) } /* - * Confirm all pages in a range [start, end) is belongs to the same zone. + * Confirm all pages in a range [start, end) belong to the same zone. + * When true, return its valid [start, end). */ -int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) +int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end) { unsigned long pfn, sec_end_pfn; + unsigned long start, end; struct zone *zone = NULL; struct page *page; int i; - for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn); + for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1); pfn < end_pfn; - pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) { + pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) { /* Make sure the memory section is present first */ if (!present_section_nr(pfn_to_section_nr(pfn))) continue; @@ -1503,10 +1512,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; + if (!zone) + start = pfn + i; zone = page_zone(page); + end = pfn + MAX_ORDER_NR_PAGES; } } - return 1; + + if (zone) { + *valid_start = start; + *valid_end = end; + return 1; + } else { + return 0; + } } /* @@ -1853,6 +1872,7 @@ static int __ref __offline_pages(unsigned long start_pfn, long offlined_pages; int ret, drain, retry_max, node; unsigned long flags; + unsigned long valid_start, valid_end; struct zone *zone; struct memory_notify arg; @@ -1863,10 +1883,10 @@ static int __ref __offline_pages(unsigned long start_pfn, return -EINVAL; /* This makes hotplug much easier...and readable. we assume this for now. .*/ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return -EINVAL; - zone = page_zone(pfn_to_page(start_pfn)); + zone = page_zone(pfn_to_page(valid_start)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 238c4e8f2654a09cec95ee2808e1e457baaacf49..0adc6f905db224d94fa43da91dc043dbfaf40df2 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2024,8 +2024,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); zl = policy_zonelist(gfp, pol, node); - mpol_cond_put(pol); page = __alloc_pages_nodemask(gfp, order, zl, nmask); + mpol_cond_put(pol); out: if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; diff --git a/mm/migrate.c b/mm/migrate.c index 99250aee1ac166fd8d5bb03849041b2a07fc3f80..66ce6b490b13a9394411addc61ca43f819d5132b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -168,8 +168,6 @@ void putback_movable_pages(struct list_head *l) continue; } list_del(&page->lru); - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); /* * We isolated non-lru movable page so here we can use * __PageMovable because LRU page's mapping cannot have @@ -186,6 +184,8 @@ void putback_movable_pages(struct list_head *l) put_page(page); } else { putback_lru_page(page); + dec_node_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); } } } @@ -1121,8 +1121,15 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, * restored. */ list_del(&page->lru); - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + + /* + * Compaction can migrate also non-LRU pages which are + * not accounted to NR_ISOLATED_*. They can be recognized + * as __PageMovable + */ + if (likely(!__PageMovable(page))) + dec_node_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); } /* diff --git a/mm/nommu.c b/mm/nommu.c index 8b8faaf2a9e95cfc607ff1a5a37c83eadfda59fe..44265e00b701ebe5bc3327c8575f5181aa6a6f99 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe, } EXPORT_SYMBOL(filemap_map_pages); -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8702d661e48d427012141cb5b57363f569f1a566..04b1c95bacda5e04775aecd5b242bfc195a2450f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2192,7 +2192,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, bool cold) { - int i; + int i, alloced = 0; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { @@ -2217,13 +2217,21 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, else list_add_tail(&page->lru, list); list = &page->lru; + alloced++; if (is_migrate_cma(get_pcppage_migratetype(page))) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, -(1 << order)); } + + /* + * i pages were removed from the buddy list even if some leak due + * to check_pcp_refill failing so adjust NR_FREE_PAGES based + * on i. Do not confuse with 'alloced' which is the number of + * pages added to the pcp list. + */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock(&zone->lock); - return i; + return alloced; } #ifdef CONFIG_NUMA @@ -3494,12 +3502,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; unsigned int alloc_flags; unsigned long did_some_progress; - enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; + enum compact_priority compact_priority; enum compact_result compact_result; - int compaction_retries = 0; - int no_progress_loops = 0; + int compaction_retries; + int no_progress_loops; unsigned long alloc_start = jiffies; unsigned int stall_timeout = 10 * HZ; + unsigned int cpuset_mems_cookie; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -3520,6 +3529,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) gfp_mask &= ~__GFP_ATOMIC; +retry_cpuset: + compaction_retries = 0; + no_progress_loops = 0; + compact_priority = DEF_COMPACT_PRIORITY; + cpuset_mems_cookie = read_mems_allowed_begin(); + /* + * We need to recalculate the starting point for the zonelist iterator + * because we might have used different nodemask in the fast path, or + * there was a cpuset modification and we are retrying - otherwise we + * could end up iterating over non-eligible zones endlessly. + */ + ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, + ac->high_zoneidx, ac->nodemask); + if (!ac->preferred_zoneref->zone) + goto nopage; + + /* * The fast path uses conservative alloc_flags to succeed only until * kswapd needs to be woken up, and to avoid the cost of setting up @@ -3679,6 +3705,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, &compaction_retries)) goto retry; + /* + * It's possible we raced with cpuset update so the OOM would be + * premature (see below the nopage: label for full explanation). + */ + if (read_mems_allowed_retry(cpuset_mems_cookie)) + goto retry_cpuset; + /* Reclaim has failed us, start killing things */ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); if (page) @@ -3691,6 +3724,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, } nopage: + /* + * When updating a task's mems_allowed or mempolicy nodemask, it is + * possible to race with parallel threads in such a way that our + * allocation can fail while the mask is being updated. If we are about + * to fail, check if the cpuset changed during allocation and if so, + * retry. + */ + if (read_mems_allowed_retry(cpuset_mems_cookie)) + goto retry_cpuset; + warn_alloc(gfp_mask, "page allocation failure: order:%u", order); got_pg: @@ -3705,7 +3748,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { struct page *page; - unsigned int cpuset_mems_cookie; unsigned int alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { @@ -3742,9 +3784,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; -retry_cpuset: - cpuset_mems_cookie = read_mems_allowed_begin(); - /* Dirty zone balancing only done in the fast path */ ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); @@ -3755,8 +3794,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, */ ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask); - if (!ac.preferred_zoneref) { + if (!ac.preferred_zoneref->zone) { page = NULL; + /* + * This might be due to race with cpuset_current_mems_allowed + * update, so make sure we retry with original nodemask in the + * slow path. + */ goto no_zone; } @@ -3765,6 +3809,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (likely(page)) goto out; +no_zone: /* * Runtime PM, block IO and its error handling path can deadlock * because I/O on the device might not complete. @@ -3776,21 +3821,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. */ - if (cpusets_enabled()) + if (unlikely(ac.nodemask != nodemask)) ac.nodemask = nodemask; - page = __alloc_pages_slowpath(alloc_mask, order, &ac); -no_zone: - /* - * When updating a task's mems_allowed, it is possible to race with - * parallel threads in such a way that an allocation can fail while - * the mask is being updated. If a page allocation is about to fail, - * check if the cpuset changed during allocation and if so, retry. - */ - if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) { - alloc_mask = gfp_mask; - goto retry_cpuset; - } + page = __alloc_pages_slowpath(alloc_mask, order, &ac); out: if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && diff --git a/mm/slab.c b/mm/slab.c index 0b0550ca85b40c9108b4085873d67babc200885d..bd878f051a3b9bbdd1d520bc934c134b761bb04e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2475,7 +2475,6 @@ union freelist_init_state { unsigned int pos; unsigned int *list; unsigned int count; - unsigned int rand; }; struct rnd_state rnd_state; }; @@ -2501,8 +2500,7 @@ static bool freelist_state_initialize(union freelist_init_state *state, } else { state->list = cachep->random_seq; state->count = count; - state->pos = 0; - state->rand = rand; + state->pos = rand % count; ret = true; } return ret; @@ -2511,7 +2509,9 @@ static bool freelist_state_initialize(union freelist_init_state *state, /* Get the next entry on the list and randomize it using a random shift */ static freelist_idx_t next_random_slot(union freelist_init_state *state) { - return (state->list[state->pos++] + state->rand) % state->count; + if (state->pos >= state->count) + state->pos = 0; + return state->list[state->pos++]; } /* Swap two freelist entries */ diff --git a/mm/swapfile.c b/mm/swapfile.c index f30438970cd176e5dde188bc6e05a28c3367e451..d76b2a18f044a07b8428e463528a91041790d37e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -943,11 +943,25 @@ bool reuse_swap_page(struct page *page, int *total_mapcount) count = page_trans_huge_mapcount(page, total_mapcount); if (count <= 1 && PageSwapCache(page)) { count += page_swapcount(page); - if (count == 1 && !PageWriteback(page)) { + if (count != 1) + goto out; + if (!PageWriteback(page)) { delete_from_swap_cache(page); SetPageDirty(page); + } else { + swp_entry_t entry; + struct swap_info_struct *p; + + entry.val = page_private(page); + p = swap_info_get(entry); + if (p->flags & SWP_STABLE_WRITES) { + spin_unlock(&p->lock); + return false; + } + spin_unlock(&p->lock); } } +out: return count <= 1; } @@ -2449,6 +2463,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = -ENOMEM; goto bad_swap; } + + if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) + p->flags |= SWP_STABLE_WRITES; + if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { int cpu; diff --git a/mm/vmscan.c b/mm/vmscan.c index d75cdf360730183e2f3690795b6bac8bcc594d36..fa30010a52773f41444176cfad0047e4a538ce7a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -242,6 +242,16 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); } +unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, + int zone_idx) +{ + if (!mem_cgroup_disabled()) + return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); + + return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], + NR_ZONE_LRU_BASE + lru); +} + /* * Add a shrinker callback to be called from the vm. */ @@ -291,6 +301,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, int nid = shrinkctl->nid; long batch_size = shrinker->batch ? shrinker->batch : SHRINK_BATCH; + long scanned = 0, next_deferred; freeable = shrinker->count_objects(shrinker, shrinkctl); if (freeable == 0) @@ -312,7 +323,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", shrinker->scan_objects, total_scan); total_scan = freeable; - } + next_deferred = nr; + } else + next_deferred = total_scan; /* * We need to avoid excessive windup on filesystem shrinkers @@ -369,17 +382,22 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; + scanned += nr_to_scan; cond_resched(); } + if (next_deferred >= scanned) + next_deferred -= scanned; + else + next_deferred = 0; /* * move the unused scan count back into the shrinker in a * manner that handles concurrent updates. If we exhausted the * scan, there is no need to do an update. */ - if (total_scan > 0) - new_nr = atomic_long_add_return(total_scan, + if (next_deferred > 0) + new_nr = atomic_long_add_return(next_deferred, &shrinker->nr_deferred[nid]); else new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); @@ -1374,8 +1392,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) * be complete before mem_cgroup_update_lru_size due to a santity check. */ static __always_inline void update_lru_sizes(struct lruvec *lruvec, - enum lru_list lru, unsigned long *nr_zone_taken, - unsigned long nr_taken) + enum lru_list lru, unsigned long *nr_zone_taken) { int zid; @@ -1384,11 +1401,11 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, continue; __update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); - } - #ifdef CONFIG_MEMCG - mem_cgroup_update_lru_size(lruvec, lru, -nr_taken); + mem_cgroup_update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); #endif + } + } /* @@ -1493,7 +1510,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, *nr_scanned = scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan, nr_taken, mode, is_file_lru(lru)); - update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken); + update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } @@ -2039,10 +2056,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, if (!managed_zone(zone)) continue; - inactive_zone = zone_page_state(zone, - NR_ZONE_LRU_BASE + (file * LRU_FILE)); - active_zone = zone_page_state(zone, - NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE); + inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); + active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); inactive -= min(inactive, inactive_zone); active -= min(active, active_zone); diff --git a/mm/zswap.c b/mm/zswap.c index 275b22cc8df4352c8210948e4381aa4bce83dc08..dbef27822a987fde3b90de71313f6e479cb44d46 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -78,7 +78,13 @@ static u64 zswap_duplicate_entry; /* Enable/disable zswap (disabled by default) */ static bool zswap_enabled; -module_param_named(enabled, zswap_enabled, bool, 0644); +static int zswap_enabled_param_set(const char *, + const struct kernel_param *); +static struct kernel_param_ops zswap_enabled_param_ops = { + .set = zswap_enabled_param_set, + .get = param_get_bool, +}; +module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); /* Crypto compressor to use */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" @@ -176,6 +182,9 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0); /* used by param callback function */ static bool zswap_init_started; +/* fatal error during init */ +static bool zswap_init_failed; + /********************************* * helpers and fwd declarations **********************************/ @@ -706,6 +715,11 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, char *s = strstrip((char *)val); int ret; + if (zswap_init_failed) { + pr_err("can't set param, initialization failed\n"); + return -ENODEV; + } + /* no change required */ if (!strcmp(s, *(char **)kp->arg)) return 0; @@ -785,6 +799,17 @@ static int zswap_zpool_param_set(const char *val, return __zswap_param_set(val, kp, NULL, zswap_compressor); } +static int zswap_enabled_param_set(const char *val, + const struct kernel_param *kp) +{ + if (zswap_init_failed) { + pr_err("can't enable, initialization failed\n"); + return -ENODEV; + } + + return param_set_bool(val, kp); +} + /********************************* * writeback code **********************************/ @@ -1271,6 +1296,9 @@ static int __init init_zswap(void) dstmem_fail: zswap_entry_cache_destroy(); cache_fail: + /* if built-in, we aren't unloaded on failure; don't allow use */ + zswap_init_failed = true; + zswap_enabled = false; return -ENOMEM; } /* must be late so crypto has time to come up */ diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 655a7d4c96e1e26b7390b8c783cebb2a31ca4ca8..983f0b5e14f1fafa4802f9c2763b363e30dcb659 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!sock_flag(ax25->sk, SOCK_DESTROY)) + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2fe9345c1407108bf5a0802671fa419aa9b24e7e..7fbdbae58e65e0d213d330b28bd8df7d92d99104 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -399,7 +399,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish); + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e99037c6f7b7b7e1073375d320f4fb186c4e8b38..04741064a173f4ac75772440e9c0d75d84090fcd 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -781,20 +781,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_bridge *br = netdev_priv(dev); - - if (tb[IFLA_ADDRESS]) { - spin_lock_bh(&br->lock); - br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); - spin_unlock_bh(&br->lock); - } - - return register_netdevice(dev); -} - static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], @@ -1093,6 +1079,25 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = br_changelink(dev, tb, data); + if (err) + return err; + + return register_netdevice(dev); +} + static size_t br_get_size(const struct net_device *brdev) { return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 436a7537e6a9d3ef065ec572568c42fd13331b5e..5e9ed5ec2860bd2471310a255a8e145afae701bd 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -734,14 +734,23 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, static void bcm_remove_op(struct bcm_op *op) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); - - if (op->tsklet.func) - tasklet_kill(&op->tsklet); + if (op->tsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || + hrtimer_active(&op->timer)) { + hrtimer_cancel(&op->timer); + tasklet_kill(&op->tsklet); + } + } - if (op->thrtsklet.func) - tasklet_kill(&op->thrtsklet); + if (op->thrtsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || + hrtimer_active(&op->thrtimer)) { + hrtimer_cancel(&op->thrtimer); + tasklet_kill(&op->thrtsklet); + } + } if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index a0905f04bd13f3250f51de5a7600f4089d3053a4..b216131915e74a2c160623bb045f8ac0e3971927 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -39,56 +39,58 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac) return need != 0; } +static int ceph_x_encrypt_offset(void) +{ + return sizeof(u32) + sizeof(struct ceph_x_encrypt_header); +} + static int ceph_x_encrypt_buflen(int ilen) { - return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + - sizeof(u32); + return ceph_x_encrypt_offset() + ilen + 16; } -static int ceph_x_encrypt(struct ceph_crypto_key *secret, - void *ibuf, int ilen, void *obuf, size_t olen) +static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, + int buf_len, int plaintext_len) { - struct ceph_x_encrypt_header head = { - .struct_v = 1, - .magic = cpu_to_le64(CEPHX_ENC_MAGIC) - }; - size_t len = olen - sizeof(u32); + struct ceph_x_encrypt_header *hdr = buf + sizeof(u32); + int ciphertext_len; int ret; - ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len, - &head, sizeof(head), ibuf, ilen); + hdr->struct_v = 1; + hdr->magic = cpu_to_le64(CEPHX_ENC_MAGIC); + + ret = ceph_crypt(secret, true, buf + sizeof(u32), buf_len - sizeof(u32), + plaintext_len + sizeof(struct ceph_x_encrypt_header), + &ciphertext_len); if (ret) return ret; - ceph_encode_32(&obuf, len); - return len + sizeof(u32); + + ceph_encode_32(&buf, ciphertext_len); + return sizeof(u32) + ciphertext_len; } -static int ceph_x_decrypt(struct ceph_crypto_key *secret, - void **p, void *end, void **obuf, size_t olen) +static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) { - struct ceph_x_encrypt_header head; - size_t head_len = sizeof(head); - int len, ret; - - len = ceph_decode_32(p); - if (*p + len > end) - return -EINVAL; + struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); + int ciphertext_len, plaintext_len; + int ret; - dout("ceph_x_decrypt len %d\n", len); - if (*obuf == NULL) { - *obuf = kmalloc(len, GFP_NOFS); - if (!*obuf) - return -ENOMEM; - olen = len; - } + ceph_decode_32_safe(p, end, ciphertext_len, e_inval); + ceph_decode_need(p, end, ciphertext_len, e_inval); - ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); + ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, + &plaintext_len); if (ret) return ret; - if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) + + if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) return -EPERM; - *p += len; - return olen; + + *p += ciphertext_len; + return plaintext_len - sizeof(struct ceph_x_encrypt_header); + +e_inval: + return -EINVAL; } /* @@ -143,13 +145,10 @@ static int process_one_ticket(struct ceph_auth_client *ac, int type; u8 tkt_struct_v, blob_struct_v; struct ceph_x_ticket_handler *th; - void *dbuf = NULL; void *dp, *dend; int dlen; char is_enc; struct timespec validity; - struct ceph_crypto_key old_key; - void *ticket_buf = NULL; void *tp, *tpend; void **ptp; struct ceph_crypto_key new_session_key; @@ -174,20 +173,17 @@ static int process_one_ticket(struct ceph_auth_client *ac, } /* blob for me */ - dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); - if (dlen <= 0) { - ret = dlen; + dp = *p + ceph_x_encrypt_offset(); + ret = ceph_x_decrypt(secret, p, end); + if (ret < 0) goto out; - } - dout(" decrypted %d bytes\n", dlen); - dp = dbuf; - dend = dp + dlen; + dout(" decrypted %d bytes\n", ret); + dend = dp + ret; tkt_struct_v = ceph_decode_8(&dp); if (tkt_struct_v != 1) goto bad; - memcpy(&old_key, &th->session_key, sizeof(old_key)); ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); if (ret) goto out; @@ -203,15 +199,13 @@ static int process_one_ticket(struct ceph_auth_client *ac, ceph_decode_8_safe(p, end, is_enc, bad); if (is_enc) { /* encrypted */ - dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); - if (dlen < 0) { - ret = dlen; + tp = *p + ceph_x_encrypt_offset(); + ret = ceph_x_decrypt(&th->session_key, p, end); + if (ret < 0) goto out; - } - tp = ticket_buf; + dout(" encrypted ticket, decrypted %d bytes\n", ret); ptp = &tp; - tpend = *ptp + dlen; + tpend = tp + ret; } else { /* unencrypted */ ptp = p; @@ -242,8 +236,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, xi->have_keys |= th->service; out: - kfree(ticket_buf); - kfree(dbuf); return ret; bad: @@ -294,7 +286,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, { int maxlen; struct ceph_x_authorize_a *msg_a; - struct ceph_x_authorize_b msg_b; + struct ceph_x_authorize_b *msg_b; void *p, *end; int ret; int ticket_blob_len = @@ -308,8 +300,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, if (ret) goto out_au; - maxlen = sizeof(*msg_a) + sizeof(msg_b) + - ceph_x_encrypt_buflen(ticket_blob_len); + maxlen = sizeof(*msg_a) + ticket_blob_len + + ceph_x_encrypt_buflen(sizeof(*msg_b)); dout(" need len %d\n", maxlen); if (au->buf && au->buf->alloc_len < maxlen) { ceph_buffer_put(au->buf); @@ -343,18 +335,19 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, p += ticket_blob_len; end = au->buf->vec.iov_base + au->buf->vec.iov_len; + msg_b = p + ceph_x_encrypt_offset(); + msg_b->struct_v = 1; get_random_bytes(&au->nonce, sizeof(au->nonce)); - msg_b.struct_v = 1; - msg_b.nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), - p, end - p); + msg_b->nonce = cpu_to_le64(au->nonce); + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); if (ret < 0) goto out_au; + p += ret; + WARN_ON(p > end); au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); - BUG_ON(au->buf->vec.iov_len > maxlen); return 0; out_au: @@ -452,8 +445,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, if (need & CEPH_ENTITY_TYPE_AUTH) { struct ceph_x_authenticate *auth = (void *)(head + 1); void *p = auth + 1; - struct ceph_x_challenge_blob tmp; - char tmp_enc[40]; + void *enc_buf = xi->auth_authorizer.enc_buf; + struct ceph_x_challenge_blob *blob = enc_buf + + ceph_x_encrypt_offset(); u64 *u; if (p > end) @@ -464,16 +458,16 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, /* encrypt and hash */ get_random_bytes(&auth->client_challenge, sizeof(u64)); - tmp.client_challenge = auth->client_challenge; - tmp.server_challenge = cpu_to_le64(xi->server_challenge); - ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp), - tmp_enc, sizeof(tmp_enc)); + blob->client_challenge = auth->client_challenge; + blob->server_challenge = cpu_to_le64(xi->server_challenge); + ret = ceph_x_encrypt(&xi->secret, enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*blob)); if (ret < 0) return ret; auth->struct_v = 1; auth->key = 0; - for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) + for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++) auth->key ^= *(__le64 *)u; dout(" server_challenge %llx client_challenge %llx key %llx\n", xi->server_challenge, le64_to_cpu(auth->client_challenge), @@ -600,8 +594,8 @@ static int ceph_x_create_authorizer( auth->authorizer = (struct ceph_authorizer *) au; auth->authorizer_buf = au->buf->vec.iov_base; auth->authorizer_buf_len = au->buf->vec.iov_len; - auth->authorizer_reply_buf = au->reply_buf; - auth->authorizer_reply_buf_len = sizeof (au->reply_buf); + auth->authorizer_reply_buf = au->enc_buf; + auth->authorizer_reply_buf_len = CEPHX_AU_ENC_BUF_LEN; auth->sign_message = ac->ops->sign_message; auth->check_message_signature = ac->ops->check_message_signature; @@ -632,24 +626,22 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len) { struct ceph_x_authorizer *au = (void *)a; - int ret = 0; - struct ceph_x_authorize_reply reply; - void *preply = &reply; - void *p = au->reply_buf; - void *end = p + sizeof(au->reply_buf); + void *p = au->enc_buf; + struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset(); + int ret; - ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply)); + ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); if (ret < 0) return ret; - if (ret != sizeof(reply)) + if (ret != sizeof(*reply)) return -EPERM; - if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one)) + if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) ret = -EPERM; else ret = 0; dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", - au->nonce, le64_to_cpu(reply.nonce_plus_one), ret); + au->nonce, le64_to_cpu(reply->nonce_plus_one), ret); return ret; } @@ -704,35 +696,48 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH); } -static int calcu_signature(struct ceph_x_authorizer *au, - struct ceph_msg *msg, __le64 *sig) +static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, + __le64 *psig) { + void *enc_buf = au->enc_buf; + struct { + __le32 len; + __le32 header_crc; + __le32 front_crc; + __le32 middle_crc; + __le32 data_crc; + } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); int ret; - char tmp_enc[40]; - __le32 tmp[5] = { - cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, - msg->footer.middle_crc, msg->footer.data_crc, - }; - ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), - tmp_enc, sizeof(tmp_enc)); + + sigblock->len = cpu_to_le32(4*sizeof(u32)); + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->data_crc = msg->footer.data_crc; + ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*sigblock)); if (ret < 0) return ret; - *sig = *(__le64*)(tmp_enc + 4); + + *psig = *(__le64 *)(enc_buf + sizeof(u32)); return 0; } static int ceph_x_sign_message(struct ceph_auth_handshake *auth, struct ceph_msg *msg) { + __le64 sig; int ret; if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) return 0; - ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, - msg, &msg->footer.sig); - if (ret < 0) + ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &sig); + if (ret) return ret; + + msg->footer.sig = sig; msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED; return 0; } @@ -746,9 +751,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth, if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) return 0; - ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, - msg, &sig_check); - if (ret < 0) + ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &sig_check); + if (ret) return ret; if (sig_check == msg->footer.sig) return 0; diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 21a5af904bae751391bf7d508dd75b1d2ee426e2..48e9ad41bd2aa70cdf42922a830f052d39e5ea7d 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -24,6 +24,7 @@ struct ceph_x_ticket_handler { unsigned long renew_after, expires; }; +#define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */ struct ceph_x_authorizer { struct ceph_authorizer base; @@ -32,7 +33,7 @@ struct ceph_x_authorizer { unsigned int service; u64 nonce; u64 secret_id; - char reply_buf[128]; /* big enough for encrypted blob */ + char enc_buf[CEPHX_AU_ENC_BUF_LEN] __aligned(8); }; struct ceph_x_info { diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index db2847ac5f122988ce1625153fc219e74a3ddb8d..292e33bd916e650c0317ab630a0c60a400d21c7d 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -13,14 +13,60 @@ #include #include "crypto.h" +/* + * Set ->key and ->tfm. The rest of the key should be filled in before + * this function is called. + */ +static int set_secret(struct ceph_crypto_key *key, void *buf) +{ + unsigned int noio_flag; + int ret; + + key->key = NULL; + key->tfm = NULL; + + switch (key->type) { + case CEPH_CRYPTO_NONE: + return 0; /* nothing to do */ + case CEPH_CRYPTO_AES: + break; + default: + return -ENOTSUPP; + } + + WARN_ON(!key->len); + key->key = kmemdup(buf, key->len, GFP_NOIO); + if (!key->key) { + ret = -ENOMEM; + goto fail; + } + + /* crypto_alloc_skcipher() allocates with GFP_KERNEL */ + noio_flag = memalloc_noio_save(); + key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + memalloc_noio_restore(noio_flag); + if (IS_ERR(key->tfm)) { + ret = PTR_ERR(key->tfm); + key->tfm = NULL; + goto fail; + } + + ret = crypto_skcipher_setkey(key->tfm, key->key, key->len); + if (ret) + goto fail; + + return 0; + +fail: + ceph_crypto_key_destroy(key); + return ret; +} + int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src) { memcpy(dst, src, sizeof(struct ceph_crypto_key)); - dst->key = kmemdup(src->key, src->len, GFP_NOFS); - if (!dst->key) - return -ENOMEM; - return 0; + return set_secret(dst, src->key); } int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) @@ -37,16 +83,16 @@ int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) { + int ret; + ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); key->type = ceph_decode_16(p); ceph_decode_copy(p, &key->created, sizeof(key->created)); key->len = ceph_decode_16(p); ceph_decode_need(p, end, key->len, bad); - key->key = kmalloc(key->len, GFP_NOFS); - if (!key->key) - return -ENOMEM; - ceph_decode_copy(p, key->key, key->len); - return 0; + ret = set_secret(key, *p); + *p += key->len; + return ret; bad: dout("failed to decode crypto key\n"); @@ -80,9 +126,14 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) return 0; } -static struct crypto_skcipher *ceph_crypto_alloc_cipher(void) +void ceph_crypto_key_destroy(struct ceph_crypto_key *key) { - return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + if (key) { + kfree(key->key); + key->key = NULL; + crypto_free_skcipher(key->tfm); + key->tfm = NULL; + } } static const u8 *aes_iv = (u8 *)CEPH_AES_IV; @@ -157,372 +208,82 @@ static void teardown_sgtable(struct sg_table *sgt) sg_free_table(sgt); } -static int ceph_aes_encrypt(const void *key, int key_len, - void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - struct scatterlist sg_in[2], prealloc_sg; - struct sg_table sg_out; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - int ret; - char iv[AES_BLOCK_SIZE]; - size_t zero_padding = (0x10 - (src_len & 0x0f)); - char pad[16]; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - memset(pad, zero_padding, zero_padding); - - *dst_len = src_len + zero_padding; - - sg_init_table(sg_in, 2); - sg_set_buf(&sg_in[0], src, src_len); - sg_set_buf(&sg_in[1], pad, zero_padding); - ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in, sg_out.sgl, - src_len + zero_padding, iv); - - /* - print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1, - src, src_len, 1); - print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, - pad, zero_padding, 1); - */ - ret = crypto_skcipher_encrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_crypt failed %d\n", ret); - goto out_sg; - } - /* - print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, - dst, *dst_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_out); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - -static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, - size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len) -{ - struct scatterlist sg_in[3], prealloc_sg; - struct sg_table sg_out; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - int ret; - char iv[AES_BLOCK_SIZE]; - size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); - char pad[16]; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - memset(pad, zero_padding, zero_padding); - - *dst_len = src1_len + src2_len + zero_padding; - - sg_init_table(sg_in, 3); - sg_set_buf(&sg_in[0], src1, src1_len); - sg_set_buf(&sg_in[1], src2, src2_len); - sg_set_buf(&sg_in[2], pad, zero_padding); - ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in, sg_out.sgl, - src1_len + src2_len + zero_padding, iv); - - /* - print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1, - src1, src1_len, 1); - print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1, - src2, src2_len, 1); - print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, - pad, zero_padding, 1); - */ - ret = crypto_skcipher_encrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_crypt2 failed %d\n", ret); - goto out_sg; - } - /* - print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, - dst, *dst_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_out); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - -static int ceph_aes_decrypt(const void *key, int key_len, - void *dst, size_t *dst_len, - const void *src, size_t src_len) +static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, + void *buf, int buf_len, int in_len, int *pout_len) { - struct sg_table sg_in; - struct scatterlist sg_out[2], prealloc_sg; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - char pad[16]; - char iv[AES_BLOCK_SIZE]; + SKCIPHER_REQUEST_ON_STACK(req, key->tfm); + struct sg_table sgt; + struct scatterlist prealloc_sg; + char iv[AES_BLOCK_SIZE] __aligned(8); + int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1)); + int crypt_len = encrypt ? in_len + pad_byte : in_len; int ret; - int last_byte; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - sg_init_table(sg_out, 2); - sg_set_buf(&sg_out[0], dst, *dst_len); - sg_set_buf(&sg_out[1], pad, sizeof(pad)); - ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + WARN_ON(crypt_len > buf_len); + if (encrypt) + memset(buf + in_len, pad_byte, pad_byte); + ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len); if (ret) - goto out_tfm; + return ret; - crypto_skcipher_setkey((void *)tfm, key, key_len); memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_tfm(req, key->tfm); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in.sgl, sg_out, - src_len, iv); + skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); /* - print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, - src, src_len, 1); + print_hex_dump(KERN_ERR, "key: ", DUMP_PREFIX_NONE, 16, 1, + key->key, key->len, 1); + print_hex_dump(KERN_ERR, " in: ", DUMP_PREFIX_NONE, 16, 1, + buf, crypt_len, 1); */ - ret = crypto_skcipher_decrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_decrypt failed %d\n", ret); - goto out_sg; - } - - if (src_len <= *dst_len) - last_byte = ((char *)dst)[src_len - 1]; + if (encrypt) + ret = crypto_skcipher_encrypt(req); else - last_byte = pad[src_len - *dst_len - 1]; - if (last_byte <= 16 && src_len >= last_byte) { - *dst_len = src_len - last_byte; - } else { - pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", - last_byte, (int)src_len); - return -EPERM; /* bad padding */ - } - /* - print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, - dst, *dst_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_in); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - -static int ceph_aes_decrypt2(const void *key, int key_len, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len) -{ - struct sg_table sg_in; - struct scatterlist sg_out[3], prealloc_sg; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - char pad[16]; - char iv[AES_BLOCK_SIZE]; - int ret; - int last_byte; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - sg_init_table(sg_out, 3); - sg_set_buf(&sg_out[0], dst1, *dst1_len); - sg_set_buf(&sg_out[1], dst2, *dst2_len); - sg_set_buf(&sg_out[2], pad, sizeof(pad)); - ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in.sgl, sg_out, - src_len, iv); - - /* - print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, - src, src_len, 1); - */ - ret = crypto_skcipher_decrypt(req); + ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_decrypt failed %d\n", ret); - goto out_sg; - } - - if (src_len <= *dst1_len) - last_byte = ((char *)dst1)[src_len - 1]; - else if (src_len <= *dst1_len + *dst2_len) - last_byte = ((char *)dst2)[src_len - *dst1_len - 1]; - else - last_byte = pad[src_len - *dst1_len - *dst2_len - 1]; - if (last_byte <= 16 && src_len >= last_byte) { - src_len -= last_byte; - } else { - pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", - last_byte, (int)src_len); - return -EPERM; /* bad padding */ - } - - if (src_len < *dst1_len) { - *dst1_len = src_len; - *dst2_len = 0; - } else { - *dst2_len = src_len - *dst1_len; + if (ret) { + pr_err("%s %scrypt failed: %d\n", __func__, + encrypt ? "en" : "de", ret); + goto out_sgt; } /* - print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1, - dst1, *dst1_len, 1); - print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1, - dst2, *dst2_len, 1); + print_hex_dump(KERN_ERR, "out: ", DUMP_PREFIX_NONE, 16, 1, + buf, crypt_len, 1); */ -out_sg: - teardown_sgtable(&sg_in); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - - -int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst_len < src_len) - return -ERANGE; - memcpy(dst, src, src_len); - *dst_len = src_len; - return 0; - - case CEPH_CRYPTO_AES: - return ceph_aes_decrypt(secret->key, secret->len, dst, - dst_len, src, src_len); - - default: - return -EINVAL; - } -} - -int ceph_decrypt2(struct ceph_crypto_key *secret, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len) -{ - size_t t; - - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst1_len + *dst2_len < src_len) - return -ERANGE; - t = min(*dst1_len, src_len); - memcpy(dst1, src, t); - *dst1_len = t; - src += t; - src_len -= t; - if (src_len) { - t = min(*dst2_len, src_len); - memcpy(dst2, src, t); - *dst2_len = t; + if (encrypt) { + *pout_len = crypt_len; + } else { + pad_byte = *(char *)(buf + in_len - 1); + if (pad_byte > 0 && pad_byte <= AES_BLOCK_SIZE && + in_len >= pad_byte) { + *pout_len = in_len - pad_byte; + } else { + pr_err("%s got bad padding %d on in_len %d\n", + __func__, pad_byte, in_len); + ret = -EPERM; + goto out_sgt; } - return 0; - - case CEPH_CRYPTO_AES: - return ceph_aes_decrypt2(secret->key, secret->len, - dst1, dst1_len, dst2, dst2_len, - src, src_len); - - default: - return -EINVAL; } -} - -int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst_len < src_len) - return -ERANGE; - memcpy(dst, src, src_len); - *dst_len = src_len; - return 0; - case CEPH_CRYPTO_AES: - return ceph_aes_encrypt(secret->key, secret->len, dst, - dst_len, src, src_len); - - default: - return -EINVAL; - } +out_sgt: + teardown_sgtable(&sgt); + return ret; } -int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len) +int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, + void *buf, int buf_len, int in_len, int *pout_len) { - switch (secret->type) { + switch (key->type) { case CEPH_CRYPTO_NONE: - if (*dst_len < src1_len + src2_len) - return -ERANGE; - memcpy(dst, src1, src1_len); - memcpy(dst + src1_len, src2, src2_len); - *dst_len = src1_len + src2_len; + *pout_len = in_len; return 0; - case CEPH_CRYPTO_AES: - return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len, - src1, src1_len, src2, src2_len); - + return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len, + pout_len); default: - return -EINVAL; + return -ENOTSUPP; } } diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 2e9cab09f37ba65c9c5961c12e10d2aded02413f..58d83aa7740f64a68fc25dcbde06791f5ce17496 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -12,37 +12,19 @@ struct ceph_crypto_key { struct ceph_timespec created; int len; void *key; + struct crypto_skcipher *tfm; }; -static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) -{ - if (key) { - kfree(key->key); - key->key = NULL; - } -} - int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src); int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); +void ceph_crypto_key_destroy(struct ceph_crypto_key *key); /* crypto.c */ -int ceph_decrypt(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src, size_t src_len); -int ceph_encrypt(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src, size_t src_len); -int ceph_decrypt2(struct ceph_crypto_key *secret, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len); -int ceph_encrypt2(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len); +int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, + void *buf, int buf_len, int in_len, int *pout_len); int ceph_crypto_init(void); void ceph_crypto_shutdown(void); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index a5502898ea33b0e4d74cfa8f12947c1d9466013c..2efb335deada6048be8c8f3b2c6fe30131fb330d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2027,6 +2027,19 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); + if (con->auth_reply_buf) { + /* + * Any connection that defines ->get_authorizer() + * should also define ->verify_authorizer_reply(). + * See get_connect_authorizer(). + */ + ret = con->ops->verify_authorizer_reply(con, 0); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } + } + switch (con->in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," diff --git a/net/core/dev.c b/net/core/dev.c index 46a4830c42ebf0bd9f58a4e7d5a2c4ef1319c535..ab6dc94dfb4a72cb77d36efc1f67a0d66652f32f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2815,9 +2815,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - } else if (illegal_highdma(skb->dev, skb)) { - features &= ~NETIF_F_SG; } + if (illegal_highdma(skb->dev, skb)) + features &= ~NETIF_F_SG; return features; } @@ -4453,7 +4453,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, + skb_frag_size(frag0), + skb->end - skb->tail); } } diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 72cfb0c611254cbe48885e4d3a7b497e91d94fed..ca2c9c8b9a3e9897ecba1b32c869a6be75068950 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -80,6 +80,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) struct nlattr *nla; struct sk_buff *skb; unsigned long flags; + void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -87,21 +88,41 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) skb = genlmsg_new(al, GFP_KERNEL); - if (skb) { - genlmsg_put(skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(skb, NLA_UNSPEC, - sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - } else { - mod_timer(&data->send_timer, jiffies + HZ / 10); + if (!skb) + goto err; + + msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + if (!msg_header) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + if (!nla) { + nlmsg_free(skb); + skb = NULL; + goto err; } + msg = nla_data(nla); + memset(msg, 0, al); + goto out; +err: + mod_timer(&data->send_timer, jiffies + HZ / 10); +out: spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); + if (skb) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); + + genlmsg_end(skb, genlmsg_data(gnlh)); + } + return skb; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a384a10beb4fa5d0f75fcceb4ac14dcdee35f080..5de436a73be254325d678735ee279331e96ef148 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include #include +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,8 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; - r->uid_start = INVALID_UID; - r->uid_end = INVALID_UID; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -174,21 +178,32 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); -static inline kuid_t fib_nl_uid(struct nlattr *nla) +static int uid_range_set(struct fib_kuid_range *range) { - return make_kuid(current_user_ns(), nla_get_u32(nla)); + return uid_valid(range->start) && uid_valid(range->end); } -static int nla_put_uid(struct sk_buff *skb, int idx, kuid_t uid) +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) { - return nla_put_u32(skb, idx, from_kuid_munged(current_user_ns(), uid)); + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; } -static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) { - return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || - (uid_gte(fl->flowi_uid, rule->uid_start) && - uid_lte(fl->flowi_uid, rule->uid_end)); + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); } static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, @@ -212,7 +227,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; - if (!fib_uid_range_match(fl, rule)) + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) goto out; ret = ops->match(rule, fl, flags); @@ -451,25 +467,27 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; goto errout_free; } - /* UID start and end must either both be valid or both unspecified. */ - rule->uid_start = rule->uid_end = INVALID_UID; - if (tb[FRA_UID_START] || tb[FRA_UID_END]) { - if (tb[FRA_UID_START] && tb[FRA_UID_END]) { - rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); - rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); - } - if (!uid_valid(rule->uid_start) || - !uid_valid(rule->uid_end) || - !uid_lte(rule->uid_start, rule->uid_end)) - goto errout_free; - } - err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -532,6 +550,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -551,6 +570,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -587,12 +614,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) continue; - if (tb[FRA_UID_START] && - !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) - continue; - - if (tb[FRA_UID_END] && - !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) continue; if (!ops->compare(rule, frh, tb)) @@ -663,8 +687,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size_64bit(8) /* FRA_TUN_ID */ - + nla_total_size(4) /* FRA_UID_START */ - + nla_total_size(4); /* FRA_UID_END */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -725,10 +748,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || - (uid_valid(rule->uid_start) && - nla_put_uid(skb, FRA_UID_START, rule->uid_start)) || - (uid_valid(rule->uid_end) && - nla_put_uid(skb, FRA_UID_END, rule->uid_end))) + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c6d8207ffa7e9ec6247a608bbbfbb5fc6a1076f7..32e4e0158846b18bdbd7f482847d987a105ef1cf 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -445,8 +445,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, if (hdr->flags & GRE_ACK) offset += sizeof(((struct pptp_gre_header *)0)->ack); - ppp_hdr = skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), _ppp_hdr); + ppp_hdr = __skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), + data, hlen, _ppp_hdr); if (!ppp_hdr) goto out_bad; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e5f84c26ba1a6fd89d5651d0408c8d6ef123db7b..afa64f086d8720ac13e7302fed043f9c6bf49d47 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_MODULES @@ -65,6 +66,15 @@ EXPORT_SYMBOL(lwtunnel_state_alloc); static const struct lwtunnel_encap_ops __rcu * lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; +void lwtstate_free(struct lwtunnel_state *lws) +{ + const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; + + kfree(lws); + module_put(ops->owner); +} +EXPORT_SYMBOL(lwtstate_free); + int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, unsigned int num) { @@ -110,25 +120,77 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); + if (likely(ops && ops->build_state && try_module_get(ops->owner))) { + ret = ops->build_state(dev, encap, family, cfg, lws); + if (ret) + module_put(ops->owner); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_build_state); + +int lwtunnel_valid_encap_type(u16 encap_type) +{ + const struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (encap_type == LWTUNNEL_ENCAP_NONE || + encap_type > LWTUNNEL_ENCAP_MAX) + return ret; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); + rcu_read_unlock(); #ifdef CONFIG_MODULES if (!ops) { const char *encap_type_str = lwtunnel_encap_str(encap_type); if (encap_type_str) { - rcu_read_unlock(); + __rtnl_unlock(); request_module("rtnl-lwt-%s", encap_type_str); + rtnl_lock(); + rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); + rcu_read_unlock(); } } #endif - if (likely(ops && ops->build_state)) - ret = ops->build_state(dev, encap, family, cfg, lws); - rcu_read_unlock(); + return ops ? 0 : -EOPNOTSUPP; +} +EXPORT_SYMBOL(lwtunnel_valid_encap_type); - return ret; +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) +{ + struct rtnexthop *rtnh = (struct rtnexthop *)attr; + struct nlattr *nla_entype; + struct nlattr *attrs; + struct nlattr *nla; + u16 encap_type; + int attrlen; + + while (rtnh_ok(rtnh, remaining)) { + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + attrs = rtnh_attrs(rtnh); + nla = nla_find(attrs, attrlen, RTA_ENCAP); + nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + + if (nla_entype) { + encap_type = nla_get_u16(nla_entype); + + if (lwtunnel_valid_encap_type(encap_type) != 0) + return -EOPNOTSUPP; + } + } + rtnh = rtnh_next(rtnh, &remaining); + } + + return 0; } -EXPORT_SYMBOL(lwtunnel_build_state); +EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6196cf844f6644782fa8c3a743f35db7b9af33e..b7f9ae7b1c5f9702b3766f28554a1d8a75a5af7d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3886,6 +3886,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) u32 filter_mask; int err; + if (nlmsg_len(nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); @@ -3935,6 +3938,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; + if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(cb->nlh); filter_mask = ifsm->filter_mask; if (!filter_mask) diff --git a/net/core/sock.c b/net/core/sock.c index 00a074dbfe9bf169c2b81498e6ae265199745b22..87a740b48f49dfb5ae3a3121f2c7fc363b6e375d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , - "sk_lock-AF_MAX" + "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , - "slock-AF_MAX" + "slock-AF_QIPCRTR", "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , - "clock-AF_MAX" + "clock-AF_QIPCRTR", "clock-AF_MAX" }; /* @@ -2436,8 +2436,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5fff951a0a4928ccf28fb681be86c7df6a05e94f..da38621245458bae2506b0c030d92315f1be5775 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - err = dsa_cpu_port_ethtool_setup(dst->ds[0]); - if (err) - return err; + if (dst->ds[0]) { + err = dsa_cpu_port_ethtool_setup(dst->ds[0]); + if (err) + return err; + } /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - dsa_cpu_port_ethtool_restore(dst->ds[0]); + if (dst->ds[0]) + dsa_cpu_port_ethtool_restore(dst->ds[0]); pr_info("DSA: tree %d unapplied\n", dst->tree); dst->applied = false; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 30e2e21d76196fdfc404ab036a3e6efb9905d3b8..3ff9d97cf56b381ca670532c2003066420dac08e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1201,6 +1201,8 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + netif_device_detach(slave_dev); + if (p->phy) { phy_stop(p->phy); p->old_pause = -1; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 121384bbb40ba24163c5aca2b84c8bd6bfb38f93..18412f989b32a690d28a06c4acaf9a6612e6ca84 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -85,7 +86,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); @@ -677,6 +678,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_mx_len = nla_len(attr); break; case RTA_MULTIPATH: + err = lwtunnel_valid_encap_type_attr(nla_data(attr), + nla_len(attr)); + if (err < 0) + goto errout; cfg->fc_mp = nla_data(attr); cfg->fc_mp_len = nla_len(attr); break; @@ -691,6 +696,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; break; } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 388d3e21629b17b9a212234352ac7644da4fb7d6..6a4068031aaa418a19f2ecf24915b19a1c43b0b9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1278,8 +1278,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate) - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); + if (fi->fib_nh->nh_lwtstate && + lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + goto nla_put_failure; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { @@ -1315,8 +1316,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (nh->nh_lwtstate) - lwtunnel_fill_encap(skb, nh->nh_lwtstate); + if (nh->nh_lwtstate && + lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); @@ -1617,8 +1620,13 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, int mp_hash) { + bool oif_check; + + oif_check = (fl4->flowi4_oif == 0 || + fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + if (res->fi->fib_nhs > 1 && oif_check) { if (mp_hash < 0) mp_hash = get_hash_from_flowi4(fl4) >> 1; @@ -1628,7 +1636,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && !fl4->flowi4_oif) + res->type == RTN_UNICAST && oif_check) fib_select_default(fl4, res); if (!fl4->saddr) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 48734ee6293f3383e064ba18734abc485b1c3639..691146abde2df65345165516b4aad819d51968f2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 15db786d50ed28c7d31855a9582ab111752b3641..32a08bc010bfc0c885efb320a5e67ea19e5292f4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a16b4399049570554db03bfd325bc61394d4bb62..d5d3ead0a6c31e42e8843d30f8c643324a91b8e9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid((struct sock *)sk)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid((struct sock *)sk)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b8a2d63d1fb82f5084a0d98911b8110816dee963..f226f4086e05f0493b28024753676a72018e02a9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -137,7 +137,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); __be16 *ports = (__be16 *)skb_transport_header(skb); - if (skb_transport_offset(skb) + 4 > skb->len) + if (skb_transport_offset(skb) + 4 > (int)skb->len) return; /* All current transport protocols have the port numbers in the @@ -1202,8 +1202,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first - * element so the iif is picked up from the prior IPCB + * element so the iif is picked up from the prior IPCB. If iif + * is the loopback interface, then return the sending interface + * (e.g., process binds socket to eth0 for Tx which is + * redirected to loopback in the rtable/dst). */ + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index fed3d29f9eb3b716664b8d9eba052695cbb867bd..0fd1976ab63bbd9be357e41b9acd71dcfa507665 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { @@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; void __init ip_tunnel_core_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2dcd16ef26382f3eed045b3de90d7e6053958659..5b2635e69a926899f05e2dad4c0fdb10a95acc76 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -794,7 +794,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, - sock_i_uid(sk)); + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 65c3bc947eca853e866a59d9538ecf68ed779421..7525f5eab622b95e34ece02f821feddb4a3e1d9c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -604,8 +604,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0, - sock_i_uid(sk)); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { rfv.msg = msg; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8cb3f524ac4f0efe7acfeb591b598117fbfb4ae8..5ba912da4dc51bcf9b757bdb7a1e8cb457665725 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -524,19 +525,20 @@ static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, RT_SCOPE_UNIVERSE, prot, flow_flags, iph->daddr, iph->saddr, 0, 0, - sk ? sock_i_uid(sk) : GLOBAL_ROOT_UID); + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, struct sock *sk) { + const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) @@ -553,8 +555,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0, - sock_i_uid(sk)); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -804,7 +805,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1022,7 +1023,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1038,7 +1039,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1057,6 +1058,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1070,7 +1072,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1110,7 +1112,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1125,9 +1127,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); @@ -1904,7 +1907,8 @@ out: return err; } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -2441,7 +2445,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = table_id; + r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 63d07b8094461823624a05fdbbc017cfa8389fce..0dc6286272aae651874851cd4e40d81f1d27866a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -371,9 +371,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest, - sock_i_uid(sk)); + opt->srr ? opt->faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e777a3243f94457d9928e3967bb83947da563f6..dd2560c83a8592359af70919bddb4a628630b0b3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) @@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2259114c7242c72cadc149073c8101101c198483..eb5a0e1fb187500a7726f7b708920657e5f9094c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -691,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +712,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -726,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,7 +821,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3e6daef27af903f86895d614f803bc526eecb1a5..5093bb842056690f616d169ce39a3bb9cc56ce24 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1020,7 +1020,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport, - sock_i_uid(sk)); + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e03efc6db3b1e21de10b972e650cc912d976f062..965ca8615b5b797ba632ba3565fe02409b7fd851 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5554,8 +5554,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -5564,7 +5563,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf) dev_disable_change(idev); } } - rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) @@ -5949,6 +5947,13 @@ static const struct ctl_table addrconf_sysctl[] = { }, #endif #endif + { + .procname = "accept_ra_rt_table", + .data = &ipv6_devconf.accept_ra_rt_table, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e29162d32b62539ae849971bc8389a921cae1c8e..02816456a53ecd79e8e933fd7adec4a49b5c93e0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -694,7 +694,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index c52b8fc904c978344f9aaafd7d6de3b43108cf2f..189eb10b742d02fa5b39ac7206703e31e30c3cf7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 50dacc84270507548dd11b0ccde5b086c9cfb32a..1529833f085c49de5d29e0fbfe44c4782bcc1a0b 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,7 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; - fl6->flowi6_uid = sock_i_uid(sk); + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; @@ -701,7 +701,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sockaddr_in6 sin6; __be16 *ports = (__be16 *) skb_transport_header(skb); - if (skb_transport_offset(skb) + 4 <= skb->len) { + if (skb_transport_offset(skb) + 4 <= (int)skb->len) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 17df6bbebcffbecc1e8d883997c33766f6f30e33..cbcdd5db31f473f75011c2346345dd752c9a7424 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 15db3758ff55026b65676f48391bd540a43cf6a4..17fa28f7a0ffe85f489ab5000b7936c1330922de 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -486,6 +487,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -660,6 +662,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index e50c27a93e17d8e44260d89bbbdf70c046e665cb..f3db364fc85360c00e3a8acabf6d88f0985e51d7 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -164,6 +164,7 @@ static const struct lwtunnel_encap_ops ila_encap_ops = { .fill_encap = ila_fill_encap_info, .get_encap_size = ila_encap_nlsize, .cmp_encap = ila_encap_cmp, + .owner = THIS_MODULE, }; int ila_lwt_init(void) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index dd6f8aa35a00bfcb14c3741107dda4c34fa889f6..1c86c478f578b49373e61a4c397f23f3dc7f3fc6 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -88,7 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); - fl6->flowi6_uid = sock_i_uid((struct sock *)sk); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -137,7 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; - fl6->flowi6_uid = sock_i_uid(sk); + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b62423c4c41317ad22fbe491eac75..710bc79f91131be75dc70c49c4b9459fc29cf502 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 89c59e656f44939863ceada610d3442d2de666ba..fc7b4017ba241f9dd39d49bd6258ecd4a16e3a3a 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); + skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d76674efe523cf041bf2ef57f5b861b3707dd05e..c1f497bb398fafdfe07b7e57b402a36043dc1ebd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1108,7 +1108,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen; + mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1117,7 +1117,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; @@ -1248,6 +1248,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1326,6 +1328,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d58480a9215e3694c07075fb23b1f67f3bf90600..3bce120cf5a4b3b85f42adc86f41bb63cc17c9da 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index b247baceb797d004b0b911366fff75498c8a9379..54d165b9845a02c24f735477f48abdb0c944bae2 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d615b394797e193008f1cc8e4592935..39970e212ad574ae36406b5354e0550698249387 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 505c44239d40e4999b63396b193ac21d213dd3d0..e1f8b34d7a2ef8fb232826747d2ffe6652c1cb06 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,7 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; - fl6.flowi6_uid = sock_i_uid(sk); + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f7b0b5985f8f160b115179be3258377ef13d193c..566537510c26488d889a95d4608ab0ae706a70c2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -589,7 +589,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); + err = skb_copy_bits(skb, offset, &csum, 2); + if (err < 0) { + ip6_flush_pending_frames(sk); + goto out; + } /* in case cksum was not initialized */ if (unlikely(csum)) @@ -774,7 +778,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; - fl6.flowi6_uid = sock_i_uid(sk); + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 77e65471d3198108315fa435ad7185a7b0da3dbe..477600fea8761a6a505cbcd8b3dbc3773461ab22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1428,7 +1428,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk)); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1520,7 +1520,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1533,6 +1534,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1554,6 +1556,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1562,7 +1565,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); @@ -2329,12 +2333,12 @@ static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr) { + u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO); struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), - addrconf_rt_table(dev, RT6_TABLE_INFO)); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2373,7 +2377,7 @@ static struct rt6_info *rt6_add_route_info(struct net_device *dev, .fc_nlinfo.nl_net = dev_net(dev), }; - cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO); + cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO), cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2389,11 +2393,11 @@ static struct rt6_info *rt6_add_route_info(struct net_device *dev, struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN); struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), - addrconf_rt_table(dev, RT6_TABLE_MAIN)); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2438,7 +2442,6 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } - int rt6_addrconf_purge(struct rt6_info *rt, void *arg) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) @@ -2851,6 +2854,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_MULTIPATH]) { cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + + err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, + cfg->fc_mp_len); + if (err < 0) + goto errout; } if (tb[RTA_PREF]) { @@ -2864,9 +2872,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP]) cfg->fc_encap = tb[RTA_ENCAP]; - if (tb[RTA_ENCAP_TYPE]) + if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; + } + if (tb[RTA_EXPIRES]) { unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); @@ -3272,7 +3285,8 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; - lwtunnel_fill_encap(skb, rt->dst.lwtstate); + if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -3346,6 +3360,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) nla_get_u32(tb[RTA_UID])); else fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7042046983826597a66109e515105c502eb51697..97830a6a9cbb25fc805a3c6a07afb47e5e6d7388 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -227,7 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6e24ed22ae34830b611572556096f3a7c14f6784..28ec0a2e7b729517ed33b9f9f7ab426fec0522fb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,7 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -829,6 +829,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d541bf1d1540aff41a3a4393b1aced5d987ea3f6..f6fbd257cf7ec8314f7588dc745d369c2ebeca62 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1156,7 +1156,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; - fl6.flowi6_uid = sock_i_uid(sk); + fl6.flowi6_uid = sk->sk_uid; sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 02b45a8e8b35729a65c6d796136a846aa2df648b..91cbbf1c3f82daff512009be723ed507bf19c9d2 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1036,7 +1036,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); - size_t headroom, linear; + size_t headroom = 0; + size_t linear; struct sk_buff *skb; struct iucv_message txmsg = {0}; struct cmsghdr *cmsg; @@ -1114,18 +1115,20 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - headroom = (iucv->transport == AF_IUCV_TRANS_HIPER) - ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0; - if (headroom + len < PAGE_SIZE) { + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; linear = len; } else { - /* In nonlinear "classic" iucv skb, - * reserve space for iucv_array - */ - if (iucv->transport != AF_IUCV_TRANS_HIPER) - headroom += sizeof(struct iucv_array) * - (MAX_SKB_FRAGS + 1); - linear = PAGE_SIZE - headroom; + if (len < PAGE_SIZE) { + linear = len; + } else { + /* In nonlinear "classic" iucv skb, + * reserve space for iucv_array + */ + headroom = sizeof(struct iucv_array) * + (MAX_SKB_FRAGS + 1); + linear = PAGE_SIZE - headroom; + } } skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear, noblock, &err, 0); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index aa821cb639e541fa85a062deeee43516e2d4da8d..f092ac441fdda5cdaf294005d28d586c216d2f26 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -525,6 +525,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f6749dced021bc8176c3a8ae1b4fe263bd6730f6..3b5fd4188f2ac7c67c269ad425812221294c823e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, mutex_lock(&sta->ampdu_mlme.mtx); if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { - tid_agg_rx = rcu_dereference_protected( - sta->ampdu_mlme.tid_rx[tid], - lockdep_is_held(&sta->ampdu_mlme.mtx)); - - if (tid_agg_rx->dialog_token == dialog_token) { + if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { ht_dbg_ratelimited(sta->sdata, "updated AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); @@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } /* update data */ - tid_agg_rx->dialog_token = dialog_token; tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; tid_agg_rx->buf_size = buf_size; @@ -418,6 +413,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + sta->ampdu_mlme.tid_rx_token[tid] = dialog_token; } mutex_unlock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a2fcdb47a0e68443baecfc805bfc33e617dd8146..14ec63a026693c7bcd0711605d41450b034d1a74 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -205,7 +205,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - tid_rx ? tid_rx->dialog_token : 0); + tid_rx ? sta->ampdu_mlme.tid_rx_token[i] : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7486f2dab4ba70ade0b2faf48ffa7f3880ac2ad4..1118c61f835dde97b801531163f22ede2177592d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2510,7 +2510,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, } static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, - bool assoc) + bool assoc, bool abandon) { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; @@ -2533,6 +2533,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->mtx); + + if (abandon) + cfg80211_abandon_assoc(sdata->dev, assoc_data->bss); } kfree(assoc_data); @@ -2762,7 +2765,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; @@ -3167,14 +3170,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); return; } @@ -3187,7 +3190,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * recalc after assoc_data is NULL but before associated * is set can cause the interface to go idle */ - ieee80211_destroy_assoc_data(sdata, true); + ieee80211_destroy_assoc_data(sdata, true, false); /* get uapsd queues configuration */ uapsd_queues = 0; @@ -3886,7 +3889,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) .u.mlme.status = MLME_TIMEOUT, }; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); drv_event_callback(sdata->local, sdata, &event); } @@ -4025,7 +4028,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); if (ifmgd->assoc_data) - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, @@ -4907,7 +4910,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code); @@ -4982,7 +4985,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); if (ifmgd->assoc_data) { struct cfg80211_bss *bss = ifmgd->assoc_data->bss; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); } if (ifmgd->auth_data) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a47bbc973f2dbc629aa8ab6ed91c928784cd26c6..2384b4aae0648f6f9f78f9a8861b55a979116e26 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3939,21 +3939,31 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u64_stats_update_end(&stats->syncp); if (fast_rx->internal_forward) { - struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); + struct sk_buff *xmit_skb = NULL; + bool multicast = is_multicast_ether_addr(skb->data); - if (dsta) { + if (multicast) { + xmit_skb = skb_copy(skb, GFP_ATOMIC); + } else if (sta_info_get(rx->sdata, skb->data)) { + xmit_skb = skb; + skb = NULL; + } + + if (xmit_skb) { /* * Send to wireless media and increase priority by 256 * to keep the received priority instead of * reclassifying the frame (see cfg80211_classify8021d). */ - skb->priority += 256; - skb->protocol = htons(ETH_P_802_3); - skb_reset_network_header(skb); - skb_reset_mac_header(skb); - dev_queue_xmit(skb); - return true; + xmit_skb->priority += 256; + xmit_skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(xmit_skb); + skb_reset_mac_header(xmit_skb); + dev_queue_xmit(xmit_skb); } + + if (!skb) + return true; } /* deliver to local stack */ diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ed5fcb984a01a51ce08caa32101b50265509eb3f..dd06ef0b88614566ea4eb751a314856d38fb0e39 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -184,7 +184,6 @@ struct tid_ampdu_tx { * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). - * @dialog_token: dialog token for aggregation session * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and @@ -213,7 +212,6 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; - u8 dialog_token; bool auto_seq; bool removed; }; @@ -225,6 +223,7 @@ struct tid_ampdu_rx { * to tid_tx[idx], which are protected by the sta spinlock) * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected + * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the @@ -243,6 +242,7 @@ struct sta_ampdu_mlme { struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; + u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bd5f4be89435eb6ef20b7103c7627f7bc10e9cce..dd190ff3daea27a78265fb1bcbd6aba989d60e10 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3262,7 +3262,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); int hw_headroom = sdata->local->hw.extra_tx_headroom; struct ethhdr eth; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct ieee80211_tx_data tx; ieee80211_tx_result r; @@ -3326,6 +3326,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); info->band = fast_tx->band; info->control.vif = &sdata->vif; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 15fe97644ffe048c9b1d5818c0b1aec56eb988f4..5b77377e5a15474e39037be5e6e873ebceb33555 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -98,18 +98,19 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static u32 mpls_multipath_hash(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) { struct mpls_entry_decoded dec; + unsigned int mpls_hdr_len = 0; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; u32 hash = 0; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; label_index++) { - if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) + mpls_hdr_len += sizeof(*hdr); + if (!pskb_may_pull(skb, mpls_hdr_len)) break; /* Read and decode the current label */ @@ -134,37 +135,38 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, eli_seen = true; } - bos = dec.bos; - if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct iphdr))) { + if (!dec.bos) + continue; + + /* found bottom label; does skb have room for a header? */ + if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { const struct iphdr *v4hdr; - v4hdr = (const struct iphdr *)(mpls_hdr(skb) + - label_index); + v4hdr = (const struct iphdr *)(hdr + 1); if (v4hdr->version == 4) { hash = jhash_3words(ntohl(v4hdr->saddr), ntohl(v4hdr->daddr), v4hdr->protocol, hash); } else if (v4hdr->version == 6 && - pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct ipv6hdr))) { + pskb_may_pull(skb, mpls_hdr_len + + sizeof(struct ipv6hdr))) { const struct ipv6hdr *v6hdr; - v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + - label_index); - + v6hdr = (const struct ipv6hdr *)(hdr + 1); hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); hash = jhash_1word(v6hdr->nexthdr, hash); } } + + break; } return hash; } static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) + struct sk_buff *skb) { int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; @@ -180,7 +182,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive <= 0) return NULL; - hash = mpls_multipath_hash(rt, skb, bos); + hash = mpls_multipath_hash(rt, skb); nh_index = hash % alive; if (alive == rt->rt_nhn) goto out; @@ -278,17 +280,11 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, hdr = mpls_hdr(skb); dec = mpls_entry_decode(hdr); - /* Pop the label */ - skb_pull(skb, sizeof(*hdr)); - skb_reset_network_header(skb); - - skb_orphan(skb); - rt = mpls_route_input_rcu(net, dec.label); if (!rt) goto drop; - nh = mpls_select_multipath(rt, skb, dec.bos); + nh = mpls_select_multipath(rt, skb); if (!nh) goto drop; @@ -297,6 +293,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (!mpls_output_possible(out_dev)) goto drop; + /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + if (skb_warn_if_lro(skb)) goto drop; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index cf52cf30ac4b505bf1b10545de2fb9d588943731..bc9aaf58c7ccfd9578d115fa7ff97e7e7ed750ad 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -218,6 +218,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = { .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, + .owner = THIS_MODULE, }; static int __init mpls_iptunnel_init(void) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fecefa2dc94e129935bcf9b5d1ff117e58566b9f..eab210bb1ef0f5ad6b93f8a2e597f47d789b57b9 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -514,7 +514,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, int hooknum, nh_off, err = NF_ACCEPT; nh_off = skb_network_offset(skb); - skb_pull(skb, nh_off); + skb_pull_rcsum(skb, nh_off); /* See HOOK2MANIP(). */ if (maniptype == NF_NAT_MANIP_SRC) @@ -579,6 +579,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = nf_nat_packet(ct, ctinfo, hooknum, skb); push: skb_push(skb, nh_off); + skb_postpush_rcsum(skb, skb->data, nh_off); return err; } @@ -890,7 +891,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, /* The conntrack module expects to be working at L3. */ nh_ofs = skb_network_offset(skb); - skb_pull(skb, nh_ofs); + skb_pull_rcsum(skb, nh_ofs); if (key->ip.frag != OVS_FRAG_TYPE_NONE) { err = handle_fragments(net, key, info->zone.id, skb); @@ -904,6 +905,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, err = ovs_ct_lookup(net, key, info, skb); skb_push(skb, nh_ofs); + skb_postpush_rcsum(skb, skb->data, nh_ofs); if (err) kfree_skb(skb); return err; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index dd2332390c45bbff7c3fc5d259453f2e1ca352bf..94e4a5941d89e8e06194cce44eb35025fb7c3cda 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1972,7 +1972,7 @@ static int __packet_rcv_vnet(const struct sk_buff *skb, { *vnet_hdr = (const struct virtio_net_hdr) { 0 }; - if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le())) + if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le(), true)) BUG(); return 0; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f893d180da1caa3b6dd1cc8773920beb1885f9b0..c6c2a93cc2a23ab2b572d9ece58c8c3c6990638d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -903,8 +903,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; - if (event == RTM_GETACTION) - act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } @@ -917,7 +915,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return ret; } err: - tcf_action_destroy(&actions, 0); + if (event != RTM_GETACTION) + tcf_action_destroy(&actions, 0); return ret; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b05d4a2155b0b08ae7bb57c5bda2be2ee234679a..c1a4b5d30814af521bc7f0102304e62b9d9db06c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 904442421db3afae405548a9f5c316ce65e22de3..eee299bb6bcffbea0a20c2be595f3080e53dee1e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -149,10 +149,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, switch (ip_tunnel_info_af(info)) { case AF_INET: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV4_ADDRS; skb_key.enc_ipv4.src = key->u.ipv4.src; skb_key.enc_ipv4.dst = key->u.ipv4.dst; break; case AF_INET6: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV6_ADDRS; skb_key.enc_ipv6.src = key->u.ipv6.src; skb_key.enc_ipv6.dst = key->u.ipv6.dst; break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f23ad913dc7a070407813b44acb500a5f7c148e9..ca12aa346c0d05aeb7e2468de6ffc8051a87c093 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4479,9 +4479,10 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); - if (!transport || !sctp_transport_hold(transport)) + if (!transport || !sctp_transport_hold(transport)) { + rcu_read_unlock(); goto out; - + } rcu_read_unlock(); err = cb(transport, p); sctp_transport_put(transport); diff --git a/net/socket.c b/net/socket.c index 73dc69f9681e7b63d3916d561812cf8419764f31..5f4ec66570e1fb4b959733db63fd69cbcf08b922 100644 --- a/net/socket.c +++ b/net/socket.c @@ -533,8 +533,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err && (iattr->ia_valid & ATTR_UID)) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3dfd769dc5b51a724f20273eb0c52e5d6e25e9f1..16cea00c959b65e43cc52c1a2d6712ff970cf682 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); + int res; + atomic_inc(&gss_msg->count); + res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); + atomic_dec(&gss_msg->count); + gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else @@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) warn_gssd(); gss_release_msg(gss_msg); } + gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index dc6fb79a361f1ca3ab9869fc02ba05c1a533ad9b..25d9a9cf7b66b7f4e501d38d91f6a1908830972e 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 45662d7f0943c671297955e0c44e7632460c2c68..6fdffde28733dfabd50a4f8d2a27b4bc0a4ce36d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1489,7 +1489,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = get_seconds(); + rsci->h.expiry_time = seconds_since_boot(); set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62a482790937b54a5d486bc932289b0fb14da248..b2ae4f150ec639a2c962725e0b134c4ca264ba50 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -336,6 +336,11 @@ static int rpc_client_register(struct rpc_clnt *clnt, static DEFINE_IDA(rpc_clids); +void rpc_cleanup_clids(void) +{ + ida_destroy(&rpc_clids); +} + static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ee5d3d253102bf5d81a39f953248a6a6ca7a38d6..3142f38d11046207dba951fb5f2133535e6c2a6d 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -119,6 +119,7 @@ init_sunrpc(void) static void __exit cleanup_sunrpc(void) { + rpc_cleanup_clids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3bc1d61694cbbbf7a094a1849b747b65760550b2..9c9db55a0c1e1735e522e406797c4efb25930b07 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); + if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ goto out; @@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) le = to_be_closed.next; list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - xprt->xpt_ops->xpo_kill_temp_xprt(xprt); - svc_close_xprt(xprt); + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_KILL_TEMP, &xprt->xpt_flags); + dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n", + xprt); + svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 26b26beef2d4a6dd7ef9d31f09de7fe51504d841..adbf52c6df833b01548e1e08a5d1565c0ec9c5af 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -421,7 +421,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; - DECR_CQCOUNT(&r_xprt->rx_ep); + rpcrdma_set_signaled(&r_xprt->rx_ep, ®_wr->wr); rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; @@ -486,7 +486,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; - int rc; + int count, rc; dprintk("RPC: %s: req %p\n", __func__, req); @@ -496,6 +496,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ f = NULL; + count = 0; invalidate_wrs = pos = prev = NULL; list_for_each_entry(mw, &req->rl_registered, mw_list) { if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && @@ -505,6 +506,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) } pos = __frwr_prepare_linv_wr(mw); + count++; if (!invalidate_wrs) invalidate_wrs = pos; @@ -523,7 +525,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) f->fr_invwr.send_flags = IB_SEND_SIGNALED; f->fr_cqe.done = frwr_wc_localinv_wake; reinit_completion(&f->fr_linv_done); - INIT_CQCOUNT(&r_xprt->rx_ep); + + /* Initialize CQ count, since there is always a signaled + * WR being posted here. The new cqcount depends on how + * many SQEs are about to be consumed. + */ + rpcrdma_init_cqcount(&r_xprt->rx_ep, count); /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 20027f8de129e61faba9037d552eb826ecc34ae4..6035c5a380a6b10198a01eada0732f46ca2e93fc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -359,6 +359,7 @@ xprt_setup_rdma_bc(struct xprt_create *args) out_fail: xprt_rdma_free_addresses(xprt); args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); xprt_free(xprt); return ERR_PTR(-EINVAL); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ad1df979b3f07224457d5723d53946659adcae18..a47c9bdef5fa959f29b586820c717ab8483e9cd0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -348,8 +348,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - ib_dma_unmap_sg(xprt->sc_cm_id->device, - frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ec74289af7ec90b7f33a2ec2e2157c588ed0eeb3..8da7f6a4dfc38788e942b3f142b9a7ba86725a59 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -223,8 +223,8 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, cdata->inline_rsize = rsize; if (wsize < cdata->inline_wsize) cdata->inline_wsize = wsize; - pr_info("rpcrdma: max send %u, max recv %u\n", - cdata->inline_wsize, cdata->inline_rsize); + dprintk("RPC: %s: max send %u, max recv %u\n", + __func__, cdata->inline_wsize, cdata->inline_rsize); rpcrdma_set_max_header_sizes(r_xprt); } @@ -532,7 +532,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ - INIT_CQCOUNT(ep); + rpcrdma_init_cqcount(ep, 0); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); @@ -1311,13 +1311,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, dprintk("RPC: %s: posting %d s/g entries\n", __func__, send_wr->num_sge); - if (DECR_CQCOUNT(ep) > 0) - send_wr->send_flags = 0; - else { /* Provider must take a send completion every now and then */ - INIT_CQCOUNT(ep); - send_wr->send_flags = IB_SEND_SIGNALED; - } - + rpcrdma_set_signaled(ep, send_wr); rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 6e1bba358203694e79cbc9074554b12053fa45c7..f6ae1b22da476b48d3f9736e7eeaf0625a913b5d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -95,8 +95,24 @@ struct rpcrdma_ep { struct delayed_work rep_connect_worker; }; -#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) -#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +static inline void +rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count) +{ + atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count); +} + +/* To update send queue accounting, provider must take a + * send completion every now and then. + */ +static inline void +rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr) +{ + send_wr->send_flags = 0; + if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) { + rpcrdma_init_cqcount(ep, 0); + send_wr->send_flags = IB_SEND_SIGNALED; + } +} /* Pre-allocate extra Work Requests for handling backward receives * and sends. This is a fixed value because the Work Queues are diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2358f2690ec58c20aeb77fb60ac45b3db27ecac6..2d03d5bcb5b93259a181baa88dbef21ec25cc697 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; + struct path path = { NULL, NULL }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; + if (sun_path[0]) { + umode_t mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current_umask()); + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + goto out; + } + } + err = mutex_lock_interruptible(&u->bindlock); if (err) - goto out; + goto out_put; err = -EINVAL; if (u->addr) @@ -1029,16 +1041,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - struct path path; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; - } addr->hash = UNIX_HASH_SIZE; hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); @@ -1065,6 +1067,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->bindlock); +out_put: + if (err) + path_put(&path); out: return err; } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a53b3a16b4f1f79554bcbc066a49d6a0e848f093..62c056ea403bc1ad3eaaa0efd81dca2353472fae 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -606,9 +606,9 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) return 0; pkt = virtio_transport_alloc_pkt(&info, 0, - le32_to_cpu(pkt->hdr.dst_cid), + le64_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port), - le32_to_cpu(pkt->hdr.src_cid), + le64_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); if (!pkt) return -ENOMEM; @@ -823,7 +823,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RESPONSE, .type = VIRTIO_VSOCK_TYPE_STREAM, - .remote_cid = le32_to_cpu(pkt->hdr.src_cid), + .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, }; @@ -863,9 +863,9 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) child->sk_state = SS_CONNECTED; vchild = vsock_sk(child); - vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), + vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), + vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); vsock_insert_connected(vchild); @@ -904,9 +904,9 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) struct sock *sk; bool space_available; - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), + vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), + vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, diff --git a/net/wireless/core.h b/net/wireless/core.h index f0c0c8a48c920887cadbc714f3e55015e9b9761d..5f5867f90fed5e61992baf06fbf330333889a390 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -410,6 +410,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index cbb48e26a8715b8964333bf9b69c52010289bafb..76775a2b421db31c33a323f4a671f470099051db 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) } EXPORT_SYMBOL(cfg80211_assoc_timeout); +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + + cfg80211_sme_abandon_assoc(wdev); + + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); +} +EXPORT_SYMBOL(cfg80211_abandon_assoc); + void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 57a332f5124eb28853c5fe1824d22d7f1038dd7e..92db80dc651705ba49527aa076bee0710c12f174 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -414,6 +414,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, + [NL80211_ATTR_BSSID] = { .len = ETH_ALEN }, }; /* policy for the key attributes */ @@ -6677,7 +6678,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - if (info->attrs[NL80211_ATTR_MAC]) + /* Initial implementation used NL80211_ATTR_MAC to set the specific + * BSSID to scan for. This was problematic because that same attribute + * was already used for another purpose (local random MAC address). The + * NL80211_ATTR_BSSID attribute was added to fix this. For backwards + * compatibility with older userspace components, also use the + * NL80211_ATTR_MAC value here if it can be determined to be used for + * the specific BSSID use case instead of the random MAC address + * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use). + */ + if (info->attrs[NL80211_ATTR_BSSID]) + memcpy(request->bssid, + nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); + else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && + info->attrs[NL80211_ATTR_MAC]) memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), ETH_ALEN); else @@ -14391,13 +14405,17 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { bool schedule_destroy_work = false; - bool schedule_scan_stop = false; struct cfg80211_sched_scan_request *sched_scan_req = rcu_dereference(rdev->sched_scan_req); if (sched_scan_req && notify->portid && - sched_scan_req->owner_nlportid == notify->portid) - schedule_scan_stop = true; + sched_scan_req->owner_nlportid == notify->portid) { + sched_scan_req->owner_nlportid = 0; + + if (rdev->ops->sched_scan_stop && + rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + schedule_work(&rdev->sched_scan_stop_wk); + } list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); @@ -14428,12 +14446,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb, spin_unlock(&rdev->destroy_list_lock); schedule_work(&rdev->destroy_work); } - } else if (schedule_scan_stop) { - sched_scan_req->owner_nlportid = 0; - - if (rdev->ops->sched_scan_stop && - rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - schedule_work(&rdev->sched_scan_stop_wk); } } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 5cf182ee42478f12b33eed713ed910d2dce944a5..8ae2e203021666fad66f0b8bc36bfa49675b46a4 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -39,6 +39,7 @@ struct cfg80211_conn { CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, + CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; @@ -229,6 +230,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* fall through */ + case CFG80211_CONN_ABANDON: /* free directly, disconnected event already sent */ cfg80211_sme_free(wdev); return 0; @@ -446,6 +449,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ABANDON; + schedule_work(&rdev->conn_work); +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 950fd2e64bb73b9f261188bba272ea7e7ec10249..12262c0cc6914e6a5eebac1b887b129a15fb7466 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -39,6 +39,9 @@ #include "hash-map.h" #endif +#if BUILDING_GCC_VERSION >= 7000 +#include "memmodel.h" +#endif #include "emit-rtl.h" #include "debug.h" #include "target.h" @@ -91,6 +94,9 @@ #include "tree-ssa-alias.h" #include "tree-ssa.h" #include "stringpool.h" +#if BUILDING_GCC_VERSION >= 7000 +#include "tree-vrp.h" +#endif #include "tree-ssanames.h" #include "print-tree.h" #include "tree-eh.h" @@ -287,6 +293,22 @@ static inline struct cgraph_node *cgraph_next_function_with_gimple_body(struct c return NULL; } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + cgraph_node_ptr alias; + + if (callback(node, data)) + return true; + + for (alias = node->same_body; alias; alias = alias->next) { + if (include_overwritable || cgraph_function_body_availability(alias) > AVAIL_OVERWRITABLE) + if (cgraph_for_node_and_aliases(alias, callback, data, include_overwritable)) + return true; + } + + return false; +} + #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \ for ((node) = cgraph_first_function_with_gimple_body(); (node); \ (node) = cgraph_next_function_with_gimple_body(node)) @@ -399,6 +421,7 @@ typedef union gimple_statement_d gassign; typedef union gimple_statement_d gcall; typedef union gimple_statement_d gcond; typedef union gimple_statement_d gdebug; +typedef union gimple_statement_d ggoto; typedef union gimple_statement_d gphi; typedef union gimple_statement_d greturn; @@ -452,6 +475,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return stmt; @@ -496,6 +529,14 @@ static inline const greturn *as_a_const_greturn(const_gimple stmt) typedef struct rtx_def rtx_insn; +static inline const char *get_decl_section_name(const_tree decl) +{ + if (DECL_SECTION_NAME(decl) == NULL_TREE) + return NULL; + + return TREE_STRING_POINTER(DECL_SECTION_NAME(decl)); +} + static inline void set_decl_section_name(tree node, const char *value) { if (value) @@ -511,6 +552,7 @@ typedef struct gimple_statement_base gassign; typedef struct gimple_statement_call gcall; typedef struct gimple_statement_base gcond; typedef struct gimple_statement_base gdebug; +typedef struct gimple_statement_base ggoto; typedef struct gimple_statement_phi gphi; typedef struct gimple_statement_base greturn; @@ -564,6 +606,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a(stmt); @@ -611,6 +663,11 @@ inline bool is_a_helper::test(const_gimple gs) #define INSN_DELETED_P(insn) (insn)->deleted() +static inline const char *get_decl_section_name(const_tree decl) +{ + return DECL_SECTION_NAME(decl); +} + /* symtab/cgraph related */ #define debug_cgraph_node(node) (node)->debug() #define cgraph_get_node(decl) cgraph_node::get(decl) @@ -619,6 +676,7 @@ inline bool is_a_helper::test(const_gimple gs) #define cgraph_n_nodes symtab->cgraph_count #define cgraph_max_uid symtab->cgraph_max_uid #define varpool_get_node(decl) varpool_node::get(decl) +#define dump_varpool_node(file, node) (node)->dump(file) #define cgraph_create_edge(caller, callee, call_stmt, count, freq, nest) \ (caller)->create_edge((callee), (call_stmt), (count), (freq)) @@ -674,6 +732,11 @@ static inline cgraph_node_ptr cgraph_alias_target(cgraph_node_ptr node) return node->get_alias_target(); } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + return node->call_for_symbol_thunks_and_aliases(callback, data, include_overwritable); +} + static inline struct cgraph_node_hook_list *cgraph_add_function_insertion_hook(cgraph_node_hook hook, void *data) { return symtab->add_cgraph_insertion_hook(hook, data); @@ -729,6 +792,13 @@ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree l return gimple_build_assign(lhs, subcode, op1, op2 PASS_MEM_STAT); } +template <> +template <> +inline bool is_a_helper::test(const_gimple gs) +{ + return gs->code == GIMPLE_GOTO; +} + template <> template <> inline bool is_a_helper::test(const_gimple gs) @@ -766,6 +836,16 @@ static inline const gcall *as_a_const_gcall(const_gimple stmt) return as_a(stmt); } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return as_a(stmt); +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return as_a(stmt); +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a(stmt); @@ -828,4 +908,9 @@ static inline void debug_gimple_stmt(const_gimple s) #define debug_gimple_stmt(s) debug_gimple_stmt(CONST_CAST_GIMPLE(s)) #endif +#if BUILDING_GCC_VERSION >= 7000 +#define get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep, keep_aligning) \ + get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep) +#endif + #endif diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 8160f1c1b56ed941a6e15f7c906f8fb990edcdd3..dff390f692a2c61e3117d2075dca40c13bef66b0 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -328,9 +328,9 @@ static enum tree_code get_op(tree *rhs) op = LROTATE_EXPR; /* * This code limits the value of random_const to - * the size of a wide int for the rotation + * the size of a long for the rotation */ - random_const &= HOST_BITS_PER_WIDE_INT - 1; + random_const %= TYPE_PRECISION(long_unsigned_type_node); break; } diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 8275f0e55106bc0055d0a1adfe2a169ad1b54193..4b2f44c20caf8941d150f261074b40d589a10376 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -364,12 +364,14 @@ int dialog_inputbox(WINDOW *main_window, WINDOW *prompt_win; WINDOW *form_win; PANEL *panel; - int i, x, y; + int i, x, y, lines, columns, win_lines, win_cols; int res = -1; int cursor_position = strlen(init); int cursor_form_win; char *result = *resultp; + getmaxyx(stdscr, lines, columns); + if (strlen(init)+1 > *result_len) { *result_len = strlen(init)+1; *resultp = result = realloc(result, *result_len); @@ -386,14 +388,19 @@ int dialog_inputbox(WINDOW *main_window, if (title) prompt_width = max(prompt_width, strlen(title)); + win_lines = min(prompt_lines+6, lines-2); + win_cols = min(prompt_width+7, columns-2); + prompt_lines = max(win_lines-6, 0); + prompt_width = max(win_cols-7, 0); + /* place dialog in middle of screen */ - y = (getmaxy(stdscr)-(prompt_lines+4))/2; - x = (getmaxx(stdscr)-(prompt_width+4))/2; + y = (lines-win_lines)/2; + x = (columns-win_cols)/2; strncpy(result, init, *result_len); /* create the windows */ - win = newwin(prompt_lines+6, prompt_width+7, y, x); + win = newwin(win_lines, win_cols, y, x); prompt_win = derwin(win, prompt_lines+1, prompt_width, 2, 2); form_win = derwin(win, 1, prompt_width, prompt_lines+3, 2); keypad(form_win, TRUE); diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 8ea9fd2b65736c42b055791ee88b9151a573a02d..3c575cd07888807d14693ad8475b6b5a953a1ca4 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -51,7 +51,7 @@ set_debarch() { debarch=hppa ;; mips*) debarch=mips$(grep -q CPU_LITTLE_ENDIAN=y $KCONFIG_CONFIG && echo el || true) ;; - arm64) + aarch64|arm64) debarch=arm64 ;; arm*) if grep -q CONFIG_AEABI=y $KCONFIG_CONFIG; then diff --git a/security/inode.c b/security/inode.c index c83db05c15aba95801c49418d51728d5774bfdf0..b4531f2be0f1bb17af411b519ea94e5f8433111e 100644 --- a/security/inode.c +++ b/security/inode.c @@ -100,7 +100,7 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode, dir = d_inode(parent); inode_lock(dir); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_one_len2(name, mount, parent, strlen(name)); if (IS_ERR(dentry)) goto out; diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index c07a3844ea0a290efcee652b3212483e4e58f067..3df46906492dec72a6381aa2a6aa67500e6d7883 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -401,7 +401,7 @@ static int ima_release_policy(struct inode *inode, struct file *file) const char *cause = valid_policy ? "completed" : "failed"; if ((file->f_flags & O_ACCMODE) == O_RDONLY) - return 0; + return seq_release(inode, file); if (valid_policy && ima_check_policy() < 0) { cause = "failed"; diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c index 4ad3bd7fd4453e3a64fc4cd95001165510292559..f1657a4e0621ef4999349477ce6e71fdbcd7f411 100644 --- a/sound/firewire/tascam/tascam-stream.c +++ b/sound/firewire/tascam/tascam-stream.c @@ -343,7 +343,7 @@ int snd_tscm_stream_init_duplex(struct snd_tscm *tscm) if (err < 0) amdtp_stream_destroy(&tscm->rx_stream); - return 0; + return err; } /* At bus reset, streaming is stopped and some registers are clear. */ diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 7f57a145a47e1fe8b5a949511fdfa99c9c55c221..a03cf68d0bcd6ce350b658e96de7dc9b9561dd1a 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -884,6 +884,8 @@ void snd_hda_apply_fixup(struct hda_codec *codec, int action) } EXPORT_SYMBOL_GPL(snd_hda_apply_fixup); +#define IGNORE_SEQ_ASSOC (~(AC_DEFCFG_SEQUENCE | AC_DEFCFG_DEF_ASSOC)) + static bool pin_config_match(struct hda_codec *codec, const struct hda_pintbl *pins) { @@ -901,7 +903,7 @@ static bool pin_config_match(struct hda_codec *codec, for (; t_pins->nid; t_pins++) { if (t_pins->nid == nid) { found = 1; - if (t_pins->val == cfg) + if ((t_pins->val & IGNORE_SEQ_ASSOC) == (cfg & IGNORE_SEQ_ASSOC)) break; else if ((cfg & 0xf0000000) == 0x40000000 && (t_pins->val & 0xf0000000) == 0x40000000) break; diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index ad06866d7c69e46c61f20860be8d111fc7a54228..11b9b2f17a2ef66b7555c0bbbde8d31f6ca2a070 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -780,6 +780,7 @@ static const struct hda_pintbl alienware_pincfgs[] = { static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE), + SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE), {} }; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index ed62748a6d55a815527ad2c84b2b1e8ce839deee..c15c51bea26d0afdcc6d8c806993754eaaa2e031 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -262,6 +262,7 @@ enum { CXT_FIXUP_CAP_MIX_AMP_5047, CXT_FIXUP_MUTE_LED_EAPD, CXT_FIXUP_HP_SPECTRE, + CXT_FIXUP_HP_GATE_MIC, }; /* for hda_fixup_thinkpad_acpi() */ @@ -633,6 +634,17 @@ static void cxt_fixup_cap_mix_amp_5047(struct hda_codec *codec, (1 << AC_AMPCAP_MUTE_SHIFT)); } +static void cxt_fixup_hp_gate_mic_jack(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* the mic pin (0x19) doesn't give an unsolicited event; + * probe the mic pin together with the headphone pin (0x16) + */ + if (action == HDA_FIXUP_ACT_PROBE) + snd_hda_jack_set_gating_jack(codec, 0x19, 0x16); +} + /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { { 0x16, 0x042140ff }, /* HP (seq# overridden) */ @@ -774,6 +786,10 @@ static const struct hda_fixup cxt_fixups[] = { { } } }, + [CXT_FIXUP_HP_GATE_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_hp_gate_mic_jack, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -824,6 +840,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), + SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ea81c08ddc7acf77dc7c4144a5b093b0d72ef867..758ac86a1d3a2167f3cb7205e7f8e36427dbed8b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2230,6 +2230,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1971, "Asus W2JC", ALC882_FIXUP_ASUS_W2JC), SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), + SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), @@ -5917,6 +5918,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60180}, {0x14, 0x90170120}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x1b, 0x01011020}, + {0x21, 0x02211010}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, @@ -6940,6 +6944,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x1963, "ASUS X71SL", ALC662_FIXUP_ASUS_MODE8), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 25c6d87c818e7caf4bdd904b7b72d17887f58b25..f5a8050351b53a0fdfc60af61a176cf3e60b08a2 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -771,6 +771,9 @@ static int sst_soc_prepare(struct device *dev) struct sst_data *drv = dev_get_drvdata(dev); struct snd_soc_pcm_runtime *rtd; + if (!drv->soc_card) + return 0; + /* suspend all pcms first */ snd_soc_suspend(drv->soc_card->dev); snd_soc_poweroff(drv->soc_card->dev); @@ -793,6 +796,9 @@ static void sst_soc_complete(struct device *dev) struct sst_data *drv = dev_get_drvdata(dev); struct snd_soc_pcm_runtime *rtd; + if (!drv->soc_card) + return; + /* restart SSPs */ list_for_each_entry(rtd, &drv->soc_card->rtd_list, list) { struct snd_soc_dai *dai = rtd->cpu_dai; diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 56056ed7fcfd1e90b48f96316e1750fd92e113df..16c94c45ce50a8672aa5c96703d255d9890f2051 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -44,6 +44,7 @@ struct cht_acpi_card { struct cht_mc_private { struct snd_soc_jack jack; struct cht_acpi_card *acpi_card; + char codec_name[16]; }; static inline struct snd_soc_dai *cht_get_codec_dai(struct snd_soc_card *card) @@ -354,7 +355,6 @@ static int snd_cht_mc_probe(struct platform_device *pdev) int i; struct cht_mc_private *drv; struct snd_soc_card *card = snd_soc_cards[0].soc_card; - char codec_name[16]; struct sst_acpi_mach *mach; const char *i2c_name = NULL; int dai_index = 0; @@ -374,12 +374,12 @@ static int snd_cht_mc_probe(struct platform_device *pdev) } card->dev = &pdev->dev; mach = card->dev->platform_data; - sprintf(codec_name, "i2c-%s:00", drv->acpi_card->codec_id); + sprintf(drv->codec_name, "i2c-%s:00", drv->acpi_card->codec_id); /* set correct codec name */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) if (!strcmp(card->dai_link[i].codec_name, "i2c-10EC5645:00")) { - card->dai_link[i].codec_name = kstrdup(codec_name, GFP_KERNEL); + card->dai_link[i].codec_name = drv->codec_name; dai_index = i; } diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index 8dc03039b3113fa691626021ec4917a0155cf7e0..ea162fbf68e5b3038f99f1c7ef0ba24590e22c41 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -179,7 +179,7 @@ static inline int skl_getid_32(struct uuid_module *module, u64 *val, index = ffz(mask_val); pvt_id = index + word1_mask + word2_mask; if (pvt_id <= (max_inst - 1)) { - *val |= 1 << (index + word1_mask); + *val |= 1ULL << (index + word1_mask); return pvt_id; } } diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index b392e51de94d173a20b130ad663b6af7c1e25f19..420d200f9a0533bd335224784c2e7933a0c8c3b1 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -78,6 +78,9 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) dma_ch = 0; if (v->alloc_dma_channel) dma_ch = v->alloc_dma_channel(drvdata, dir); + else + dma_ch = 0; + if (dma_ch < 0) return dma_ch; diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 7825bff45ae3a523450e6732b94056a288b032d3..85324e61cbd56da5c07a55ae02151516ce4a8f00 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1029,12 +1029,13 @@ static int samsung_i2s_dai_probe(struct snd_soc_dai *dai) static int samsung_i2s_dai_remove(struct snd_soc_dai *dai) { struct i2s_dai *i2s = snd_soc_dai_get_drvdata(dai); + unsigned long flags; if (!is_secondary(i2s)) { if (i2s->quirks & QUIRK_NEED_RSTCLR) { - spin_lock(i2s->lock); + spin_lock_irqsave(i2s->lock, flags); writel(0, i2s->addr + I2SCON); - spin_unlock(i2s->lock); + spin_unlock_irqrestore(i2s->lock, flags); } } diff --git a/sound/usb/card.c b/sound/usb/card.c index 0d6aa4933ba69e304c16897bf1428e72d48116fd..90a4e687304a0e8411aeaa16427484b28c91024c 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -206,7 +206,6 @@ static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int int if (! snd_usb_parse_audio_interface(chip, interface)) { usb_set_interface(dev, interface, 0); /* reset the current interface */ usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L); - return -EINVAL; } return 0; diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index c470251cea4b251269bcd3822f27d0e09e3cd1b2..c5251aaad8442ae3692048f649e8eeeed267d189 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -534,6 +534,11 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) alive, ep->ep_num); clear_bit(EP_FLAG_STOPPING, &ep->flags); + ep->data_subs = NULL; + ep->sync_slave = NULL; + ep->retire_data_urb = NULL; + ep->prepare_data_urb = NULL; + return 0; } @@ -898,9 +903,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, /** * snd_usb_endpoint_start: start an snd_usb_endpoint * - * @ep: the endpoint to start - * @can_sleep: flag indicating whether the operation is executed in - * non-atomic context + * @ep: the endpoint to start * * A call to this function will increment the use count of the endpoint. * In case it is not already running, the URBs for this endpoint will be @@ -910,7 +913,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, * * Returns an error if the URB submission failed, 0 in all other cases. */ -int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep) +int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) { int err; unsigned int i; @@ -924,8 +927,6 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep) /* just to be sure */ deactivate_urbs(ep, false); - if (can_sleep) - wait_clear_urbs(ep); ep->active_mask = 0; ep->unlink_mask = 0; @@ -1006,10 +1007,6 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) if (--ep->use_count == 0) { deactivate_urbs(ep, false); - ep->data_subs = NULL; - ep->sync_slave = NULL; - ep->retire_data_urb = NULL; - ep->prepare_data_urb = NULL; set_bit(EP_FLAG_STOPPING, &ep->flags); } } diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 6428392d8f6244688e65d8be3d04bc9c30c68ce5..584f295d7c7738a6b91753c8c48eed0bf101a200 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -18,7 +18,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, struct audioformat *fmt, struct snd_usb_endpoint *sync_ep); -int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep); +int snd_usb_endpoint_start(struct snd_usb_endpoint *ep); void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep); void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep); int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep); diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c index 2c44139b404188607fc47e863aa9b33419ae5fcc..33db205dd12b57d8b53436d5fd2eb9ff6cc2a22a 100644 --- a/sound/usb/hiface/pcm.c +++ b/sound/usb/hiface/pcm.c @@ -445,6 +445,8 @@ static int hiface_pcm_prepare(struct snd_pcm_substream *alsa_sub) mutex_lock(&rt->stream_mutex); + hiface_pcm_stream_stop(rt); + sub->dma_off = 0; sub->period_off = 0; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 4df74b36c8dc62d8ea53399ed7e4864ebf65a977..932ce3e6bfb94a751ba0a5021c99834f7dbfb72b 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -932,9 +932,10 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */ case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */ case USB_ID(0x046d, 0x0991): + case USB_ID(0x046d, 0x09a2): /* QuickCam Communicate Deluxe/S7500 */ /* Most audio usb devices lie about volume resolution. * Most Logitech webcams have res = 384. - * Proboly there is some logitech magic behind this number --fishor + * Probably there is some logitech magic behind this number --fishor */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 44d178ee9177565bba3fde9d2ac2b7308e8dd7ae..48afae053c56f9ff75b72aa83a73a7d1ab267565 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -218,7 +218,7 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip, int iface, } } -static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) +static int start_endpoints(struct snd_usb_substream *subs) { int err; @@ -231,7 +231,7 @@ static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) dev_dbg(&subs->dev->dev, "Starting data EP @%p\n", ep); ep->data_subs = subs; - err = snd_usb_endpoint_start(ep, can_sleep); + err = snd_usb_endpoint_start(ep); if (err < 0) { clear_bit(SUBSTREAM_FLAG_DATA_EP_STARTED, &subs->flags); return err; @@ -260,7 +260,7 @@ static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) dev_dbg(&subs->dev->dev, "Starting sync EP @%p\n", ep); ep->sync_slave = subs->data_endpoint; - err = snd_usb_endpoint_start(ep, can_sleep); + err = snd_usb_endpoint_start(ep); if (err < 0) { clear_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags); return err; @@ -839,7 +839,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) /* for playback, submit the URBs now; otherwise, the first hwptr_done * updates for all URBs would happen at the same time when starting */ if (subs->direction == SNDRV_PCM_STREAM_PLAYBACK) - ret = start_endpoints(subs, true); + ret = start_endpoints(subs); unlock: snd_usb_unlock_shutdown(subs->stream->chip); @@ -1655,7 +1655,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream switch (cmd) { case SNDRV_PCM_TRIGGER_START: - err = start_endpoints(subs, false); + err = start_endpoints(subs); if (err < 0) return err; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 2782155ae3ce02f2d7e31ebabd96245df479952c..93bb14e7e0f728afbbdfbffcb28d23fec744c1a3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */ case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 72edf83d76b72188b193fc5f28934fa26dab2b1d..cffdd9cf3ebf764a1d4fbed2421045e4ee148c7b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -366,7 +366,7 @@ ifndef NO_SDT endif ifdef PERF_HAVE_JITDUMP - ifndef NO_DWARF + ifndef NO_LIBELF $(call detected,CONFIG_JITDUMP) CFLAGS += -DHAVE_JITDUMP endif diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d1ce29be560e5e7dad2a2faca9a68c2246510a11..cd7bc4d104e27e878e1ed694bf6be1d9b89d0a9d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), - OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), + OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c298bd3e1d909cfd808f911af1df053e608f55fe..21f8a81797a04e32b9fc496851e9332f58e5e6d9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1452,7 +1452,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1499,7 +1499,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1592,7 +1592,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index fd710ab33684e65e6b62acc62ac51b5e12d02b35..af1cfde6b97b12962045d9bc66126c217335ad21 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eb60e613d7954319e8d2ee16f9a94eb1481160e1..1dc67efad6340f7aef55f1727f00f7f20d8baa2c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -120,7 +120,7 @@ libperf-y += demangle-rust.o ifdef CONFIG_JITDUMP libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o -libperf-$(CONFIG_LIBELF) += genelf_debug.o +libperf-$(CONFIG_DWARF) += genelf_debug.o endif CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index aeb5a441bd7416744598f7dfa6d18809cf4fa883..430d039d00798e452d9f9996934d72a36eb6224e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -593,7 +593,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) { + if ((addr < sym->start || addr >= sym->end) && + (addr != sym->end || sym->start != sym->end)) { pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 07fd30bc2f816feeda146fe98343f5ce408884e8..ae58b493af4549d07558b9b730b9cc7f05759a7e 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value) if (!strcmp(var, "record-mode")) return parse_callchain_record_opt(value, &callchain_param); -#ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; int ret; @@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value) return ret; } -#endif if (!strcmp(var, "print-type")) return parse_callchain_mode(value); if (!strcmp(var, "order")) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 13e75549c4407dcf750c52e123ce3d48cc57ed2c..47cfd10809755f9f2ed3dfd9fdd5b13b46ad9330 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,11 +11,7 @@ #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" -#ifdef HAVE_DWARF_UNWIND_SUPPORT # define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" -#else -# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" -#endif #define RECORD_SIZE_HELP \ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording ()\n" \ diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c1ef805c6a8f4decc9f5c5c613b6b590e8c31581..14a73acc549c9db4f64bcb324d5923a2bac5da2a 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -19,7 +19,9 @@ #include #include #include +#ifdef HAVE_DWARF_SUPPORT #include +#endif #include "perf.h" #include "genelf.h" @@ -157,7 +159,7 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod int jit_write_elf(int fd, uint64_t load_addr, const char *sym, const void *code, int csize, - void *debug, int nr_debug_entries) + void *debug __maybe_unused, int nr_debug_entries __maybe_unused) { Elf *e; Elf_Data *d; @@ -386,11 +388,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; +#ifdef HAVE_DWARF_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) goto error; - } else { + } else +#endif + { if (elf_update(e, ELF_C_WRITE) < 0) { warnx("elf_update 4 failed"); goto error; diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 2fbeb59c4bdd91b2b9de943d153c3674c46ca125..5c933ac71451f778d4e9b6f9fef10b5c1d095f5b 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -4,8 +4,10 @@ /* genelf.c */ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries); +#ifdef HAVE_DWARF_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); +#endif #if defined(__arm__) #define GEN_ELF_ARCH EM_ARM diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index aecff69a510df064975e8b4d25d0aba0e64879c1..f7b35e178582b6d427498638fb4744854c8e8b21 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(dso->long_name) && + if (!dso->has_build_id && + is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 9df61059a85d2188f9d0deb5dbf2e8f1f60aed4b..a2fd6e79d5a5c5bb8e35f587e3e695e4f611879d 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -95,7 +95,8 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering py script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPYTHON @@ -159,7 +160,8 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering pl script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPERL diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f770dba2a6f636024c5254b5f9a6d747e8abff3a..a899ef81c705bc130df005d23c53453b23d16462 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -87,7 +87,7 @@ ifdef INSTALL_PATH done; @# Ask all targets to emit their test scripts - echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "#!/bin/sh" > $(ALL_SCRIPT) echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56ae9fb3cc91214ae9d79da19714633..16058bbea7a8501324ce6c9c639f5219d69979ba 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh echo "--------------------" echo "running socket test" diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index c22860ab973378f76417d2bc85f1daf2c828e0c7..30e1ac62e8cb4249350c89aa64163e3d4ee3bedd 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -66,7 +66,7 @@ int pmc56_overflow(void) FAIL_IF(ebb_event_enable(&event)); - mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC2, pmc_sample_period(sample_period)); mtspr(SPRN_PMC5, 0); mtspr(SPRN_PMC6, 0); diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h index 845960e1cbf2f4c961518a6181e51e0078b967d0..c9ccfd42ec137d593913bffcca846e197d1cb9f4 100644 --- a/tools/virtio/linux/compiler.h +++ b/tools/virtio/linux/compiler.h @@ -4,6 +4,6 @@ #define WRITE_ONCE(var, val) \ (*((volatile typeof(val) *)(&(var))) = (val)) -#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var)))) +#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) #endif diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh index 2e69ca812b4cf4b39d653cd774286587fcc87bc7..29b0d3920bfc412a049b1478a7074869cbd113df 100755 --- a/tools/virtio/ringtest/run-on-all.sh +++ b/tools/virtio/ringtest/run-on-all.sh @@ -1,12 +1,13 @@ #!/bin/sh +CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#') #use last CPU for host. Why not the first? #many devices tend to use cpu0 by default so #it tends to be busier -HOST_AFFINITY=$(lscpu -p=cpu | tail -1) +HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1) #run command on all cpus -for cpu in $(seq 0 $HOST_AFFINITY) +for cpu in $CPUS_ONLINE do #Don't run guest and host on same CPU #It actually works ok if using signalling diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 8cebfbc19e90ef053ec325f7d7d75ab7e626a049..539d3f5cb61934eed59ceb38f201da8e6fb53935 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -268,15 +268,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - mutex_lock(&kvm->lock); - dist->ready = false; dist->initialized = false; kfree(dist->spis); dist->nr_spis = 0; - - mutex_unlock(&kvm->lock); } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -286,7 +282,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&vgic_cpu->ap_list_head); } -void kvm_vgic_destroy(struct kvm *kvm) +/* To be called with kvm->lock held */ +static void __kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; int i; @@ -297,6 +294,13 @@ void kvm_vgic_destroy(struct kvm *kvm) kvm_vgic_vcpu_destroy(vcpu); } +void kvm_vgic_destroy(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + __kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); +} + /** * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest * is a GICv2. A GICv3 must be explicitly initialized by the guest using the @@ -348,6 +352,10 @@ int kvm_vgic_map_resources(struct kvm *kvm) ret = vgic_v2_map_resources(kvm); else ret = vgic_v3_map_resources(kvm); + + if (ret) + __kvm_vgic_destroy(kvm); + out: mutex_unlock(&kvm->lock); return ret; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 9bab86757fa4f3613c372fbc0250c146284306ff..834137e7b83ff0c37515a1c36300c24aeadb9925 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -293,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm) dist->ready = true; out: - if (ret) - kvm_vgic_destroy(kvm); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 5c9f9745e6cab8284161397c3d810df65304fae8..e6b03fd8c374ca7a4dcb1e272141504f66c697d6 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -302,8 +302,6 @@ int vgic_v3_map_resources(struct kvm *kvm) dist->ready = true; out: - if (ret) - kvm_vgic_destroy(kvm); return ret; } diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 52abac4bb6a2532ad55f4ec690530a6a7b593df0..6d2fcd6fcb2509e801c84b88e6c18e3e7a1d1fa0 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token == consumer->token) { + if (tmp->token == consumer->token || tmp == consumer) { mutex_unlock(&lock); module_put(THIS_MODULE); return -EBUSY; @@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token != consumer->token) + if (tmp != consumer) continue; list_for_each_entry(producer, &producers, node) {