diff --git a/.mailmap b/.mailmap
index 1469ff0d3f4d55dde07c676e4c4fc29f432d20f3..e18cab73e209a7b3057cfd672356dab6d9846483 100644
--- a/.mailmap
+++ b/.mailmap
@@ -107,6 +107,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
 Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
 Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
 Mark Brown <broonie@sirena.org.uk>
+Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
 Matthieu CASTET <castet.matthieu@free.fr>
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index d6d862db3b5d65fe09c26783298bba21ceec5558..bfd29bc8d37af1bbc4913deee9d941b1692bedda 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -375,3 +375,19 @@ Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	information about CPUs heterogeneity.
 
 		cpu_capacity: capacity of cpu#.
+
+What:		/sys/devices/system/cpu/vulnerabilities
+		/sys/devices/system/cpu/vulnerabilities/meltdown
+		/sys/devices/system/cpu/vulnerabilities/spectre_v1
+		/sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date:		January 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Information about CPU vulnerabilities
+
+		The files are named after the code names of CPU
+		vulnerabilities. The output of those files reflects the
+		state of the CPUs in the system. Possible output values:
+
+		"Not affected"	  CPU is not affected by the vulnerability
+		"Vulnerable"	  CPU is affected and no mitigation in effect
+		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index b2598cc9834c3cbacfd9b3b1d885ed69c4c4c133..7242cbda15dd3bf8c4debea387ff2c8e376aeb60 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -109,6 +109,7 @@ parameter is applicable::
 	IPV6	IPv6 support is enabled.
 	ISAPNP	ISA PnP code is enabled.
 	ISDN	Appropriate ISDN support is enabled.
+	ISOL	CPU Isolation is enabled.
 	JOY	Appropriate joystick support is enabled.
 	KGDB	Kernel debugger support is enabled.
 	KVM	Kernel Virtual Machine support is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6571fbfdb2a1527c25b3a01e9c4228c84adce639..46b26bfee27ba81267703a1ceac7c0e082f1e5ec 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -328,11 +328,15 @@
 			not play well with APC CPU idle - disable it if you have
 			APC and your system crashes randomly.
 
-	apic=		[APIC,X86-32] Advanced Programmable Interrupt Controller
+	apic=		[APIC,X86] Advanced Programmable Interrupt Controller
 			Change the output verbosity whilst booting
 			Format: { quiet (default) | verbose | debug }
 			Change the amount of debugging information output
 			when initialising the APIC and IO-APIC components.
+			For X86-32, this can also be used to specify an APIC
+			driver name.
+			Format: apic=driver_name
+			Examples: apic=bigsmp
 
 	apic_extnmi=	[APIC,X86] External NMI delivery setting
 			Format: { bsp (default) | all | none }
@@ -709,9 +713,6 @@
 			It will be ignored when crashkernel=X,high is not used
 			or memory reserved is below 4G.
 
-	crossrelease_fullstack
-			[KNL] Allow to record full stack trace in cross-release
-
 	cryptomgr.notests
                         [KNL] Disable crypto self-tests
 
@@ -1737,7 +1738,7 @@
 	isapnp=		[ISAPNP]
 			Format: <RDP>,<reset>,<pci_scan>,<verbosity>
 
-	isolcpus=	[KNL,SMP] Isolate a given set of CPUs from disturbance.
+	isolcpus=	[KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
 			[Deprecated - use cpusets instead]
 			Format: [flag-list,]<cpu-list>
 
@@ -2622,6 +2623,11 @@
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
+	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
+			(indirect branch prediction) vulnerability. System may
+			allow data leaks with this option, which is equivalent
+			to spectre_v2=off.
+
 	noxsave		[BUGS=X86] Disables x86 extended register state save
 			and restore using xsave. The kernel will fallback to
 			enabling legacy floating-point and sse state.
@@ -2662,7 +2668,7 @@
 			Valid arguments: on, off
 			Default: on
 
-	nohz_full=	[KNL,BOOT]
+	nohz_full=	[KNL,BOOT,SMP,ISOL]
 			The argument is a cpu list, as described above.
 			In kernels built with CONFIG_NO_HZ_FULL=y, set
 			the specified list of CPUs whose tick will be stopped
@@ -3094,6 +3100,12 @@
 		pcie_scan_all	Scan all possible PCIe devices.  Otherwise we
 				only look for one device below a PCIe downstream
 				port.
+		big_root_window	Try to add a big 64bit memory window to the PCIe
+				root complex on AMD CPUs. Some GFX hardware
+				can resize a BAR to allow access to all VRAM.
+				Adding the window is slightly risky (it may
+				conflict with unreported devices), so this
+				taints the kernel.
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
@@ -3282,6 +3294,21 @@
 	pt.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
 
+	pti=		[X86_64] Control Page Table Isolation of user and
+			kernel address spaces.  Disabling this feature
+			removes hardening, but improves performance of
+			system calls and interrupts.
+
+			on   - unconditionally enable
+			off  - unconditionally disable
+			auto - kernel detects whether your CPU model is
+			       vulnerable to issues that PTI mitigates
+
+			Not specifying this option is equivalent to pti=auto.
+
+	nopti		[X86_64]
+			Equivalent to pti=off
+
 	pty.legacy_count=
 			[KNL] Number of legacy pty's. Overwrites compiled-in
 			default number.
@@ -3931,6 +3958,29 @@
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/laptops/sonypi.txt
 
+	spectre_v2=	[X86] Control mitigation of Spectre variant 2
+			(indirect branch speculation) vulnerability.
+
+			on   - unconditionally enable
+			off  - unconditionally disable
+			auto - kernel detects whether your CPU model is
+			       vulnerable
+
+			Selecting 'on' will, and 'auto' may, choose a
+			mitigation method at run time according to the
+			CPU, the available microcode, the setting of the
+			CONFIG_RETPOLINE configuration option, and the
+			compiler with which the kernel was built.
+
+			Specific mitigations can also be selected manually:
+
+			retpoline	  - replace indirect branches
+			retpoline,generic - google's original retpoline
+			retpoline,amd     - AMD-specific minimal thunk
+
+			Not specifying this option is equivalent to
+			spectre_v2=auto.
+
 	spia_io_base=	[HW,MTD]
 	spia_fio_base=
 	spia_pedr=
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index de50a8561774249351515662404e2a1f8328aba6..9b55952039a692d8119ce62502af9a4dd071b8e6 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
 a sysfs attribute called "force_power".
 
 For example the intel-wmi-thunderbolt driver exposes this attribute in:
-  /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
+  /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
 
   To force the power to on, write 1 to this attribute file.
   To disable force power, write 0 to this attribute file.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 304bf22bb83cc0ec8dfbbcf2a48b206ecb781afb..fc1c884fea10497357f889b11e33c6d323fecf55 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -75,3 +75,4 @@ stable kernels.
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
 | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
+| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 779211fbb69ffac450f22b0ad6864c7c6c2bd98f..2cddab7efb20df0dcf07c4d7d64a5611138a8d7c 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for
 normal scheduling policy and absolute bandwidth allocation model for
 realtime scheduling policy.
 
+WARNING: cgroup2 doesn't yet support control of realtime processes and
+the cpu controller can only be enabled when all RT processes are in
+the root cgroup.  Be aware that system management software may already
+have placed RT processes into nonroot cgroups during the system boot
+process, and these processes may need to be moved to the root cgroup
+before the cpu controller can be enabled.
+
 
 CPU Interface Files
 ~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst
index 0054bd48be849035146239cb6efc0d7dba0c0a62..4da67b65cecfa68e536efe7a6905cc0757df0417 100644
--- a/Documentation/core-api/genericirq.rst
+++ b/Documentation/core-api/genericirq.rst
@@ -225,9 +225,9 @@ interrupts.
 
 The following control flow is implemented (simplified excerpt)::
 
-    :c:func:`desc->irq_data.chip->irq_mask_ack`;
+    desc->irq_data.chip->irq_mask_ack();
     handle_irq_event(desc->action);
-    :c:func:`desc->irq_data.chip->irq_unmask`;
+    desc->irq_data.chip->irq_unmask();
 
 
 Default Fast EOI IRQ flow handler
@@ -239,7 +239,7 @@ which only need an EOI at the end of the handler.
 The following control flow is implemented (simplified excerpt)::
 
     handle_irq_event(desc->action);
-    :c:func:`desc->irq_data.chip->irq_eoi`;
+    desc->irq_data.chip->irq_eoi();
 
 
 Default Edge IRQ flow handler
@@ -251,15 +251,15 @@ interrupts.
 The following control flow is implemented (simplified excerpt)::
 
     if (desc->status & running) {
-        :c:func:`desc->irq_data.chip->irq_mask_ack`;
+        desc->irq_data.chip->irq_mask_ack();
         desc->status |= pending | masked;
         return;
     }
-    :c:func:`desc->irq_data.chip->irq_ack`;
+    desc->irq_data.chip->irq_ack();
     desc->status |= running;
     do {
         if (desc->status & masked)
-            :c:func:`desc->irq_data.chip->irq_unmask`;
+            desc->irq_data.chip->irq_unmask();
         desc->status &= ~pending;
         handle_irq_event(desc->action);
     } while (status & pending);
@@ -293,10 +293,10 @@ simplified version without locking.
 The following control flow is implemented (simplified excerpt)::
 
     if (desc->irq_data.chip->irq_ack)
-        :c:func:`desc->irq_data.chip->irq_ack`;
+        desc->irq_data.chip->irq_ack();
     handle_irq_event(desc->action);
     if (desc->irq_data.chip->irq_eoi)
-            :c:func:`desc->irq_data.chip->irq_eoi`;
+        desc->irq_data.chip->irq_eoi();
 
 
 EOI Edge IRQ flow handler
diff --git a/Documentation/devicetree/bindings/arm/ccn.txt b/Documentation/devicetree/bindings/arm/ccn.txt
index 29801456c9ee9a3701481d8c57cc149df2c312d2..43b5a71a5a9dde70aeebc38510e5a478e1d8d742 100644
--- a/Documentation/devicetree/bindings/arm/ccn.txt
+++ b/Documentation/devicetree/bindings/arm/ccn.txt
@@ -15,7 +15,7 @@ Required properties:
 
 Example:
 
-	ccn@0x2000000000 {
+	ccn@2000000000 {
 		compatible = "arm,ccn-504";
 		reg = <0x20 0x00000000 0 0x1000000>;
 		interrupts = <0 181 4>;
diff --git a/Documentation/devicetree/bindings/arm/omap/crossbar.txt b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
index bb5727ae004ac287c6725d53ef6a4b90a9e41480..ecb360ed0e332b70a84cb139eba8fd16ca847145 100644
--- a/Documentation/devicetree/bindings/arm/omap/crossbar.txt
+++ b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
@@ -49,7 +49,7 @@ An interrupt consumer on an SoC using crossbar will use:
 	interrupts = <GIC_SPI request_number interrupt_level>
 
 Example:
-	device_x@0x4a023000 {
+	device_x@4a023000 {
 		/* Crossbar 8 used */
 		interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
 		...
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
index 866d93421eba20ba81fa66e905ecd58213636d95..f9632bacbd04aebd4ad68c46012777eb0c263204 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
@@ -8,7 +8,7 @@ Required properties:
 - interrupts : Should contain MC General interrupt.
 
 Example:
-	memory-controller@0x7000f000 {
+	memory-controller@7000f000 {
 		compatible = "nvidia,tegra20-mc";
 		reg = <0x7000f000 0x024
 		       0x7000f03c 0x3c4>;
diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
index fb40da303d25c8f13093aafe43df83eaf2f88f47..aca94fe9416f009ef387e9f7b7880bc5b6b5127e 100644
--- a/Documentation/devicetree/bindings/clock/axi-clkgen.txt
+++ b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
@@ -17,7 +17,7 @@ Optional properties:
 - clock-output-names : From common clock binding.
 
 Example:
-	clock@0xff000000 {
+	clock@ff000000 {
 		compatible = "adi,axi-clkgen";
 		#clock-cells = <0>;
 		reg = <0xff000000 0x1000>;
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt
index 7a837d2182acf74e0104b4da227cc2a4715d7504..4acfc8f641b63c83e6ad9c5e2efb510b43022213 100644
--- a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt
@@ -23,7 +23,7 @@ Example:
 		clocks = <&clk_osc>;
 	};
 
-	aux: aux@0x7e215004 {
+	aux: aux@7e215004 {
 		compatible = "brcm,bcm2835-aux";
 		#clock-cells = <1>;
 		reg = <0x7e215000 0x8>;
diff --git a/Documentation/devicetree/bindings/clock/exynos4-clock.txt b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
index bc61c952cb0b7221ccd47c099199d06598daf91e..17bb11365354d6d6126446874d562da2dfe7e45d 100644
--- a/Documentation/devicetree/bindings/clock/exynos4-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
@@ -24,7 +24,7 @@ tree sources.
 
 Example 1: An example of a clock controller node is listed below.
 
-	clock: clock-controller@0x10030000 {
+	clock: clock-controller@10030000 {
 		compatible = "samsung,exynos4210-clock";
 		reg = <0x10030000 0x20000>;
 		#clock-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
index 536eacd1063f88ccf2e9ef0e0bdcf06137558cf8..aff266a12eeb71bf7cf05a8cbeecbe4390f21ef4 100644
--- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
@@ -22,7 +22,7 @@ tree sources.
 
 Example 1: An example of a clock controller node is listed below.
 
-	clock: clock-controller@0x10010000 {
+	clock: clock-controller@10010000 {
 		compatible = "samsung,exynos5250-clock";
 		reg = <0x10010000 0x30000>;
 		#clock-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
index 4527de3ea205d176f0425f9512e6bf560a2d3ce3..c68b0d29b3d031636f827ef904a60a6794270275 100644
--- a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
@@ -30,7 +30,7 @@ Example 1: An example of a clock controller node is listed below.
 		#clock-cells = <0>;
 	};
 
-	clock: clock-controller@0x10010000 {
+	clock: clock-controller@10010000 {
 		compatible = "samsung,exynos5410-clock";
 		reg = <0x10010000 0x30000>;
 		#clock-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
index d54f42cf0440945396e039e3e37053bef04bbc9b..717a7b1531c78278e606c16f545627651dce2ccd 100644
--- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
@@ -23,7 +23,7 @@ tree sources.
 
 Example 1: An example of a clock controller node is listed below.
 
-	clock: clock-controller@0x10010000 {
+	clock: clock-controller@10010000 {
 		compatible = "samsung,exynos5420-clock";
 		reg = <0x10010000 0x30000>;
 		#clock-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
index 5f7005f73058f74a1fe308814eb11f75d8100435..c7d227c31e95bd944f351dd977682d3337b4a2e5 100644
--- a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
@@ -21,7 +21,7 @@ tree sources.
 
 Example: An example of a clock controller node is listed below.
 
-	clock: clock-controller@0x10010000 {
+	clock: clock-controller@10010000 {
 		compatible = "samsung,exynos5440-clock";
 		reg = <0x160000 0x10000>;
 		#clock-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
index 3e6a81e99804435c003560e9f2145a6a5e6e74c9..c35cb6c4af4d9fd7e681de3c9d6a5bdbe629e034 100644
--- a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
+++ b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
@@ -14,7 +14,7 @@ Required properties:
 
 Example:
 
-pllctrl: pll-controller@0x02310000 {
+pllctrl: pll-controller@02310000 {
 	compatible = "ti,keystone-pllctrl", "syscon";
 	reg = <0x02310000 0x200>;
 };
diff --git a/Documentation/devicetree/bindings/clock/zx296702-clk.txt b/Documentation/devicetree/bindings/clock/zx296702-clk.txt
index e85ecb510d56daad83ffbbe3572a96dc110a43a3..5c91c9e4f1beb47f35feb64361f3de23bcc35389 100644
--- a/Documentation/devicetree/bindings/clock/zx296702-clk.txt
+++ b/Documentation/devicetree/bindings/clock/zx296702-clk.txt
@@ -20,13 +20,13 @@ ID in its "clocks" phandle cell. See include/dt-bindings/clock/zx296702-clock.h
 for the full list of zx296702 clock IDs.
 
 
-topclk: topcrm@0x09800000 {
+topclk: topcrm@09800000 {
         compatible = "zte,zx296702-topcrm-clk";
         reg = <0x09800000 0x1000>;
         #clock-cells = <1>;
 };
 
-uart0: serial@0x09405000 {
+uart0: serial@09405000 {
         compatible = "zte,zx296702-uart";
         reg = <0x09405000 0x1000>;
         interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
index 7aef0eae58d43cce4a2d94aa6e20fc5c40343ffa..76aec8a3724d62120392be98cd093acd0f4015b5 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -456,7 +456,7 @@ System ON/OFF key driver
       Definition: this is phandle to the register map node.
 
 EXAMPLE:
-	snvs-pwrkey@0x020cc000 {
+	snvs-pwrkey@020cc000 {
 		compatible = "fsl,sec-v4.0-pwrkey";
 		regmap = <&snvs>;
 		interrupts = <0 4 0x4>
@@ -545,7 +545,7 @@ FULL EXAMPLE
 			interrupts = <93 2>;
 		};
 
-		snvs-pwrkey@0x020cc000 {
+		snvs-pwrkey@020cc000 {
 			compatible = "fsl,sec-v4.0-pwrkey";
 			regmap = <&sec_mon>;
 			interrupts = <0 4 0x4>;
diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
index 001dd63979a974802fe69b6e72b12fe79c7c1c6b..148191b0fc15864725da6907a0571b64337024d6 100644
--- a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
+++ b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
@@ -9,7 +9,7 @@ Required properties:
 - clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon";
 
 Example:
-	dfi: dfi@0xff630000 {
+	dfi: dfi@ff630000 {
 		compatible = "rockchip,rk3399-dfi";
 		reg = <0x00 0xff630000 0x00 0x4000>;
 		rockchip,pmu = <&pmugrf>;
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt
index 7f040edc16fe6325e22e1c8f46081c758225c348..bf4a18047309a5f806bff4f8d5bf72459cf347c7 100644
--- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt
@@ -48,6 +48,10 @@ Required properties:
   Documentation/devicetree/bindings/reset/reset.txt,
   the reset-names should be "hdmitx_apb", "hdmitx", "hdmitx_phy"
 
+Optional properties:
+- hdmi-supply: Optional phandle to an external 5V regulator to power the HDMI
+  logic, as described in the file ../regulator/regulator.txt
+
 Required nodes:
 
 The connections to the HDMI ports are modeled using the OF graph
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt
index 00f74bad1e957c780fbb6f5b174aa356076e7ca0..057b81335775e7526805710324eb497fe7c04bcb 100644
--- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt
@@ -64,6 +64,10 @@ Required properties:
 - reg-names: should contain the names of the previous memory regions
 - interrupts: should contain the VENC Vsync interrupt number
 
+Optional properties:
+- power-domains: Optional phandle to associated power domain as described in
+	the file ../power/power_domain.txt
+
 Required nodes:
 
 The connections to the VPU output video ports are modeled using the OF graph
diff --git a/Documentation/devicetree/bindings/display/atmel,lcdc.txt b/Documentation/devicetree/bindings/display/atmel,lcdc.txt
index 1a21202778ee354af7e5fc1f3d9ce65b0a7a964a..acb5a01321279e13d0c0c2780be8018b330152d8 100644
--- a/Documentation/devicetree/bindings/display/atmel,lcdc.txt
+++ b/Documentation/devicetree/bindings/display/atmel,lcdc.txt
@@ -27,7 +27,7 @@ Optional properties:
 
 Example:
 
-	fb0: fb@0x00500000 {
+	fb0: fb@00500000 {
 		compatible = "atmel,at91sam9g45-lcdc";
 		reg = <0x00500000 0x1000>;
 		interrupts = <23 3 0>;
@@ -41,7 +41,7 @@ Example:
 
 Example for fixed framebuffer memory:
 
-	fb0: fb@0x00500000 {
+	fb0: fb@00500000 {
 		compatible = "atmel,at91sam9263-lcdc";
 		reg = <0x00700000 0x1000 0x70000000 0x200000>;
 		[...]
diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9225.txt b/Documentation/devicetree/bindings/display/ilitek,ili9225.txt
index 21607a541c332057bf7805fe90a52104b26c8473..a59feb52015be1329140529bc9befc5685abee8d 100644
--- a/Documentation/devicetree/bindings/display/ilitek,ili9225.txt
+++ b/Documentation/devicetree/bindings/display/ilitek,ili9225.txt
@@ -4,7 +4,7 @@ This binding is for display panels using an Ilitek ILI9225 controller in SPI
 mode.
 
 Required properties:
-- compatible:	"ilitek,ili9225-2.2in-176x220"
+- compatible:	"vot,v220hf01a-t", "ilitek,ili9225"
 - rs-gpios:	Register select signal
 - reset-gpios:	Reset pin
 
@@ -16,7 +16,7 @@ Optional properties:
 
 Example:
 	display@0{
-		compatible = "ilitek,ili9225-2.2in-176x220";
+		compatible = "vot,v220hf01a-t", "ilitek,ili9225";
 		reg = <0>;
 		spi-max-frequency = <12000000>;
 		rs-gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>;
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d5ce6ad6ec71f48d8e67a36014101da708d3f7b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt
@@ -0,0 +1,49 @@
+Ilitek ILI9322 TFT panel driver with SPI control bus
+
+This is a driver for 320x240 TFT panels, accepting a variety of input
+streams that get adapted and scaled to the panel. The panel output has
+960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and
+VCOMH outputs.
+
+Required properties:
+  - compatible: "dlink,dir-685-panel", "ilitek,ili9322"
+    (full system-specific compatible is always required to look up configuration)
+  - reg: address of the panel on the SPI bus
+
+Optional properties:
+  - vcc-supply: core voltage supply, see regulator/regulator.txt
+  - iovcc-supply: voltage supply for the interface input/output signals,
+    see regulator/regulator.txt
+  - vci-supply: voltage supply for analog parts, see regulator/regulator.txt
+  - reset-gpios: a GPIO spec for the reset pin, see gpio/gpio.txt
+
+  The following optional properties only apply to RGB and YUV input modes and
+  can be omitted for BT.656 input modes:
+
+  - pixelclk-active: see display/panel/display-timing.txt
+  - de-active: see display/panel/display-timing.txt
+  - hsync-active: see display/panel/display-timing.txt
+  - vsync-active: see display/panel/display-timing.txt
+
+The panel must obey the rules for a SPI slave device as specified in
+spi/spi-bus.txt
+
+The device node can contain one 'port' child node with one child
+'endpoint' node, according to the bindings defined in
+media/video-interfaces.txt. This node should describe panel's video bus.
+
+Example:
+
+panel: display@0 {
+	compatible = "dlink,dir-685-panel", "ilitek,ili9322";
+	reg = <0>;
+	vcc-supply = <&vdisp>;
+	iovcc-supply = <&vdisp>;
+	vci-supply = <&vdisp>;
+
+	port {
+		panel_in: endpoint {
+			remote-endpoint = <&display_out>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.txt b/Documentation/devicetree/bindings/display/panel/panel-common.txt
index ec52c472c8459b920643af0592a30c385dd27a8e..557fa765adcb9450c4003555d1211978e73a9703 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-common.txt
+++ b/Documentation/devicetree/bindings/display/panel/panel-common.txt
@@ -78,6 +78,16 @@ used for panels that implement compatible control signals.
   while active. Active high reset signals can be supported by inverting the
   GPIO specifier polarity flag.
 
+Power
+-----
+
+- power-supply: display panels require power to be supplied. While several
+  panels need more than one power supply with panel-specific constraints
+  governing the order and timings of the power supplies, in many cases a single
+  power supply is sufficient, either because the panel has a single power rail,
+  or because all its power rails can be driven by the same supply. In that case
+  the power-supply property specifies the supply powering the panel as a phandle
+  to a regulator.
 
 Backlight
 ---------
diff --git a/Documentation/devicetree/bindings/display/panel/panel-lvds.txt b/Documentation/devicetree/bindings/display/panel/panel-lvds.txt
index b938269f841e5d6b55d8336a0d1be3ddb3127e45..250850a2150b8c9805997de6cfa153a8ea403aae 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-lvds.txt
+++ b/Documentation/devicetree/bindings/display/panel/panel-lvds.txt
@@ -32,6 +32,7 @@ Optional properties:
 - label: See panel-common.txt.
 - gpios: See panel-common.txt.
 - backlight: See panel-common.txt.
+- power-supply: See panel-common.txt.
 - data-mirror: If set, reverse the bit order described in the data mappings
   below on all data lanes, transmitting bits for slots 6 to 0 instead of
   0 to 6.
diff --git a/Documentation/devicetree/bindings/display/panel/simple-panel.txt b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
index 1341bbf4aa3d13147de6465d5219989338b9d3b7..16d8ff088b7d15504afd58984b291ded9a4b7f15 100644
--- a/Documentation/devicetree/bindings/display/panel/simple-panel.txt
+++ b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
@@ -1,7 +1,7 @@
 Simple display panel
 
 Required properties:
-- power-supply: regulator to provide the supply voltage
+- power-supply: See panel-common.txt
 
 Optional properties:
 - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
diff --git a/Documentation/devicetree/bindings/display/panel/toppoly,td028ttec1.txt b/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt
similarity index 84%
rename from Documentation/devicetree/bindings/display/panel/toppoly,td028ttec1.txt
rename to Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt
index 7175dc3740acf12ddddd673a13d543eecb9c6440..ed34253d9fb129916f25db38440faf13645721c3 100644
--- a/Documentation/devicetree/bindings/display/panel/toppoly,td028ttec1.txt
+++ b/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt
@@ -2,7 +2,7 @@ Toppoly TD028TTEC1 Panel
 ========================
 
 Required properties:
-- compatible: "toppoly,td028ttec1"
+- compatible: "tpo,td028ttec1"
 
 Optional properties:
 - label: a symbolic name for the panel
@@ -14,7 +14,7 @@ Example
 -------
 
 lcd-panel: td028ttec1@0 {
-	compatible = "toppoly,td028ttec1";
+	compatible = "tpo,td028ttec1";
 	reg = <0>;
 	spi-max-frequency = <100000>;
 	spi-cpol;
diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.txt b/Documentation/devicetree/bindings/display/sitronix,st7735r.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0a5090a3326b86c1df8a42eb65e8884a77cd521
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.txt
@@ -0,0 +1,35 @@
+Sitronix ST7735R display panels
+
+This binding is for display panels using a Sitronix ST7735R controller in SPI
+mode.
+
+Required properties:
+- compatible:	"jianda,jd-t18003-t01", "sitronix,st7735r"
+- dc-gpios:	Display data/command selection (D/CX)
+- reset-gpios:	Reset signal (RSTX)
+
+The node for this driver must be a child node of a SPI controller, hence
+all mandatory properties described in ../spi/spi-bus.txt must be specified.
+
+Optional properties:
+- rotation:	panel rotation in degrees counter clockwise (0,90,180,270)
+- backlight:	phandle of the backlight device attached to the panel
+
+Example:
+
+	backlight: backlight {
+		compatible = "gpio-backlight";
+		gpios = <&gpio 44 GPIO_ACTIVE_HIGH>;
+	}
+
+	...
+
+	display@0{
+		compatible = "jianda,jd-t18003-t01", "sitronix,st7735r";
+		reg = <0>;
+		spi-max-frequency = <32000000>;
+		dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>;
+		rotation = <270>;
+		backlight = &backlight;
+	};
diff --git a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
index 50cc72ee11689ccc99f54252b0c1a442362b9569..cd626ee1147a2c92da8592dbd6a48728b54181e0 100644
--- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
+++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
@@ -93,6 +93,7 @@ Required properties:
    * allwinner,sun6i-a31s-tcon
    * allwinner,sun7i-a20-tcon
    * allwinner,sun8i-a33-tcon
+   * allwinner,sun8i-a83t-tcon-lcd
    * allwinner,sun8i-v3s-tcon
  - reg: base address and size of memory-mapped region
  - interrupts: interrupt associated to this IP
@@ -121,6 +122,14 @@ Required properties:
 On SoCs other than the A33 and V3s, there is one more clock required:
    - 'tcon-ch1': The clock driving the TCON channel 1
 
+On SoCs that support LVDS (all SoCs but the A13, H3, H5 and V3s), you
+need one more reset line:
+   - 'lvds': The reset line driving the LVDS logic
+
+And on the A23, A31, A31s and A33, you need one more clock line:
+   - 'lvds-alt': An alternative clock source, separate from the TCON channel 0
+                 clock, that can be used to drive the LVDS clock
+
 DRC
 ---
 
@@ -216,6 +225,7 @@ supported.
 
 Required properties:
   - compatible: value must be one of:
+    * allwinner,sun8i-a83t-de2-mixer-0
     * allwinner,sun8i-v3s-de2-mixer
   - reg: base address and size of the memory-mapped region.
   - clocks: phandles to the clocks feeding the mixer
@@ -245,6 +255,7 @@ Required properties:
     * allwinner,sun6i-a31s-display-engine
     * allwinner,sun7i-a20-display-engine
     * allwinner,sun8i-a33-display-engine
+    * allwinner,sun8i-a83t-display-engine
     * allwinner,sun8i-v3s-display-engine
 
   - allwinner,pipelines: list of phandle to the display engine
diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
index 844e0103fb0dac15220794cfde75f6d58b2c4d73..593be44a53c9527774c35ca9331f9b25e23d97cd 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
@@ -206,21 +206,33 @@ of the following host1x client modules:
     - "nvidia,tegra132-sor": for Tegra132
     - "nvidia,tegra210-sor": for Tegra210
     - "nvidia,tegra210-sor1": for Tegra210
+    - "nvidia,tegra186-sor": for Tegra186
+    - "nvidia,tegra186-sor1": for Tegra186
   - reg: Physical base address and length of the controller's registers.
   - interrupts: The interrupt outputs from the controller.
   - clocks: Must contain an entry for each entry in clock-names.
     See ../clocks/clock-bindings.txt for details.
   - clock-names: Must include the following entries:
     - sor: clock input for the SOR hardware
-    - source: source clock for the SOR clock
+    - out: SOR output clock
     - parent: input for the pixel clock
     - dp: reference clock for the SOR clock
     - safe: safe reference for the SOR clock during power up
+
+    For Tegra186 and later:
+    - pad: SOR pad output clock (on Tegra186 and later)
+
+    Obsolete:
+    - source: source clock for the SOR clock (obsolete, use "out" instead)
+
   - resets: Must contain an entry for each entry in reset-names.
     See ../reset/reset.txt for details.
   - reset-names: Must include the following entries:
     - sor
 
+  Required properties on Tegra186 and later:
+  - nvidia,interface: index of the SOR interface
+
   Optional properties:
   - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
   - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
diff --git a/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt
index c30f9ec189ed612cf1ab79de9e7a4ea4c427f1b8..91279f1060fe1a4f58ae0b4908dee632e98e6b11 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt
+++ b/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt
@@ -47,6 +47,11 @@ Required properties:
 - clocks: handle to fclk
 - clock-names: "fck"
 
+Optional properties:
+- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit
+			in bytes per second
+
+
 HDMI
 ----
 
diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt
index afcd5a86c6a45a8a1d9bac21e566ad93a47f95c2..ee867c4d1152e4a62b41f266b6944b943203e34c 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt
+++ b/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt
@@ -28,6 +28,10 @@ Required properties:
 - ti,hwmods: "dss_dispc"
 - interrupts: the DISPC interrupt
 
+Optional properties:
+- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit
+			in bytes per second
+
 
 RFBI
 ----
diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt
index dc66e1447c318139b333b4d5338b5090b0fffa5b..cd02516a40b6f0a2974a08d307ba66ec265dfc70 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt
+++ b/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt
@@ -37,6 +37,10 @@ Required properties:
 - clocks: handle to fclk
 - clock-names: "fck"
 
+Optional properties:
+- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit
+			in bytes per second
+
 
 RFBI
 ----
diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt
index bc624db8888d761e814ff1df5fa4d36a6a23ae13..0f85f6b3a5a8b4609c7c1fe6a056d210dfc25b7c 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt
+++ b/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt
@@ -36,6 +36,10 @@ Required properties:
 - clocks: handle to fclk
 - clock-names: "fck"
 
+Optional properties:
+- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit
+			in bytes per second
+
 
 RFBI
 ----
diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt
index 118a486c47bb71740511a87e661446a7df299580..20861218649f14412c3b280e533e6426d7e93053 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt
+++ b/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt
@@ -36,6 +36,10 @@ Required properties:
 - clocks: handle to fclk
 - clock-names: "fck"
 
+Optional properties:
+- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit
+			in bytes per second
+
 
 RFBI
 ----
diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
index 55492c264d1779539ffb838f3e2f020c07e45709..b3408cc57be6d398f2c206f5d599d7134004dd0b 100644
--- a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
@@ -73,7 +73,7 @@ Hypervisor OS configuration:
 		max-read-transactions = <31>;
 		channel-reset-timeout-cycles = <0x500>;
 
-		hidma_24: dma-controller@0x5c050000 {
+		hidma_24: dma-controller@5c050000 {
 			compatible = "qcom,hidma-1.0";
 			reg = <0 0x5c050000 0x0 0x1000>,
 			      <0 0x5c0b0000 0x0 0x1000>;
@@ -85,7 +85,7 @@ Hypervisor OS configuration:
 
 Guest OS configuration:
 
-	hidma_24: dma-controller@0x5c050000 {
+	hidma_24: dma-controller@5c050000 {
 		compatible = "qcom,hidma-1.0";
 		reg = <0 0x5c050000 0x0 0x1000>,
 		      <0 0x5c0b0000 0x0 0x1000>;
diff --git a/Documentation/devicetree/bindings/dma/zxdma.txt b/Documentation/devicetree/bindings/dma/zxdma.txt
index abec59f35fde949baf60210017407e45e8cffce6..0ab80f69e566ef7c66001486a7cd5acea5685df8 100644
--- a/Documentation/devicetree/bindings/dma/zxdma.txt
+++ b/Documentation/devicetree/bindings/dma/zxdma.txt
@@ -13,7 +13,7 @@ Required properties:
 Example:
 
 Controller:
-	dma: dma-controller@0x09c00000{
+	dma: dma-controller@09c00000{
 		compatible = "zte,zx296702-dma";
 		reg = <0x09c00000 0x1000>;
 		clocks = <&topclk ZX296702_DMA_ACLK>;
diff --git a/Documentation/devicetree/bindings/eeprom/at25.txt b/Documentation/devicetree/bindings/eeprom/at25.txt
index 1d3447165c374f673aa9a717f94f2387cfd852f1..e823d90b802f7f8f293f9c0ba4f06533361824bd 100644
--- a/Documentation/devicetree/bindings/eeprom/at25.txt
+++ b/Documentation/devicetree/bindings/eeprom/at25.txt
@@ -1,7 +1,12 @@
 EEPROMs (SPI) compatible with Atmel at25.
 
 Required properties:
-- compatible : "atmel,at25".
+- compatible : Should be "<vendor>,<type>", and generic value "atmel,at25".
+  Example "<vendor>,<type>" values:
+    "microchip,25lc040"
+    "st,m95m02"
+    "st,m95256"
+
 - reg : chip select number
 - spi-max-frequency : max spi frequency to use
 - pagesize : size of the eeprom page
@@ -13,7 +18,7 @@ Optional properties:
 - spi-cpol : SPI inverse clock polarity, as per spi-bus bindings.
 - read-only : this parameter-less property disables writes to the eeprom
 
-Obsolete legacy properties are can be used in place of "size", "pagesize",
+Obsolete legacy properties can be used in place of "size", "pagesize",
 "address-width", and "read-only":
 - at25,byte-len : total eeprom size in bytes
 - at25,addr-mode : addr-mode flags, as defined in include/linux/spi/eeprom.h
@@ -22,8 +27,8 @@ Obsolete legacy properties are can be used in place of "size", "pagesize",
 Additional compatible properties are also allowed.
 
 Example:
-	at25@0 {
-		compatible = "atmel,at25", "st,m95256";
+	eeprom@0 {
+		compatible = "st,m95256", "atmel,at25";
 		reg = <0>
 		spi-max-frequency = <5000000>;
 		spi-cpha;
diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt
index 826a7208ca93a8f5cd31f4a94c96b36ce6c7dab1..146e554b3c6769cfe10238acc1e9b48d9a5e3d28 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt
@@ -30,7 +30,7 @@ Optional properties:
 
 Example:
 
-gpio_altr: gpio@0xff200000 {
+gpio_altr: gpio@ff200000 {
 	compatible = "altr,pio-1.0";
 	reg = <0xff200000 0x10>;
 	interrupts = <0 45 4>;
diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
index 7f57271df2bc96b41168d337c911511ccb726e33..0d0158728f897bd9fbeaa41e884955445ca1f062 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
@@ -27,7 +27,7 @@ Required properties:
 	ti,tca6424
 	ti,tca9539
 	ti,tca9554
-	onsemi,pca9654
+	onnn,pca9654
 	exar,xra1202
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt
index 231e4cc4008cbcf0783c6ab438fa3eca1815c4fb..d4a082acf92f0f7b98cdd0702c47de3bbed30725 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt
@@ -18,7 +18,7 @@ Optional properties:
 Example
 
 / {
-	i2c4: i2c4@0x10054000 {
+	i2c4: i2c4@10054000 {
 		compatible = "ingenic,jz4780-i2c";
 		reg = <0x10054000 0x1000>;
 
diff --git a/Documentation/devicetree/bindings/iio/pressure/hp03.txt b/Documentation/devicetree/bindings/iio/pressure/hp03.txt
index 54e7e70bcea52da324caf4f6c277573cca91f823..831dbee7a5c35abee4e2d64e5c2c30de653fbe7a 100644
--- a/Documentation/devicetree/bindings/iio/pressure/hp03.txt
+++ b/Documentation/devicetree/bindings/iio/pressure/hp03.txt
@@ -10,7 +10,7 @@ Required properties:
 
 Example:
 
-hp03@0x77 {
+hp03@77 {
 	compatible = "hoperf,hp03";
 	reg = <0x77>;
 	xclr-gpio = <&portc 0 0x0>;
diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
index ca5a2c86480cff1ec8f68168a9f7c94c1a29ef01..56d835242af2d834f440e3d149011b38e5a7a65f 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
@@ -15,7 +15,7 @@ Optional properties:
 Example:
 
 	i2c@80110000 {
-		bu21013_tp@0x5c {
+		bu21013_tp@5c {
 			compatible = "rohm,bu21013_tp";
 			reg = <0x5c>;
 			touch-gpio = <&gpio2 20 0x4>;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
index 560d8a727b8f84e951b227d8fcc838579ddc94b1..2f324464864658a80a13b1cdab0395282f280b8e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
@@ -155,7 +155,7 @@ Example:
 		      <0x0 0xe112f000 0 0x02000>,
 		      <0x0 0xe1140000 0 0x10000>,
 		      <0x0 0xe1160000 0 0x10000>;
-		v2m0: v2m@0x8000 {
+		v2m0: v2m@8000 {
 			compatible = "arm,gic-v2m-frame";
 			msi-controller;
 			reg = <0x0 0x80000 0 0x1000>;
@@ -163,7 +163,7 @@ Example:
 
 		....
 
-		v2mN: v2m@0x9000 {
+		v2mN: v2m@9000 {
 			compatible = "arm,gic-v2m-frame";
 			msi-controller;
 			reg = <0x0 0x90000 0 0x1000>;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt
index 80994adab3928905de7c44bbc725a1f6f345ca74..42431f44697fb22cee723d176ef4ce9d1cdc4ebd 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt
@@ -71,7 +71,7 @@ Example 2:
 	 * An interrupt generating device that is wired to a Meta external
 	 * trigger block.
 	 */
-	uart1: uart@0x02004c00 {
+	uart1: uart@02004c00 {
 		// Interrupt source '5' that is level-sensitive.
 		// Note that there are only two cells as specified in the
 		// interrupt parent's '#interrupt-cells' property.
diff --git a/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt
index a691185503441ec2ed37e292c8412af12a116f8e..5dc2a55ad81143d86d8e781f147342ad6a21848e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt
@@ -51,7 +51,7 @@ Example 1:
 	/*
 	 * TZ1090 PDC block
 	 */
-	pdc: pdc@0x02006000 {
+	pdc: pdc@02006000 {
 		// This is an interrupt controller node.
 		interrupt-controller;
 
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt b/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt
index 715a013ed4bdef1ada6217e5f5540df142d385ce..2ab0ea39867b516dd9462e8b71142ddf8d8a5f8d 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt
@@ -39,7 +39,7 @@ Example:
 
 The following is an example from the SPEAr320 SoC dtsi file.
 
-shirq: interrupt-controller@0xb3000000 {
+shirq: interrupt-controller@b3000000 {
 	compatible = "st,spear320-shirq";
 	reg = <0xb3000000 0x1000>;
 	interrupts = <28 29 30 1>;
diff --git a/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt b/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt
index c2619797ce0c92f06b41117940209f4c9e37ca1c..49cfc8c337c4644a5b7cd271660fe88b5ec28412 100644
--- a/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt
+++ b/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt
@@ -14,7 +14,7 @@ Optional properties:
 			depends on the interrupt controller parent.
 
 Example:
-	mbox_tx: mailbox@0x100 {
+	mbox_tx: mailbox@100 {
 		compatible = "altr,mailbox-1.0";
 		reg = <0x100 0x8>;
 		interrupt-parent = < &gic_0 >;
@@ -22,7 +22,7 @@ Example:
 		#mbox-cells = <1>;
 	};
 
-	mbox_rx: mailbox@0x200 {
+	mbox_rx: mailbox@200 {
 		compatible = "altr,mailbox-1.0";
 		reg = <0x200 0x8>;
 		interrupt-parent = < &gic_0 >;
@@ -40,7 +40,7 @@ support only one channel).The equivalent "mbox-names" property value can be
 used to give a name to the communication channel to be used by the client user.
 
 Example:
-	mclient0: mclient0@0x400 {
+	mclient0: mclient0@400 {
 		compatible = "client-1.0";
 		reg = <0x400 0x10>;
 		mbox-names = "mbox-tx", "mbox-rx";
diff --git a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt
index 0f3ee81d92c297022adcdbb1f9689f4b924ea143..9bcdf2087625c108a8180060f3dd9ebb4c111a46 100644
--- a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt
+++ b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt
@@ -15,7 +15,7 @@ Optional properties:
 - brcm,use-bcm-hdr:  present if a BCM header precedes each frame.
 
 Example:
-	pdc0: iproc-pdc0@0x612c0000 {
+	pdc0: iproc-pdc0@612c0000 {
 		compatible = "brcm,iproc-pdc-mbox";
 		reg = <0 0x612c0000 0 0x445>;  /* PDC FS0 regs */
 		interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/media/exynos5-gsc.txt b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
index 0d4fdaedc6f1e82aec78f8080b6c312104b3cb0e..bc963a6d305a8bbc0e0ca2a231eea89d3247087c 100644
--- a/Documentation/devicetree/bindings/media/exynos5-gsc.txt
+++ b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
@@ -17,7 +17,7 @@ Optional properties:
 
 Example:
 
-gsc_0:  gsc@0x13e00000 {
+gsc_0:  gsc@13e00000 {
 	compatible = "samsung,exynos5250-gsc";
 	reg = <0x13e00000 0x1000>;
 	interrupts = <0 85 0>;
diff --git a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
index 46c15c54175d87cca499ceb81c33fdc6560df5b0..2a615d84a682563d28499b9a1f26a5ca2abcd984 100644
--- a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
+++ b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
@@ -68,7 +68,7 @@ vcodec_dec: vcodec@16000000 {
                   "vdec_bus_clk_src";
   };
 
-  vcodec_enc: vcodec@0x18002000 {
+  vcodec_enc: vcodec@18002000 {
     compatible = "mediatek,mt8173-vcodec-enc";
     reg = <0 0x18002000 0 0x1000>,    /*VENC_SYS*/
           <0 0x19002000 0 0x1000>;    /*VENC_LT_SYS*/
diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt
index 6e4ef8caf759e5d31179c00120eccec349f3f8cd..19357d0bbe6539b3fbb7c73a4adbe4509fc905de 100644
--- a/Documentation/devicetree/bindings/media/rcar_vin.txt
+++ b/Documentation/devicetree/bindings/media/rcar_vin.txt
@@ -44,7 +44,7 @@ Device node example
 	       vin0 = &vin0;
 	};
 
-        vin0: vin@0xe6ef0000 {
+        vin0: vin@e6ef0000 {
                 compatible = "renesas,vin-r8a7790", "renesas,rcar-gen2-vin";
                 clocks = <&mstp8_clks R8A7790_CLK_VIN0>;
                 reg = <0 0xe6ef0000 0 0x1000>;
diff --git a/Documentation/devicetree/bindings/media/samsung-fimc.txt b/Documentation/devicetree/bindings/media/samsung-fimc.txt
index e4e15d8d752157909e03344ee04ce7621ba5b967..48c599dacbdf7baafbc2cc3a64c170a44ba95313 100644
--- a/Documentation/devicetree/bindings/media/samsung-fimc.txt
+++ b/Documentation/devicetree/bindings/media/samsung-fimc.txt
@@ -138,7 +138,7 @@ Example:
 		};
 
 		/* MIPI CSI-2 bus IF sensor */
-		s5c73m3: sensor@0x1a {
+		s5c73m3: sensor@1a {
 			compatible = "samsung,s5c73m3";
 			reg = <0x1a>;
 			vddio-supply = <...>;
diff --git a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
index 1ce4e46bcbb768a91ba16e27f19fa415769ee802..17a8e81ca0cc0ecd3749bc65a32abd7a4ce58bfb 100644
--- a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
+++ b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
@@ -8,7 +8,7 @@ Bindings, specific for the sh_mobile_ceu_camera.c driver:
 
 Example:
 
-ceu0: ceu@0xfe910000 {
+ceu0: ceu@fe910000 {
 	compatible = "renesas,sh-mobile-ceu";
 	reg = <0xfe910000 0xa0>;
 	interrupt-parent = <&intcs>;
diff --git a/Documentation/devicetree/bindings/media/video-interfaces.txt b/Documentation/devicetree/bindings/media/video-interfaces.txt
index 3994b0143dd1ef37c9ce28462f686e51ccdadc4e..258b8dfddf48c1e218b5e3e1a8a41b70b4baeeec 100644
--- a/Documentation/devicetree/bindings/media/video-interfaces.txt
+++ b/Documentation/devicetree/bindings/media/video-interfaces.txt
@@ -154,7 +154,7 @@ imx074 is linked to ceu0 through the MIPI CSI-2 receiver (csi2). ceu0 has a
 'port' node which may indicate that at any time only one of the following data
 pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0.
 
-	ceu0: ceu@0xfe910000 {
+	ceu0: ceu@fe910000 {
 		compatible = "renesas,sh-mobile-ceu";
 		reg = <0xfe910000 0xa0>;
 		interrupts = <0x880>;
@@ -193,9 +193,9 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0.
 		};
 	};
 
-	i2c0: i2c@0xfff20000 {
+	i2c0: i2c@fff20000 {
 		...
-		ov772x_1: camera@0x21 {
+		ov772x_1: camera@21 {
 			compatible = "ovti,ov772x";
 			reg = <0x21>;
 			vddio-supply = <&regulator1>;
@@ -219,7 +219,7 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0.
 			};
 		};
 
-		imx074: camera@0x1a {
+		imx074: camera@1a {
 			compatible = "sony,imx074";
 			reg = <0x1a>;
 			vddio-supply = <&regulator1>;
@@ -239,7 +239,7 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0.
 		};
 	};
 
-	csi2: csi2@0xffc90000 {
+	csi2: csi2@ffc90000 {
 		compatible = "renesas,sh-mobile-csi2";
 		reg = <0xffc90000 0x1000>;
 		interrupts = <0x17a0>;
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
index fd823d6091b29514e0db29a3b73b1c47edbf064f..152eeccbde1ccd7f3e7686867404733c3f0f8a29 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
@@ -46,7 +46,7 @@ Optional properties:
 
 Example:
 
-emif1: emif@0x4c000000 {
+emif1: emif@4c000000 {
 	compatible	= "ti,emif-4d";
 	ti,hwmods	= "emif2";
 	phy-type	= <1>;
diff --git a/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt b/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt
index 20963c76b4bcbd30d8d5081ddab600d1c78ef39a..71a1f5963936b855d2ec7fc195e754cf5ad6ec02 100644
--- a/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt
@@ -13,7 +13,7 @@ Required properties:
 
 Example:
 
-devctrl: device-state-control@0x02620000 {
+devctrl: device-state-control@02620000 {
 	compatible = "ti,keystone-devctrl", "syscon";
 	reg = <0x02620000 0x1000>;
 };
diff --git a/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt b/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt
index 6c9f176f35717acf1a2fd3dcd7ca2cee86daeb88..05b47232ed9ed647a97df99e7b4c175352748fb6 100644
--- a/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt
+++ b/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt
@@ -9,7 +9,7 @@ Required properties:
 - reg : Location and size of bounce buffer
 
 Example:
-	smc@0x3404c000 {
+	smc@3404c000 {
 		compatible = "brcm,bcm11351-smc", "brcm,kona-smc";
 		reg = <0x3404c000 0x400>; //1 KiB in SRAM
 	};
diff --git a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
index aaba2483b4ff8c79f34030b28b3f546c03084032..7f5dd83f5bd95afd1b696e186681caae75c268b1 100644
--- a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
@@ -12,7 +12,7 @@ Refer to clocks/clock-bindings.txt for generic clock consumer properties.
 
 Example:
 
-sdio2: sdio@0x3f1a0000 {
+sdio2: sdio@3f1a0000 {
 	compatible = "brcm,kona-sdhci";
 	reg = <0x3f1a0000 0x10000>;
 	clocks = <&sdio3_clk>;
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
index 954561d09a8e6acca1cdfcd433c07da29027c1ca..fa90d253dc7ea00f79de2b7cb99477a87f1682b7 100644
--- a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
+++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
@@ -24,7 +24,7 @@ Optional properties:
 
 Example:
 
-sdhci0: sdhci@0x18041000 {
+sdhci0: sdhci@18041000 {
 	compatible = "brcm,sdhci-iproc-cygnus";
 	reg = <0x18041000 0x100>;
 	interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index 3a4ac401e6f93a9d8ce3becd4d75410f95f1e50d..19f5508a75696b722624c550700ee74f72b2362f 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -55,7 +55,7 @@ Examples:
 
 [hwmod populated DMA resources]
 
-	mmc1: mmc@0x4809c000 {
+	mmc1: mmc@4809c000 {
 		compatible = "ti,omap4-hsmmc";
 		reg = <0x4809c000 0x400>;
 		ti,hwmods = "mmc1";
@@ -67,7 +67,7 @@ Examples:
 
 [generic DMA request binding]
 
-	mmc1: mmc@0x4809c000 {
+	mmc1: mmc@4809c000 {
 		compatible = "ti,omap4-hsmmc";
 		reg = <0x4809c000 0x400>;
 		ti,hwmods = "mmc1";
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
index 131d3a74d0bd453f48c3f31e5487db4e3c71c6e1..c8567b40fe13a02abaee0ffd2f037d0a4282a950 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
@@ -82,15 +82,15 @@ gpmc: gpmc@6e000000 {
 			label = "bootloader-nor";
 			reg = <0 0x40000>;
 		};
-		partition@0x40000 {
+		partition@40000 {
 			label = "params-nor";
 			reg = <0x40000 0x40000>;
 		};
-		partition@0x80000 {
+		partition@80000 {
 			label = "kernel-nor";
 			reg = <0x80000 0x200000>;
 		};
-		partition@0x280000 {
+		partition@280000 {
 			label = "filesystem-nor";
 			reg = <0x240000 0x7d80000>;
 		};
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 376fa2f50e6bc9b41052928037acd4b3a382d380..956bb046e599d576e3f881b2901e0d369a3c9802 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
                  at25df321a
                  at25df641
                  at26df081a
-                 en25s64
                  mr25h128
                  mr25h256
                  mr25h10
@@ -33,7 +32,6 @@ Required properties:
                  s25fl008k
                  s25fl064k
                  sst25vf040b
-                 sst25wf040b
                  m25p40
                  m25p80
                  m25p16
diff --git a/Documentation/devicetree/bindings/mtd/mtk-nand.txt b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
index dbf9e054c11c0f3a68ba2783a042ca25e12d6b78..0431841de781334ce38aab9877ccd7a8d3698079 100644
--- a/Documentation/devicetree/bindings/mtd/mtk-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
@@ -131,7 +131,7 @@ Example:
 				read-only;
 				reg = <0x00000000 0x00400000>;
 			};
-			android@0x00400000 {
+			android@00400000 {
 				label = "android";
 				reg = <0x00400000 0x12c00000>;
 			};
diff --git a/Documentation/devicetree/bindings/net/altera_tse.txt b/Documentation/devicetree/bindings/net/altera_tse.txt
index a706297998e9b48a5eb8924b0e622ae807532757..0e21df94a53ffa221e6262f4323dc2ff60068a00 100644
--- a/Documentation/devicetree/bindings/net/altera_tse.txt
+++ b/Documentation/devicetree/bindings/net/altera_tse.txt
@@ -52,7 +52,7 @@ Optional properties:
 
 Example:
 
-	tse_sub_0_eth_tse_0: ethernet@0x1,00000000 {
+	tse_sub_0_eth_tse_0: ethernet@1,00000000 {
 		compatible = "altr,tse-msgdma-1.0";
 		reg =	<0x00000001 0x00000000 0x00000400>,
 			<0x00000001 0x00000460 0x00000020>,
@@ -90,7 +90,7 @@ Example:
 		};
 	};
 
-	tse_sub_1_eth_tse_0: ethernet@0x1,00001000 {
+	tse_sub_1_eth_tse_0: ethernet@1,00001000 {
 		compatible = "altr,tse-msgdma-1.0";
 		reg = 	<0x00000001 0x00001000 0x00000400>,
 			<0x00000001 0x00001460 0x00000020>,
diff --git a/Documentation/devicetree/bindings/net/mdio.txt b/Documentation/devicetree/bindings/net/mdio.txt
index 96a53f89aa6e2fa7f9a0d4c6c3b18d03a3b75994..e3e1603f256c1a55ce956a403fce855e7bf58e0e 100644
--- a/Documentation/devicetree/bindings/net/mdio.txt
+++ b/Documentation/devicetree/bindings/net/mdio.txt
@@ -18,7 +18,7 @@ Example :
 This example shows these optional properties, plus other properties
 required for the TI Davinci MDIO driver.
 
-	davinci_mdio: ethernet@0x5c030000 {
+	davinci_mdio: ethernet@5c030000 {
 		compatible = "ti,davinci_mdio";
 		reg = <0x5c030000 0x1000>;
 		#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
index b30d04b54ee94f93910421caca3badda8fcae01e..17d6819669c8ccee0186f4dc797be638a1828097 100644
--- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
@@ -28,7 +28,7 @@ Required properties:
 
 Example:
 
-gmii_to_sgmii_converter: phy@0x100000240 {
+gmii_to_sgmii_converter: phy@100000240 {
 	compatible = "altr,gmii-to-sgmii-2.0";
 	reg = <0x00000001 0x00000240 0x00000008>,
 		<0x00000001 0x00000200 0x00000040>;
diff --git a/Documentation/devicetree/bindings/nios2/nios2.txt b/Documentation/devicetree/bindings/nios2/nios2.txt
index d6d0a94cb3bbba6634a1c365df92670c84499a94..b95e831bcba3f9824bebf3ce75ee11c2a0b4f203 100644
--- a/Documentation/devicetree/bindings/nios2/nios2.txt
+++ b/Documentation/devicetree/bindings/nios2/nios2.txt
@@ -36,7 +36,7 @@ Optional properties:
 
 Example:
 
-cpu@0x0 {
+cpu@0 {
 	device_type = "cpu";
 	compatible = "altr,nios2-1.0";
 	reg = <0>;
diff --git a/Documentation/devicetree/bindings/pci/altera-pcie.txt b/Documentation/devicetree/bindings/pci/altera-pcie.txt
index 495880193adc8c9e9403b5fda893df580718630c..a1dc9366a8fcac6683c99d4d1aeb961b836d44cc 100644
--- a/Documentation/devicetree/bindings/pci/altera-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/altera-pcie.txt
@@ -25,7 +25,7 @@ Optional properties:
 - bus-range:	PCI bus numbers covered
 
 Example
-	pcie_0: pcie@0xc00000000 {
+	pcie_0: pcie@c00000000 {
 		compatible = "altr,pcie-root-port-1.0";
 		reg = <0xc0000000 0x20000000>,
 			<0xff220000 0x00004000>;
diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
index 7b1e48bf172b74ba51755409cf8510dc0a15fb6d..149d8f7f86b06ed943b7c0c1331d777a593db55f 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
@@ -52,7 +52,7 @@ Additional required properties for imx7d-pcie:
 
 Example:
 
-	pcie@0x01000000 {
+	pcie@01000000 {
 		compatible = "fsl,imx6q-pcie", "snps,dw-pcie";
 		reg = <0x01ffc000 0x04000>,
 		      <0x01f00000 0x80000>;
diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
index bdb7ab39d2d7effafede0803adef4eeef1d9bf9a..7bf9df047a1ee4992ee7f8b9f8e3f742b26bc8a0 100644
--- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
@@ -21,7 +21,7 @@ Optional properties:
 - dma-coherent: Present if DMA operations are coherent.
 
 Hip05 Example (note that Hip06 is the same except compatible):
-	pcie@0xb0080000 {
+	pcie@b0080000 {
 		compatible = "hisilicon,hip05-pcie", "snps,dw-pcie";
 		reg = <0 0xb0080000 0 0x10000>, <0x220 0x00000000 0 0x2000>;
 		reg-names = "rc_dbi", "config";
diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
index cbc7847dbf6c59ffeb845f95682d9b9a427d81b8..c1ce5a0a652ecbbbb9e61b251b6d41eaad52433f 100644
--- a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
@@ -45,7 +45,7 @@ Optional properties:
 - usb3_vbus-supply : regulator phandle for controller usb3 vbus
 
 Example:
-	usbphy: phy@0x01c13400 {
+	usbphy: phy@01c13400 {
 		#phy-cells = <1>;
 		compatible = "allwinner,sun4i-a10-usb-phy";
 		/* phy base regs, phy1 pmu reg, phy2 pmu reg */
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt
index 3600d5c6c4d7d5372f405fbd4c146e41dfa24071..3914529a3214b61b6dbc8c2fa6e49cd90bc8d6a5 100644
--- a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt
@@ -25,7 +25,7 @@ Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
 
 For example:
 
-	pinmux: pinmux@0x0301d0c8 {
+	pinmux: pinmux@0301d0c8 {
 		compatible = "brcm,cygnus-pinmux";
 		reg = <0x0301d0c8 0x1b0>;
 
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt
index eecf028ff4854bbe994b6232a7f88890e759d3cf..bf9b07016c8730f29a199e100065d19e90e9200a 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt
@@ -96,14 +96,14 @@ For example, pinctrl might have subnodes like the following:
 
 For a specific board, if it wants to use sd1,
 it can add the following to its board-specific .dts file.
-sd1: sd@0x12340000 {
+sd1: sd@12340000 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd1_pmx0>;
 }
 
 or
 
-sd1: sd@0x12340000 {
+sd1: sd@12340000 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd1_pmx1>;
 }
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt
index 5f55be59d914a33aa71f4f432237782fd45f46ad..f8420520e14bf6d48467585c8260455f12da1540 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt
@@ -41,7 +41,7 @@ For example, pinctrl might have subnodes like the following:
 
 For a specific board, if it wants to use uart2 without hardware flow control,
 it can add the following to its board-specific .dts file.
-uart2: uart@0xb0070000 {
+uart2: uart@b0070000 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart2_noflow_pins_a>;
 }
diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
index 4864e3a74de311ff02e6f3c54a04ae2fbba5ae2d..a01a3b8a23632a8707ff9e528c6c010500d8da9a 100644
--- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
@@ -136,7 +136,7 @@ Example for rk3188:
 		#size-cells = <1>;
 		ranges;
 
-		gpio0: gpio0@0x2000a000 {
+		gpio0: gpio0@2000a000 {
 			compatible = "rockchip,rk3188-gpio-bank0";
 			reg = <0x2000a000 0x100>;
 			interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
@@ -149,7 +149,7 @@ Example for rk3188:
 			#interrupt-cells = <2>;
 		};
 
-		gpio1: gpio1@0x2003c000 {
+		gpio1: gpio1@2003c000 {
 			compatible = "rockchip,gpio-bank";
 			reg = <0x2003c000 0x100>;
 			interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
index 378f6dc8b8bd102b64cbf477aa203c41115d9b00..3cbf56ce66ea9871ad2e41068baaef6171ae7ca3 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -107,7 +107,7 @@ regulators (twl_reg1 and twl_reg2),
 		...
 	};
 
-	mmc: mmc@0x0 {
+	mmc: mmc@0 {
 		...
 		...
 		vmmc-supply = <&twl_reg1>;
diff --git a/Documentation/devicetree/bindings/serial/efm32-uart.txt b/Documentation/devicetree/bindings/serial/efm32-uart.txt
index 8adbab268ca3997f50c134081d2f84409d8d10e3..4f8d8fde0c1c2a0728ac3c331d694e7a1bdbd423 100644
--- a/Documentation/devicetree/bindings/serial/efm32-uart.txt
+++ b/Documentation/devicetree/bindings/serial/efm32-uart.txt
@@ -12,7 +12,7 @@ Optional properties:
 
 Example:
 
-uart@0x4000c400 {
+uart@4000c400 {
 	compatible = "energymicro,efm32-uart";
 	reg = <0x4000c400 0x400>;
 	interrupts = <15>;
diff --git a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
index f311472990a7e5f081dad3cdba160e2d5edc7abc..75996b6111bb7d95f5f6effb28d27679fc1a5400 100644
--- a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
+++ b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
@@ -14,7 +14,7 @@ Required properties:
 
 
 Example:
-	ps20: ps2@0x01c2a000 {
+	ps20: ps2@01c2a000 {
 		compatible = "allwinner,sun4i-a10-ps2";
 		reg = <0x01c2a000 0x400>;
 		interrupts = <0 62 4>;
diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
index 64c66a5644e7551c2637b4aebff3d81f12cdf48f..77cd42cc5f54897b60371af222d62f847e97326f 100644
--- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
+++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
@@ -220,7 +220,7 @@ qmss: qmss@2a40000 {
 		#address-cells = <1>;
 		#size-cells = <1>;
 		ranges;
-		pdsp0@0x2a10000 {
+		pdsp0@2a10000 {
 			reg = <0x2a10000 0x1000>,
 			      <0x2a0f000 0x100>,
 			      <0x2a0c000 0x3c8>,
diff --git a/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt b/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt
index 5875ca459ed11355feccc4c106e04e5df2a71ce3..4248b662deff04e4fbbbc3effa487e09fa140bee 100644
--- a/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt
+++ b/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt
@@ -21,7 +21,7 @@ please check:
 
 Example:
 
-	i2s: i2s@0x77600000 {
+	i2s: i2s@77600000 {
 		compatible = "adi,axi-i2s-1.00.a";
 		reg = <0x77600000 0x1000>;
 		clocks = <&clk 15>, <&audio_clock>;
diff --git a/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt b/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt
index 4eb7997674a09006dfa0a991b634082ddc64974d..7b664e7cb4ae99dfae7f1beb52bba140f6ee1dd7 100644
--- a/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt
+++ b/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt
@@ -20,7 +20,7 @@ please check:
 
 Example:
 
-	spdif: spdif@0x77400000 {
+	spdif: spdif@77400000 {
 		compatible = "adi,axi-spdif-tx-1.00.a";
 		reg = <0x77600000 0x1000>;
 		clocks = <&clk 15>, <&audio_clock>;
diff --git a/Documentation/devicetree/bindings/sound/ak4613.txt b/Documentation/devicetree/bindings/sound/ak4613.txt
index 1783f9ef093096a0f06c1daf8ba854bba91e397d..49a2e74fd9cb1489b145b1132f2f892209c8b448 100644
--- a/Documentation/devicetree/bindings/sound/ak4613.txt
+++ b/Documentation/devicetree/bindings/sound/ak4613.txt
@@ -20,7 +20,7 @@ Optional properties:
 Example:
 
 &i2c {
-	ak4613: ak4613@0x10 {
+	ak4613: ak4613@10 {
 		compatible = "asahi-kasei,ak4613";
 		reg = <0x10>;
 	};
diff --git a/Documentation/devicetree/bindings/sound/ak4642.txt b/Documentation/devicetree/bindings/sound/ak4642.txt
index 340784db6808809eecf4ae0e310023550713e2c6..58e48ee9717547f2766a2c0a89fec19536431634 100644
--- a/Documentation/devicetree/bindings/sound/ak4642.txt
+++ b/Documentation/devicetree/bindings/sound/ak4642.txt
@@ -17,7 +17,7 @@ Optional properties:
 Example 1:
 
 &i2c {
-	ak4648: ak4648@0x12 {
+	ak4648: ak4648@12 {
 		compatible = "asahi-kasei,ak4642";
 		reg = <0x12>;
 	};
diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt
index 5ca5a709b6aa1989901eff6b9239f0bc71d0d272..3ab9dfef38d113523549e66cee20ad8db6e56842 100644
--- a/Documentation/devicetree/bindings/sound/da7218.txt
+++ b/Documentation/devicetree/bindings/sound/da7218.txt
@@ -73,7 +73,7 @@ Example:
 		compatible = "dlg,da7218";
 		reg = <0x1a>;
 		interrupt-parent = <&gpio6>;
-		interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 		wakeup-source;
 
 		VDD-supply = <&reg_audio>;
diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt
index cf61681826b675ad984dc17f6c1dbcc2e1b6fc35..5b54d2d045c355808bf0f58afba9ec7539ff4472 100644
--- a/Documentation/devicetree/bindings/sound/da7219.txt
+++ b/Documentation/devicetree/bindings/sound/da7219.txt
@@ -77,7 +77,7 @@ Example:
 		reg = <0x1a>;
 
 		interrupt-parent = <&gpio6>;
-		interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 
 		VDD-supply = <&reg_audio>;
 		VDDMIC-supply = <&reg_audio>;
diff --git a/Documentation/devicetree/bindings/sound/max98371.txt b/Documentation/devicetree/bindings/sound/max98371.txt
index 6c285235e64be59e0cf55bfee5b54920a58dea36..8b2b2704b574e248c426565514076e2a42aa0631 100644
--- a/Documentation/devicetree/bindings/sound/max98371.txt
+++ b/Documentation/devicetree/bindings/sound/max98371.txt
@@ -10,7 +10,7 @@ Required properties:
 Example:
 
 &i2c {
-	max98371: max98371@0x31 {
+	max98371: max98371@31 {
 		compatible = "maxim,max98371";
 		reg = <0x31>;
 	};
diff --git a/Documentation/devicetree/bindings/sound/max9867.txt b/Documentation/devicetree/bindings/sound/max9867.txt
index 394cd4eb17ec47531edabc161f93506925775805..b8bd914ee697b1435f0ebb47e4d3062961c61859 100644
--- a/Documentation/devicetree/bindings/sound/max9867.txt
+++ b/Documentation/devicetree/bindings/sound/max9867.txt
@@ -10,7 +10,7 @@ Required properties:
 Example:
 
 &i2c {
-	max9867: max9867@0x18 {
+	max9867: max9867@18 {
 		compatible = "maxim,max9867";
 		reg = <0x18>;
 	};
diff --git a/Documentation/devicetree/bindings/sound/renesas,fsi.txt b/Documentation/devicetree/bindings/sound/renesas,fsi.txt
index 0d0ab51105b01ece9c0dfbe6571e6d2086550f42..0cf0f819b8236e250fd0f0db1bc0dba879e9dcd4 100644
--- a/Documentation/devicetree/bindings/sound/renesas,fsi.txt
+++ b/Documentation/devicetree/bindings/sound/renesas,fsi.txt
@@ -20,7 +20,7 @@ Required properties:
 
 Example:
 
-sh_fsi2: sh_fsi2@0xec230000 {
+sh_fsi2: sh_fsi2@ec230000 {
 	compatible = "renesas,sh_fsi2";
 	reg = <0xec230000 0x400>;
 	interrupts = <0 146 0x4>;
diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.txt b/Documentation/devicetree/bindings/sound/rockchip-spdif.txt
index 0a1dc4e1815cb524faee4c228a14ddeeea626578..ec20c1271e929c17e8c24e736989cdbbd4ec9ce9 100644
--- a/Documentation/devicetree/bindings/sound/rockchip-spdif.txt
+++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.txt
@@ -33,7 +33,7 @@ Required properties on RK3288:
 
 Example for the rk3188 SPDIF controller:
 
-spdif: spdif@0x1011e000 {
+spdif: spdif@1011e000 {
 	compatible = "rockchip,rk3188-spdif", "rockchip,rk3066-spdif";
 	reg = <0x1011e000 0x2000>;
 	interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt b/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
index 40068ec0e9a561db3128fe37d581b1a4499b5ddf..9c1ee52fed5bff6300b5863b0c3fc07aeceb5dc8 100644
--- a/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
+++ b/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
@@ -51,7 +51,7 @@ Optional properties:
 
 Example:
 
-	sti_uni_player1: sti-uni-player@0x8D81000 {
+	sti_uni_player1: sti-uni-player@8D81000 {
 		compatible = "st,stih407-uni-player-hdmi";
 		#sound-dai-cells = <0>;
 		st,syscfg = <&syscfg_core>;
@@ -63,7 +63,7 @@ Example:
 		st,tdm-mode = <1>;
 	};
 
-	sti_uni_player2: sti-uni-player@0x8D82000 {
+	sti_uni_player2: sti-uni-player@8D82000 {
 		compatible = "st,stih407-uni-player-pcm-out";
 		#sound-dai-cells = <0>;
 		st,syscfg = <&syscfg_core>;
@@ -74,7 +74,7 @@ Example:
 		dma-names = "tx";
 	};
 
-	sti_uni_player3: sti-uni-player@0x8D85000 {
+	sti_uni_player3: sti-uni-player@8D85000 {
 		compatible = "st,stih407-uni-player-spdif";
 		#sound-dai-cells = <0>;
 		st,syscfg = <&syscfg_core>;
@@ -85,7 +85,7 @@ Example:
 		dma-names = "tx";
 	};
 
-	sti_uni_reader1: sti-uni-reader@0x8D84000 {
+	sti_uni_reader1: sti-uni-reader@8D84000 {
 		compatible = "st,stih407-uni-reader-hdmi";
 		#sound-dai-cells = <0>;
 		st,syscfg = <&syscfg_core>;
diff --git a/Documentation/devicetree/bindings/spi/efm32-spi.txt b/Documentation/devicetree/bindings/spi/efm32-spi.txt
index 2c1e6a43930baa88426aa4822f6ec79ee8bbc10e..e0fa61a1be0c864258574787d8fa496db2118916 100644
--- a/Documentation/devicetree/bindings/spi/efm32-spi.txt
+++ b/Documentation/devicetree/bindings/spi/efm32-spi.txt
@@ -19,7 +19,7 @@ Recommended properties :
 
 Example:
 
-spi1: spi@0x4000c400 { /* USART1 */
+spi1: spi@4000c400 { /* USART1 */
 	#address-cells = <1>;
 	#size-cells = <0>;
 	compatible = "energymicro,efm32-spi";
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index 5bf13960f7f4a3c826c10b1e15a618df82d82403..e3c48b20b1a691b37d0b425251a257c682a38eca 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
 See the clock consumer binding,
 	Documentation/devicetree/bindings/clock/clock-bindings.txt
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-		Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
 
-Obsolete properties:
-- fsl,spi-num-chipselects : Contains the number of the chipselect
+Recommended properties:
+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
+select lines can be used, they appear to always generate a pulse between each
+word of a transfer.  Most use cases will require GPIO based chip selects to
+generate a valid transaction.
 
 Optional properties:
+- num-cs :  Number of total chip selects, see spi-bus.txt.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names, if present, should include "tx" and "rx".
 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
 the SPI_READY mode-flag needs to be set too.
 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
 
+Obsolete properties:
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+
 Example:
 
 ecspi@70010000 {
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
index 88b6ea1ad2903cadd73650ff6dbfa0eb54aac39f..44d7cb2cb2c023ec8f1bbcaa90c26f7d406f9abf 100644
--- a/Documentation/devicetree/bindings/thermal/thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -239,7 +239,7 @@ cpus {
 	 * A simple fan controller which supports 10 speeds of operation
 	 * (represented as 0-9).
 	 */
-	fan0: fan@0x48 {
+	fan0: fan@48 {
 		...
 		cooling-min-level = <0>;
 		cooling-max-level = <9>;
@@ -252,7 +252,7 @@ ocp {
 	/*
 	 * A simple IC with a single bandgap temperature sensor.
 	 */
-	bandgap0: bandgap@0x0000ED00 {
+	bandgap0: bandgap@0000ED00 {
 		...
 		#thermal-sensor-cells = <0>;
 	};
@@ -330,7 +330,7 @@ ocp {
 	/*
 	 * A simple IC with several bandgap temperature sensors.
 	 */
-	bandgap0: bandgap@0x0000ED00 {
+	bandgap0: bandgap@0000ED00 {
 		...
 		#thermal-sensor-cells = <1>;
 	};
@@ -447,7 +447,7 @@ one thermal zone.
 	/*
 	 * A simple IC with a single temperature sensor.
 	 */
-	adc: sensor@0x49 {
+	adc: sensor@49 {
 		...
 		#thermal-sensor-cells = <0>;
 	};
@@ -458,7 +458,7 @@ ocp {
 	/*
 	 * A simple IC with a single bandgap temperature sensor.
 	 */
-	bandgap0: bandgap@0x0000ED00 {
+	bandgap0: bandgap@0000ED00 {
 		...
 		#thermal-sensor-cells = <0>;
 	};
@@ -516,7 +516,7 @@ with many sensors and many cooling devices.
 	/*
 	 * An IC with several temperature sensor.
 	 */
-	adc_dummy: sensor@0x50 {
+	adc_dummy: sensor@50 {
 		...
 		#thermal-sensor-cells = <1>; /* sensor internal ID */
 	};
diff --git a/Documentation/devicetree/bindings/ufs/ufs-qcom.txt b/Documentation/devicetree/bindings/ufs/ufs-qcom.txt
index 1f69ee1a61ea25b776671be1becb8e8f63a29eb1..21d9a93db2e970cde487151f667c93063a8ae5f9 100644
--- a/Documentation/devicetree/bindings/ufs/ufs-qcom.txt
+++ b/Documentation/devicetree/bindings/ufs/ufs-qcom.txt
@@ -32,7 +32,7 @@ Optional properties:
 
 Example:
 
-	ufsphy1: ufsphy@0xfc597000 {
+	ufsphy1: ufsphy@fc597000 {
 		compatible = "qcom,ufs-phy-qmp-20nm";
 		reg = <0xfc597000 0x800>;
 		reg-names = "phy_mem";
@@ -53,7 +53,7 @@ Example:
 			<&clock_gcc clk_gcc_ufs_rx_cfg_clk>;
 	};
 
-	ufshc@0xfc598000 {
+	ufshc@fc598000 {
 		...
 		phys = <&ufsphy1>;
 		phy-names = "ufsphy";
diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
index a99ed5565b26c406ef6e643c99d6c1eb08fedeac..c39dfef76a183ad80eb27b36a820b3ee6c8107b6 100644
--- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
@@ -46,7 +46,7 @@ Note: If above properties are not defined it can be assumed that the supply
 regulators or clocks are always on.
 
 Example:
-	ufshc@0xfc598000 {
+	ufshc@fc598000 {
 		compatible = "jedec,ufs-1.1";
 		reg = <0xfc598000 0x800>;
 		interrupts = <0 28 0>;
diff --git a/Documentation/devicetree/bindings/usb/am33xx-usb.txt b/Documentation/devicetree/bindings/usb/am33xx-usb.txt
index 7a33f22c815a76ca534bf16f5d158e6d9ae07033..7a198a30408abf3f8a164189d60248f1dfcddc8b 100644
--- a/Documentation/devicetree/bindings/usb/am33xx-usb.txt
+++ b/Documentation/devicetree/bindings/usb/am33xx-usb.txt
@@ -95,6 +95,7 @@ usb: usb@47400000 {
 		reg = <0x47401300 0x100>;
 		reg-names = "phy";
 		ti,ctrl_mod = <&ctrl_mod>;
+		#phy-cells = <0>;
 	};
 
 	usb0: usb@47401000 {
@@ -141,6 +142,7 @@ usb: usb@47400000 {
 		reg = <0x47401b00 0x100>;
 		reg-names = "phy";
 		ti,ctrl_mod = <&ctrl_mod>;
+		#phy-cells = <0>;
 	};
 
 	usb1: usb@47401800 {
diff --git a/Documentation/devicetree/bindings/usb/ehci-st.txt b/Documentation/devicetree/bindings/usb/ehci-st.txt
index 9feea6c3e4d998cba087de49f608cba8ce445242..065c91d955ad159a1ff4cbd75d26ff86f0e52c0e 100644
--- a/Documentation/devicetree/bindings/usb/ehci-st.txt
+++ b/Documentation/devicetree/bindings/usb/ehci-st.txt
@@ -22,7 +22,7 @@ See: Documentation/devicetree/bindings/reset/reset.txt
 
 Example:
 
-	ehci1: usb@0xfe203e00 {
+	ehci1: usb@fe203e00 {
 		compatible = "st,st-ehci-300x";
 		reg = <0xfe203e00 0x100>;
 		interrupts = <GIC_SPI 148 IRQ_TYPE_NONE>;
diff --git a/Documentation/devicetree/bindings/usb/ohci-st.txt b/Documentation/devicetree/bindings/usb/ohci-st.txt
index d893ec9131c3bdec4cd71e632b932ffd56dfbc9f..44c998c16f85bfbfa4814fffb8dc3d344c14a3de 100644
--- a/Documentation/devicetree/bindings/usb/ohci-st.txt
+++ b/Documentation/devicetree/bindings/usb/ohci-st.txt
@@ -20,7 +20,7 @@ See: Documentation/devicetree/bindings/reset/reset.txt
 
 Example:
 
-	ohci0: usb@0xfe1ffc00 {
+	ohci0: usb@fe1ffc00 {
 		compatible = "st,st-ohci-300x";
 		reg = <0xfe1ffc00 0x100>;
 		interrupts = <GIC_SPI 149 IRQ_TYPE_NONE>;
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 41cb1ff0715072a132f54b6b33421032c59e5399..159dc5c075c2bcfbd3ace6257d59737555189abd 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -173,6 +173,7 @@ itead	ITEAD Intelligent Systems Co.Ltd
 iwave  iWave Systems Technologies Pvt. Ltd.
 jdi	Japan Display Inc.
 jedec	JEDEC Solid State Technology Association
+jianda	Jiandangjing Technology Co., Ltd.
 karo	Ka-Ro electronics GmbH
 keithkoep	Keith & Koep GmbH
 keymile	Keymile GmbH
@@ -380,6 +381,7 @@ virtio	Virtual I/O Device Specification, developed by the OASIS consortium
 vivante	Vivante Corporation
 vocore VoCore Studio
 voipac	Voipac Technologies s.r.o.
+vot	Vision Optical Technology Co., Ltd.
 wd	Western Digital Corp.
 wetek	WeTek Electronics, limited.
 wexler	Wexler
diff --git a/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt
index e27763ef00497d8645463d084a04e3f07d0c939f..3c7a1cd13b1011f37753f67905561843ce984370 100644
--- a/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt
@@ -6,7 +6,7 @@ reg: Register address and length for watchdog registers
 
 Example:
 
-watchdog: jz4740-watchdog@0x10002000 {
+watchdog: jz4740-watchdog@10002000 {
 	compatible = "ingenic,jz4740-watchdog";
 	reg = <0x10002000 0x100>;
 };
diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst
index 6245c99af8c1157176fd1d22681bc221917d3d7f..fbbb2831f29f8c7f50675238289478c98f472606 100644
--- a/Documentation/driver-api/dmaengine/client.rst
+++ b/Documentation/driver-api/dmaengine/client.rst
@@ -185,7 +185,7 @@ The details of these operations are:
       void dma_async_issue_pending(struct dma_chan *chan);
 
 Further APIs:
-------------
+-------------
 
 1. Terminate APIs
 
diff --git a/Documentation/driver-api/pci.rst b/Documentation/driver-api/pci.rst
index 01a6c8b7d3a7b59dcee9a11d828968e464af9c1c..ca85e5e78b2c439b3279fbe797b67eeaea84b141 100644
--- a/Documentation/driver-api/pci.rst
+++ b/Documentation/driver-api/pci.rst
@@ -25,9 +25,6 @@ PCI Support Library
 .. kernel-doc:: drivers/pci/irq.c
    :export:
 
-.. kernel-doc:: drivers/pci/htirq.c
-   :export:
-
 .. kernel-doc:: drivers/pci/probe.c
    :export:
 
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index c0727dc36271e99eaf844c205fff6d901c3d057b..f2f3f8592a6f5e12bdcd2d56005deaf52030d0f7 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -25,8 +25,8 @@ available from the following download page.  At least "mkfs.nilfs2",
 cleaner or garbage collector) are required.  Details on the tools are
 described in the man pages included in the package.
 
-Project web page:    http://nilfs.sourceforge.net/
-Download page:       http://nilfs.sourceforge.net/en/download.html
+Project web page:    https://nilfs.sourceforge.io/
+Download page:       https://nilfs.sourceforge.io/en/download.html
 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
 
 Caveats
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 8caa60734647f70a777b8a568ba9f2dd99182fb8..e6a5f4912b6d4a4910ed1d2fc494fff304ab47ff 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -156,6 +156,40 @@ handle it in two different ways:
    root of the overlay.  Finally the directory is moved to the new
    location.
 
+There are several ways to tune the "redirect_dir" feature.
+
+Kernel config options:
+
+- OVERLAY_FS_REDIRECT_DIR:
+    If this is enabled, then redirect_dir is turned on by  default.
+- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
+    If this is enabled, then redirects are always followed by default. Enabling
+    this results in a less secure configuration.  Enable this option only when
+    worried about backward compatibility with kernels that have the redirect_dir
+    feature and follow redirects even if turned off.
+
+Module options (can also be changed through /sys/module/overlay/parameters/*):
+
+- "redirect_dir=BOOL":
+    See OVERLAY_FS_REDIRECT_DIR kernel config option above.
+- "redirect_always_follow=BOOL":
+    See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
+- "redirect_max=NUM":
+    The maximum number of bytes in an absolute redirect (default is 256).
+
+Mount options:
+
+- "redirect_dir=on":
+    Redirects are enabled.
+- "redirect_dir=follow":
+    Redirects are not created, but followed.
+- "redirect_dir=off":
+    Redirects are not created and only followed if "redirect_always_follow"
+    feature is enabled in the kernel/module config.
+- "redirect_dir=nofollow":
+    Redirects are not created and not followed (equivalent to "redirect_dir=off"
+    if "redirect_always_follow" feature is not enabled).
+
 Non-directories
 ---------------
 
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index 3ea622876b67ac1388bf2c58aeb71d9ca598b4b4..e37557b30f620604dfddffa2f00e1d303e147f7d 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -74,15 +74,6 @@ Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
    :export:
 
-Legacy CRTC/Modeset Helper Functions Reference
-==============================================
-
-.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
-   :doc: overview
-
-.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
-   :export:
-
 Simple KMS Helper Reference
 ===========================
 
@@ -282,15 +273,6 @@ Flip-work Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_flip_work.c
    :export:
 
-Plane Helper Reference
-======================
-
-.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
-   :doc: overview
-
-.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
-   :export:
-
 Auxiliary Modeset Helpers
 =========================
 
@@ -308,3 +290,21 @@ Framebuffer GEM Helper Reference
 
 .. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
    :export:
+
+Legacy Plane Helper Reference
+=============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
+   :export:
+
+Legacy CRTC/Modeset Helper Functions Reference
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
+   :export:
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 307284125d7aa964522f85ea6fb34247e67c9de2..2dcf5b42015debbe9c047c61f2a0c35df7d02d65 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -263,14 +263,20 @@ Taken all together there's two consequences for the atomic design:
 
 - An atomic update is assembled and validated as an entirely free-standing pile
   of structures within the :c:type:`drm_atomic_state <drm_atomic_state>`
-  container. Again drivers can subclass that container for their own state
-  structure tracking needs. Only when a state is committed is it applied to the
-  driver and modeset objects. This way rolling back an update boils down to
-  releasing memory and unreferencing objects like framebuffers.
+  container. Driver private state structures are also tracked in the same
+  structure; see the next chapter.  Only when a state is committed is it applied
+  to the driver and modeset objects. This way rolling back an update boils down
+  to releasing memory and unreferencing objects like framebuffers.
 
 Read on in this chapter, and also in :ref:`drm_atomic_helper` for more detailed
 coverage of specific topics.
 
+Handling Driver Private State
+-----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+   :doc: handling driver private state
+
 Atomic Mode Setting Function Reference
 --------------------------------------
 
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index af614746d9c53d67eacdef9b3ea36932294208c7..1e593370f64f061b550e273da46e1cd96645f244 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -194,6 +194,24 @@ drm_mode_config_helper_suspend/resume().
 
 Contact: Maintainer of the driver you plan to convert
 
+Convert drivers to use drm_fb_helper_fbdev_setup/teardown()
+-----------------------------------------------------------
+
+Most drivers can use drm_fb_helper_fbdev_setup() except maybe:
+
+- amdgpu which has special logic to decide whether to call
+  drm_helper_disable_unused_functions()
+
+- armada which isn't atomic and doesn't call
+  drm_helper_disable_unused_functions()
+
+- i915 which calls drm_fb_helper_initial_config() in a worker
+
+Drivers that use drm_framebuffer_remove() to clean up the fbdev framebuffer can
+probably use drm_fb_helper_fbdev_teardown().
+
+Contact: Maintainer of the driver you plan to convert
+
 Core refactorings
 =================
 
@@ -395,11 +413,6 @@ those drivers as simple as possible, so lots of room for refactoring:
   one of the ideas for having a shared dsi/dbi helper, abstracting away the
   transport details more.
 
-- tinydrm_lastclose could be drm_fb_helper_lastclose. Only thing we need
-  for that is to store the drm_fb_helper pointer somewhere in
-  drm_device->mode_config. And then we could roll that out to all the
-  drivers.
-
 - tinydrm_gem_cma_prime_import_sg_table should probably go into the cma
   helpers, as a _vmapped variant (since not every driver needs the vmap).
   And tinydrm_gem_cma_free_object could the be merged into
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 262722d8867b2aa9f70f137529dd298e5e36af60..c4a293a03c337f5080d9cf682f64428fc74a1d79 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
 <expr> ::= <symbol>                             (1)
            <symbol> '=' <symbol>                (2)
            <symbol> '!=' <symbol>               (3)
-           '(' <expr> ')'                       (4)
-           '!' <expr>                           (5)
-           <expr> '&&' <expr>                   (6)
-           <expr> '||' <expr>                   (7)
+           <symbol1> '<' <symbol2>              (4)
+           <symbol1> '>' <symbol2>              (4)
+           <symbol1> '<=' <symbol2>             (4)
+           <symbol1> '>=' <symbol2>             (4)
+           '(' <expr> ')'                       (5)
+           '!' <expr>                           (6)
+           <expr> '&&' <expr>                   (7)
+           <expr> '||' <expr>                   (8)
 
 Expressions are listed in decreasing order of precedence. 
 
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
     otherwise 'n'.
 (3) If the values of both symbols are equal, it returns 'n',
     otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+    or greater-or-equal than value of <symbol2>, it returns 'y',
+    otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
 
 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 respectively for calculations). A menu entry becomes visible when its
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt
deleted file mode 100644
index bdf1423d5f99e9b5a65e293049fdf0af7675f3b7..0000000000000000000000000000000000000000
--- a/Documentation/locking/crossrelease.txt
+++ /dev/null
@@ -1,874 +0,0 @@
-Crossrelease
-============
-
-Started by Byungchul Park <byungchul.park@lge.com>
-
-Contents:
-
- (*) Background
-
-     - What causes deadlock
-     - How lockdep works
-
- (*) Limitation
-
-     - Limit lockdep
-     - Pros from the limitation
-     - Cons from the limitation
-     - Relax the limitation
-
- (*) Crossrelease
-
-     - Introduce crossrelease
-     - Introduce commit
-
- (*) Implementation
-
-     - Data structures
-     - How crossrelease works
-
- (*) Optimizations
-
-     - Avoid duplication
-     - Lockless for hot paths
-
- (*) APPENDIX A: What lockdep does to work aggresively
-
- (*) APPENDIX B: How to avoid adding false dependencies
-
-
-==========
-Background
-==========
-
-What causes deadlock
---------------------
-
-A deadlock occurs when a context is waiting for an event to happen,
-which is impossible because another (or the) context who can trigger the
-event is also waiting for another (or the) event to happen, which is
-also impossible due to the same reason.
-
-For example:
-
-   A context going to trigger event C is waiting for event A to happen.
-   A context going to trigger event A is waiting for event B to happen.
-   A context going to trigger event B is waiting for event C to happen.
-
-A deadlock occurs when these three wait operations run at the same time,
-because event C cannot be triggered if event A does not happen, which in
-turn cannot be triggered if event B does not happen, which in turn
-cannot be triggered if event C does not happen. After all, no event can
-be triggered since any of them never meets its condition to wake up.
-
-A dependency might exist between two waiters and a deadlock might happen
-due to an incorrect releationship between dependencies. Thus, we must
-define what a dependency is first. A dependency exists between them if:
-
-   1. There are two waiters waiting for each event at a given time.
-   2. The only way to wake up each waiter is to trigger its event.
-   3. Whether one can be woken up depends on whether the other can.
-
-Each wait in the example creates its dependency like:
-
-   Event C depends on event A.
-   Event A depends on event B.
-   Event B depends on event C.
-
-   NOTE: Precisely speaking, a dependency is one between whether a
-   waiter for an event can be woken up and whether another waiter for
-   another event can be woken up. However from now on, we will describe
-   a dependency as if it's one between an event and another event for
-   simplicity.
-
-And they form circular dependencies like:
-
-    -> C -> A -> B -
-   /                \
-   \                /
-    ----------------
-
-   where 'A -> B' means that event A depends on event B.
-
-Such circular dependencies lead to a deadlock since no waiter can meet
-its condition to wake up as described.
-
-CONCLUSION
-
-Circular dependencies cause a deadlock.
-
-
-How lockdep works
------------------
-
-Lockdep tries to detect a deadlock by checking dependencies created by
-lock operations, acquire and release. Waiting for a lock corresponds to
-waiting for an event, and releasing a lock corresponds to triggering an
-event in the previous section.
-
-In short, lockdep does:
-
-   1. Detect a new dependency.
-   2. Add the dependency into a global graph.
-   3. Check if that makes dependencies circular.
-   4. Report a deadlock or its possibility if so.
-
-For example, consider a graph built by lockdep that looks like:
-
-   A -> B -
-           \
-            -> E
-           /
-   C -> D -
-
-   where A, B,..., E are different lock classes.
-
-Lockdep will add a dependency into the graph on detection of a new
-dependency. For example, it will add a dependency 'E -> C' when a new
-dependency between lock E and lock C is detected. Then the graph will be:
-
-       A -> B -
-               \
-                -> E -
-               /      \
-    -> C -> D -        \
-   /                   /
-   \                  /
-    ------------------
-
-   where A, B,..., E are different lock classes.
-
-This graph contains a subgraph which demonstrates circular dependencies:
-
-                -> E -
-               /      \
-    -> C -> D -        \
-   /                   /
-   \                  /
-    ------------------
-
-   where C, D and E are different lock classes.
-
-This is the condition under which a deadlock might occur. Lockdep
-reports it on detection after adding a new dependency. This is the way
-how lockdep works.
-
-CONCLUSION
-
-Lockdep detects a deadlock or its possibility by checking if circular
-dependencies were created after adding each new dependency.
-
-
-==========
-Limitation
-==========
-
-Limit lockdep
--------------
-
-Limiting lockdep to work on only typical locks e.g. spin locks and
-mutexes, which are released within the acquire context, the
-implementation becomes simple but its capacity for detection becomes
-limited. Let's check pros and cons in next section.
-
-
-Pros from the limitation
-------------------------
-
-Given the limitation, when acquiring a lock, locks in a held_locks
-cannot be released if the context cannot acquire it so has to wait to
-acquire it, which means all waiters for the locks in the held_locks are
-stuck. It's an exact case to create dependencies between each lock in
-the held_locks and the lock to acquire.
-
-For example:
-
-   CONTEXT X
-   ---------
-   acquire A
-   acquire B /* Add a dependency 'A -> B' */
-   release B
-   release A
-
-   where A and B are different lock classes.
-
-When acquiring lock A, the held_locks of CONTEXT X is empty thus no
-dependency is added. But when acquiring lock B, lockdep detects and adds
-a new dependency 'A -> B' between lock A in the held_locks and lock B.
-They can be simply added whenever acquiring each lock.
-
-And data required by lockdep exists in a local structure, held_locks
-embedded in task_struct. Forcing to access the data within the context,
-lockdep can avoid racy problems without explicit locks while handling
-the local data.
-
-Lastly, lockdep only needs to keep locks currently being held, to build
-a dependency graph. However, relaxing the limitation, it needs to keep
-even locks already released, because a decision whether they created
-dependencies might be long-deferred.
-
-To sum up, we can expect several advantages from the limitation:
-
-   1. Lockdep can easily identify a dependency when acquiring a lock.
-   2. Races are avoidable while accessing local locks in a held_locks.
-   3. Lockdep only needs to keep locks currently being held.
-
-CONCLUSION
-
-Given the limitation, the implementation becomes simple and efficient.
-
-
-Cons from the limitation
-------------------------
-
-Given the limitation, lockdep is applicable only to typical locks. For
-example, page locks for page access or completions for synchronization
-cannot work with lockdep.
-
-Can we detect deadlocks below, under the limitation?
-
-Example 1:
-
-   CONTEXT X	   CONTEXT Y	   CONTEXT Z
-   ---------	   ---------	   ----------
-		   mutex_lock A
-   lock_page B
-		   lock_page B
-				   mutex_lock A /* DEADLOCK */
-				   unlock_page B held by X
-		   unlock_page B
-		   mutex_unlock A
-				   mutex_unlock A
-
-   where A and B are different lock classes.
-
-No, we cannot.
-
-Example 2:
-
-   CONTEXT X		   CONTEXT Y
-   ---------		   ---------
-			   mutex_lock A
-   mutex_lock A
-			   wait_for_complete B /* DEADLOCK */
-   complete B
-			   mutex_unlock A
-   mutex_unlock A
-
-   where A is a lock class and B is a completion variable.
-
-No, we cannot.
-
-CONCLUSION
-
-Given the limitation, lockdep cannot detect a deadlock or its
-possibility caused by page locks or completions.
-
-
-Relax the limitation
---------------------
-
-Under the limitation, things to create dependencies are limited to
-typical locks. However, synchronization primitives like page locks and
-completions, which are allowed to be released in any context, also
-create dependencies and can cause a deadlock. So lockdep should track
-these locks to do a better job. We have to relax the limitation for
-these locks to work with lockdep.
-
-Detecting dependencies is very important for lockdep to work because
-adding a dependency means adding an opportunity to check whether it
-causes a deadlock. The more lockdep adds dependencies, the more it
-thoroughly works. Thus Lockdep has to do its best to detect and add as
-many true dependencies into a graph as possible.
-
-For example, considering only typical locks, lockdep builds a graph like:
-
-   A -> B -
-           \
-            -> E
-           /
-   C -> D -
-
-   where A, B,..., E are different lock classes.
-
-On the other hand, under the relaxation, additional dependencies might
-be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
-added thanks to the relaxation, the graph will be:
-
-         A -> B -
-                 \
-                  -> E -> GX
-                 /
-   FX -> C -> D -
-
-   where A, B,..., E, FX and GX are different lock classes, and a suffix
-   'X' is added on non-typical locks.
-
-The latter graph gives us more chances to check circular dependencies
-than the former. However, it might suffer performance degradation since
-relaxing the limitation, with which design and implementation of lockdep
-can be efficient, might introduce inefficiency inevitably. So lockdep
-should provide two options, strong detection and efficient detection.
-
-Choosing efficient detection:
-
-   Lockdep works with only locks restricted to be released within the
-   acquire context. However, lockdep works efficiently.
-
-Choosing strong detection:
-
-   Lockdep works with all synchronization primitives. However, lockdep
-   suffers performance degradation.
-
-CONCLUSION
-
-Relaxing the limitation, lockdep can add additional dependencies giving
-additional opportunities to check circular dependencies.
-
-
-============
-Crossrelease
-============
-
-Introduce crossrelease
-----------------------
-
-In order to allow lockdep to handle additional dependencies by what
-might be released in any context, namely 'crosslock', we have to be able
-to identify those created by crosslocks. The proposed 'crossrelease'
-feature provoides a way to do that.
-
-Crossrelease feature has to do:
-
-   1. Identify dependencies created by crosslocks.
-   2. Add the dependencies into a dependency graph.
-
-That's all. Once a meaningful dependency is added into graph, then
-lockdep would work with the graph as it did. The most important thing
-crossrelease feature has to do is to correctly identify and add true
-dependencies into the global graph.
-
-A dependency e.g. 'A -> B' can be identified only in the A's release
-context because a decision required to identify the dependency can be
-made only in the release context. That is to decide whether A can be
-released so that a waiter for A can be woken up. It cannot be made in
-other than the A's release context.
-
-It's no matter for typical locks because each acquire context is same as
-its release context, thus lockdep can decide whether a lock can be
-released in the acquire context. However for crosslocks, lockdep cannot
-make the decision in the acquire context but has to wait until the
-release context is identified.
-
-Therefore, deadlocks by crosslocks cannot be detected just when it
-happens, because those cannot be identified until the crosslocks are
-released. However, deadlock possibilities can be detected and it's very
-worth. See 'APPENDIX A' section to check why.
-
-CONCLUSION
-
-Using crossrelease feature, lockdep can work with what might be released
-in any context, namely crosslock.
-
-
-Introduce commit
-----------------
-
-Since crossrelease defers the work adding true dependencies of
-crosslocks until they are actually released, crossrelease has to queue
-all acquisitions which might create dependencies with the crosslocks.
-Then it identifies dependencies using the queued data in batches at a
-proper time. We call it 'commit'.
-
-There are four types of dependencies:
-
-1. TT type: 'typical lock A -> typical lock B'
-
-   Just when acquiring B, lockdep can see it's in the A's release
-   context. So the dependency between A and B can be identified
-   immediately. Commit is unnecessary.
-
-2. TC type: 'typical lock A -> crosslock BX'
-
-   Just when acquiring BX, lockdep can see it's in the A's release
-   context. So the dependency between A and BX can be identified
-   immediately. Commit is unnecessary, too.
-
-3. CT type: 'crosslock AX -> typical lock B'
-
-   When acquiring B, lockdep cannot identify the dependency because
-   there's no way to know if it's in the AX's release context. It has
-   to wait until the decision can be made. Commit is necessary.
-
-4. CC type: 'crosslock AX -> crosslock BX'
-
-   When acquiring BX, lockdep cannot identify the dependency because
-   there's no way to know if it's in the AX's release context. It has
-   to wait until the decision can be made. Commit is necessary.
-   But, handling CC type is not implemented yet. It's a future work.
-
-Lockdep can work without commit for typical locks, but commit step is
-necessary once crosslocks are involved. Introducing commit, lockdep
-performs three steps. What lockdep does in each step is:
-
-1. Acquisition: For typical locks, lockdep does what it originally did
-   and queues the lock so that CT type dependencies can be checked using
-   it at the commit step. For crosslocks, it saves data which will be
-   used at the commit step and increases a reference count for it.
-
-2. Commit: No action is reauired for typical locks. For crosslocks,
-   lockdep adds CT type dependencies using the data saved at the
-   acquisition step.
-
-3. Release: No changes are required for typical locks. When a crosslock
-   is released, it decreases a reference count for it.
-
-CONCLUSION
-
-Crossrelease introduces commit step to handle dependencies of crosslocks
-in batches at a proper time.
-
-
-==============
-Implementation
-==============
-
-Data structures
----------------
-
-Crossrelease introduces two main data structures.
-
-1. hist_lock
-
-   This is an array embedded in task_struct, for keeping lock history so
-   that dependencies can be added using them at the commit step. Since
-   it's local data, it can be accessed locklessly in the owner context.
-   The array is filled at the acquisition step and consumed at the
-   commit step. And it's managed in circular manner.
-
-2. cross_lock
-
-   One per lockdep_map exists. This is for keeping data of crosslocks
-   and used at the commit step.
-
-
-How crossrelease works
-----------------------
-
-It's the key of how crossrelease works, to defer necessary works to an
-appropriate point in time and perform in at once at the commit step.
-Let's take a look with examples step by step, starting from how lockdep
-works without crossrelease for typical locks.
-
-   acquire A /* Push A onto held_locks */
-   acquire B /* Push B onto held_locks and add 'A -> B' */
-   acquire C /* Push C onto held_locks and add 'B -> C' */
-   release C /* Pop C from held_locks */
-   release B /* Pop B from held_locks */
-   release A /* Pop A from held_locks */
-
-   where A, B and C are different lock classes.
-
-   NOTE: This document assumes that readers already understand how
-   lockdep works without crossrelease thus omits details. But there's
-   one thing to note. Lockdep pretends to pop a lock from held_locks
-   when releasing it. But it's subtly different from the original pop
-   operation because lockdep allows other than the top to be poped.
-
-In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
-dependency every time acquiring a lock.
-
-After adding 'A -> B', a dependency graph will be:
-
-   A -> B
-
-   where A and B are different lock classes.
-
-And after adding 'B -> C', the graph will be:
-
-   A -> B -> C
-
-   where A, B and C are different lock classes.
-
-Let's performs commit step even for typical locks to add dependencies.
-Of course, commit step is not necessary for them, however, it would work
-well because this is a more general way.
-
-   acquire A
-   /*
-    * Queue A into hist_locks
-    *
-    * In hist_locks: A
-    * In graph: Empty
-    */
-
-   acquire B
-   /*
-    * Queue B into hist_locks
-    *
-    * In hist_locks: A, B
-    * In graph: Empty
-    */
-
-   acquire C
-   /*
-    * Queue C into hist_locks
-    *
-    * In hist_locks: A, B, C
-    * In graph: Empty
-    */
-
-   commit C
-   /*
-    * Add 'C -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire C: Nothing
-    *
-    * In hist_locks: A, B, C
-    * In graph: Empty
-    */
-
-   release C
-
-   commit B
-   /*
-    * Add 'B -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire B: C
-    *
-    * In hist_locks: A, B, C
-    * In graph: 'B -> C'
-    */
-
-   release B
-
-   commit A
-   /*
-    * Add 'A -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire A: B, C
-    *
-    * In hist_locks: A, B, C
-    * In graph: 'B -> C', 'A -> B', 'A -> C'
-    */
-
-   release A
-
-   where A, B and C are different lock classes.
-
-In this case, dependencies are added at the commit step as described.
-
-After commits for A, B and C, the graph will be:
-
-   A -> B -> C
-
-   where A, B and C are different lock classes.
-
-   NOTE: A dependency 'A -> C' is optimized out.
-
-We can see the former graph built without commit step is same as the
-latter graph built using commit steps. Of course the former way leads to
-earlier finish for building the graph, which means we can detect a
-deadlock or its possibility sooner. So the former way would be prefered
-when possible. But we cannot avoid using the latter way for crosslocks.
-
-Let's look at how commit steps work for crosslocks. In this case, the
-commit step is performed only on crosslock AX as real. And it assumes
-that the AX release context is different from the AX acquire context.
-
-   BX RELEASE CONTEXT		   BX ACQUIRE CONTEXT
-   ------------------		   ------------------
-				   acquire A
-				   /*
-				    * Push A onto held_locks
-				    * Queue A into hist_locks
-				    *
-				    * In held_locks: A
-				    * In hist_locks: A
-				    * In graph: Empty
-				    */
-
-				   acquire BX
-				   /*
-				    * Add 'the top of held_locks -> BX'
-				    *
-				    * In held_locks: A
-				    * In hist_locks: A
-				    * In graph: 'A -> BX'
-				    */
-
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-   It must be guaranteed that the following operations are seen after
-   acquiring BX globally. It can be done by things like barrier.
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-   acquire C
-   /*
-    * Push C onto held_locks
-    * Queue C into hist_locks
-    *
-    * In held_locks: C
-    * In hist_locks: C
-    * In graph: 'A -> BX'
-    */
-
-   release C
-   /*
-    * Pop C from held_locks
-    *
-    * In held_locks: Empty
-    * In hist_locks: C
-    * In graph: 'A -> BX'
-    */
-				   acquire D
-				   /*
-				    * Push D onto held_locks
-				    * Queue D into hist_locks
-				    * Add 'the top of held_locks -> D'
-				    *
-				    * In held_locks: A, D
-				    * In hist_locks: A, D
-				    * In graph: 'A -> BX', 'A -> D'
-				    */
-   acquire E
-   /*
-    * Push E onto held_locks
-    * Queue E into hist_locks
-    *
-    * In held_locks: E
-    * In hist_locks: C, E
-    * In graph: 'A -> BX', 'A -> D'
-    */
-
-   release E
-   /*
-    * Pop E from held_locks
-    *
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D'
-    */
-				   release D
-				   /*
-				    * Pop D from held_locks
-				    *
-				    * In held_locks: A
-				    * In hist_locks: A, D
-				    * In graph: 'A -> BX', 'A -> D'
-				    */
-   commit BX
-   /*
-    * Add 'BX -> ?'
-    * What has been queued since acquire BX: C, E
-    *
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D',
-    *           'BX -> C', 'BX -> E'
-    */
-
-   release BX
-   /*
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D',
-    *           'BX -> C', 'BX -> E'
-    */
-				   release A
-				   /*
-				    * Pop A from held_locks
-				    *
-				    * In held_locks: Empty
-				    * In hist_locks: A, D
-				    * In graph: 'A -> BX', 'A -> D',
-				    *           'BX -> C', 'BX -> E'
-				    */
-
-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-Crossrelease considers all acquisitions after acqiuring BX are
-candidates which might create dependencies with BX. True dependencies
-will be determined when identifying the release context of BX. Meanwhile,
-all typical locks are queued so that they can be used at the commit step.
-And then two dependencies 'BX -> C' and 'BX -> E' are added at the
-commit step when identifying the release context.
-
-The final graph will be, with crossrelease:
-
-               -> C
-              /
-       -> BX -
-      /       \
-   A -         -> E
-      \
-       -> D
-
-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-However, the final graph will be, without crossrelease:
-
-   A -> D
-
-   where A and D are different lock classes.
-
-The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
-'BX -> E' giving additional opportunities to check if they cause
-deadlocks. This way lockdep can detect a deadlock or its possibility
-caused by crosslocks.
-
-CONCLUSION
-
-We checked how crossrelease works with several examples.
-
-
-=============
-Optimizations
-=============
-
-Avoid duplication
------------------
-
-Crossrelease feature uses a cache like what lockdep already uses for
-dependency chains, but this time it's for caching CT type dependencies.
-Once that dependency is cached, the same will never be added again.
-
-
-Lockless for hot paths
-----------------------
-
-To keep all locks for later use at the commit step, crossrelease adopts
-a local array embedded in task_struct, which makes access to the data
-lockless by forcing it to happen only within the owner context. It's
-like how lockdep handles held_locks. Lockless implmentation is important
-since typical locks are very frequently acquired and released.
-
-
-=================================================
-APPENDIX A: What lockdep does to work aggresively
-=================================================
-
-A deadlock actually occurs when all wait operations creating circular
-dependencies run at the same time. Even though they don't, a potential
-deadlock exists if the problematic dependencies exist. Thus it's
-meaningful to detect not only an actual deadlock but also its potential
-possibility. The latter is rather valuable. When a deadlock occurs
-actually, we can identify what happens in the system by some means or
-other even without lockdep. However, there's no way to detect possiblity
-without lockdep unless the whole code is parsed in head. It's terrible.
-Lockdep does the both, and crossrelease only focuses on the latter.
-
-Whether or not a deadlock actually occurs depends on several factors.
-For example, what order contexts are switched in is a factor. Assuming
-circular dependencies exist, a deadlock would occur when contexts are
-switched so that all wait operations creating the dependencies run
-simultaneously. Thus to detect a deadlock possibility even in the case
-that it has not occured yet, lockdep should consider all possible
-combinations of dependencies, trying to:
-
-1. Use a global dependency graph.
-
-   Lockdep combines all dependencies into one global graph and uses them,
-   regardless of which context generates them or what order contexts are
-   switched in. Aggregated dependencies are only considered so they are
-   prone to be circular if a problem exists.
-
-2. Check dependencies between classes instead of instances.
-
-   What actually causes a deadlock are instances of lock. However,
-   lockdep checks dependencies between classes instead of instances.
-   This way lockdep can detect a deadlock which has not happened but
-   might happen in future by others but the same class.
-
-3. Assume all acquisitions lead to waiting.
-
-   Although locks might be acquired without waiting which is essential
-   to create dependencies, lockdep assumes all acquisitions lead to
-   waiting since it might be true some time or another.
-
-CONCLUSION
-
-Lockdep detects not only an actual deadlock but also its possibility,
-and the latter is more valuable.
-
-
-==================================================
-APPENDIX B: How to avoid adding false dependencies
-==================================================
-
-Remind what a dependency is. A dependency exists if:
-
-   1. There are two waiters waiting for each event at a given time.
-   2. The only way to wake up each waiter is to trigger its event.
-   3. Whether one can be woken up depends on whether the other can.
-
-For example:
-
-   acquire A
-   acquire B /* A dependency 'A -> B' exists */
-   release B
-   release A
-
-   where A and B are different lock classes.
-
-A depedency 'A -> B' exists since:
-
-   1. A waiter for A and a waiter for B might exist when acquiring B.
-   2. Only way to wake up each is to release what it waits for.
-   3. Whether the waiter for A can be woken up depends on whether the
-      other can. IOW, TASK X cannot release A if it fails to acquire B.
-
-For another example:
-
-   TASK X			   TASK Y
-   ------			   ------
-				   acquire AX
-   acquire B /* A dependency 'AX -> B' exists */
-   release B
-   release AX held by Y
-
-   where AX and B are different lock classes, and a suffix 'X' is added
-   on crosslocks.
-
-Even in this case involving crosslocks, the same rule can be applied. A
-depedency 'AX -> B' exists since:
-
-   1. A waiter for AX and a waiter for B might exist when acquiring B.
-   2. Only way to wake up each is to release what it waits for.
-   3. Whether the waiter for AX can be woken up depends on whether the
-      other can. IOW, TASK X cannot release AX if it fails to acquire B.
-
-Let's take a look at more complicated example:
-
-   TASK X			   TASK Y
-   ------			   ------
-   acquire B
-   release B
-   fork Y
-				   acquire AX
-   acquire C /* A dependency 'AX -> C' exists */
-   release C
-   release AX held by Y
-
-   where AX, B and C are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-Does a dependency 'AX -> B' exist? Nope.
-
-Two waiters are essential to create a dependency. However, waiters for
-AX and B to create 'AX -> B' cannot exist at the same time in this
-example. Thus the dependency 'AX -> B' cannot be created.
-
-It would be ideal if the full set of true ones can be considered. But
-we can ensure nothing but what actually happened. Relying on what
-actually happens at runtime, we can anyway add only true ones, though
-they might be a subset of true ones. It's similar to how lockdep works
-for typical locks. There might be more true dependencies than what
-lockdep has detected in runtime. Lockdep has no choice but to rely on
-what actually happens. Crossrelease also relies on it.
-
-CONCLUSION
-
-Relying on what actually happens, lockdep can avoid adding false
-dependencies.
diff --git a/Documentation/media/dvb-drivers/frontends.rst b/Documentation/media/dvb-drivers/frontends.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1f5f5798919627411ec88a2a96097d5fa6444c8a
--- /dev/null
+++ b/Documentation/media/dvb-drivers/frontends.rst
@@ -0,0 +1,30 @@
+****************
+Frontend drivers
+****************
+
+Frontend attach headers
+***********************
+
+.. Keep it on alphabetic order
+
+.. kernel-doc:: drivers/media/dvb-frontends/a8293.h
+.. kernel-doc:: drivers/media/dvb-frontends/af9013.h
+.. kernel-doc:: drivers/media/dvb-frontends/ascot2e.h
+.. kernel-doc:: drivers/media/dvb-frontends/cxd2820r.h
+.. kernel-doc:: drivers/media/dvb-frontends/drxk.h
+.. kernel-doc:: drivers/media/dvb-frontends/dvb-pll.h
+.. kernel-doc:: drivers/media/dvb-frontends/helene.h
+.. kernel-doc:: drivers/media/dvb-frontends/horus3a.h
+.. kernel-doc:: drivers/media/dvb-frontends/ix2505v.h
+.. kernel-doc:: drivers/media/dvb-frontends/m88ds3103.h
+.. kernel-doc:: drivers/media/dvb-frontends/mb86a20s.h
+.. kernel-doc:: drivers/media/dvb-frontends/mn88472.h
+.. kernel-doc:: drivers/media/dvb-frontends/rtl2830.h
+.. kernel-doc:: drivers/media/dvb-frontends/rtl2832.h
+.. kernel-doc:: drivers/media/dvb-frontends/rtl2832_sdr.h
+.. kernel-doc:: drivers/media/dvb-frontends/stb6000.h
+.. kernel-doc:: drivers/media/dvb-frontends/tda10071.h
+.. kernel-doc:: drivers/media/dvb-frontends/tda826x.h
+.. kernel-doc:: drivers/media/dvb-frontends/zd1301_demod.h
+.. kernel-doc:: drivers/media/dvb-frontends/zl10036.h
+
diff --git a/Documentation/media/dvb-drivers/index.rst b/Documentation/media/dvb-drivers/index.rst
index 376141143ae91d46a953b49bc9338fd7c9d9b203..314e127d82e310e42f97a96a21dbf566db6b43ab 100644
--- a/Documentation/media/dvb-drivers/index.rst
+++ b/Documentation/media/dvb-drivers/index.rst
@@ -41,4 +41,5 @@ For more details see the file COPYING in the source distribution of Linux.
 	technisat
 	ttusb-dec
 	udev
+	frontends
 	contributors
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 66e62086624501fe76c170f77efc1cbc0844032f..7d4b15977d61201eb8b265293d3d95a6b042c7e6 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -9,6 +9,7 @@ Contents:
    batman-adv
    kapi
    z8530book
+   msg_zerocopy
 
 .. only::  subproject
 
@@ -16,4 +17,3 @@ Contents:
    =======
 
    * :ref:`genindex`
-
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 77f6d7e25cfda65ac36e0c47bb2f8bdacc2fee2a..291a012649678035b5d0fdc306904093ceedf610 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
 	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
 		error(1, errno, "setsockopt zerocopy");
 
+Setting the socket option only works when the socket is in its initial
+(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
+for example, will lead to an EBUSY error. In this case, the option should be set
+to the listening socket and it will be inherited by the accepted sockets.
 
 Transmission
 ------------
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 6338400eed73d7e2f8b90807186e15d54cd16cde..2c31d9ee6776ea7fef125d8fd00e7f2ef8614e36 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -319,12 +319,12 @@ struct Scsi_Host:
         instance. If the reference count reaches 0 then the given instance
         is freed
 
-The Scsi_device structure has had reference counting infrastructure added.
-This effectively spreads the ownership of struct Scsi_device instances
+The scsi_device structure has had reference counting infrastructure added.
+This effectively spreads the ownership of struct scsi_device instances
 across the various SCSI layers which use them. Previously such instances
 were exclusively owned by the mid level. See the access functions declared
 towards the end of include/scsi/scsi_device.h . If an LLD wants to keep
-a copy of a pointer to a Scsi_device instance it should use scsi_device_get()
+a copy of a pointer to a scsi_device instance it should use scsi_device_get()
 to bump its reference count. When it is finished with the pointer it can
 use scsi_device_put() to decrement its reference count (and potentially
 delete it).
diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt
index 441a4b9b666fbb2b2aace2cbc2f5ab542635d553..5908a21fddb6035cd25bb759bfd01ee405371163 100644
--- a/Documentation/usb/gadget-testing.txt
+++ b/Documentation/usb/gadget-testing.txt
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
 in each line. The rules stated above are best illustrated with an example:
 
 # mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
 # ln -s header/h class/fs
 # ln -s header/h class/ss
 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f670e4b9e7f33fdbb186d9b1c9186c6a2fbf7324..57d3ee9e4bde2a799715ca75871fd61b27858b0a 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2901,14 +2901,19 @@ userspace buffer and its length:
 
 struct kvm_s390_irq_state {
 	__u64 buf;
-	__u32 flags;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 reserved[4];
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
 Userspace passes in the above struct and for each pending interrupt a
 struct kvm_s390_irq is copied to the provided buffer.
 
+The structure contains a flags and a reserved field for future extensions. As
+the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
+reserved, these fields can not be used in the future without breaking
+compatibility.
+
 If -ENOBUFS is returned the buffer provided was too small and userspace
 may retry with a bigger buffer.
 
@@ -2932,10 +2937,14 @@ containing a struct kvm_s390_irq_state:
 
 struct kvm_s390_irq_state {
 	__u64 buf;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 pad;
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
+The restrictions for flags and reserved apply as well.
+(see KVM_S390_GET_IRQ_STATE)
+
 The userspace memory referenced by buf contains a struct kvm_s390_irq
 for each interrupt to be injected into the guest.
 If one of the interrupts could not be injected for some reason the
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt
index 89fff7d611ccb533a5c3d375bc94fecf3c2e0687..0b3a1148f9f0414558ed0537b4219225162ccc3a 100644
--- a/Documentation/vm/zswap.txt
+++ b/Documentation/vm/zswap.txt
@@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its
 original compressor.  Once all pages are removed from an old zpool, the zpool
 and its compressor are freed.
 
+Some of the pages in zswap are same-value filled pages (i.e. contents of the
+page have same value or repetitive pattern). These pages include zero-filled
+pages and they are handled differently. During store operation, a page is
+checked if it is a same-value filled page before compressing it. If true, the
+compressed length of the page is set to zero and the pattern or same-filled
+value is stored.
+
+Same-value filled pages identification feature is enabled by default and can be
+disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
+e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
+runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
+
+echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
+
+When zswap same-filled page identification is disabled at runtime, it will stop
+checking for the same-value filled pages during store operation. However, the
+existing pages which are marked as same-value filled pages remain stored
+unchanged in zswap until they are either loaded or invalidated.
+
 A debugfs interface is provided for various statistic about pool size, number
-of pages stored, and various counters for the reasons pages are rejected.
+of pages stored, same-value filled pages and various counters for the reasons
+pages are rejected.
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d11eff61fc9addf6cd0ad54db5b30bac73783503
--- /dev/null
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications.  When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy.  When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT).  There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled.  It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI.  This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level.  This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel.  This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal.  The only difference is when the kernel
+makes entries in the top (PGD) level.  In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables.  This leaves a single, shared set of
+userspace page tables to manage.  One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important.  But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+  a. Each process now needs an order-1 PGD instead of order-0.
+     (Consumes an additional 4k per process).
+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+     aligned so that it can be mapped by setting a single PMD
+     entry.  This consumes nearly 2MB of RAM once the kernel
+     is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+  a. CR3 manipulation to switch between the page table copies
+     must be done at interrupt, syscall, and exception entry
+     and exit (it can be skipped when the kernel is interrupted,
+     though.)  Moves to CR3 are on the order of a hundred
+     cycles, and are required at every entry and exit.
+  b. A "trampoline" must be used for SYSCALL entry.  This
+     trampoline depends on a smaller set of resources than the
+     non-PTI SYSCALL entry code, so requires mapping fewer
+     things into the userspace page tables.  The downside is
+     that stacks must be switched at entry time.
+  d. Global pages are disabled for all kernel structures not
+     mapped into both kernel and userspace page tables.  This
+     feature of the MMU allows different processes to share TLB
+     entries mapping the kernel.  Losing the feature means more
+     TLB misses after a context switch.  The actual loss of
+     performance is very small, however, never exceeding 1%.
+  d. Process Context IDentifiers (PCID) is a CPU feature that
+     allows us to skip flushing the entire TLB when switching page
+     tables by setting a special bit in CR3 when the page tables
+     are changed.  This makes switching the page tables (at context
+     switch, or kernel entry/exit) cheaper.  But, on systems with
+     PCID support, the context switch code must flush both the user
+     and kernel entries out of the TLB.  The user PCID TLB flush is
+     deferred until the exit to userspace, minimizing the cost.
+     See intel.com/sdm for the gory PCID/INVPCID details.
+  e. The userspace page tables must be populated for each new
+     process.  Even without PTI, the shared kernel mappings
+     are created by copying top-level (PGD) entries into each
+     new process.  But, with PTI, there are now *two* kernel
+     mappings: one in the kernel page tables that maps everything
+     and one for the entry/exit structures.  At fork(), we need to
+     copy both.
+  f. In addition to the fork()-time copying, there must also
+     be an update to the userspace PGD any time a set_pgd() is done
+     on a PGD used to map userspace.  This ensures that the kernel
+     and userspace copies always map the same userspace
+     memory.
+  g. On systems without PCID support, each CR3 write flushes
+     the entire TLB.  That means that each syscall, interrupt
+     or exception flushes the TLB.
+  h. INVPCID is a TLB-flushing instruction which allows flushing
+     of TLB entries for non-current PCIDs.  Some systems support
+     PCIDs, but do not support INVPCID.  On these systems, addresses
+     can only be flushed from the TLB for the current PCID.  When
+     flushing a kernel address, we need to flush all PCIDs, so a
+     single kernel address flush will require a TLB-flushing CR3
+     write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+   unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+   boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
+   several minutes.  These tests frequently uncover corner cases in the
+   kernel entry code.  In general, old kernels might cause these tests
+   themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+   frequent performance monitoring non-maskable interrupts (see "NMI"
+   in /proc/interrupts).  This exercises the NMI entry/exit code which
+   is known to trigger bugs in code paths that did not expect to be
+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
+   NMIs, and using two -c with separate counters encourages nested NMIs
+   and less deterministic behavior.
+
+	while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+   This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code.  Usually a bug in one of the
+   more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup.  Bugs
+   in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
+   like screwing up a page table switch.  Also caused by
+   incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI.  The NMI code is separate from main
+   interrupt handlers and can have bugs that do not affect
+   normal interrupts.  Also caused by incorrectly mapping NMI
+   code.  NMIs that interrupt the entry code must be very
+   careful and can be the cause of crashes that show up when
+   running perf.
+ * Kernel crashes at the first exit to userspace.  entry_64.S
+   bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+   in entry_64.S that return to userspace are sometimes separate
+   from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+   faults upon page faults.  Caused by touching non-pti-mapped
+   data in the entry code, or forgetting to switch to kernel
+   CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+   as mount(8) failing to mount the rootfs.  These have
+   tended to be TLB invalidation issues.  Usually invalidating
+   the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 3448e675b4623ce81b5e0bc1116c52a12c411801..ea91cb61a60297ac658a4160cee200da75e6f00d 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
 
-<previous description obsolete, deleted>
-
 Virtual memory map with 4 level page tables:
 
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,17 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
 ... unused hole ...
+				    vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Virtual memory map with 5 level page tables:
@@ -29,26 +31,31 @@ Virtual memory map with 5 level page tables:
 hole caused by [56:63] sign extension
 ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
 ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 ... unused hole ...
 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+... unused hole ...
+				    vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
 ... unused hole ...
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are set to either all ones
-or all zero. This causes hole between user space and kernel addresses.
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
 
 The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
@@ -58,19 +65,15 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 the processes using the page fault handler, with init_top_pgt as
 reference.
 
-Current X86-64 implementations support up to 46 bits of address space (64 TB),
-which is our current limit. This expands into MBZ space in the page tables.
-
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
 during EFI runtime calls.
 
-The module mapping space size changes based on the CONFIG requirements for the
-following fixmap section.
-
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
 
--Andi Kleen, Jul 2004
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
diff --git a/MAINTAINERS b/MAINTAINERS
index 8110df7acfeab0632270ddeb033c114b10526e91..5bc088f27c83b761e9c3b6766e7c86e71c11c7e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -554,13 +554,13 @@ S:	Orphan
 F:	Documentation/filesystems/affs.txt
 F:	fs/affs/
 
-AFS FILESYSTEM & AF_RXRPC SOCKET DOMAIN
+AFS FILESYSTEM
 M:	David Howells <dhowells@redhat.com>
 L:	linux-afs@lists.infradead.org
 S:	Supported
 F:	fs/afs/
-F:	include/net/af_rxrpc.h
-F:	net/rxrpc/af_rxrpc.c
+F:	include/trace/events/afs.h
+F:	Documentation/filesystems/afs.txt
 W:	https://www.infradead.org/~dhowells/kafs/
 
 AGPGART DRIVER
@@ -859,7 +859,8 @@ F:	kernel/configs/android*
 ANDROID DRIVERS
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 M:	Arve Hjønnevåg <arve@android.com>
-M:	Riley Andrews <riandrews@android.com>
+M:	Todd Kjos <tkjos@android.com>
+M:	Martijn Coenen <maco@android.com>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
 L:	devel@driverdev.osuosl.org
 S:	Supported
@@ -2046,7 +2047,7 @@ F:	arch/arm/boot/dts/uniphier*
 F:	arch/arm/include/asm/hardware/cache-uniphier.h
 F:	arch/arm/mach-uniphier/
 F:	arch/arm/mm/cache-uniphier.c
-F:	arch/arm64/boot/dts/socionext/
+F:	arch/arm64/boot/dts/socionext/uniphier*
 F:	drivers/bus/uniphier-system-bus.c
 F:	drivers/clk/uniphier/
 F:	drivers/gpio/gpio-uniphier.c
@@ -2620,24 +2621,22 @@ F:	fs/bfs/
 F:	include/uapi/linux/bfs_fs.h
 
 BLACKFIN ARCHITECTURE
-M:	Steven Miao <realmz6@gmail.com>
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 T:	git git://git.code.sf.net/p/adi-linux/code
 W:	http://blackfin.uclinux.org
-S:	Supported
+S:	Orphan
 F:	arch/blackfin/
 
 BLACKFIN EMAC DRIVER
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org
-S:	Supported
+S:	Orphan
 F:	drivers/net/ethernet/adi/
 
 BLACKFIN MEDIA DRIVER
-M:	Scott Jiang <scott.jiang.linux@gmail.com>
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org/
-S:	Supported
+S:	Orphan
 F:	drivers/media/platform/blackfin/
 F:	drivers/media/i2c/adv7183*
 F:	drivers/media/i2c/vs6624*
@@ -2645,25 +2644,25 @@ F:	drivers/media/i2c/vs6624*
 BLACKFIN RTC DRIVER
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org
-S:	Supported
+S:	Orphan
 F:	drivers/rtc/rtc-bfin.c
 
 BLACKFIN SDH DRIVER
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org
-S:	Supported
+S:	Orphan
 F:	drivers/mmc/host/bfin_sdh.c
 
 BLACKFIN SERIAL DRIVER
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org
-S:	Supported
+S:	Orphan
 F:	drivers/tty/serial/bfin_uart.c
 
 BLACKFIN WATCHDOG DRIVER
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org
-S:	Supported
+S:	Orphan
 F:	drivers/watchdog/bfin_wdt.c
 
 BLINKM RGB LED DRIVER
@@ -4553,6 +4552,12 @@ S:	Maintained
 F:	drivers/gpu/drm/tinydrm/st7586.c
 F:	Documentation/devicetree/bindings/display/st7586.txt
 
+DRM DRIVER FOR SITRONIX ST7735R PANELS
+M:	David Lechner <david@lechnology.com>
+S:	Maintained
+F:	drivers/gpu/drm/tinydrm/st7735r.c
+F:	Documentation/devicetree/bindings/display/st7735r.txt
+
 DRM DRIVER FOR TDFX VIDEO CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/tdfx/
@@ -4592,7 +4597,6 @@ F:	include/linux/vga*
 
 DRM DRIVERS AND MISC GPU PATCHES
 M:	Daniel Vetter <daniel.vetter@intel.com>
-M:	Jani Nikula <jani.nikula@linux.intel.com>
 M:	Gustavo Padovan <gustavo@padovan.org>
 M:	Sean Paul <seanpaul@chromium.org>
 W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
@@ -4811,6 +4815,15 @@ S:	Maintained
 F:	drivers/gpu/drm/tinydrm/
 F:	include/drm/tinydrm/
 
+DRM TTM SUBSYSTEM
+M:	Christian Koenig <christian.koenig@amd.com>
+M:	Roger He <Hongbo.He@amd.com>
+T:	git git://people.freedesktop.org/~agd5f/linux
+S:	Maintained
+L:	dri-devel@lists.freedesktop.org
+F:	include/drm/ttm/
+F:	drivers/gpu/drm/ttm/
+
 DSBR100 USB FM RADIO DRIVER
 M:	Alexey Klimov <klimov.linux@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -5158,15 +5171,15 @@ F:	sound/usb/misc/ua101.c
 EFI TEST DRIVER
 L:	linux-efi@vger.kernel.org
 M:	Ivan Hu <ivan.hu@canonical.com>
-M:	Matt Fleming <matt@codeblueprint.co.uk>
+M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
 S:	Maintained
 F:	drivers/firmware/efi/test/
 
 EFI VARIABLE FILESYSTEM
 M:	Matthew Garrett <matthew.garrett@nebula.com>
 M:	Jeremy Kerr <jk@ozlabs.org>
-M:	Matt Fleming <matt@codeblueprint.co.uk>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
+M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
 L:	linux-efi@vger.kernel.org
 S:	Maintained
 F:	fs/efivarfs/
@@ -5327,7 +5340,6 @@ S:	Supported
 F:	security/integrity/evm/
 
 EXTENSIBLE FIRMWARE INTERFACE (EFI)
-M:	Matt Fleming <matt@codeblueprint.co.uk>
 M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
 L:	linux-efi@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
@@ -5438,7 +5450,7 @@ F:	drivers/media/tuners/fc2580*
 
 FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
 M:	Johannes Thumshirn <jth@kernel.org>
-L:	fcoe-devel@open-fcoe.org
+L:	linux-scsi@vger.kernel.org
 W:	www.Open-FCoE.org
 S:	Supported
 F:	drivers/scsi/libfc/
@@ -7774,6 +7786,7 @@ F:	security/keys/
 
 KGDB / KDB /debug_core
 M:	Jason Wessel <jason.wessel@windriver.com>
+M:	Daniel Thompson <daniel.thompson@linaro.org>
 W:	http://kgdb.wiki.kernel.org/
 L:	kgdb-bugreport@lists.sourceforge.net
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git
@@ -9647,8 +9660,8 @@ F:	include/uapi/linux/sunrpc/
 NILFS2 FILESYSTEM
 M:	Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
 L:	linux-nilfs@vger.kernel.org
-W:	http://nilfs.sourceforge.net/
-W:	http://nilfs.osdn.jp/
+W:	https://nilfs.sourceforge.io/
+W:	https://nilfs.osdn.jp/
 T:	git git://github.com/konis/nilfs2.git
 S:	Supported
 F:	Documentation/filesystems/nilfs2.txt
@@ -10143,7 +10156,7 @@ F:	drivers/irqchip/irq-ompic.c
 F:	drivers/irqchip/irq-or1k-*
 
 OPENVSWITCH
-M:	Pravin Shelar <pshelar@nicira.com>
+M:	Pravin B Shelar <pshelar@ovn.org>
 L:	netdev@vger.kernel.org
 L:	dev@openvswitch.org
 W:	http://openvswitch.org
@@ -11366,6 +11379,7 @@ F:	drivers/net/wireless/quantenna
 RADEON and AMDGPU DRM DRIVERS
 M:	Alex Deucher <alexander.deucher@amd.com>
 M:	Christian König <christian.koenig@amd.com>
+M:	David (ChunMing) Zhou <David1.Zhou@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 T:	git git://people.freedesktop.org/~agd5f/linux
 S:	Supported
@@ -11784,6 +11798,18 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-deve
 S:	Maintained
 F:	drivers/net/wireless/realtek/rtl8xxxu/
 
+RXRPC SOCKETS (AF_RXRPC)
+M:	David Howells <dhowells@redhat.com>
+L:	linux-afs@lists.infradead.org
+S:	Supported
+F:	net/rxrpc/
+F:	include/keys/rxrpc-type.h
+F:	include/net/af_rxrpc.h
+F:	include/trace/events/rxrpc.h
+F:	include/uapi/linux/rxrpc.h
+F:	Documentation/networking/rxrpc.txt
+W:	https://www.infradead.org/~dhowells/kafs/
+
 S3 SAVAGE FRAMEBUFFER DRIVER
 M:	Antonino Daplas <adaplas@gmail.com>
 L:	linux-fbdev@vger.kernel.org
@@ -12637,6 +12663,14 @@ S:	Maintained
 F:	drivers/ssb/
 F:	include/linux/ssb/
 
+SONY IMX274 SENSOR DRIVER
+M:	Leon Luo <leonl@leopardimaging.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/i2c/imx274.c
+F:	Documentation/devicetree/bindings/media/i2c/imx274.txt
+
 SONY MEMORYSTICK CARD SUPPORT
 M:	Alex Dubov <oakad@yahoo.com>
 W:	http://tifmxx.berlios.de/
@@ -13103,6 +13137,7 @@ F:	drivers/dma/dw/
 
 SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
 M:	Jie Deng <jiedeng@synopsys.com>
+M:	Jose Abreu <Jose.Abreu@synopsys.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/synopsys/
@@ -13478,6 +13513,7 @@ M:	Mika Westerberg <mika.westerberg@linux.intel.com>
 M:	Yehezkel Bernat <yehezkel.bernat@intel.com>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
 S:	Maintained
+F:	Documentation/admin-guide/thunderbolt.rst
 F:	drivers/thunderbolt/
 F:	include/linux/thunderbolt.h
 
@@ -13655,10 +13691,8 @@ F:	drivers/net/wireless/ti/
 F:	include/linux/wl12xx.h
 
 TILE ARCHITECTURE
-M:	Chris Metcalf <cmetcalf@mellanox.com>
 W:	http://www.mellanox.com/repository/solutions/tile-scm/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile.git
-S:	Supported
+S:	Orphan
 F:	arch/tile/
 F:	drivers/char/tile-srom.c
 F:	drivers/edac/tile_edac.c
diff --git a/Makefile b/Makefile
index c988e46a53cd78cfa3c8759fada57c1ef1ffb3ec..bf5b8cbb9469db3ab4a42e636746cab82a093044 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc8
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -484,26 +484,6 @@ CLANG_GCC_TC	:= --gcc-toolchain=$(GCC_TOOLCHAIN)
 endif
 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-# Quiet clang warning: comparison of unsigned expression < 0 is always false
-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
-# See modpost pattern 2
-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
-else
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
 
 ifeq ($(config-targets),1)
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifeq ($(cc-name),clang)
+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Quiet clang warning: comparison of unsigned expression < 0 is always false
+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
+# See modpost pattern 2
+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
+else
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+endif
+
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
@@ -789,6 +792,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
 
+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
+KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
+
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index b15bf6bc0e94f46f035e8781ffa921060341fe91..14a2e9af97e9992d87821e8f11276ecfef8e57cf 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 4e6e9f57e790ac0cefcade55664124c9351e830e..dc91c663bcc02e2cdc116f40ad31757d711ac485 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -35,6 +35,14 @@
 			reg = <0x80 0x10>, <0x100 0x10>;
 			#clock-cells = <0>;
 			clocks = <&input_clk>;
+
+			/*
+			 * Set initial core pll output frequency to 90MHz.
+			 * It will be applied at the core pll driver probing
+			 * on early boot.
+			 */
+			assigned-clocks = <&core_clk>;
+			assigned-clock-rates = <90000000>;
 		};
 
 		core_intc: archs-intc@cpu {
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 63954a8b0100ebf5746394fedb62b4825048d903..69ff4895f2ba4b558f2bdfed547ef0ec27288174 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -35,6 +35,14 @@
 			reg = <0x80 0x10>, <0x100 0x10>;
 			#clock-cells = <0>;
 			clocks = <&input_clk>;
+
+			/*
+			 * Set initial core pll output frequency to 100MHz.
+			 * It will be applied at the core pll driver probing
+			 * on early boot.
+			 */
+			assigned-clocks = <&core_clk>;
+			assigned-clock-rates = <100000000>;
 		};
 
 		core_intc: archs-intc@cpu {
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 8f627c200d609148c55731aac99b2a353e72f126..006aa3de5348f31c7462f52f173ad2e74434d062 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -114,6 +114,14 @@
 			reg = <0x00 0x10>, <0x14B8 0x4>;
 			#clock-cells = <0>;
 			clocks = <&input_clk>;
+
+			/*
+			 * Set initial core pll output frequency to 1GHz.
+			 * It will be applied at the core pll driver probing
+			 * on early boot.
+			 */
+			assigned-clocks = <&core_clk>;
+			assigned-clock-rates = <1000000000>;
 		};
 
 		serial: serial@5000 {
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 7b8f8faf8a24315d3379d189cab69506539e04a8..ac6b0ed8341eeff9afdef2544fa48aaac5641deb 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_UDL=y
 CONFIG_FB=y
-CONFIG_FB_UDL=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index f35974ee7264a1e13c3fb82b19dc2cc84b9fcc99..c9173c02081c0c3c81e136e3ae226562f5505b8e 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
 		return 0;
 
 	__asm__ __volatile__(
+	"	mov	lp_count, %5		\n"
 	"	lp	3f			\n"
 	"1:	ldb.ab  %3, [%2, 1]		\n"
 	"	breq.d	%3, 0, 3f               \n"
@@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
 	"	.word   1b, 4b			\n"
 	"	.previous			\n"
 	: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
-	: "g"(-EFAULT), "l"(count)
-	: "memory");
+	: "g"(-EFAULT), "r"(count)
+	: "lp_count", "lp_start", "lp_end", "memory");
 
 	return res;
 }
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index fa6d0ff4ff894be699616eefad77cd6a2347a3b7..170b5db64afeb7f74fb8279887a7cb75e0205c7b 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 7ef7d9a8ff89231811e73a241a3a3c6d248e720b..9d27331fe69a0eb441b34e51324138ef375070b0 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void)
 			unsigned int exec_ctrl;
 
 			READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-			cpu->extn.dual_enb = exec_ctrl & 1;
+			cpu->extn.dual_enb = !(exec_ctrl & 1);
 
 			/* dual issue always present for this core */
 			cpu->extn.dual = 1;
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 74315f302971b1989b305b5c8a3ad14517a97cc1..bf40e06f3fb84fec42cefe779d92b18829524af6 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
  */
 static int __print_sym(unsigned int address, void *unused)
 {
-	__print_symbol("  %s\n", address);
+	printk("  %pS\n", (void *)address);
 	return 0;
 }
 
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index bcd7c9fc5d0fc9868c04ff98d08b2e1492d50bb8..133a4dae41fe7d73b07b0d2365a899b943bc4779 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
 DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
+DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
 
 /*
  * Entry Point for Misaligned Data access Exception, for emulating in software
@@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
  * Thus TRAP_S <n> can be used for specific purpose
  *  -1 used for software breakpointing (gdb)
  *  -2 used by kprobes
+ *  -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
+ *     -fno-isolate-erroneous-paths-dereference
  */
 void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
@@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 		kgdb_trap(regs);
 		break;
 
+	case 5:
+		do_trap5_error(address, regs);
+		break;
 	default:
 		break;
 	}
@@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
 
 	insterror_is_error(address, regs);
 }
+
+/*
+ * abort() call generated by older gcc for __builtin_trap()
+ */
+void abort(void)
+{
+	__asm__ __volatile__("trap_s  5\n");
+}
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 7d8c1d6c2f60f918e95f2cafc81c90c028684b9a..6e9a0a9a6a04e1794ca0bbe6ec75dc5ab0617352 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs)
 		else
 			pr_cont("Bus Error, check PRM\n");
 #endif
+	} else if (vec == ECR_V_TRAP) {
+		if (regs->ecr_param == 5)
+			pr_cont("gcc generated __builtin_trap\n");
 	} else {
 		pr_cont("Check Programmer's Manual\n");
 	}
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index f1ac6790da5fe64782b59b720bf3ea80d999bff1..46544e88492d5273de4c6a121538fd4a6418ec65 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -317,25 +317,23 @@ static void __init axs103_early_init(void)
 	 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
 	 * of fudging the freq in DT
 	 */
+#define AXS103_QUAD_CORE_CPU_FREQ_HZ	50000000
+
 	unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
 	if (num_cores > 2) {
-		u32 freq = 50, orig;
-		/*
-		 * TODO: use cpu node "cpu-freq" param instead of platform-specific
-		 * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
-		 */
+		u32 freq;
 		int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
 		const struct fdt_property *prop;
 
 		prop = fdt_get_property(initial_boot_params, off,
-					"clock-frequency", NULL);
-		orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000;
+					"assigned-clock-rates", NULL);
+		freq = be32_to_cpu(*(u32 *)(prop->data));
 
 		/* Patching .dtb in-place with new core clock value */
-		if (freq != orig ) {
-			freq = cpu_to_be32(freq * 1000000);
+		if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
+			freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
 			fdt_setprop_inplace(initial_boot_params, off,
-					    "clock-frequency", &freq, sizeof(freq));
+					    "assigned-clock-rates", &freq, sizeof(freq));
 		}
 	}
 #endif
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index fd0ae5e38639a8756c86d7882c6e74c88f0e07e5..2958aedb649ab183edcce1ca858006f67fd8ff21 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu)
 #define CREG_PAE		(CREG_BASE + 0x180)
 #define CREG_PAE_UPDATE		(CREG_BASE + 0x194)
 
-#define CREG_CORE_IF_CLK_DIV	(CREG_BASE + 0x4B8)
-#define CREG_CORE_IF_CLK_DIV_2	0x1
-#define CGU_BASE		ARC_PERIPHERAL_BASE
-#define CGU_PLL_STATUS		(ARC_PERIPHERAL_BASE + 0x4)
-#define CGU_PLL_CTRL		(ARC_PERIPHERAL_BASE + 0x0)
-#define CGU_PLL_STATUS_LOCK	BIT(0)
-#define CGU_PLL_STATUS_ERR	BIT(1)
-#define CGU_PLL_CTRL_1GHZ	0x3A10
-#define HSDK_PLL_LOCK_TIMEOUT	500
-
-#define HSDK_PLL_LOCKED() \
-	!!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK)
-
-#define HSDK_PLL_ERR() \
-	!!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR)
-
-static void __init hsdk_set_cpu_freq_1ghz(void)
-{
-	u32 timeout = HSDK_PLL_LOCK_TIMEOUT;
-
-	/*
-	 * As we set cpu clock which exceeds 500MHz, the divider for the interface
-	 * clock must be programmed to div-by-2.
-	 */
-	iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV);
-
-	/* Set cpu clock to 1GHz */
-	iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL);
-
-	while (!HSDK_PLL_LOCKED() && timeout--)
-		cpu_relax();
-
-	if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR())
-		pr_err("Failed to setup CPU frequency to 1GHz!");
-}
-
 #define SDIO_BASE		(ARC_PERIPHERAL_BASE + 0xA000)
 #define SDIO_UHS_REG_EXT	(SDIO_BASE + 0x108)
 #define SDIO_UHS_REG_EXT_DIV_2	(2 << 30)
@@ -98,12 +62,6 @@ static void __init hsdk_init_early(void)
 	 * minimum possible div-by-2.
 	 */
 	iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
-
-	/*
-	 * Setup CPU frequency to 1GHz.
-	 * TODO: remove it after smart hsdk pll driver will be introduced.
-	 */
-	hsdk_set_cpu_freq_1ghz();
 }
 
 static const char *hsdk_compat[] __initconst = {
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 1b81c4e757727d2abcc798fefaaa55e0b0fecfec..d37f95025807708a9c486d1e6f6365f9960286b5 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -630,6 +630,7 @@
 				reg-names = "phy";
 				status = "disabled";
 				ti,ctrl_mod = <&usb_ctrl_mod>;
+				#phy-cells = <0>;
 			};
 
 			usb0: usb@47401000 {
@@ -678,6 +679,7 @@
 				reg-names = "phy";
 				status = "disabled";
 				ti,ctrl_mod = <&usb_ctrl_mod>;
+				#phy-cells = <0>;
 			};
 
 			usb1: usb@47401800 {
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index e5b061469bf88a234ef2367ecb94a143afca793f..4714a59fd86df05a8715ee400f18e6a71041eac7 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -927,7 +927,8 @@
 			reg = <0x48038000 0x2000>,
 			      <0x46000000 0x400000>;
 			reg-names = "mpu", "dat";
-			interrupts = <80>, <81>;
+			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
 			interrupt-names = "tx", "rx";
 			status = "disabled";
 			dmas = <&edma 8 2>,
@@ -941,7 +942,8 @@
 			reg = <0x4803C000 0x2000>,
 			      <0x46400000 0x400000>;
 			reg-names = "mpu", "dat";
-			interrupts = <82>, <83>;
+			interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
 			interrupt-names = "tx", "rx";
 			status = "disabled";
 			dmas = <&edma 10 2>,
diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts
index 9e92d480576b04ae89f3e5aefc7c57f3a522070c..3b9a94c274a7b01ed29ead402a4b3538c122138d 100644
--- a/arch/arm/boot/dts/am437x-cm-t43.dts
+++ b/arch/arm/boot/dts/am437x-cm-t43.dts
@@ -301,8 +301,8 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&spi0_pins>;
-	dmas = <&edma 16
-		&edma 17>;
+	dmas = <&edma 16 0
+		&edma 17 0>;
 	dma-names = "tx0", "rx0";
 
 	flash: w25q64cvzpig@0 {
diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts
index 25d2d720dc0e2cacc98b402e609e0fe78eb01acb..678aa023335d885279d6ba3bcbc67f774c3b1408 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -236,6 +236,7 @@
 	usb3_phy: usb3_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&reg_xhci0_vbus>;
+		#phy-cells = <0>;
 	};
 
 	reg_xhci0_vbus: xhci0-vbus {
diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index e1f355ffc8f7e07cbbc3912b13e7436f9ac4f407..434dc9aaa5e4e2b46429e221201bf7839ae00a80 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -66,6 +66,7 @@
 	usb3_1_phy: usb3_1-phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&usb3_1_vbus>;
+		#phy-cells = <0>;
 	};
 
 	usb3_1_vbus: usb3_1-vbus {
diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts
index 36ad571e76f31c85aff97acc42d7991ddf184e50..0a3552ebda3b80c84588a3cdfd45fb1ae9286fd2 100644
--- a/arch/arm/boot/dts/armada-385-synology-ds116.dts
+++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts
@@ -191,11 +191,13 @@
 	usb3_0_phy: usb3_0_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&reg_usb3_0_vbus>;
+		#phy-cells = <0>;
 	};
 
 	usb3_1_phy: usb3_1_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&reg_usb3_1_vbus>;
+		#phy-cells = <0>;
 	};
 
 	reg_usb3_0_vbus: usb3-vbus0 {
diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts
index f503955dbd3b810db157344e3f9cf48dadd4b1ca..51b4ee6df130188cb0fe839ce78aa5f84d5e3607 100644
--- a/arch/arm/boot/dts/armada-388-gp.dts
+++ b/arch/arm/boot/dts/armada-388-gp.dts
@@ -276,11 +276,13 @@
 	usb2_1_phy: usb2_1_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&reg_usb2_1_vbus>;
+		#phy-cells = <0>;
 	};
 
 	usb3_phy: usb3_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&reg_usb3_vbus>;
+		#phy-cells = <0>;
 	};
 
 	reg_usb3_vbus: usb3-vbus {
diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi
index 45d815a86d420b9f8f108919bb846f865f9b5231..de08d9045cb85bd83884b8a7c05d78296034a94b 100644
--- a/arch/arm/boot/dts/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed-g4.dtsi
@@ -219,7 +219,7 @@
 				compatible = "aspeed,ast2400-vuart";
 				reg = <0x1e787000 0x40>;
 				reg-shift = <2>;
-				interrupts = <10>;
+				interrupts = <8>;
 				clocks = <&clk_uart>;
 				no-loopback-test;
 				status = "disabled";
diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts
index 5f29010cdbd8129d57bd2141164f4fdd4e58cc5b..9b82cc8843e1af8df22ee12d4146cd87c725c563 100644
--- a/arch/arm/boot/dts/at91-tse850-3.dts
+++ b/arch/arm/boot/dts/at91-tse850-3.dts
@@ -221,6 +221,7 @@
 	jc42@18 {
 		compatible = "nxp,se97b", "jedec,jc-42.4-temp";
 		reg = <0x18>;
+		smbus-timeout-disable;
 	};
 
 	dpot: mcp4651-104@28 {
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index 528b9e3bc1da146fd188b4e2c93ec75b7abb4fdd..dcc55aa84583cdd18f7ef6ecd780eb947be1ef1f 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -85,7 +85,7 @@
 		timer@20200 {
 			compatible = "arm,cortex-a9-global-timer";
 			reg = <0x20200 0x100>;
-			interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
 			clocks = <&periph_clk>;
 		};
 
@@ -93,7 +93,7 @@
 			compatible = "arm,cortex-a9-twd-timer";
 			reg = <0x20600 0x20>;
 			interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
-						  IRQ_TYPE_LEVEL_HIGH)>;
+						  IRQ_TYPE_EDGE_RISING)>;
 			clocks = <&periph_clk>;
 		};
 
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 013431e3d7c3140d3a0645bdf4f130e9a860f984..dcde93c85c2d38e0f230ca21c700cb7755d9ec55 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -639,5 +639,6 @@
 
 	usbphy: phy {
 		compatible = "usb-nop-xceiv";
+		#phy-cells = <0>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts
index 3bc50849d013ff0442f559dce24274972202f5f2..b8bde13de90a571ea71aeec0515c7c8290ecb7ca 100644
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -141,10 +141,6 @@
 	status = "okay";
 };
 
-&sata {
-	status = "okay";
-};
-
 &qspi {
 	bspi-sel = <0>;
 	flash: m25p80@0 {
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index d94d14b3c745a0d01c031b6d9b9e6426e0e7b2ec..6a44b8021702176c63d09e55925ffd3d7e02994e 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -177,10 +177,6 @@
 	status = "okay";
 };
 
-&sata {
-	status = "okay";
-};
-
 &srab {
 	compatible = "brcm,bcm58625-srab", "brcm,nsp-srab";
 	status = "okay";
diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts
index 413dbd5d9f6442b8dae4b7f132b35e5f816cf798..81942ae83e1f9c9f717dd0fd565a28966c44c1da 100644
--- a/arch/arm/boot/dts/da850-lego-ev3.dts
+++ b/arch/arm/boot/dts/da850-lego-ev3.dts
@@ -178,7 +178,7 @@
 	 */
 	battery {
 		pinctrl-names = "default";
-		pintctrl-0 = <&battery_pins>;
+		pinctrl-0 = <&battery_pins>;
 		compatible = "lego,ev3-battery";
 		io-channels = <&adc 4>, <&adc 3>;
 		io-channel-names = "voltage", "current";
@@ -392,7 +392,7 @@
 	batt_volt_en {
 		gpio-hog;
 		gpios = <6 GPIO_ACTIVE_HIGH>;
-		output-low;
+		output-high;
 	};
 };
 
diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi
index 9708157f5daf6f7c84a76e521aae8d27e5684d68..681f5487406e39f7bdad2c793754149c0ab189d3 100644
--- a/arch/arm/boot/dts/dm814x.dtsi
+++ b/arch/arm/boot/dts/dm814x.dtsi
@@ -75,6 +75,7 @@
 				reg = <0x47401300 0x100>;
 				reg-names = "phy";
 				ti,ctrl_mod = <&usb_ctrl_mod>;
+				#phy-cells = <0>;
 			};
 
 			usb0: usb@47401000 {
@@ -385,6 +386,7 @@
 					reg = <0x1b00 0x100>;
 					reg-names = "phy";
 					ti,ctrl_mod = <&usb_ctrl_mod>;
+					#phy-cells = <0>;
 				};
 			};
 
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index b2b95ff205e81ba9248f3f29416001459e3c3d77..0029ec27819ca361024bfdaccb43b1faededa600 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -664,6 +664,10 @@
 	status = "okay";
 };
 
+&mixer {
+	status = "okay";
+};
+
 /* eMMC flash */
 &mmc_0 {
 	status = "okay";
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 589a67c5f7969fb15b59e1910ea96db528dbf9e4..84f17f7abb7136104cf2265bb34c7e0bba0ebd12 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -433,15 +433,6 @@
 				clock-names = "ipg", "per";
 			};
 
-			srtc: srtc@53fa4000 {
-				compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
-				reg = <0x53fa4000 0x4000>;
-				interrupts = <24>;
-				interrupt-parent = <&tzic>;
-				clocks = <&clks IMX5_CLK_SRTC_GATE>;
-				clock-names = "ipg";
-			};
-
 			iomuxc: iomuxc@53fa8000 {
 				compatible = "fsl,imx53-iomuxc";
 				reg = <0x53fa8000 0x4000>;
diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
index 38faa90007d7f0c7e3042a90aef486321cc4bfa0..2fa5eb4bd4029facce1217e34f266b02daeb987f 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
@@ -72,7 +72,8 @@
 };
 
 &gpmc {
-	ranges = <1 0 0x08000000 0x1000000>;	/* CS1: 16MB for LAN9221 */
+	ranges = <0 0 0x30000000 0x1000000	/* CS0: 16MB for NAND */
+		  1 0 0x2c000000 0x1000000>;	/* CS1: 16MB for LAN9221 */
 
 	ethernet@gpmc {
 		pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 26cce4d18405d5c993377ed7a246d7c80b43dcea..29cb804d10cc7d8ad9c37b13296d31bdf0f83ffe 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -33,11 +33,12 @@
 	hsusb2_phy: hsusb2_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; /* gpio_4 */
+		#phy-cells = <0>;
 	};
 };
 
 &gpmc {
-	ranges = <0 0 0x00000000 0x1000000>;	/* CS0: 16MB for NAND */
+	ranges = <0 0 0x30000000 0x1000000>;	/* CS0: 16MB for NAND */
 
 	nand@0,0 {
 		compatible = "ti,omap2-nand";
@@ -121,7 +122,7 @@
 
 &mmc3 {
 	interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
-	pinctrl-0 = <&mmc3_pins>;
+	pinctrl-0 = <&mmc3_pins &wl127x_gpio>;
 	pinctrl-names = "default";
 	vmmc-supply = <&wl12xx_vmmc>;
 	non-removable;
@@ -132,8 +133,8 @@
 	wlcore: wlcore@2 {
 		compatible = "ti,wl1273";
 		reg = <2>;
-		interrupt-parent = <&gpio5>;
-		interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
+		interrupt-parent = <&gpio1>;
+		interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; /* gpio 2 */
 		ref-clock-frequency = <26000000>;
 	};
 };
@@ -157,8 +158,6 @@
 			OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3)	/* sdmmc2_dat5.sdmmc3_dat1 */
 			OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3)	/* sdmmc2_dat6.sdmmc3_dat2 */
 			OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3)	/* sdmmc2_dat6.sdmmc3_dat3 */
-			OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT_PULLUP | MUX_MODE4)	/* mcbsp4_clkx.gpio_152 */
-			OMAP3_CORE1_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4)	/* sys_boot1.gpio_3 */
 			OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */
 			OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3)	/* mcspi1_cs2.sdmmc_clk */
 		>;
@@ -228,6 +227,12 @@
 			OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4)	/* sys_boot2.gpio_4 */
 		>;
 	};
+	wl127x_gpio: pinmux_wl127x_gpio_pin {
+		pinctrl-single,pins = <
+			OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4)		/* sys_boot0.gpio_2 */
+			OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4)	/* sys_boot1.gpio_3 */
+		>;
+	};
 };
 
 &omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts
index 940875316d0f3926b39ea0717e560288f8594084..67b4de0e343921fda48d7e94b228732fe2d0131d 100644
--- a/arch/arm/boot/dts/ls1021a-qds.dts
+++ b/arch/arm/boot/dts/ls1021a-qds.dts
@@ -215,7 +215,7 @@
 				reg = <0x2a>;
 				VDDA-supply = <&reg_3p3v>;
 				VDDIO-supply = <&reg_3p3v>;
-				clocks = <&sys_mclk 1>;
+				clocks = <&sys_mclk>;
 			};
 		};
 	};
diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts
index a8b148ad1dd2c37076a6a89d7c02498064fabc15..44715c8ef756b9f44bdc7a7c69b0cdc12c26d93d 100644
--- a/arch/arm/boot/dts/ls1021a-twr.dts
+++ b/arch/arm/boot/dts/ls1021a-twr.dts
@@ -187,7 +187,7 @@
 		reg = <0x0a>;
 		VDDA-supply = <&reg_3p3v>;
 		VDDIO-supply = <&reg_3p3v>;
-		clocks = <&sys_mclk 1>;
+		clocks = <&sys_mclk>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 4926133077b3541165ccf989af0834a8362411b3..0d9faf1a51eac0cca63b54cb272bb1c57e8261d2 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -85,15 +85,6 @@
 				reg = <0x7c00 0x200>;
 			};
 
-			gpio_intc: interrupt-controller@9880 {
-				compatible = "amlogic,meson-gpio-intc";
-				reg = <0xc1109880 0x10>;
-				interrupt-controller;
-				#interrupt-cells = <2>;
-				amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
-				status = "disabled";
-			};
-
 			hwrng: rng@8100 {
 				compatible = "amlogic,meson-rng";
 				reg = <0x8100 0x8>;
@@ -191,6 +182,15 @@
 				status = "disabled";
 			};
 
+			gpio_intc: interrupt-controller@9880 {
+				compatible = "amlogic,meson-gpio-intc";
+				reg = <0x9880 0x10>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
+				status = "disabled";
+			};
+
 			wdt: watchdog@9900 {
 				compatible = "amlogic,meson6-wdt";
 				reg = <0x9900 0x8>;
diff --git a/arch/arm/boot/dts/nspire.dtsi b/arch/arm/boot/dts/nspire.dtsi
index ec2283b1a638e028d28e57d53e8a7dd9fc2c2778..1a5ae4cd107f08294c4bddce41d7369a76fa9ecd 100644
--- a/arch/arm/boot/dts/nspire.dtsi
+++ b/arch/arm/boot/dts/nspire.dtsi
@@ -56,6 +56,7 @@
 
 	usb_phy: usb_phy {
 		compatible = "usb-nop-xceiv";
+		#phy-cells = <0>;
 	};
 
 	vbus_reg: vbus_reg {
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 683b96a8f73e01248349440d1caab8fd7ffeb7cc..0349fcc9dc26ab8d80f568b1d5c8528a78a8a588 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -90,6 +90,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
 		vcc-supply = <&hsusb2_power>;
+		#phy-cells = <0>;
 	};
 
 	tfp410: encoder0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index 4d2eaf843fa960190514dffb06b24d46056adb3e..3ca8991a6c3e977e2a16bb30444db4478a61ca0d 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -64,6 +64,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>;	/* gpio_147 */
 		vcc-supply = <&hsusb2_power>;
+		#phy-cells = <0>;
 	};
 
 	sound {
diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
index 31d5ebf38892e77d04fc53fcd37922ffaca282e2..ab6003fe5a4341ded0b7668861e7ff1553a9ef57 100644
--- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
@@ -43,12 +43,14 @@
 	hsusb1_phy: hsusb1_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&hsusb1_power>;
+		#phy-cells = <0>;
 	};
 
 	/* HS USB Host PHY on PORT 2 */
 	hsusb2_phy: hsusb2_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&hsusb2_power>;
+		#phy-cells = <0>;
 	};
 
 	ads7846reg: ads7846-reg {
diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi
index dbc3f030a16c00cf8baf4e75cbd53fa5d21dc2a0..ee64191e41ca187b60eda4e36dc7aeb641c34896 100644
--- a/arch/arm/boot/dts/omap3-evm-common.dtsi
+++ b/arch/arm/boot/dts/omap3-evm-common.dtsi
@@ -29,6 +29,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; /* gpio_21 */
 		vcc-supply = <&hsusb2_power>;
+		#phy-cells = <0>;
 	};
 
 	leds {
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index 4504908c23fe991ef2805548550ea4b091977522..3dc56fb156b7e88d9503cfea220a3c9067b326c2 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -120,6 +120,7 @@
 	hsusb2_phy: hsusb2_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>;
+		#phy-cells = <0>;
 	};
 
 	tv0: connector {
diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
index 667f96245729cd7ef76c2a27dbce6fa2f6410f52..ecbec23af49f74a6ecccdd0ec99fc434422c9de3 100644
--- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
@@ -58,6 +58,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; /* gpio_24 */
 		vcc-supply = <&hsusb1_power>;
+		#phy-cells = <0>;
 	};
 
 	tfp410: encoder {
diff --git a/arch/arm/boot/dts/omap3-igep0030-common.dtsi b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
index e94d9427450cafa9457be8f5b50d8bfa9a07d8b5..443f717074374274f7d82ffc385cb47e3d9cf8e7 100644
--- a/arch/arm/boot/dts/omap3-igep0030-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
@@ -37,6 +37,7 @@
 	hsusb2_phy: hsusb2_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>;		/* gpio_54 */
+		#phy-cells = <0>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index 343a36d8031d8a2207d9b411ae71000b50232fa0..7ada1e93e166389eb4472cc39d61003b4cc68360 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -51,6 +51,7 @@
 	hsusb1_phy: hsusb1_phy {
 		compatible = "usb-nop-xceiv";
 		vcc-supply = <&reg_vcc3>;
+		#phy-cells = <0>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi
index f25e158e7163b23201ca8e643f557b3965fc959d..ac141fcd1742e3e29a79c4eaacbf69884b171718 100644
--- a/arch/arm/boot/dts/omap3-overo-base.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-base.dtsi
@@ -51,6 +51,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio6 23 GPIO_ACTIVE_LOW>;	/* gpio_183 */
 		vcc-supply = <&hsusb2_power>;
+		#phy-cells = <0>;
 	};
 
 	/* Regulator to trigger the nPoweron signal of the Wifi module */
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index 53e007abdc7159ee69fd35a120ba30edb0704e3c..cd53dc6c00516b4d9be3aa5f3023a7ae1a3ffb48 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -205,6 +205,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>; /* GPIO_16 */
 		vcc-supply = <&vaux2>;
+		#phy-cells = <0>;
 	};
 
 	/* HS USB Host VBUS supply
diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi
index 9a601d15247bef5da1519db88b1680b119381b6d..6f5bd027b71753c8508acc93a56f211a95cd12be 100644
--- a/arch/arm/boot/dts/omap3-tao3530.dtsi
+++ b/arch/arm/boot/dts/omap3-tao3530.dtsi
@@ -46,6 +46,7 @@
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio6 2 GPIO_ACTIVE_LOW>;	/* gpio_162 */
 		vcc-supply = <&hsusb2_power>;
+		#phy-cells = <0>;
 	};
 
 	sound {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index 90b5c7148feb5a6c20763741628fd1e4c41e3909..bb33935df7b057eef7a9b237fc2d2f951f6ab375 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -715,6 +715,7 @@
 				compatible = "ti,ohci-omap3";
 				reg = <0x48064400 0x400>;
 				interrupts = <76>;
+				remote-wakeup-connected;
 			};
 
 			usbhsehci: ehci@48064800 {
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 8b93d37310f28ba4c10ad81d35f8419914ef4633..24a463f8641fe5b968cdb5d62bf15ed4e100b128 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -73,6 +73,7 @@
 	/* HS USB Host PHY on PORT 1 */
 	hsusb1_phy: hsusb1_phy {
 		compatible = "usb-nop-xceiv";
+		#phy-cells = <0>;
 	};
 
 	/* LCD regulator from sw5 source */
diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi
index 6e6810c258eb29c15ff8b4e87a6ce1a6eac93f5e..eb123b24c8e330141b7ece42c6dea74f0cc2a837 100644
--- a/arch/arm/boot/dts/omap4-duovero.dtsi
+++ b/arch/arm/boot/dts/omap4-duovero.dtsi
@@ -43,6 +43,7 @@
 	hsusb1_phy: hsusb1_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>;	/* gpio_62 */
+		#phy-cells = <0>;
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&hsusb1phy_pins>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 22c1eee9b07a28e27cd1a2d66e89100065a3ce7b..5501d1b4e6cdfa152804b7cbb5b277fcdc0417f6 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -89,6 +89,7 @@
 	hsusb1_phy: hsusb1_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>;   /* gpio_62 */
+		#phy-cells = <0>;
 		vcc-supply = <&hsusb1_power>;
 		clocks = <&auxclk3_ck>;
 		clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
index 6500bfc8d1309a26198893583bb00861d5a69a1e..10fce28ceb5b7dc38477e60f0d87a86b6a384638 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
@@ -44,6 +44,7 @@
 
 		reset-gpios = <&gpio6 17 GPIO_ACTIVE_LOW>; /* gpio 177 */
 		vcc-supply = <&vbat>;
+		#phy-cells = <0>;
 
 		clocks = <&auxclk3_ck>;
 		clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 1dc5a76b3c7106c9532f47c71c8052813b2492f9..cc1a07a3620ff5daa9c32770fee918c45e9439cd 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -398,7 +398,7 @@
 		elm: elm@48078000 {
 			compatible = "ti,am3352-elm";
 			reg = <0x48078000 0x2000>;
-			interrupts = <4>;
+			interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "elm";
 			status = "disabled";
 		};
@@ -1081,14 +1081,13 @@
 			usbhsohci: ohci@4a064800 {
 				compatible = "ti,ohci-omap3";
 				reg = <0x4a064800 0x400>;
-				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+				remote-wakeup-connected;
 			};
 
 			usbhsehci: ehci@4a064c00 {
 				compatible = "ti,ehci-omap";
 				reg = <0x4a064c00 0x400>;
-				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
 			};
 		};
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 575ecffb0e9e47cfda8373c22071de12e8789302..1b20838bb9a42ed64b80b0943dd59acf8be0d06d 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -73,12 +73,14 @@
 		clocks = <&auxclk1_ck>;
 		clock-names = "main_clk";
 		clock-frequency = <19200000>;
+		#phy-cells = <0>;
 	};
 
 	/* HS USB Host PHY on PORT 3 */
 	hsusb3_phy: hsusb3_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; /* gpio3_79 ETH_NRESET */
+		#phy-cells = <0>;
 	};
 
 	tpd12s015: encoder {
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index 5b172a04b6f1b14193f280228432adf19b00296d..5e21fb430a65daa8e29a1ca90a404389d9dc99a9 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -63,12 +63,14 @@
 	hsusb2_phy: hsusb2_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio3 12 GPIO_ACTIVE_LOW>; /* gpio3_76 HUB_RESET */
+		#phy-cells = <0>;
 	};
 
 	/* HS USB Host PHY on PORT 3 */
 	hsusb3_phy: hsusb3_phy {
 		compatible = "usb-nop-xceiv";
 		reset-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 ETH_RESET */
+		#phy-cells = <0>;
 	};
 
 	leds {
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 4cd0005e462f7a0b88577a578d0baff8f2b13714..51a7fb3d7b9a019f5b76d5f72afaa0c0c857e050 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -940,6 +940,7 @@
 				compatible = "ti,ohci-omap3";
 				reg = <0x4a064800 0x400>;
 				interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+				remote-wakeup-connected;
 			};
 
 			usbhsehci: ehci@4a064c00 {
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index 2f017fee4009a2242c3a808c5aa07e29eac36ee9..62baabd757b6ba010693b9b7d4cfc37a367819d8 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1201,6 +1201,7 @@
 		clock-names = "extal", "usb_extal";
 		#clock-cells = <2>;
 		#power-domain-cells = <0>;
+		#reset-cells = <1>;
 	};
 
 	prr: chipid@ff000044 {
diff --git a/arch/arm/boot/dts/r8a7792.dtsi b/arch/arm/boot/dts/r8a7792.dtsi
index 131f65b0426ea317ccfa4fa8fed0b015611bd48c..3d080e07374ca609d367d09642595ff11f9c6629 100644
--- a/arch/arm/boot/dts/r8a7792.dtsi
+++ b/arch/arm/boot/dts/r8a7792.dtsi
@@ -829,6 +829,7 @@
 			clock-names = "extal";
 			#clock-cells = <2>;
 			#power-domain-cells = <0>;
+			#reset-cells = <1>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi
index 58eae569b4e0e3f0920f0e8d0cb4a19cd752d1fd..0cd1035de1a4bd4906eafae6586a596a184a3b84 100644
--- a/arch/arm/boot/dts/r8a7793.dtsi
+++ b/arch/arm/boot/dts/r8a7793.dtsi
@@ -1088,6 +1088,7 @@
 		clock-names = "extal", "usb_extal";
 		#clock-cells = <2>;
 		#power-domain-cells = <0>;
+		#reset-cells = <1>;
 	};
 
 	rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi
index 905e50c9b524d2c4ac40e516f8238e15f48a2a50..5643976c13569541b079dd5f9c9a4545dcf2ffc3 100644
--- a/arch/arm/boot/dts/r8a7794.dtsi
+++ b/arch/arm/boot/dts/r8a7794.dtsi
@@ -1099,6 +1099,7 @@
 		clock-names = "extal", "usb_extal";
 		#clock-cells = <2>;
 		#power-domain-cells = <0>;
+		#reset-cells = <1>;
 	};
 
 	rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts
index c6d92c25df42d5a673e991df58f86f566df73679..d23ee6d911acf55cb9330b66330966c33816ac7f 100644
--- a/arch/arm/boot/dts/rk3066a-marsboard.dts
+++ b/arch/arm/boot/dts/rk3066a-marsboard.dts
@@ -83,6 +83,10 @@
 	};
 };
 
+&cpu0 {
+	cpu0-supply = <&vdd_arm>;
+};
+
 &i2c1 {
 	status = "okay";
 	clock-frequency = <400000>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index cd24894ee5c6b14deab21c543bad7c62ec34c93d..6102e4e7f35c15bcb809152453ef3ed82dd579d7 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -956,7 +956,7 @@
 	iep_mmu: iommu@ff900800 {
 		compatible = "rockchip,iommu";
 		reg = <0x0 0xff900800 0x0 0x40>;
-		interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>;
+		interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "iep_mmu";
 		#iommu-cells = <0>;
 		status = "disabled";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index b91300d49a31081269789a6e7437e6d707a8a6f4..5840f5c75c3b388d47242c53aa0c0f7fbe3e3a68 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -502,8 +502,8 @@
 			reg = <0x01c16000 0x1000>;
 			interrupts = <58>;
 			clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
-				 <&ccu 9>,
-				 <&ccu 18>;
+				 <&ccu CLK_PLL_VIDEO0_2X>,
+				 <&ccu CLK_PLL_VIDEO1_2X>;
 			clock-names = "ahb", "mod", "pll-0", "pll-1";
 			dmas = <&dma SUN4I_DMA_NORMAL 16>,
 			       <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 6ae4d95e230e58a468c908d85c2cfaf8d87894a0..316cb8b2945b114224d2c75ffa65a3d6a07f3300 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -82,8 +82,8 @@
 			reg = <0x01c16000 0x1000>;
 			interrupts = <58>;
 			clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
-				 <&ccu 9>,
-				 <&ccu 16>;
+				 <&ccu CLK_PLL_VIDEO0_2X>,
+				 <&ccu CLK_PLL_VIDEO1_2X>;
 			clock-names = "ahb", "mod", "pll-0", "pll-1";
 			dmas = <&dma SUN4I_DMA_NORMAL 16>,
 			       <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index 8bfa12b548e0a2acf8f285505cd3699e27519e86..72d3fe44ecaf0d53f16b28b6433e9853d1a13c2f 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -429,8 +429,8 @@
 			interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>,
 				 <&ccu CLK_HDMI_DDC>,
-				 <&ccu 7>,
-				 <&ccu 13>;
+				 <&ccu CLK_PLL_VIDEO0_2X>,
+				 <&ccu CLK_PLL_VIDEO1_2X>;
 			clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
 			resets = <&ccu RST_AHB1_HDMI>;
 			reset-names = "ahb";
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 68dfa82544fc4c574916c3d60c17a60097ddb2b1..59655e42e4b09a75edc0b285e2c18fd4926e21aa 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -581,8 +581,8 @@
 			reg = <0x01c16000 0x1000>;
 			interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
-				 <&ccu 9>,
-				 <&ccu 18>;
+				 <&ccu CLK_PLL_VIDEO0_2X>,
+				 <&ccu CLK_PLL_VIDEO1_2X>;
 			clock-names = "ahb", "mod", "pll-0", "pll-1";
 			dmas = <&dma SUN4I_DMA_NORMAL 16>,
 			       <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 98715538932f10bd048b355b4f7bb5bfc183a7cd..a021ee6da3968c252a02aefb29c3b99a2e13a096 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -146,6 +146,7 @@
 	status = "okay";
 
 	axp81x: pmic@3a3 {
+		compatible = "x-powers,axp813";
 		reg = <0x3a3>;
 		interrupt-parent = <&r_intc>;
 		interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi
index 0ec1b0a317b4c4631d4cfe5d9d451e9d16c697de..ff72a8efb73d05e3d219587654b96987ab1b31e5 100644
--- a/arch/arm/boot/dts/tango4-common.dtsi
+++ b/arch/arm/boot/dts/tango4-common.dtsi
@@ -156,7 +156,6 @@
 			reg = <0x6e000 0x400>;
 			ranges = <0 0x6e000 0x400>;
 			interrupt-parent = <&gic>;
-			interrupt-controller;
 			#address-cells = <1>;
 			#size-cells = <1>;
 
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
index 02a6227c717ca6ce0bd515804887b74778a06ded..4b8edc8982cf156931177b0ecce0f6b9329afce3 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
 					switch0port10: port@10 {
 						reg = <10>;
 						label = "dsa";
-						phy-mode = "xgmii";
+						phy-mode = "xaui";
 						link = <&switch1port10>;
 					};
 				};
@@ -208,7 +208,7 @@
 					switch1port10: port@10 {
 						reg = <10>;
 						label = "dsa";
-						phy-mode = "xgmii";
+						phy-mode = "xaui";
 						link = <&switch0port10>;
 					};
 				};
@@ -359,7 +359,7 @@
 };
 
 &i2c1 {
-	at24mac602@0 {
+	at24mac602@50 {
 		compatible = "atmel,24c02";
 		reg = <0x50>;
 		read-only;
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c8781450905be94995e0df72aeb3cc722afd1b60..3ab8b3781bfeca7264989b813209a99d115d35f2 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
 #else
 #define VTTBR_X		(5 - KVM_T0SZ)
 #endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
 #define VTTBR_VMID_SHIFT  _AC(48, ULL)
 #define VTTBR_VMID_MASK(size)	(_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 242151ea69087a4ec8c4b5fd963c210ff30a89fe..a9f7d3f47134a96536480275168247e309d78388 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
+					     struct kvm_run *run)
+{
+	return false;
+}
 
 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 4d53de308ee089a7b745926ab8da16caa825806e..4d1cc1847edf076dfb3ea03db6712803a851d28b 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generated-y += unistd-oabi.h
 generated-y += unistd-eabi.h
 
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 5cf04888c581df4a21053a826f8a2fdd58204435..3e26c6f7a191a9621a5234598920bb43f260bbbb 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -793,7 +793,6 @@ void abort(void)
 	/* if that doesn't kill us, halt */
 	panic("Oops failed to kill thread");
 }
-EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d2402d94d72ea974a0c3326fa2f52..b83fdc06286a64ece150fb7e419bc587e47c3e34 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
 		.pushsection .text.fixup,"ax"
 		.align	4
 9001:		mov	r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+		ldr	r5, [sp, #9*4]		@ *err_ptr
+#else
 		ldr	r5, [sp, #8*4]		@ *err_ptr
+#endif
 		str	r4, [r5]
 		ldmia	sp, {r1, r2}		@ retrieve dst, len
 		add	r2, r2, r1
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 8be04ec95adf5e4d0969c9c1cc1460e4e31c9caf..5ace9380626a0cc34f074e71a2981c8c42e87c50 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = {
 	{ "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) },
 	{ "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) },
 	{ "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) },
-	{ "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
-	{ "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
-	{ "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
-	{ "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
+	{ "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
+	{ "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
+	{ "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
+	{ "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
 };
 
 static struct edma_soc_info dm365_edma_pdata = {
@@ -925,12 +925,14 @@ static struct resource edma_resources[] = {
 	/* not using TC*_ERR */
 };
 
-static struct platform_device dm365_edma_device = {
-	.name			= "edma",
-	.id			= 0,
-	.dev.platform_data	= &dm365_edma_pdata,
-	.num_resources		= ARRAY_SIZE(edma_resources),
-	.resource		= edma_resources,
+static const struct platform_device_info dm365_edma_device __initconst = {
+	.name		= "edma",
+	.id		= 0,
+	.dma_mask	= DMA_BIT_MASK(32),
+	.res		= edma_resources,
+	.num_res	= ARRAY_SIZE(edma_resources),
+	.data		= &dm365_edma_pdata,
+	.size_data	= sizeof(dm365_edma_pdata),
 };
 
 static struct resource dm365_asp_resources[] = {
@@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg,
 
 static int __init dm365_init_devices(void)
 {
+	struct platform_device *edma_pdev;
 	int ret = 0;
 
 	if (!cpu_is_davinci_dm365())
 		return 0;
 
 	davinci_cfg_reg(DM365_INT_EDMA_CC);
-	platform_device_register(&dm365_edma_device);
+	edma_pdev = platform_device_register_full(&dm365_edma_device);
+	if (IS_ERR(edma_pdev)) {
+		pr_warn("%s: Failed to register eDMA\n", __func__);
+		return PTR_ERR(edma_pdev);
+	}
 
 	platform_device_register(&dm365_mdio_device);
 	platform_device_register(&dm365_emac_device);
diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c
index 2555f9056a339b491cdea8bf9305bf8614690c2b..cad7ee8f0d6b49e10fa2cebf094d754e88524e7d 100644
--- a/arch/arm/mach-meson/platsmp.c
+++ b/arch/arm/mach-meson/platsmp.c
@@ -102,7 +102,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible,
 
 	scu_base = of_iomap(node, 0);
 	if (!scu_base) {
-		pr_err("Couln't map SCU registers\n");
+		pr_err("Couldn't map SCU registers\n");
 		return;
 	}
 
diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c
index d555791cf349dd160f49be49e58088e16900466b..83c6fa74cc31e41616f06495bc5b210decd6ac6e 100644
--- a/arch/arm/mach-omap2/cm_common.c
+++ b/arch/arm/mach-omap2/cm_common.c
@@ -68,14 +68,17 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2)
 int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
 			u8 *idlest_reg_id)
 {
+	int ret;
 	if (!cm_ll_data->split_idlest_reg) {
 		WARN_ONCE(1, "cm: %s: no low-level function defined\n",
 			  __func__);
 		return -EINVAL;
 	}
 
-	return cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
+	ret = cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
 					   idlest_reg_id);
+	*prcm_inst -= cm_base.offset;
+	return ret;
 }
 
 /**
@@ -337,6 +340,7 @@ int __init omap2_cm_base_init(void)
 		if (mem) {
 			mem->pa = res.start + data->offset;
 			mem->va = data->mem + data->offset;
+			mem->offset = data->offset;
 		}
 
 		data->np = np;
diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c
index 5ac122e88f678b75d6c1060783738321d8b9c579..fa7f308c90279eb20ef89fdcdaafef31a1f8ec2c 100644
--- a/arch/arm/mach-omap2/omap-secure.c
+++ b/arch/arm/mach-omap2/omap-secure.c
@@ -73,6 +73,27 @@ phys_addr_t omap_secure_ram_mempool_base(void)
 	return omap_secure_memblock_base;
 }
 
+#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
+u32 omap3_save_secure_ram(void __iomem *addr, int size)
+{
+	u32 ret;
+	u32 param[5];
+
+	if (size != OMAP3_SAVE_SECURE_RAM_SZ)
+		return OMAP3_SAVE_SECURE_RAM_SZ;
+
+	param[0] = 4;		/* Number of arguments */
+	param[1] = __pa(addr);	/* Physical address for saving */
+	param[2] = 0;
+	param[3] = 1;
+	param[4] = 1;
+
+	ret = save_secure_ram_context(__pa(param));
+
+	return ret;
+}
+#endif
+
 /**
  * rx51_secure_dispatcher: Routine to dispatch secure PPA API calls
  * @idx: The PPA API index
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index bae263fba640af8c1ff7a3b1b98e71a35486e94c..c509cde71f931ab959cc751462d924a04fc0778f 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -31,6 +31,8 @@
 /* Maximum Secure memory storage size */
 #define OMAP_SECURE_RAM_STORAGE	(88 * SZ_1K)
 
+#define OMAP3_SAVE_SECURE_RAM_SZ	0x803F
+
 /* Secure low power HAL API index */
 #define OMAP4_HAL_SAVESECURERAM_INDEX	0x1a
 #define OMAP4_HAL_SAVEHW_INDEX		0x1b
@@ -65,6 +67,8 @@ extern u32 omap_smc2(u32 id, u32 falg, u32 pargs);
 extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs);
 extern phys_addr_t omap_secure_ram_mempool_base(void);
 extern int omap_secure_ram_reserve_memblock(void);
+extern u32 save_secure_ram_context(u32 args_pa);
+extern u32 omap3_save_secure_ram(void __iomem *save_regs, int size);
 
 extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
 				  u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index d45cbfdb4be6838b7f698886a84f4ceb48fe35f8..f0388058b7da399d28d3819ab268d5d0a2a61073 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -391,10 +391,8 @@ omap_device_copy_resources(struct omap_hwmod *oh,
 	const char *name;
 	int error, irq = 0;
 
-	if (!oh || !oh->od || !oh->od->pdev) {
-		error = -EINVAL;
-		goto error;
-	}
+	if (!oh || !oh->od || !oh->od->pdev)
+		return -EINVAL;
 
 	np = oh->od->pdev->dev.of_node;
 	if (!np) {
@@ -516,8 +514,10 @@ struct platform_device __init *omap_device_build(const char *pdev_name,
 		goto odbs_exit1;
 
 	od = omap_device_alloc(pdev, &oh, 1);
-	if (IS_ERR(od))
+	if (IS_ERR(od)) {
+		ret = PTR_ERR(od);
 		goto odbs_exit1;
+	}
 
 	ret = platform_device_add_data(pdev, pdata, pdata_len);
 	if (ret)
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index d2106ae4410a23fb76c8593b6d123b3a94800ade..52c9d585b44d2607f4d0a4c5d0b1b1d01cb4bac3 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -1646,6 +1646,7 @@ static struct omap_hwmod omap3xxx_mmc3_hwmod = {
 	.main_clk	= "mmchs3_fck",
 	.prcm		= {
 		.omap2 = {
+			.module_offs = CORE_MOD,
 			.prcm_reg_id = 1,
 			.module_bit = OMAP3430_EN_MMC3_SHIFT,
 			.idlest_reg_id = 1,
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index b668719b9b25a7ada81229ba18ad2645777cf487..8e30772cfe325a35e52f2feaaa53c9fcc59357cd 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -81,10 +81,6 @@ extern unsigned int omap3_do_wfi_sz;
 /* ... and its pointer from SRAM after copy */
 extern void (*omap3_do_wfi_sram)(void);
 
-/* save_secure_ram_context function pointer and size, for copy to SRAM */
-extern int save_secure_ram_context(u32 *addr);
-extern unsigned int save_secure_ram_context_sz;
-
 extern void omap3_save_scratchpad_contents(void);
 
 #define PM_RTA_ERRATUM_i608		(1 << 0)
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 841ba19d64a69b153a38ef594220d1a4054d7e59..36c55547137cb08b9b1b662cb5477e591590598d 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -48,6 +48,7 @@
 #include "prm3xxx.h"
 #include "pm.h"
 #include "sdrc.h"
+#include "omap-secure.h"
 #include "sram.h"
 #include "control.h"
 #include "vc.h"
@@ -66,7 +67,6 @@ struct power_state {
 
 static LIST_HEAD(pwrst_list);
 
-static int (*_omap_save_secure_sram)(u32 *addr);
 void (*omap3_do_wfi_sram)(void);
 
 static struct powerdomain *mpu_pwrdm, *neon_pwrdm;
@@ -121,8 +121,8 @@ static void omap3_save_secure_ram_context(void)
 		 * will hang the system.
 		 */
 		pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
-		ret = _omap_save_secure_sram((u32 *)(unsigned long)
-				__pa(omap3_secure_ram_storage));
+		ret = omap3_save_secure_ram(omap3_secure_ram_storage,
+					    OMAP3_SAVE_SECURE_RAM_SZ);
 		pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state);
 		/* Following is for error tracking, it should not happen */
 		if (ret) {
@@ -434,15 +434,10 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused)
  *
  * The minimum set of functions is pushed to SRAM for execution:
  * - omap3_do_wfi for erratum i581 WA,
- * - save_secure_ram_context for security extensions.
  */
 void omap_push_sram_idle(void)
 {
 	omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz);
-
-	if (omap_type() != OMAP2_DEVICE_TYPE_GP)
-		_omap_save_secure_sram = omap_sram_push(save_secure_ram_context,
-				save_secure_ram_context_sz);
 }
 
 static void __init pm_errata_configure(void)
@@ -553,7 +548,7 @@ int __init omap3_pm_init(void)
 	clkdm_add_wkdep(neon_clkdm, mpu_clkdm);
 	if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
 		omap3_secure_ram_storage =
-			kmalloc(0x803F, GFP_KERNEL);
+			kmalloc(OMAP3_SAVE_SECURE_RAM_SZ, GFP_KERNEL);
 		if (!omap3_secure_ram_storage)
 			pr_err("Memory allocation failed when allocating for secure sram context\n");
 
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index 0592b23902c6885b03985085ec5c4d7415966d01..0977da0dab76077ee663ffa0dc0ba3f3c32f3a26 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -528,6 +528,7 @@ struct omap_prcm_irq_setup {
 struct omap_domain_base {
 	u32 pa;
 	void __iomem *va;
+	s16 offset;
 };
 
 /**
diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c
index d2c5bcabdbebe03d9e05e4e551815ce85b9c4207..ebaf80d72a109fdaabb1fbeb21da6d401119ce8d 100644
--- a/arch/arm/mach-omap2/prm33xx.c
+++ b/arch/arm/mach-omap2/prm33xx.c
@@ -176,17 +176,6 @@ static int am33xx_pwrdm_read_pwrst(struct powerdomain *pwrdm)
 	return v;
 }
 
-static int am33xx_pwrdm_read_prev_pwrst(struct powerdomain *pwrdm)
-{
-	u32 v;
-
-	v = am33xx_prm_read_reg(pwrdm->prcm_offs, pwrdm->pwrstst_offs);
-	v &= AM33XX_LASTPOWERSTATEENTERED_MASK;
-	v >>= AM33XX_LASTPOWERSTATEENTERED_SHIFT;
-
-	return v;
-}
-
 static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm)
 {
 	am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK,
@@ -357,7 +346,6 @@ struct pwrdm_ops am33xx_pwrdm_operations = {
 	.pwrdm_set_next_pwrst		= am33xx_pwrdm_set_next_pwrst,
 	.pwrdm_read_next_pwrst		= am33xx_pwrdm_read_next_pwrst,
 	.pwrdm_read_pwrst		= am33xx_pwrdm_read_pwrst,
-	.pwrdm_read_prev_pwrst		= am33xx_pwrdm_read_prev_pwrst,
 	.pwrdm_set_logic_retst		= am33xx_pwrdm_set_logic_retst,
 	.pwrdm_read_logic_pwrst		= am33xx_pwrdm_read_logic_pwrst,
 	.pwrdm_read_logic_retst		= am33xx_pwrdm_read_logic_retst,
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index fa5fd24f524c5cb233dfb72d73c23929084d7caa..22daf4efed68b2b4b34ea4e5f1c6788f1033c562 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -93,20 +93,13 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
 ENDPROC(enable_omap3630_toggle_l2_on_restore)
 
 /*
- * Function to call rom code to save secure ram context. This gets
- * relocated to SRAM, so it can be all in .data section. Otherwise
- * we need to initialize api_params separately.
+ * Function to call rom code to save secure ram context.
+ *
+ * r0 = physical address of the parameters
  */
-	.data
-	.align	3
 ENTRY(save_secure_ram_context)
 	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
-	adr	r3, api_params		@ r3 points to parameters
-	str	r0, [r3,#0x4]		@ r0 has sdram address
-	ldr	r12, high_mask
-	and	r3, r3, r12
-	ldr	r12, sram_phy_addr_mask
-	orr	r3, r3, r12
+	mov	r3, r0			@ physical address of parameters
 	mov	r0, #25			@ set service ID for PPA
 	mov	r12, r0			@ copy secure service ID in r12
 	mov	r1, #0			@ set task id for ROM code in r1
@@ -120,18 +113,7 @@ ENTRY(save_secure_ram_context)
 	nop
 	nop
 	ldmfd	sp!, {r4 - r11, pc}
-	.align
-sram_phy_addr_mask:
-	.word	SRAM_BASE_P
-high_mask:
-	.word	0xffff
-api_params:
-	.word	0x4, 0x0, 0x0, 0x1, 0x1
 ENDPROC(save_secure_ram_context)
-ENTRY(save_secure_ram_context_sz)
-	.word	. - save_secure_ram_context
-
-	.text
 
 /*
  * ======================
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a93339f5178f2eff247144eb9244c077225094bc..c9a7e9e1414f344c9dfd515600e3e4378bf61d81 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
 
 	  If unsure, say Y.
 
-
 config SOCIONEXT_SYNQUACER_PREITS
 	bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
 	default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
 	  a 128kB offset to be applied to the target address in this commands.
 
 	  If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_E1041
+	bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
+	default y
+	help
+	  Falkor CPU may speculatively fetch instructions from an improper
+	  memory location when MMU translation is changed from SCTLR_ELn[M]=1
+	  to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index d7c22d51bc5073c13b17143d314e49728bcd3220..4aa50b9b26bca7b904ab3e2921dc844f0f46f9b9 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -12,6 +12,7 @@ subdir-y += cavium
 subdir-y += exynos
 subdir-y += freescale
 subdir-y += hisilicon
+subdir-y += lg
 subdir-y += marvell
 subdir-y += mediatek
 subdir-y += nvidia
@@ -22,5 +23,4 @@ subdir-y += rockchip
 subdir-y += socionext
 subdir-y += sprd
 subdir-y += xilinx
-subdir-y += lg
 subdir-y += zte
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 45bdbfb961261becf2fb940fb9da19ee0f99560a..4a8d3f83a36eabc50134577306c06febffb9bed8 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -75,6 +75,7 @@
 	pinctrl-0 = <&rgmii_pins>;
 	phy-mode = "rgmii";
 	phy-handle = <&ext_rgmii_phy>;
+	phy-supply = <&reg_dc1sw>;
 	status = "okay";
 };
 
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
index 806442d3e846881d01e3c971da41af1db869b14f..604cdaedac38eed479a8f24a88efbc4fddc2676e 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -77,6 +77,7 @@
 	pinctrl-0 = <&rmii_pins>;
 	phy-mode = "rmii";
 	phy-handle = <&ext_rmii_phy1>;
+	phy-supply = <&reg_dc1sw>;
 	status = "okay";
 
 };
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index 0eb2acedf8c3bc5ff2b4bbde4cddfde4d8c204ad..abe179de35d780cc1d3691394d465817dad30549 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -82,6 +82,7 @@
 	pinctrl-0 = <&rgmii_pins>;
 	phy-mode = "rgmii";
 	phy-handle = <&ext_rgmii_phy>;
+	phy-supply = <&reg_dc1sw>;
 	status = "okay";
 };
 
@@ -95,7 +96,7 @@
 &mmc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc2_pins>;
-	vmmc-supply = <&reg_vcc3v3>;
+	vmmc-supply = <&reg_dcdc1>;
 	vqmmc-supply = <&reg_vcc1v8>;
 	bus-width = <8>;
 	non-removable;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
index a5da18a6f2866d34537c55dcc5d3288daf6c8331..43418bd881d81e73da7634fa6ff62c11d64b65ef 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
@@ -45,19 +45,10 @@
 
 #include "sun50i-a64.dtsi"
 
-/ {
-	reg_vcc3v3: vcc3v3 {
-		compatible = "regulator-fixed";
-		regulator-name = "vcc3v3";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-	};
-};
-
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins>;
-	vmmc-supply = <&reg_vcc3v3>;
+	vmmc-supply = <&reg_dcdc1>;
 	non-removable;
 	disable-wp;
 	bus-width = <4>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
index b6b7a561df8c91d90c57bd2d36cbaba37283bb02..a42fd79a62a30695221375b9a18d0209c29d73dd 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
@@ -71,7 +71,7 @@
 	pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
 	vmmc-supply = <&reg_vcc3v3>;
 	bus-width = <4>;
-	cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index ead895a4e9a5c9fb6f663092288e8178b95cd6a1..1fb8b9d6cb4ea07c105d088d2ccc5c25ca1ea128 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -753,12 +753,12 @@
 
 &uart_B {
 	clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
-	clock-names = "xtal", "core", "baud";
+	clock-names = "xtal", "pclk", "baud";
 };
 
 &uart_C {
 	clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
-	clock-names = "xtal", "core", "baud";
+	clock-names = "xtal", "pclk", "baud";
 };
 
 &vpu {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 8ed981f59e5ae5804da97c887193a32a73b53983..6524b89e7115b5e313834b5ab8565768eb06bd13 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -688,7 +688,7 @@
 
 &uart_A {
 	clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>;
-	clock-names = "xtal", "core", "baud";
+	clock-names = "xtal", "pclk", "baud";
 };
 
 &uart_AO {
@@ -703,12 +703,12 @@
 
 &uart_B {
 	clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
-	clock-names = "xtal", "core", "baud";
+	clock-names = "xtal", "pclk", "baud";
 };
 
 &uart_C {
 	clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
-	clock-names = "xtal", "core", "baud";
+	clock-names = "xtal", "pclk", "baud";
 };
 
 &vpu {
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index a298df74ca6c037f373f78f5fd310a4d8e8c0e64..dbe2648649db1e1f74bfb7b2bc97f9765658f7d9 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -255,7 +255,6 @@
 &avb {
 	pinctrl-0 = <&avb_pins>;
 	pinctrl-names = "default";
-	renesas,no-ether-link;
 	phy-handle = <&phy0>;
 	status = "okay";
 
diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi
index 0d85b315ce711c8f0d896d7861204036ce9b0ec3..73439cf4865964016c14d2237c2f504d3ddfa430 100644
--- a/arch/arm64/boot/dts/renesas/ulcb.dtsi
+++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi
@@ -145,7 +145,6 @@
 &avb {
 	pinctrl-0 = <&avb_pins>;
 	pinctrl-names = "default";
-	renesas,no-ether-link;
 	phy-handle = <&phy0>;
 	status = "okay";
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index d4f80786e7c20c0a46bacb9636f8defc7ef137b4..3890468678ce1caa78a411aeb0a4a9cdedfc1302 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -132,6 +132,8 @@
 	assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
 	assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
 	clock_in_out = "input";
+	/* shows instability at 1GBit right now */
+	max-speed = <100>;
 	phy-supply = <&vcc_io>;
 	phy-mode = "rgmii";
 	pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 41d61840fb99ce52ec553c94e119ab63bb79cdbe..2426da6319382dbcab52f286860aabd8651087cd 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -514,7 +514,7 @@
 	tsadc: tsadc@ff250000 {
 		compatible = "rockchip,rk3328-tsadc";
 		reg = <0x0 0xff250000 0x0 0x100>;
-		interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
+		interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
 		assigned-clocks = <&cru SCLK_TSADC>;
 		assigned-clock-rates = <50000>;
 		clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index 910628d18add07d9a39974bc6ce2ac4a403adb81..1fc5060d7027e6e4b852c6cd71d430d2e7ab3990 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -155,17 +155,6 @@
 		regulator-min-microvolt = <5000000>;
 		regulator-max-microvolt = <5000000>;
 	};
-
-	vdd_log: vdd-log {
-		compatible = "pwm-regulator";
-		pwms = <&pwm2 0 25000 0>;
-		regulator-name = "vdd_log";
-		regulator-min-microvolt = <800000>;
-		regulator-max-microvolt = <1400000>;
-		regulator-always-on;
-		regulator-boot-on;
-		status = "okay";
-	};
 };
 
 &cpu_b0 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
index dd7193acc7dfa54aebf712fe7bbb3264159565f0..6bdefb26b3296ae2dda562472bd26a8a8d8b23bd 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
@@ -40,7 +40,6 @@
 };
 
 &ethsc {
-	interrupt-parent = <&gpio>;
 	interrupts = <0 8>;
 };
 
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
index d99e3731358c4aed8f7933fb49a8ba4db7a290b0..254d6795c67e94802789038ad3b9ccc872ffb3f5 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -40,7 +40,6 @@
 };
 
 &ethsc {
-	interrupt-parent = <&gpio>;
 	interrupts = <0 8>;
 };
 
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
index 864feeb3518014f2f9670e3f82c5815c624d038a..f9f06fcfb94aa5e2c8552afc11ae2df8bf3c2f10 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
@@ -38,8 +38,7 @@
 };
 
 &ethsc {
-	interrupt-parent = <&gpio>;
-	interrupts = <0 8>;
+	interrupts = <4 8>;
 };
 
 &serial0 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 48e733136db4580ffd06963f622b7ce7ada92d61..0ac2ace824350185c0e53369b6702875f1274ea0 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -198,8 +198,8 @@
 			gpio-controller;
 			#gpio-cells = <2>;
 			gpio-ranges = <&pinctrl 0 0 0>,
-				      <&pinctrl 96 0 0>,
-				      <&pinctrl 160 0 0>;
+				      <&pinctrl 104 0 0>,
+				      <&pinctrl 168 0 0>;
 			gpio-ranges-group-names = "gpio_range0",
 						  "gpio_range1",
 						  "gpio_range2";
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index aef72d886677758c76d6b932c863893df7c67b53..8b168280976f25de43539ed1b4dbed9b952fcfde 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
 #endif
 	.endm
 
+/**
+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
+ */
+	.macro pre_disable_mmu_workaround
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+	isb
+#endif
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ac67cfc2585a8af417405958779b792d60b06e6a..060e3a4008abd18e4a5aa48bb5b6fa1674a735c3 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
 #define FTR_VISIBLE	true	/* Feature visible to the user space */
 #define FTR_HIDDEN	false	/* Feature is hidden from the user */
 
+#define FTR_VISIBLE_IF_IS_ENABLED(config)		\
+	(IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
+
 struct arm64_ftr_bits {
 	bool		sign;	/* Value is signed ? */
 	bool		visible;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 235e77d982610a0114f62cc833278994cc2e76b2..cbf08d7cbf3089949bb4ada755a36383e6e5db2e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
 #define BRCM_CPU_PART_VULCAN		0x516
 
 #define QCOM_CPU_PART_FALKOR_V1		0x800
+#define QCOM_CPU_PART_FALKOR		0xC00
 
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -99,6 +100,7 @@
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
 #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 650344d01124971d98819116b7e657c7fce7c6f4..c4cd5081d78bc4b66e34f3c283d9b1f6ba48518c 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm)
 			 * Defer the switch to the current thread's TTBR0_EL1
 			 * until uaccess_enable(). Restore the current
 			 * thread's saved ttbr0 corresponding to its active_mm
-			 * (if different from init_mm).
 			 */
 			cpu_set_reserved_ttbr0();
-			if (current->active_mm != &init_mm)
-				update_saved_ttbr0(current, current->active_mm);
+			update_saved_ttbr0(current, current->active_mm);
 		}
 	}
 }
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f069ff37f06cff3a4e032bc5916be1b155ee1f8..715d395ef45bb23f072b34f88116b36940346e95 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
 #define VTCR_EL2_FLAGS			(VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
 #define VTTBR_X				(VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
 
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 674912d7a571942b956c980aa2096efff32e85f9..ea6cb5b24258be29f39507c39d526ea7e52c7c30 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -370,6 +370,7 @@ void kvm_arm_init_debug(void);
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3257895a9b5e413c7c69c9d3cdb2fa23ec030592..9d155fa9a50791af293916cfdc1ede087f850c6d 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -156,29 +156,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
 
 #define init_new_context(tsk,mm)	({ atomic64_set(&(mm)->context.id, 0); 0; })
 
-/*
- * This is called when "tsk" is about to enter lazy TLB mode.
- *
- * mm:  describes the currently active mm context
- * tsk: task which is entering lazy tlb
- * cpu: cpu number which is entering lazy tlb
- *
- * tsk->mm will be NULL
- */
-static inline void
-enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 static inline void update_saved_ttbr0(struct task_struct *tsk,
 				      struct mm_struct *mm)
 {
-	if (system_uses_ttbr0_pan()) {
-		BUG_ON(mm->pgd == swapper_pg_dir);
-		task_thread_info(tsk)->ttbr0 =
-			virt_to_phys(mm->pgd) | ASID(mm) << 48;
-	}
+	u64 ttbr;
+
+	if (!system_uses_ttbr0_pan())
+		return;
+
+	if (mm == &init_mm)
+		ttbr = __pa_symbol(empty_zero_page);
+	else
+		ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+
+	task_thread_info(tsk)->ttbr0 = ttbr;
 }
 #else
 static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -187,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
 }
 #endif
 
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	/*
+	 * We don't actually care about the ttbr0 mapping, so point it at the
+	 * zero page.
+	 */
+	update_saved_ttbr0(tsk, &init_mm);
+}
+
 static inline void __switch_mm(struct mm_struct *next)
 {
 	unsigned int cpu = smp_processor_id();
@@ -214,11 +216,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
 	 * value may have not been initialised yet (activate_mm caller) or the
 	 * ASID has changed since the last run (following the context switch
-	 * of another thread of the same process). Avoid setting the reserved
-	 * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
+	 * of another thread of the same process).
 	 */
-	if (next != &init_mm)
-		update_saved_ttbr0(tsk, next);
+	update_saved_ttbr0(tsk, next);
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 8d5cbec17d803e37556b5f4a7b25e7b4f1391b35..f9ccc36d3dc3cb2e29ad2cd47f7cdd5eed4cef8a 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -18,6 +18,7 @@
 #define __ASM_PERF_EVENT_H
 
 #include <asm/stack_pointer.h>
+#include <asm/ptrace.h>
 
 #define	ARMV8_PMU_MAX_COUNTERS	32
 #define	ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1)
@@ -79,6 +80,7 @@ struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
 #endif
 
 #define perf_arch_fetch_caller_regs(regs, __ip) { \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 149d05fb9421520bd659b62627941ed36ce46bb3..bdcc7f1c9d069df3d95c6884b8071cdba530cfc8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
 #include <asm/cmpxchg.h>
 #include <asm/fixmap.h>
 #include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
 
 extern void __pte_error(const char *file, int line, unsigned long val);
 extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+	return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+	if (pte_write(pte))
+		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+	return pte;
 }
 
 static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
 	}
 }
 
-struct mm_struct;
-struct vm_area_struct;
-
 extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 
 /*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * hardware updates of the pte (ptep_set_access_flags safely changes
 	 * valid ptes without going through an invalid entry).
 	 */
-	if (pte_valid(*ptep) && pte_valid(pte)) {
+	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+	   (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
 		VM_WARN_ONCE(!pte_young(pte),
 			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
 			     __func__, pte_val(*ptep), pte_val(pte));
@@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
- * ptep_set_wrprotect - mark read-only while preserving the hardware update of
- * the Access Flag.
+ * ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
  */
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
 	pte_t old_pte, pte;
 
-	/*
-	 * ptep_set_wrprotect() is only called on CoW mappings which are
-	 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
-	 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
-	 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
-	 * protection_map[]. There is no race with the hardware update of the
-	 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
-	 * is set.
-	 */
-	VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
-		     "%s: potential race with hardware DBM", __func__);
 	pte = READ_ONCE(*ptep);
 	do {
 		old_pte = pte;
+		/*
+		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+		 * clear), set the PTE_DIRTY bit.
+		 */
+		if (pte_hw_dirty(pte))
+			pte = pte_mkdirty(pte);
 		pte = pte_wrprotect(pte);
 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
 					       pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..b551b741653d251d6155a214023b2215ae02e871
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 65f42d2574142d4b37bebf49ed1fc3cdccbb56ae..2a752cb2a0f35a82f2a60e744d160af9b5f6c6a1 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
 	mrs	x12, sctlr_el1
 	ldr	x13, =SCTLR_ELx_FLAGS
 	bic	x12, x12, x13
+	pre_disable_mmu_workaround
 	msr	sctlr_el1, x12
 	isb
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c5ba0097887f93e9d30b37355b84e5750d74d04e..a73a5928f09b26ae7b2de7b3c2217e5c975de4a0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 4e6ad355bd058e6a4ab73a0f94832a7b1fe719a6..6b9736c3fb5630ab31c17b662b5c5cfe2b7d0832 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
 	mrs	x0, sctlr_el2
 	bic	x0, x0, #1 << 0	// clear SCTLR.M
 	bic	x0, x0, #1 << 2	// clear SCTLR.C
+	pre_disable_mmu_workaround
 	msr	sctlr_el2, x0
 	isb
 	b	2f
@@ -103,6 +104,7 @@ ENTRY(entry)
 	mrs	x0, sctlr_el1
 	bic	x0, x0, #1 << 0	// clear SCTLR.M
 	bic	x0, x0, #1 << 2	// clear SCTLR.C
+	pre_disable_mmu_workaround
 	msr	sctlr_el1, x0
 	isb
 2:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5084e699447a4d011742ad964448203c1d93337a..fae81f7964b4f226242961607cb087a20710e22b 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -114,7 +114,12 @@
  *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
  *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
  */
-static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+struct fpsimd_last_state_struct {
+	struct fpsimd_state *st;
+	bool sve_in_use;
+};
+
+static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
 
 /* Default VL for tasks that don't set it explicitly: */
 static int sve_default_vl = -1;
@@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next)
 		 */
 		struct fpsimd_state *st = &next->thread.fpsimd_state;
 
-		if (__this_cpu_read(fpsimd_last_state) == st
+		if (__this_cpu_read(fpsimd_last_state.st) == st
 		    && st->cpu == smp_processor_id())
 			clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
 		else
@@ -991,6 +996,21 @@ void fpsimd_signal_preserve_current_state(void)
 		sve_to_fpsimd(current);
 }
 
+/*
+ * Associate current's FPSIMD context with this cpu
+ * Preemption must be disabled when calling this function.
+ */
+static void fpsimd_bind_to_cpu(void)
+{
+	struct fpsimd_last_state_struct *last =
+		this_cpu_ptr(&fpsimd_last_state);
+	struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+	last->st = st;
+	last->sve_in_use = test_thread_flag(TIF_SVE);
+	st->cpu = smp_processor_id();
+}
+
 /*
  * Load the userland FPSIMD state of 'current' from memory, but only if the
  * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
@@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void)
 	local_bh_disable();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
-		struct fpsimd_state *st = &current->thread.fpsimd_state;
-
 		task_fpsimd_load();
-		__this_cpu_write(fpsimd_last_state, st);
-		st->cpu = smp_processor_id();
+		fpsimd_bind_to_cpu();
 	}
 
 	local_bh_enable();
@@ -1026,18 +1043,14 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 
 	local_bh_disable();
 
-	current->thread.fpsimd_state = *state;
+	current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
 		fpsimd_to_sve(current);
 
 	task_fpsimd_load();
 
-	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
-		struct fpsimd_state *st = &current->thread.fpsimd_state;
-
-		__this_cpu_write(fpsimd_last_state, st);
-		st->cpu = smp_processor_id();
-	}
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_bind_to_cpu();
 
 	local_bh_enable();
 }
@@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 static inline void fpsimd_flush_cpu_state(void)
 {
-	__this_cpu_write(fpsimd_last_state, NULL);
+	__this_cpu_write(fpsimd_last_state.st, NULL);
 }
 
 /*
@@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void)
 #ifdef CONFIG_ARM64_SVE
 void sve_flush_cpu_state(void)
 {
-	struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state);
-	struct task_struct *tsk;
-
-	if (!fpstate)
-		return;
+	struct fpsimd_last_state_struct const *last =
+		this_cpu_ptr(&fpsimd_last_state);
 
-	tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state);
-	if (test_tsk_thread_flag(tsk, TIF_SVE))
+	if (last->st && last->sve_in_use)
 		fpsimd_flush_cpu_state();
 }
 #endif /* CONFIG_ARM64_SVE */
@@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { }
 #ifdef CONFIG_HOTPLUG_CPU
 static int fpsimd_cpu_dead(unsigned int cpu)
 {
-	per_cpu(fpsimd_last_state, cpu) = NULL;
+	per_cpu(fpsimd_last_state.st, cpu) = NULL;
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 67e86a0f57ac43edcee10d89bd5db2e050ae1621..e3cb9fbf96b66c3ba2d4327d4c1a4b3ca734ef1f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
 	 * to take into account by discarding the current kernel mapping and
 	 * creating a new one.
 	 */
+	pre_disable_mmu_workaround
 	msr	sctlr_el1, x20			// disable the MMU
 	isb
 	bl	__create_page_tables		// recreate kernel mapping
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 749f81779420c7bab2ead0d8f5b9a6cf108e6a45..74bb56f656eff024839df19897ba06512128e9bb 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/smp.h>
+#include <linux/uaccess.h>
 
 #include <asm/compat.h>
 #include <asm/current.h>
@@ -36,7 +37,6 @@
 #include <asm/traps.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
-#include <asm/uaccess.h>
 
 /* Breakpoint currently in use for each BRP. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b2adcce7bc18e628c924d009ba38190cb1bff951..6b7dcf4310acf6fc04ff40cca4ccd10e27fabec9 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_tsk_thread_flag(p, TIF_SVE);
 	p->thread.sve_state = NULL;
 
+	/*
+	 * In case p was allocated the same task_struct pointer as some
+	 * other recently-exited task, make sure p is disassociated from
+	 * any cpu that may have run that now-exited task recently.
+	 * Otherwise we could erroneously skip reloading the FPSIMD
+	 * registers for p.
+	 */
+	fpsimd_flush_task_state(p);
+
 	if (likely(!(p->flags & PF_KTHREAD))) {
 		*childregs = *current_pt_regs();
 		childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index ce704a4aeadd438bf637472bb7037b89fba15087..f407e422a7200b86072349cc70e1e6d5e7e1753b 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
 	mrs	x0, sctlr_el2
 	ldr	x1, =SCTLR_ELx_FLAGS
 	bic	x0, x0, x1
+	pre_disable_mmu_workaround
 	msr	sctlr_el2, x0
 	isb
 1:
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index dbadfaf850a7d07e202684f293c072e5ed4ec722..fa63b28c65e08a37e269b8d55fa6197e77b42b40 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
 		}
 	}
 }
+
+
+/*
+ * After successfully emulating an instruction, we might want to
+ * return to user space with a KVM_EXIT_DEBUG. We can only do this
+ * once the emulation is complete, though, so for userspace emulations
+ * we have to wait until we have re-entered KVM before calling this
+ * helper.
+ *
+ * Return true (and set exit_reason) to return to userspace or false
+ * if no further action is required.
+ */
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		run->exit_reason = KVM_EXIT_DEBUG;
+		run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
+		return true;
+	}
+	return false;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index b712479954692f3efd3844e490b412f71c3a96aa..304203fa9e3307583748de10c6c1f7425a231c7b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,6 +28,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_psci.h>
+#include <asm/debug-monitors.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -186,6 +187,40 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 	return arm_exit_handlers[hsr_ec];
 }
 
+/*
+ * We may be single-stepping an emulated instruction. If the emulation
+ * has been completed in the kernel, we can return to userspace with a
+ * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
+ * emulation first.
+ */
+static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int handled;
+
+	/*
+	 * See ARM ARM B1.14.1: "Hyp traps on instructions
+	 * that fail their condition code check"
+	 */
+	if (!kvm_condition_valid(vcpu)) {
+		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+		handled = 1;
+	} else {
+		exit_handle_fn exit_handler;
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+		handled = exit_handler(vcpu, run);
+	}
+
+	/*
+	 * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
+	 * structure if we need to return to userspace.
+	 */
+	if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
+		handled = 0;
+
+	return handled;
+}
+
 /*
  * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
  * proper exit to userspace.
@@ -193,8 +228,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		       int exception_index)
 {
-	exit_handle_fn exit_handler;
-
 	if (ARM_SERROR_PENDING(exception_index)) {
 		u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
 
@@ -220,20 +253,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
 		kvm_inject_vabt(vcpu);
-		return 1;
-	case ARM_EXCEPTION_TRAP:
-		/*
-		 * See ARM ARM B1.14.1: "Hyp traps on instructions
-		 * that fail their condition code check"
-		 */
-		if (!kvm_condition_valid(vcpu)) {
-			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+		/* We may still need to return for single-step */
+		if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+			&& kvm_arm_handle_step_debug(vcpu, run))
+			return 0;
+		else
 			return 1;
-		}
-
-		exit_handler = kvm_get_exit_handler(vcpu);
-
-		return exit_handler(vcpu, run);
+	case ARM_EXCEPTION_TRAP:
+		return handle_trap_exceptions(vcpu, run);
 	case ARM_EXCEPTION_HYP_GONE:
 		/*
 		 * EL2 has been reset to the hyp-stub. This happens when a guest
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377661a88fab8be6a12365d625d830a..870828c364c508f825eacc1c49c17886dc9c8cb2 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
 	mrs	x5, sctlr_el2
 	ldr	x6, =SCTLR_ELx_FLAGS
 	bic	x5, x5, x6		// Clear SCTL_M and etc
+	pre_disable_mmu_workaround
 	msr	sctlr_el2, x5
 	isb
 
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..f4363d40e2cd7fd62d40d826d5296c95f15cde9f 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
 	u64 reg;
 
+	/* Clear pmscr in case of early return */
+	*pmscr_el1 = 0;
+
 	/* SPE present on this CPU? */
 	if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
 						  ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 525c01f48867808b6efa257063daaa4c8207252e..f7c651f3a8c0e8001bb11b1a45216ac6d4a6b342 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -22,6 +22,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
 
 static bool __hyp_text __fpsimd_enabled_nvhe(void)
 {
@@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
 	return true;
 }
 
-static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
+/* Skip an instruction which has been emulated. Returns true if
+ * execution can continue or false if we need to exit hyp mode because
+ * single-step was in effect.
+ */
+static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 {
 	*vcpu_pc(vcpu) = read_sysreg_el2(elr);
 
@@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 
 	write_sysreg_el2(*vcpu_pc(vcpu), elr);
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		vcpu->arch.fault.esr_el2 =
+			(ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
+		return false;
+	} else {
+		return true;
+	}
 }
 
 int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -342,13 +355,21 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 			int ret = __vgic_v2_perform_cpuif_access(vcpu);
 
 			if (ret == 1) {
-				__skip_instr(vcpu);
-				goto again;
+				if (__skip_instr(vcpu))
+					goto again;
+				else
+					exit_code = ARM_EXCEPTION_TRAP;
 			}
 
 			if (ret == -1) {
-				/* Promote an illegal access to an SError */
-				__skip_instr(vcpu);
+				/* Promote an illegal access to an
+				 * SError. If we would be returning
+				 * due to single-step clear the SS
+				 * bit so handle_exit knows what to
+				 * do after dealing with the error.
+				 */
+				if (!__skip_instr(vcpu))
+					*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
 				exit_code = ARM_EXCEPTION_EL1_SERROR;
 			}
 
@@ -363,8 +384,10 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 		int ret = __vgic_v3_perform_cpuif_access(vcpu);
 
 		if (ret == 1) {
-			__skip_instr(vcpu);
-			goto again;
+			if (__skip_instr(vcpu))
+				goto again;
+			else
+				exit_code = ARM_EXCEPTION_TRAP;
 		}
 
 		/* 0 falls through to be handled out of EL2 */
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ca74a2aace425b95ed95ecf0e70a78188621004e..7b60d62ac5939e83c8e153ec1c3a0447565f23eb 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
 		.check_wx = true,
 	};
 
-	walk_pgd(&st, &init_mm, 0);
+	walk_pgd(&st, &init_mm, VA_START);
 	note_page(&st, 0, 0, 0);
 	if (st.wx_pages || st.uxn_pages)
 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde73e06698bc40b166867df17a00134..9b7f89df49dbfe108da2eadc59421ce99a4432b4 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	struct siginfo info;
 	const struct fault_info *inf;
-	int ret = 0;
 
 	inf = esr_to_fault_info(esr);
 	pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 		if (interrupts_enabled(regs))
 			nmi_enter();
 
-		ret = ghes_notify_sea();
+		ghes_notify_sea();
 
 		if (interrupts_enabled(regs))
 			nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 		info.si_addr  = (void __user *)addr;
 	arm64_notify_die("", regs, &info, esr);
 
-	return ret;
+	return 0;
 }
 
 static const struct fault_info fault_info[] = {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170df85916d0c1ac3b65f570f0af67ea..00e7b900ca4193e83dfa7de7dd506984afe90bce 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
 
 	reserve_elfcorehdr();
 
+	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+
 	dma_contiguous_reserve(arm64_dma_phys_limit);
 
 	memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
 	sparse_init();
 	zone_sizes_init(min, max);
 
-	high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
 	memblock_dump_all();
 }
 
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index aa624b4ab6557c3e22d3660819f25688d605e3ed..2240b38c2915fa725cf2c5d1afc322edd1bb47c0 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 67ee896a76a7f2f0837436e054b8146dbe7dbac3..26644e15d8540fa43cf70e47acf9ce837dd18fd5 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 3687b54bb18ed1a0f36d512af627b8085f15987b..3470c6e9c7b9ba1ca3b5962d276b42a4c2d2e35f 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
index b15bf6bc0e94f46f035e8781ffa921060341fe91..14a2e9af97e9992d87821e8f11276ecfef8e57cf 100644
--- a/arch/frv/include/uapi/asm/Kbuild
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 187aed820e71feac3ffd03e021387bc892bda5e9..2f65f78792cbe5cf7219bb2228fb84e05a0b9204 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index cb5df3aad3a848e27fdccfd2c36e848b1217bc27..41a176dbb53e4f16524157bae122180d572dd4f5 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 13a97aa2285f7418d18f1396f03bf6281b580a0f..f5c6967a93bb204ff41bfdee837d3ec4bdaa94dd 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generic-y += kvm_para.h
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index c6ecb97151a25774bf75ce5a1663210db4f6bce3..9025699049ca63b3834ed917d935224009d2d7a8 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
 	}
 
 	if (ti->softirq_time) {
-		delta = cycle_to_nsec(ti->softirq_time));
+		delta = cycle_to_nsec(ti->softirq_time);
 		account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
 	}
 
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index 1c44d3b3eba03bac62b9a69e6e560aac61a0a3ff..451bf6071c6e28036f0da81dedb35c8b184f3b5a 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,5 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generic-y += kvm_para.h
 generic-y += siginfo.h
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index cb79fba79d4391dbe07cd517309f77cbc5b05506..b88a8dd149333d0a0227c28762ca904fbe268ed2 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -122,7 +122,6 @@ void abort(void)
 	/* if that doesn't kill us, halt */
 	panic("Oops failed to kill thread");
 }
-EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index 55e55dbc2fb66410f757f635a7cc46a7c28b1287..3d07b1de7eb0aa807d9aa637b3b0cec07e4ca1e2 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_NAMESPACES=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../uClinux-dist/romfs"
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
 # CONFIG_RD_XZ is not set
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 3717b64a620df54a46495f07a3597188b9f727ec..c2e26a44c482da3a6d87d9b26173d64e7c9ca78f 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index 3aa571a513b5dfa1f48bf2e17c120289f1a0d246..cf6edda389719535e25d505dfdc31d885995cffb 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -45,6 +45,8 @@ SECTIONS {
 	.text : {
 		HEAD_TEXT
 		TEXT_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
 		LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 89172b8974b95444f4def01ce4104b664391eaea..625a5785804faf8d706442150e6cdcac0ee117c3 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -16,6 +16,8 @@ SECTIONS
   .text : {
 	HEAD_TEXT
 	TEXT_TEXT
+	IRQENTRY_TEXT
+	SOFTIRQENTRY_TEXT
 	SCHED_TEXT
 	CPUIDLE_TEXT
 	LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 293990efc9173288d38313e22c81e2c93aea0909..9868270b0984487c5a83100514cd88e8ddd743cd 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -16,6 +16,8 @@ SECTIONS
   .text : {
 	HEAD_TEXT
 	TEXT_TEXT
+	IRQENTRY_TEXT
+	SOFTIRQENTRY_TEXT
 	SCHED_TEXT
 	CPUIDLE_TEXT
 	LOCK_TEXT
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index 6ac763d9a3e34e3cfb0caaf0072f78c26ec8b03f..f9eaf07d29f84ab1871d49b89c11a107fb5e1149 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 06609ca361150ab77529bf0999d8e258ad25d62c..2c6a6bffea3265d3f3ef1b4d04f6c64347e395e4 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 7c8aab23bce8da59f727ff8725250a8f0d385b12..b1f66699677dbaa31931e84f702c88ad0d2feb8f 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += user.h
diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h
new file mode 100644
index 0000000000000000000000000000000000000000..1d830c6666c2731d008388d3b1e44951a94fa639
--- /dev/null
+++ b/arch/mips/include/asm/serial.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 MIPS Tech, LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#ifndef __ASM__SERIAL_H
+#define __ASM__SERIAL_H
+
+#ifdef CONFIG_MIPS_GENERIC
+/*
+ * Generic kernels cannot know a correct value for all platforms at
+ * compile time. Set it to 0 to prevent 8250_early using it
+ */
+#define BASE_BAUD 0
+#else
+#include <asm-generic/serial.h>
+#endif
+
+#endif /* __ASM__SERIAL_H */
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index a0266feba9e6d996d5469ed18fd23df081a2ab38..7a4becd8963a219331632203849998a0a3f56e03 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generic-y += ipcbuf.h
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index c7ed26029cbbcf0ed7458a145d490957a59abb4e..e68e6e04063a7e0d9f30c50ebac3b082ac7b8b88 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -235,6 +235,7 @@ LEAF(mips_cps_core_init)
 	has_mt	t0, 3f
 
 	.set	push
+	.set	MIPS_ISA_LEVEL_RAW
 	.set	mt
 
 	/* Only allow 1 TC per VPE to execute... */
@@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes)
 #elif defined(CONFIG_MIPS_MT)
 
 	.set	push
+	.set	MIPS_ISA_LEVEL_RAW
 	.set	mt
 
 	/* If the core doesn't support MT then return */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 45d0b6b037eeb6a985318df16df6f73a7f1dad9c..57028d49c202aeab53a1b4ccd5d21b3b3052cedb 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 	struct task_struct *t;
 	int max_users;
 
+	/* If nothing to change, return right away, successfully.  */
+	if (value == mips_get_process_fp_mode(task))
+		return 0;
+
+	/* Only accept a mode change if 64-bit FP enabled for o32.  */
+	if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+		return -EOPNOTSUPP;
+
+	/* And only for o32 tasks.  */
+	if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+		return -EOPNOTSUPP;
+
 	/* Check the value is valid */
 	if (value & ~known_bits)
 		return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index efbd8df8b6652e7a81baed64134858fd8b3d733a..0b23b1ad99e65f1e21d1810340f9dd306483b8d3 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -419,63 +419,160 @@ static int gpr64_set(struct task_struct *target,
 
 #endif /* CONFIG_64BIT */
 
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant.  FP context's general register slots
+ * correspond 1:1 to buffer slots.  Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+		       unsigned int *pos, unsigned int *count,
+		       void **kbuf, void __user **ubuf)
+{
+	return user_regset_copyout(pos, count, kbuf, ubuf,
+				   &target->thread.fpu,
+				   0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant.  Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots.  Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+		       unsigned int *pos, unsigned int *count,
+		       void **kbuf, void __user **ubuf)
+{
+	unsigned int i;
+	u64 fpr_val;
+	int err;
+
+	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+	for (i = 0; i < NUM_FPU_REGS; i++) {
+		fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+		err = user_regset_copyout(pos, count, kbuf, ubuf,
+					  &fpr_val, i * sizeof(elf_fpreg_t),
+					  (i + 1) * sizeof(elf_fpreg_t));
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
 static int fpr_get(struct task_struct *target,
 		   const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
 {
-	unsigned i;
+	const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
 	int err;
-	u64 fpr_val;
 
-	/* XXX fcr31  */
+	if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+		err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+	else
+		err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+	if (err)
+		return err;
 
-	if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
-		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-					   &target->thread.fpu,
-					   0, sizeof(elf_fpregset_t));
+	err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpu.fcr31,
+				  fcr31_pos, fcr31_pos + sizeof(u32));
 
-	for (i = 0; i < NUM_FPU_REGS; i++) {
-		fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
-		err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-					  &fpr_val, i * sizeof(elf_fpreg_t),
-					  (i + 1) * sizeof(elf_fpreg_t));
+	return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant.   Buffer slots correspond 1:1 to FP
+ * context's general register slots.  Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+		       unsigned int *pos, unsigned int *count,
+		       const void **kbuf, const void __user **ubuf)
+{
+	return user_regset_copyin(pos, count, kbuf, ubuf,
+				  &target->thread.fpu,
+				  0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant.  Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots.  Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+		       unsigned int *pos, unsigned int *count,
+		       const void **kbuf, const void __user **ubuf)
+{
+	unsigned int i;
+	u64 fpr_val;
+	int err;
+
+	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+	for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+		err = user_regset_copyin(pos, count, kbuf, ubuf,
+					 &fpr_val, i * sizeof(elf_fpreg_t),
+					 (i + 1) * sizeof(elf_fpreg_t));
 		if (err)
 			return err;
+		set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
 	}
 
 	return 0;
 }
 
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'.  We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
 static int fpr_set(struct task_struct *target,
 		   const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   const void *kbuf, const void __user *ubuf)
 {
-	unsigned i;
+	const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+	u32 fcr31;
 	int err;
-	u64 fpr_val;
 
-	/* XXX fcr31  */
+	BUG_ON(count % sizeof(elf_fpreg_t));
+
+	if (pos + count > sizeof(elf_fpregset_t))
+		return -EIO;
 
 	init_fp_ctx(target);
 
-	if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
-		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					  &target->thread.fpu,
-					  0, sizeof(elf_fpregset_t));
+	if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+		err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+	else
+		err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+	if (err)
+		return err;
 
-	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
-	for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
+	if (count > 0) {
 		err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					 &fpr_val, i * sizeof(elf_fpreg_t),
-					 (i + 1) * sizeof(elf_fpreg_t));
+					 &fcr31,
+					 fcr31_pos, fcr31_pos + sizeof(u32));
 		if (err)
 			return err;
-		set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+
+		ptrace_setfcr31(target, fcr31);
 	}
 
-	return 0;
+	return err;
 }
 
 enum mips_regset {
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index c94ee54210bc489efd469493800596cd2b7061a3..81271d3af47cb1000ebfab2539efa92080a1eccc 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y	+= bpf_perf_event.h
 generic-y	+= siginfo.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index ffca24da7647b80e0f45728dff8da4e9474dc65f..13a3d77b4d7bdc487b814ae2933940638b62c759 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 62286dbeb9043c6ff6eecbd5859c23fd21ce1baa..130c16ccba0a0abb135e31275eb34c564d6cd700 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f036572e33721eb80e9bbbe4493aa4..f57118e1f6b4265257799ae2cf8ea356077e20b9 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
 	while ((nuline = strchr(s, '\n')) != NULL) {
 		if (nuline != s)
 			pdc_iodc_print(s, nuline - s);
-			pdc_iodc_print("\r\n", 2);
-			s = nuline + 1;
+		pdc_iodc_print("\r\n", 2);
+		s = nuline + 1;
 	}
 	if (*s != '\0')
 		pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index dd5a08aaa4da746ff09b19cf5f93e9d58489ff14..3eb4bfc1fb365478f11c9c87bfb3b3124fab567a 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -12,6 +12,7 @@
    for the semaphore.  */
 
 #define __PA_LDCW_ALIGNMENT	16
+#define __PA_LDCW_ALIGN_ORDER	4
 #define __ldcw_align(a) ({					\
 	unsigned long __ret = (unsigned long) &(a)->lock[0];	\
 	__ret = (__ret + __PA_LDCW_ALIGNMENT - 1)		\
@@ -29,6 +30,7 @@
    ldcd). */
 
 #define __PA_LDCW_ALIGNMENT	4
+#define __PA_LDCW_ALIGN_ORDER	2
 #define __ldcw_align(a) (&(a)->slock)
 #define __LDCW	"ldcw,co"
 
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc0dda0a23b205f59d1d86438553f2..598c8d60fa5e602cc9303e1986ada9680d64feb3 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
 
 /* thread information allocation */
 
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER	2 /* PA-RISC requires at least 16k stack */
+#else
 #define THREAD_SIZE_ORDER	3 /* PA-RISC requires at least 32k stack */
+#endif
+
 /* Be sure to hunt all references to this down when you change the size of
  * the kernel stack */
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 196d2a4efb312be6d830fe2de80538d1f7aaf8f4..286ef5a5904b02d5f346dd783dbea9fdb6b35e70 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
+generic-y += bpf_perf_event.h
 generic-y += kvm_para.h
 generic-y += param.h
 generic-y += poll.h
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index d8f77358e2ba29746730f34ba652db327bb2f1d5..29b99b8964aa6c3171d7633ab9863014510330f5 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev)
 	static int count;
 
 	print_pa_hwpath(dev, hw_path);
-	printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
+	printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
 		++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
 		dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
 
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e8e14f13a913aca50510b11eb49b7..e95207c0565eb12308e12d48c45bd5309ae6e2ae 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
+#include <asm/ldcw.h>
 #include <asm/thread_info.h>
 
 #include <linux/linkage.h>
@@ -46,6 +47,14 @@
 #endif
 
 	.import		pa_tlb_lock,data
+	.macro  load_pa_tlb_lock reg
+#if __PA_LDCW_ALIGNMENT > 4
+	load32	PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
+	depi	0,31,__PA_LDCW_ALIGN_ORDER, \reg
+#else
+	load32	PA(pa_tlb_lock), \reg
+#endif
+	.endm
 
 	/* space_to_prot macro creates a prot id from a space id */
 
@@ -457,7 +466,7 @@
 	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
 	cmpib,COND(=),n	0,\spc,2f
-	load32		PA(pa_tlb_lock),\tmp
+	load_pa_tlb_lock \tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
 	nop
@@ -480,7 +489,7 @@
 	/* Release pa_tlb_lock lock. */
 	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-	load32		PA(pa_tlb_lock),\tmp
+	load_pa_tlb_lock \tmp
 	tlb_unlock0	\spc,\tmp
 #endif
 	.endm
@@ -878,9 +887,6 @@ ENTRY_CFI(syscall_exit_rfi)
 	STREG   %r19,PT_SR7(%r16)
 
 intr_return:
-	/* NOTE: Need to enable interrupts incase we schedule. */
-	ssm     PSW_SM_I, %r0
-
 	/* check for reschedule */
 	mfctl   %cr30,%r1
 	LDREG   TI_FLAGS(%r1),%r19	/* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +913,11 @@ intr_check_sig:
 	LDREG	PT_IASQ1(%r16), %r20
 	cmpib,COND(=),n 0,%r20,intr_restore /* backward */
 
+	/* NOTE: We need to enable interrupts if we have to deliver
+	 * signals. We used to do this earlier but it caused kernel
+	 * stack overflows. */
+	ssm     PSW_SM_I, %r0
+
 	copy	%r0, %r25			/* long in_syscall = 0 */
 #ifdef CONFIG_64BIT
 	ldo	-16(%r30),%r29			/* Reference param save area */
@@ -958,6 +969,10 @@ intr_do_resched:
 	cmpib,COND(=)	0, %r20, intr_do_preempt
 	nop
 
+	/* NOTE: We need to enable interrupts if we schedule.  We used
+	 * to do this earlier but it caused kernel stack overflows. */
+	ssm     PSW_SM_I, %r0
+
 #ifdef CONFIG_64BIT
 	ldo	-16(%r30),%r29		/* Reference param save area */
 #endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de75897adcea4134f87c7246f60646..8d072c44f300c16d45ba8f4ee0c2eee6435e4ddd 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
 
 	__INITRODATA
+	.align 4
 	.export os_hpmc_size
 os_hpmc_size:
 	.word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index adf7187f89515ec69b19461f60ce379e552397dc..2d40c4ff3f6918ae9b2e2c6af71e20658a9850e1 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -36,6 +36,7 @@
 #include <asm/assembly.h>
 #include <asm/pgtable.h>
 #include <asm/cache.h>
+#include <asm/ldcw.h>
 #include <linux/linkage.h>
 
 	.text
@@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local)
 
 	.macro	tlb_lock	la,flags,tmp
 #ifdef CONFIG_SMP
-	ldil		L%pa_tlb_lock,%r1
-	ldo		R%pa_tlb_lock(%r1),\la
+#if __PA_LDCW_ALIGNMENT > 4
+	load32		pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
+	depi		0,31,__PA_LDCW_ALIGN_ORDER, \la
+#else
+	load32		pa_tlb_lock, \la
+#endif
 	rsm		PSW_SM_I,\flags
 1:	LDCW		0(\la),\tmp
 	cmpib,<>,n	0,\tmp,3f
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 30f92391a93ef6d5a90970b81921a2133d5e2eb0..cad3e8661cd6cf1895c8b69605dbdad730a4c8e6 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -39,6 +39,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
@@ -183,6 +184,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
 	return 1;
 }
 
+/*
+ * Idle thread support
+ *
+ * Detect when running on QEMU with SeaBIOS PDC Firmware and let
+ * QEMU idle the host too.
+ */
+
+int running_on_qemu __read_mostly;
+
+void __cpuidle arch_cpu_idle_dead(void)
+{
+	/* nop on real hardware, qemu will offline CPU. */
+	asm volatile("or %%r31,%%r31,%%r31\n":::);
+}
+
+void __cpuidle arch_cpu_idle(void)
+{
+	local_irq_enable();
+
+	/* nop on real hardware, qemu will idle sleep. */
+	asm volatile("or %%r10,%%r10,%%r10\n":::);
+}
+
+static int __init parisc_idle_init(void)
+{
+	const char *marker;
+
+	/* check QEMU/SeaBIOS marker in PAGE0 */
+	marker = (char *) &PAGE0->pad0;
+	running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
+
+	if (!running_on_qemu)
+		cpu_idle_poll_ctrl(1);
+
+	return 0;
+}
+arch_initcall(parisc_idle_init);
+
 /*
  * Copy architecture-specific thread state
  */
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf4bef7beff4e8c8d20f1343872347..143f90e2f9f3c631616d4af52f0fe3fa08f44af9 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
-#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe630b14c54c3b457285b4228607dc6..66e506520505d8a3245d49d492831df5e3bbb42a 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
 #include <linux/preempt.h>
 #include <linux/init.h>
 
-#include <asm/processor.h>
 #include <asm/delay.h>
-
 #include <asm/special_insns.h>    /* for mfctl() */
 #include <asm/processor.h> /* for boot_cpu_data */
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 13f7854e0d49cc796d98a438cad64d76916d6bc9..48f41399fc0b8b63acd84d774a09ad2d0aba5086 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -631,11 +631,11 @@ void __init mem_init(void)
 	mem_init_print_info(NULL);
 #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
 	printk("virtual kernel memory layout:\n"
-	       "    vmalloc : 0x%p - 0x%p   (%4ld MB)\n"
-	       "    memory  : 0x%p - 0x%p   (%4ld MB)\n"
-	       "      .init : 0x%p - 0x%p   (%4ld kB)\n"
-	       "      .data : 0x%p - 0x%p   (%4ld kB)\n"
-	       "      .text : 0x%p - 0x%p   (%4ld kB)\n",
+	       "    vmalloc : 0x%px - 0x%px   (%4ld MB)\n"
+	       "    memory  : 0x%px - 0x%px   (%4ld MB)\n"
+	       "      .init : 0x%px - 0x%px   (%4ld kB)\n"
+	       "      .data : 0x%px - 0x%px   (%4ld kB)\n"
+	       "      .text : 0x%px - 0x%px   (%4ld kB)\n",
 
 	       (void*)VMALLOC_START, (void*)VMALLOC_END,
 	       (VMALLOC_END - VMALLOC_START) >> 20,
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a703452d67b62f63b455098e88988fcc0b3776c6..555e22d5e07f9e21c322af718df72bdf2da0dfa4 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@ exc_##label##_book3e:
 	ori	r3,r3,vector_offset@l;		\
 	mtspr	SPRN_IVOR##vector_number,r3;
 
+#define RFI_TO_KERNEL							\
+	rfi
+
+#define RFI_TO_USER							\
+	rfi
+
 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
 
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index b27205297e1d9ca5ca46db9e7189bf9187385803..7197b179c1b150ba819f13a1f7feb6c9a45da1f9 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,59 @@
  */
 #define EX_R3		EX_DAR
 
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT							\
+	RFI_FLUSH_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop
+
+#define RFI_TO_KERNEL							\
+	rfid
+
+#define RFI_TO_USER							\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL						\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define RFI_TO_GUEST							\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define HRFI_TO_KERNEL							\
+	hrfid
+
+#define HRFI_TO_USER							\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL						\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_GUEST							\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN							\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\
 	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mtspr	SPRN_##h##SRR1,r10;					\
-	h##rfid;							\
+	h##RFI_TO_KERNEL;						\
 	b	.	/* prevent speculative execution */
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	__EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mtspr	SPRN_##h##SRR1,r10;					\
-	h##rfid;							\
+	h##RFI_TO_KERNEL;						\
 	b	.	/* prevent speculative execution */
 
 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)			\
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 8f88f771cc55ce982c11599b273e584babd38600..1e82eb3caabd19c69289957da188b563d0bcd0d6 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,7 +187,20 @@ label##3:					       	\
 	FTR_ENTRY_OFFSET label##1b-label##3b;		\
 	.popsection;
 
+#define RFI_FLUSH_FIXUP_SECTION				\
+951:							\
+	.pushsection __rfi_flush_fixup,"a";		\
+	.align 2;					\
+952:							\
+	FTR_ENTRY_OFFSET 951b-952b;			\
+	.popsection;
+
+
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
 void apply_feature_fixups(void);
 void setup_feature_keys(void);
 #endif
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a409177be8bdfd5f717af6111700d088fb91e61b..f0461618bf7bee95ffd27874e33501bd41ef80a4 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -241,6 +241,7 @@
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING		0x1D8
@@ -330,6 +331,17 @@
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2
 /* >= 0 values are CPU number */
 
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31	(1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED	(1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
+
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK	0x18
 #define PROC_TABLE_DEREG	0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
 	}
 }
 
+struct h_cpu_char_result {
+	u64 character;
+	u64 behaviour;
+};
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 73b92017b6d7b5231cd8c9d46b7986bb9131f12c..cd2fc1cc1cc7c056255b6f49effefb1e6d0cc1e6 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,6 +76,7 @@ struct machdep_calls {
 
 	void __noreturn	(*restart)(char *cmd);
 	void __noreturn (*halt)(void);
+	void		(*panic)(char *str);
 	void		(*cpu_die)(void);
 
 	long		(*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a..e2a2b8400490049143edee40316313a906ca6db7 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 #endif
 }
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-				 struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+				struct mm_struct *mm)
 {
+	return 0;
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 3892db93b8374e1f1256a6b497d384f3faedc06e..23ac7fc0af23b6cae8f4706665102dd7e2050667 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -232,6 +232,16 @@ struct paca_struct {
 	struct sibling_subcore_state *sibling_subcore_state;
 #endif
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * rfi fallback flush must be in its own cacheline to prevent
+	 * other paca data leaking into the L1d
+	 */
+	u64 exrfi[EX_SIZE] __aligned(0x80);
+	void *rfi_flush_fallback_area;
+	u64 l1d_flush_congruence;
+	u64 l1d_flush_sets;
+#endif
 };
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 7f01b22fa6cb0d0895c914423a98cb2361fb2a16..55eddf50d1498020ba7f17216c689ab89b84f3c7 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
 
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+	if (rc == H_SUCCESS) {
+		p->character = retbuf[0];
+		p->behaviour = retbuf[1];
+	}
+
+	return rc;
+}
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 257d23dbf55dce902644334280e9d72b3ff91ba8..469b7fdc9be41cd9ab0ceb7f50c2b633c19f01a4 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long);
 
 void check_for_initrd(void);
 void initmem_init(void);
+void setup_panic(void);
 #define ARCH_PANIC_TIMEOUT 180
 
 #ifdef CONFIG_PPC_PSERIES
@@ -38,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 #endif /* CONFIG_PPC_PSERIES */
 
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+	L1D_FLUSH_NONE		= 0x1,
+	L1D_FLUSH_FALLBACK	= 0x2,
+	L1D_FLUSH_ORI		= 0x4,
+	L1D_FLUSH_MTTRIG	= 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 0d960ef78a9a95a682fe17d8e5050e3803a57323..1a6ed5919ffdb13878ab7f4a8c2f958c8a41f166 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generic-y += param.h
 generic-y += poll.h
 generic-y += resource.h
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6b958414b4e036ac1e4c97bceb61277ffab65e76..f390d57cf2e1a711335bbd66cf7819e9dcd8442f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -237,6 +237,11 @@ int main(void)
 	OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
 	OFFSET(PACA_IN_MCE, paca_struct, in_mce);
 	OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+	OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+	OFFSET(PACA_EXRFI, paca_struct, exrfi);
+	OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+	OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
 #endif
 	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
 	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 610955fe8b81c528420cc43fec3c92cf80924763..679bbe714e8561b8c259f37bbfc9db8264555554 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -102,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
 	li	r0,0
 	mtspr	SPRN_PSSCR,r0
 	mtspr	SPRN_LPID,r0
+	mtspr	SPRN_PID,r0
 	mfspr	r3,SPRN_LPCR
 	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE  | LPCR_HEIC)
 	or	r3, r3, r4
@@ -126,6 +127,7 @@ _GLOBAL(__restore_cpu_power9)
 	li	r0,0
 	mtspr	SPRN_PSSCR,r0
 	mtspr	SPRN_LPID,r0
+	mtspr	SPRN_PID,r0
 	mfspr   r3,SPRN_LPCR
 	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
 	or	r3, r3, r4
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3320bcac71928eeaf85b884076e90767be4d264f..2748584b767da3f5d788c0be27b27662053853e4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -37,6 +37,11 @@
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
 
 /*
  * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
+	ld	r2,GPR2(r1)
+	ld	r1,GPR1(r1)
+	mtlr	r4
+	mtcr	r5
+	mtspr	SPRN_SRR0,r7
+	mtspr	SPRN_SRR1,r8
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
+
+	/* exit to kernel */
 1:	ld	r2,GPR2(r1)
 	ld	r1,GPR1(r1)
 	mtlr	r4
 	mtcr	r5
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
-	RFI
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
 .Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	mtmsrd	r10, 1
 	mtspr	SPRN_SRR0, r11
 	mtspr	SPRN_SRR1, r12
-
-	rfid
+	RFI_TO_USER
 	b	.	/* prevent speculative execution */
 #endif
 _ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
 	REST_GPR(13, r1)
-1:
+
 	mtspr	SPRN_SRR1,r3
 
 	ld	r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r3,GPR3(r1)
 	ld	r4,GPR4(r1)
 	ld	r1,GPR1(r1)
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
 
-	rfid
+1:	mtspr	SPRN_SRR1,r3
+
+	ld	r2,_CCR(r1)
+	mtcrf	0xFF,r2
+	ld	r2,_NIP(r1)
+	mtspr	SPRN_SRR0,r2
+
+	ld	r0,GPR0(r1)
+	ld	r2,GPR2(r1)
+	ld	r3,GPR3(r1)
+	ld	r4,GPR4(r1)
+	ld	r1,GPR1(r1)
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
 #endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
 	
 	mtspr	SPRN_SRR0,r5
 	mtspr	SPRN_SRR1,r6
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
 rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
 
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
 	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
 	andc	r11,r11,r12
 	mtsrr1	r11
-	rfid
+	RFI_TO_KERNEL
 #endif /* CONFIG_PPC_BOOK3E */
 
 1:	/* Return from OF */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e441b469dc8f61f6381c7c95b086b677004d401b..2dc10bf646b887b51dc2dc28feeb4aac6d9fc00d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
 	LOAD_HANDLER(r12, machine_check_handle_early)
 1:	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r11
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 2:
 	/* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	li	r3,MSR_ME
 	andc	r10,r10,r3		/* Turn off MSR_ME */
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 2:
 	/*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 */
 	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
-	rfid
+	RFI_TO_USER_OR_KERNEL
 9:
 	/* Deliver the machine check to host kernel in V mode. */
 	MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
 
+	andi.	r9,r11,MSR_PR	// Check for exception from userspace
+	cmpdi	cr4,r9,MSR_PR	// And save the result in CR4 for later
+
 	/*
 	 * Test MSR_RI before calling slb_allocate_realmode, because the
 	 * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
 	/* All done -- return from exception. */
 
+	bne	cr4,1f		/* returning to kernel */
+
 .machine	push
 .machine	"power4"
 	mtcrf	0x80,r9
+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
 	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
 	mtcrf	0x02,r9		/* I/D indication is in cr6 */
 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	ld	r11,PACA_EXSLB+EX_R11(r13)
 	ld	r12,PACA_EXSLB+EX_R12(r13)
 	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
+1:
+.machine	push
+.machine	"power4"
+	mtcrf	0x80,r9
+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
+	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
+	mtcrf	0x02,r9		/* I/D indication is in cr6 */
+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
+.machine	pop
+
+	RESTORE_CTR(r9, PACA_EXSLB)
+	RESTORE_PPR_PACA(PACA_EXSLB, r9)
+	mr	r3,r12
+	ld	r9,PACA_EXSLB+EX_R9(r13)
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	ld	r11,PACA_EXSLB+EX_R11(r13)
+	ld	r12,PACA_EXSLB+EX_R12(r13)
+	ld	r13,PACA_EXSLB+EX_R13(r13)
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
+
 2:	std     r3,PACA_EXSLB+EX_DAR(r13)
 	mr	r3,r12
 	mfspr	r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	mtspr	SPRN_SRR0,r10
 	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 
 8:	std     r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	mtspr	SPRN_SRR0,r10
 	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 
 EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 	mtspr	SPRN_SRR0,r10 ; 				\
 	ld	r10,PACAKMSR(r13) ;				\
 	mtspr	SPRN_SRR1,r10 ; 				\
-	rfid ; 							\
+	RFI_TO_KERNEL ;						\
 	b	. ;	/* prevent speculative execution */
 
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
 	xori	r12,r12,MSR_LE ;				\
 	mtspr	SPRN_SRR1,r12 ;					\
 	mr	r13,r9 ;					\
-	rfid ;		/* return to userspace */		\
+	RFI_TO_USER ;	/* return to userspace */		\
 	b	. ;	/* prevent speculative execution */
 #else
 #define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
 	mtcr	r11
 	REST_GPR(11, r1)
 	ld	r1,GPR1(r1)
-	hrfid
+	HRFI_TO_USER_OR_KERNEL
 
 1:	mtcr	r11
 	REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r12,PACA_EXGEN+EX_R12(r13)
 	ld	r13,PACA_EXGEN+EX_R13(r13)
-	HRFID
+	HRFI_TO_UNKNOWN
 	b	.
 #endif
 
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt:					\
 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
 	/* returns to kernel where r13 must be set up, so don't restore it */ \
-	##_H##rfid;					\
+	##_H##RFI_TO_KERNEL;				\
 	b	.;					\
 	MASKED_DEC_HANDLER(_H)
 
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+	SET_SCRATCH0(r13);
+	GET_PACA(r13);
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	std	r12,PACA_EXRFI+EX_R12(r13)
+	std	r8,PACA_EXRFI+EX_R13(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SETS(r13)
+	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+	addi	r12,r12,8
+	mtctr	r11
+	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+1:	li	r8,0
+	.rept	8 /* 8-way set associative */
+	ldx	r11,r10,r8
+	add	r8,r8,r12
+	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
+	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
+	.endr
+	addi	r10,r10,128 /* 128 byte cache line */
+	bdnz	1b
+
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	ld	r12,PACA_EXRFI+EX_R12(r13)
+	ld	r8,PACA_EXRFI+EX_R13(r13)
+	GET_SCRATCH0(r13);
+	rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+	SET_SCRATCH0(r13);
+	GET_PACA(r13);
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	std	r12,PACA_EXRFI+EX_R12(r13)
+	std	r8,PACA_EXRFI+EX_R13(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SETS(r13)
+	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+	addi	r12,r12,8
+	mtctr	r11
+	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+1:	li	r8,0
+	.rept	8 /* 8-way set associative */
+	ldx	r11,r10,r8
+	add	r8,r8,r12
+	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
+	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
+	.endr
+	addi	r10,r10,128 /* 128 byte cache line */
+	bdnz	1b
+
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	ld	r12,PACA_EXRFI+EX_R12(r13)
+	ld	r8,PACA_EXRFI+EX_R13(r13)
+	GET_SCRATCH0(r13);
+	hrfid
+
 /*
  * Real mode exceptions actually use this too, but alternate
  * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
 	addi	r13, r13, 4
 	mtspr	SPRN_SRR0, r13
 	GET_SCRATCH0(r13)
-	rfid
+	RFI_TO_KERNEL
 	b	.
 
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
 	addi	r13, r13, 4
 	mtspr	SPRN_HSRR0, r13
 	GET_SCRATCH0(r13)
-	hrfid
+	HRFI_TO_KERNEL
 	b	.
 #endif
 
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 04ea5c04fd24825a6c6affc4d7d6956b30321b84..3c2c2688918fffdcfbdbb64e2ec2f1b0dfe1ccf9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1462,25 +1462,6 @@ static void fadump_init_files(void)
 	return;
 }
 
-static int fadump_panic_event(struct notifier_block *this,
-			      unsigned long event, void *ptr)
-{
-	/*
-	 * If firmware-assisted dump has been registered then trigger
-	 * firmware-assisted dump and let firmware handle everything
-	 * else. If this returns, then fadump was not registered, so
-	 * go through the rest of the panic path.
-	 */
-	crash_fadump(NULL, ptr);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block fadump_panic_block = {
-	.notifier_call = fadump_panic_event,
-	.priority = INT_MIN /* may not return; must be done last */
-};
-
 /*
  * Prepare for firmware-assisted dump.
  */
@@ -1513,9 +1494,6 @@ int __init setup_fadump(void)
 		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
 	fadump_init_files();
 
-	atomic_notifier_chain_register(&panic_notifier_list,
-					&fadump_panic_block);
-
 	return 1;
 }
 subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5acb5a176dbe5c8bffe6ddb7458b7d3ac2b7019f..72be0c32e902a35fa45e5ed02036df91999dda58 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
 
 	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
 	       regs->nip, regs->link, regs->ctr);
-	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+	printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
 	       regs, regs->trap, print_tainted(), init_utsname()->release);
 	printk("MSR:  "REG" ", regs->msr);
 	print_msr_bits(regs->msr);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2075322cd22522edf7de78e32ef2679fae8e9913..9d213542a48bb91360b7b540ef429473494ebe98 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -704,6 +704,30 @@ int check_legacy_ioport(unsigned long base_port)
 }
 EXPORT_SYMBOL(check_legacy_ioport);
 
+static int ppc_panic_event(struct notifier_block *this,
+                             unsigned long event, void *ptr)
+{
+	/*
+	 * If firmware-assisted dump has been registered then trigger
+	 * firmware-assisted dump and let firmware handle everything else.
+	 */
+	crash_fadump(NULL, ptr);
+	ppc_md.panic(ptr);  /* May not return */
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_panic_block = {
+	.notifier_call = ppc_panic_event,
+	.priority = INT_MIN /* may not return; must be done last */
+};
+
+void __init setup_panic(void)
+{
+	if (!ppc_md.panic)
+		return;
+	atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+}
+
 #ifdef CONFIG_CHECK_CACHE_COHERENCY
 /*
  * For platforms that have configurable cache-coherency.  This function
@@ -848,6 +872,9 @@ void __init setup_arch(char **cmdline_p)
 	/* Probe the machine type, establish ppc_md. */
 	probe_machine();
 
+	/* Setup panic notifier if requested by the platform. */
+	setup_panic();
+
 	/*
 	 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
 	 * it from their respective probe() function.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8956a9856604e72634dd11ef416ec9788660bc37..491be4179ddd989fed6cba1e63963bf47653f4ba 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
 	return 0;
 }
 early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+	pr_info("rfi-flush: disabled on command line.");
+	no_rfi_flush = true;
+	return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+	handle_no_rfi_flush(NULL);
+	return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+	/*
+	 * We don't need to do the flush explicitly, just enter+exit kernel is
+	 * sufficient, the RFI exit handlers will do the right thing.
+	 */
+}
+
+void rfi_flush_enable(bool enable)
+{
+	if (rfi_flush == enable)
+		return;
+
+	if (enable) {
+		do_rfi_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else
+		do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+	rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+	u64 l1d_size, limit;
+	int cpu;
+
+	l1d_size = ppc64_caches.l1d.size;
+	limit = min(safe_stack_limit(), ppc64_rma_size);
+
+	/*
+	 * Align to L1d size, and size it at 2x L1d size, to catch possible
+	 * hardware prefetch runoff. We don't have a recipe for load patterns to
+	 * reliably avoid the prefetcher.
+	 */
+	l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+	memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * The fallback flush is currently coded for 8-way
+		 * associativity. Different associativity is possible, but it
+		 * will be treated as 8-way and may not evict the lines as
+		 * effectively.
+		 *
+		 * 128 byte lines are mandatory.
+		 */
+		u64 c = l1d_size / 8;
+
+		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+		paca[cpu].l1d_flush_congruence = c;
+		paca[cpu].l1d_flush_sets = c / 128;
+	}
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+	if (types & L1D_FLUSH_FALLBACK) {
+		pr_info("rfi-flush: Using fallback displacement flush\n");
+		init_fallback_flush();
+	}
+
+	if (types & L1D_FLUSH_ORI)
+		pr_info("rfi-flush: Using ori type flush\n");
+
+	if (types & L1D_FLUSH_MTTRIG)
+		pr_info("rfi-flush: Using mttrig type flush\n");
+
+	enabled_flush_types = types;
+
+	if (!no_rfi_flush)
+		rfi_flush_enable(enable);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 0494e1566ee2ab9b976cb0d9edb8c195a4490a23..307843d23682a79f0e4d9ae0cdb86a219e0b6e6a 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
 	/* Read-only data */
 	RO_DATA(PAGE_SIZE)
 
+#ifdef CONFIG_PPC64
+	. = ALIGN(8);
+	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+		__start___rfi_flush_fixup = .;
+		*(__rfi_flush_fixup)
+		__stop___rfi_flush_fixup = .;
+	}
+#endif
+
 	EXCEPTION_TABLE(0)
 
 	NOTES :kernel :notes
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 29ebe2fd58674c59f27803a5426e4d2414f39418..a93d719edc906887b0f4bf412b2b76ac04babfec 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		gpte->may_read = true;
 		gpte->may_write = true;
 		gpte->page_size = MMU_PAGE_4K;
+		gpte->wimg = HPTE_R_M;
 
 		return 0;
 	}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 966097232d2147bbcd79df41354a5f973fb9b7c1..b73dbc9e797da76bf4305a73972c4af925fbd70d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
 	u32 order;
 
 	/* These fields protected by kvm->lock */
+
+	/* Possible values and their usage:
+	 *  <0     an error occurred during allocation,
+	 *  -EBUSY allocation is in the progress,
+	 *  0      allocation made successfuly.
+	 */
 	int error;
-	bool prepare_done;
 
-	/* Private to the work thread, until prepare_done is true,
-	 * then protected by kvm->resize_hpt_sem */
+	/* Private to the work thread, until error != -EBUSY,
+	 * then protected by kvm->lock.
+	 */
 	struct kvm_hpt_info hpt;
 };
 
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 		 * Reset all the reverse-mapping chains for all memslots
 		 */
 		kvmppc_rmap_reset(kvm);
-		/* Ensure that each vcpu will flush its TLB on next entry. */
-		cpumask_setall(&kvm->arch.need_tlb_flush);
 		err = 0;
 		goto out;
 	}
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 	kvmppc_set_hpt(kvm, &info);
 
 out:
+	if (err == 0)
+		/* Ensure that each vcpu will flush its TLB on next entry. */
+		cpumask_setall(&kvm->arch.need_tlb_flush);
+
 	mutex_unlock(&kvm->lock);
 	return err;
 }
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
 {
-	BUG_ON(kvm->arch.resize_hpt != resize);
+	if (WARN_ON(!mutex_is_locked(&kvm->lock)))
+		return;
 
 	if (!resize)
 		return;
 
-	if (resize->hpt.virt)
-		kvmppc_free_hpt(&resize->hpt);
+	if (resize->error != -EBUSY) {
+		if (resize->hpt.virt)
+			kvmppc_free_hpt(&resize->hpt);
+		kfree(resize);
+	}
 
-	kvm->arch.resize_hpt = NULL;
-	kfree(resize);
+	if (kvm->arch.resize_hpt == resize)
+		kvm->arch.resize_hpt = NULL;
 }
 
 static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
 						     struct kvm_resize_hpt,
 						     work);
 	struct kvm *kvm = resize->kvm;
-	int err;
+	int err = 0;
 
-	resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
-			 resize->order);
-
-	err = resize_hpt_allocate(resize);
+	if (WARN_ON(resize->error != -EBUSY))
+		return;
 
 	mutex_lock(&kvm->lock);
 
+	/* Request is still current? */
+	if (kvm->arch.resize_hpt == resize) {
+		/* We may request large allocations here:
+		 * do not sleep with kvm->lock held for a while.
+		 */
+		mutex_unlock(&kvm->lock);
+
+		resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+				 resize->order);
+
+		err = resize_hpt_allocate(resize);
+
+		/* We have strict assumption about -EBUSY
+		 * when preparing for HPT resize.
+		 */
+		if (WARN_ON(err == -EBUSY))
+			err = -EINPROGRESS;
+
+		mutex_lock(&kvm->lock);
+		/* It is possible that kvm->arch.resize_hpt != resize
+		 * after we grab kvm->lock again.
+		 */
+	}
+
 	resize->error = err;
-	resize->prepare_done = true;
+
+	if (kvm->arch.resize_hpt != resize)
+		resize_hpt_release(kvm, resize);
 
 	mutex_unlock(&kvm->lock);
 }
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
 	if (resize) {
 		if (resize->order == shift) {
-			/* Suitable resize in progress */
-			if (resize->prepare_done) {
-				ret = resize->error;
-				if (ret != 0)
-					resize_hpt_release(kvm, resize);
-			} else {
+			/* Suitable resize in progress? */
+			ret = resize->error;
+			if (ret == -EBUSY)
 				ret = 100; /* estimated time in ms */
-			}
+			else if (ret)
+				resize_hpt_release(kvm, resize);
 
 			goto out;
 		}
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 		ret = -ENOMEM;
 		goto out;
 	}
+
+	resize->error = -EBUSY;
 	resize->order = shift;
 	resize->kvm = kvm;
 	INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
 	if (!resize || (resize->order != shift))
 		goto out;
 
-	ret = -EBUSY;
-	if (!resize->prepare_done)
-		goto out;
-
 	ret = resize->error;
-	if (ret != 0)
+	if (ret)
 		goto out;
 
 	ret = resize_hpt_rehash(resize);
-	if (ret != 0)
+	if (ret)
 		goto out;
 
 	resize_hpt_pivot(resize);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844784b817d7ca77e6e95ecb9418f430e62d..9c61f736c75b2d0761ec4f9d2e385df75b6dc882 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	mtmsrd	r0,1		/* clear RI in MSR */
 	mtsrr0	r5
 	mtsrr1	r6
-	RFI
+	RFI_TO_KERNEL
 
 kvmppc_call_hv_entry:
 BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	RFI
+	RFI_TO_KERNEL
 
 	/* Virtual-mode return */
 .Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
-
-	hrfid
+	HRFI_TO_GUEST
 	b	.
 
 secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	ld	r4, PACAKMSR(r13)
 	mtspr	SPRN_SRR0, r3
 	mtspr	SPRN_SRR1, r4
-	rfid
+	RFI_TO_KERNEL
 9:	addi	r3, r1, STACK_FRAME_OVERHEAD
 	bl	kvmppc_bad_interrupt
 	b	9b
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d0dc8624198f8e4663800c4ca603aea98d4ba128..7deaeeb14b9358d8a4e6f1107cd42bba0605a264 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define MSR_USER32 MSR_USER
 #define MSR_USER64 MSR_USER
 #define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M   _PAGE_COHERENT
 #endif
 
 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.eaddr = eaddr;
 		pte.vpage = eaddr >> 12;
 		pte.page_size = MMU_PAGE_64K;
+		pte.wimg = HPTE_R_M;
 	}
 
 	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 42a4b237df5f5731a4f6964feafc4db2835ffdee..34a5adeff0840662e8eae4553b008069bfd9426d 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
 
 #define FUNC(name)		name
 
+#define RFI_TO_KERNEL	RFI
+#define RFI_TO_GUEST	RFI
+
 .macro INTERRUPT_TRAMPOLINE intno
 
 .global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
 	GET_SCRATCH0(r13)
 
 	/* And get back into the code */
-	RFI
+	RFI_TO_KERNEL
 #endif
 
 /*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
 	ori	r5, r5, MSR_EE
 	mtsrr0	r7
 	mtsrr1	r6
-	RFI
+	RFI_TO_KERNEL
 
 #include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 2a2b96d5399917398312dacb7c7b2846483a7fd4..93a180ceefad03343d1f9064420ee00ca54973d7 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
 	PPC_LL	r9, SVCPU_R9(r3)
 	PPC_LL	r3, (SVCPU_R3)(r3)
 
-	RFI
+	RFI_TO_GUEST
 kvmppc_handler_trampoline_enter_end:
 
 
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
 	beqa	BOOK3S_INTERRUPT_DOORBELL
 
-	RFI
+	RFI_TO_KERNEL
 kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e03217b9aa02815d7791f0fce72aea2b..0d750d274c4e21a3324eb3505bbd73c86a58cdc9 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
 
 	/* Return the per-cpu state for state saving/migration */
 	return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
-	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+	       (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
 }
 
 int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
 	/*
 	 * Restore P and Q. If the interrupt was pending, we
-	 * force both P and Q, which will trigger a resend.
+	 * force Q and !P, which will trigger a resend.
 	 *
 	 * That means that a guest that had both an interrupt
 	 * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 	 * is perfectly fine as coalescing interrupts that haven't
 	 * been presented yet is always allowed.
 	 */
-	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+	if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
 		state->old_p = true;
 	if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
 		state->old_q = true;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 41cf5ae273cf74a2747d13b9da2274b8eb2054cb..a95ea007d654d5db2b78d811a4ef21a750a95609 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 	}
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+	unsigned int instrs[3], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___rfi_flush_fixup),
+	end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+
+	if (types & L1D_FLUSH_FALLBACK)
+		/* b .+16 to fallback flush */
+		instrs[0] = 0x48000010;
+
+	i = 0;
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction(dest, instrs[0]);
+		patch_instruction(dest + 1, instrs[1]);
+		patch_instruction(dest + 2, instrs[2]);
+	}
+
+	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
 	long *start, *end;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4797d08581cec347b6cf34a316c1ef585209653c..6e1e3903538065becbab2ad47febad597f5d6d49 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
 	return __bad_area(regs, address, SEGV_MAPERR);
 }
 
+static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+{
+	return __bad_area(regs, address, SEGV_ACCERR);
+}
+
 static int do_sigbus(struct pt_regs *regs, unsigned long address,
 		     unsigned int fault)
 {
@@ -490,7 +495,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 
 good_area:
 	if (unlikely(access_error(is_write, is_exec, vma)))
-		return bad_area(regs, address);
+		return bad_access(regs, address);
 
 	/*
 	 * If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1b4caad4769e7686fa0a800695cd4..d183b4801bdbded832b90d2aa1a18e713f70695b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			func = (u8 *) __bpf_call_base + imm;
 
 			/* Save skb pointer if we need to re-cache skb data */
-			if (bpf_helper_changes_pkt_data(func))
+			if ((ctx->seen & SEEN_SKB) &&
+			    bpf_helper_changes_pkt_data(func))
 				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
 			bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			PPC_MR(b2p[BPF_REG_0], 3);
 
 			/* refresh skb cache */
-			if (bpf_helper_changes_pkt_data(func)) {
+			if ((ctx->seen & SEEN_SKB) &&
+			    bpf_helper_changes_pkt_data(func)) {
 				/* reload skb pointer to r3 */
 				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
 				bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9e3da168d54cdcd36e3911ff040ff2ad187c92b7..fce545774d50afc6093c28ad2f4127c24ed5331c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
 	int ret;
 	__u64 target;
 
-	if (is_kernel_addr(addr))
-		return branch_target((unsigned int *)addr);
+	if (is_kernel_addr(addr)) {
+		if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+			return 0;
+
+		return branch_target(&instr);
+	}
 
 	/* Userspace: need copy instruction here then translate it */
 	pagefault_disable();
@@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count,
 	int n = 0;
 	struct perf_event *event;
 
-	if (!is_software_event(group)) {
+	if (group->pmu->task_ctx_nr == perf_hw_context) {
 		if (n >= max_count)
 			return -1;
 		ctrs[n] = group;
@@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count,
 		events[n++] = group->hw.config;
 	}
 	list_for_each_entry(event, &group->sibling_list, group_entry) {
-		if (!is_software_event(event) &&
+		if (event->pmu->task_ctx_nr == perf_hw_context &&
 		    event->state != PERF_EVENT_STATE_OFF) {
 			if (n >= max_count)
 				return -1;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa2f8816e8c04f47cca691efba0560..be4e7f84f70a59db60e92a9bfe845678f71cc608 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
 	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
 		return 0;
 
+	/*
+	 * Check whether nest_imc is registered. We could end up here if the
+	 * cpuhotplug callback registration fails. i.e, callback invokes the
+	 * offline path for all successfully registered nodes. At this stage,
+	 * nest_imc pmu will not be registered and we should return here.
+	 *
+	 * We return with a zero since this is not an offline failure. And
+	 * cpuhp_setup_state() returns the actual failure reason to the caller,
+	 * which in turn will call the cleanup routine.
+	 */
+	if (!nest_pmus)
+		return 0;
+
 	/*
 	 * Now that this cpu is one of the designated,
 	 * find a next cpu a) which is online and b) in same chip.
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 		if (nest_pmus == 1) {
 			cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
 			kfree(nest_imc_refc);
+			kfree(per_nest_pmu_arr);
 		}
 
 		if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
 	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
 	kfree(pmu_ptr);
-	kfree(per_nest_pmu_arr);
 	return;
 }
 
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 			ret = nest_pmu_cpumask_init();
 			if (ret) {
 				mutex_unlock(&nest_init_lock);
+				kfree(nest_imc_refc);
+				kfree(per_nest_pmu_arr);
 				goto err_free;
 			}
 		}
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1edfbc1e40f4387b30dd5c0a9491935460feaf92..4fb21e17504aad72295a9e1cdffe54c5547379bd 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -37,13 +37,62 @@
 #include <asm/kexec.h>
 #include <asm/smp.h>
 #include <asm/tm.h>
+#include <asm/setup.h>
 
 #include "powernv.h"
 
+static void pnv_setup_rfi_flush(void)
+{
+	struct device_node *np, *fw_features;
+	enum l1d_flush_type type;
+	int enable;
+
+	/* Default to fallback in case fw-features are not available */
+	type = L1D_FLUSH_FALLBACK;
+	enable = 1;
+
+	np = of_find_node_by_name(NULL, "ibm,opal");
+	fw_features = of_get_child_by_name(np, "fw-features");
+	of_node_put(np);
+
+	if (fw_features) {
+		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+		if (np && of_property_read_bool(np, "enabled"))
+			type = L1D_FLUSH_MTTRIG;
+
+		of_node_put(np);
+
+		np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+		if (np && of_property_read_bool(np, "enabled"))
+			type = L1D_FLUSH_ORI;
+
+		of_node_put(np);
+
+		/* Enable unless firmware says NOT to */
+		enable = 2;
+		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable--;
+
+		of_node_put(np);
+
+		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable--;
+
+		of_node_put(np);
+		of_node_put(fw_features);
+	}
+
+	setup_rfi_flush(type, enable > 0);
+}
+
 static void __init pnv_setup_arch(void)
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
+	pnv_setup_rfi_flush();
+
 	/* Initialize SMP */
 	pnv_smp_init();
 
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 9dabea6e14439647b726d007a63de63a853ac3d0..6244bc849469e33af7dcc5a4ac2b21dd970ac1c6 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void)
 	ps3_sys_manager_halt(); /* never returns */
 }
 
+static void ps3_panic(char *str)
+{
+	DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+	smp_send_stop();
+	printk("\n");
+	printk("   System does not reboot automatically.\n");
+	printk("   Please press POWER button.\n");
+	printk("\n");
+
+	while(1)
+		lv1_pause(1);
+}
+
 #if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
     defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
 static void __init prealloc(struct ps3_prealloc *p)
@@ -255,6 +269,7 @@ define_machine(ps3) {
 	.probe				= ps3_probe,
 	.setup_arch			= ps3_setup_arch,
 	.init_IRQ			= ps3_init_IRQ,
+	.panic				= ps3_panic,
 	.get_boot_time			= ps3_get_boot_time,
 	.set_dabr			= ps3_set_dabr,
 	.calibrate_decr			= ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 6e35780c5962f25144b0181414c03a017840478f..a0b20c03f078cc41b9ad126fbf2603a4ffb68463 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 
 static CLASS_ATTR_RW(dlpar);
 
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
 {
+	if (pseries_hp_wq)
+		return 0;
+
 	pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
-					WQ_UNBOUND, 1);
+			WQ_UNBOUND, 1);
+
+	return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+	int rc;
+
+	rc = dlpar_workqueue_init();
+	if (rc)
+		return rc;
+
 	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
 
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 4470a3194311e06e040c624b8b1491eb0e071298..1ae1d9f4dbe99935130971cdc390d8f581ad788e 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
 	return CMO_PageSize;
 }
 
+int dlpar_workqueue_init(void);
+
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 4923ffe230cf926565c0ed4c9a339a822b073c15..81d8614e73790b1923a3c8cfd336a2531fee76ce 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
 	/* Hotplug Events */
 	np = of_find_node_by_path("/event-sources/hot-plug-events");
 	if (np != NULL) {
-		request_event_sources_irqs(np, ras_hotplug_interrupt,
+		if (dlpar_workqueue_init() == 0)
+			request_event_sources_irqs(np, ras_hotplug_interrupt,
 					   "RAS_HOTPLUG");
 		of_node_put(np);
 	}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 5f1beb8367acaa4562dbc647ad06a1f4c12a8f72..ae4f596273b51a836e5d27307f81bfe6438590bf 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
 	of_pci_check_probe_only();
 }
 
+static void pseries_setup_rfi_flush(void)
+{
+	struct h_cpu_char_result result;
+	enum l1d_flush_type types;
+	bool enable;
+	long rc;
+
+	/* Enable by default */
+	enable = true;
+
+	rc = plpar_get_cpu_characteristics(&result);
+	if (rc == H_SUCCESS) {
+		types = L1D_FLUSH_NONE;
+
+		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+			types |= L1D_FLUSH_MTTRIG;
+		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+			types |= L1D_FLUSH_ORI;
+
+		/* Use fallback if nothing set in hcall */
+		if (types == L1D_FLUSH_NONE)
+			types = L1D_FLUSH_FALLBACK;
+
+		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+			enable = false;
+	} else {
+		/* Default to fallback if case hcall is not available */
+		types = L1D_FLUSH_FALLBACK;
+	}
+
+	setup_rfi_flush(types, enable);
+}
+
 static void __init pSeries_setup_arch(void)
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
 
 	fwnmi_init();
 
+	pseries_setup_rfi_flush();
+
 	/* By default, only probe PCI (can be overridden by rtas_pci) */
 	pci_add_flags(PCI_PROBE_ONLY);
 
@@ -726,6 +761,7 @@ define_machine(pseries) {
 	.pcibios_fixup		= pSeries_final_fixup,
 	.restart		= rtas_restart,
 	.halt			= rtas_halt,
+	.panic			= rtas_os_term,
 	.get_boot_time		= rtas_get_boot_time,
 	.get_rtc_time		= rtas_get_rtc_time,
 	.set_rtc_time		= rtas_set_rtc_time,
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 44cbf4c12ea137562f02758aab4e2e604f54e96f..df95102e732cb466911b32632fd53e3f3369bf54 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
 }
 
 static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
 
 static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
 			       int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
 		dev_err(&dev->dev, "No memory for MSI cascade data\n");
 		return -ENOMEM;
 	}
-	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
+	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+			      &fsl_msi_irq_request_class);
 	cascade_data->index = offset;
 	cascade_data->msi_data = msi;
 	cascade_data->virq = virt_msir;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b2d8cb49abb2e8ea209a511bd239dd58e710661..cab24f549e7cbdc8786c5fa5b7450f0dcda8d87c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs)
 	printf("kernel BUG at %s:%u!\n",
 	       bug->file, bug->line);
 #else
-	printf("kernel BUG at %p!\n", (void *)bug->bug_addr);
+	printf("kernel BUG at %px!\n", (void *)bug->bug_addr);
 #endif
 #endif /* CONFIG_BUG */
 }
@@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu)
 
 	p = &paca[cpu];
 
-	printf("paca for cpu 0x%x @ %p:\n", cpu, p);
+	printf("paca for cpu 0x%x @ %px:\n", cpu, p);
 
 	printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");
 	printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
@@ -2945,7 +2945,7 @@ static void show_task(struct task_struct *tsk)
 		(tsk->exit_state & EXIT_DEAD) ? 'E' :
 		(tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
 
-	printf("%p %016lx %6d %6d %c %2d %s\n", tsk,
+	printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
 		tsk->thread.ksp,
 		tsk->pid, tsk->parent->pid,
 		state, task_thread_info(tsk)->cpu,
@@ -2988,7 +2988,7 @@ static void show_pte(unsigned long addr)
 
 	if (setjmp(bus_error_jmp) != 0) {
 		catch_memory_errors = 0;
-		printf("*** Error dumping pte for task %p\n", tsk);
+		printf("*** Error dumping pte for task %px\n", tsk);
 		return;
 	}
 
@@ -3074,7 +3074,7 @@ static void show_tasks(void)
 
 	if (setjmp(bus_error_jmp) != 0) {
 		catch_memory_errors = 0;
-		printf("*** Error dumping task %p\n", tsk);
+		printf("*** Error dumping task %px\n", tsk);
 		return;
 	}
 
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..47dacf06c679f3eff22c0a6ab0f619d9b4019ab5 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -0,0 +1,75 @@
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_RAS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+# CONFIG_RCU_TRACE is not set
+CONFIG_CRYPTO_USER_API_HASH=y
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 773c4e039cd7288bcd25ed53ce831db84c766f26..c0319cbf1eec58d7ea8960259838b865c23f49a1 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,25 @@
 #define smp_rmb()	RISCV_FENCE(r,r)
 #define smp_wmb()	RISCV_FENCE(w,w)
 
+/*
+ * This is a very specific barrier: it's currently only used in two places in
+ * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
+ * orderings it guarantees, but the "critical section is RCsc" guarantee
+ * mandates a barrier on RISC-V.  The sequence looks like:
+ *
+ *    lr.aq lock
+ *    sc    lock <= LOCKED
+ *    smp_mb__after_spinlock()
+ *    // critical section
+ *    lr    lock
+ *    sc.rl lock <= UNLOCKED
+ *
+ * The AQ/RL pair provides a RCpc critical section, but there's not really any
+ * way we can take advantage of that here because the ordering is only enforced
+ * on that one lock.  Thus, we're just doing a full fence.
+ */
+#define smp_mb__after_spinlock()	RISCV_FENCE(rw,rw)
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0d64bc9f4f91562872b36231e5b593887498cde2..3c7a2c97e377a5af2923c8e7d96cf04e57e66892 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -17,10 +17,10 @@
 #include <linux/const.h>
 
 /* Status register flags */
-#define SR_IE   _AC(0x00000002, UL) /* Interrupt Enable */
-#define SR_PIE  _AC(0x00000020, UL) /* Previous IE */
-#define SR_PS   _AC(0x00000100, UL) /* Previously Supervisor */
-#define SR_SUM  _AC(0x00040000, UL) /* Supervisor may access User Memory */
+#define SR_SIE	_AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_SPIE	_AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_SPP	_AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_SUM	_AC(0x00040000, UL) /* Supervisor may access User Memory */
 
 #define SR_FS           _AC(0x00006000, UL) /* Floating-point Status */
 #define SR_FS_OFF       _AC(0x00000000, UL)
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index a82ce599b639813c9ed3ad697f217cb09a6538e1..b269451e7e85577c76cb718283806fdfb6f76532 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -21,8 +21,6 @@
 
 #include <linux/types.h>
 
-#ifdef CONFIG_MMU
-
 extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
 /*
@@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
 extern void iounmap(volatile void __iomem *addr);
 
-#endif /* CONFIG_MMU */
-
 /* Generic IO read/write.  These perform native-endian accesses. */
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index 6fdc860d7f84fd69648af547ff8984946eac93fd..07a3c6d5706ff8fd8f34acbe6c5832fc6e638676 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void)
 /* unconditionally enable interrupts */
 static inline void arch_local_irq_enable(void)
 {
-	csr_set(sstatus, SR_IE);
+	csr_set(sstatus, SR_SIE);
 }
 
 /* unconditionally disable interrupts */
 static inline void arch_local_irq_disable(void)
 {
-	csr_clear(sstatus, SR_IE);
+	csr_clear(sstatus, SR_SIE);
 }
 
 /* get status and disable interrupts */
 static inline unsigned long arch_local_irq_save(void)
 {
-	return csr_read_clear(sstatus, SR_IE);
+	return csr_read_clear(sstatus, SR_SIE);
 }
 
 /* test flags */
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-	return !(flags & SR_IE);
+	return !(flags & SR_SIE);
 }
 
 /* test hardware interrupt enable bit */
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
 /* set interrupt enabled status */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-	csr_set(sstatus, flags & SR_IE);
+	csr_set(sstatus, flags & SR_SIE);
 }
 
 #endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2cbd92ed1629c00df42b8ceaffd2250b6de7413a..16301966d65b6fd8a54614d12f0815866d19d948 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -20,8 +20,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_MMU
-
 /* Page Upper Directory not used in RISC-V */
 #include <asm-generic/pgtable-nopud.h>
 #include <asm/page.h>
@@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void)
 	/* No page table caches to initialize */
 }
 
-#endif /* CONFIG_MMU */
-
 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END      (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 93b8956e25e46714a5bd789ed0ea15ebb2a687f2..2c5df945d43c9abfdfd197a61d8c92ce20e48133 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -66,7 +66,7 @@ struct pt_regs {
 #define REG_FMT "%08lx"
 #endif
 
-#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0)
+#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
 
 
 /* Helpers for working with the instruction pointer */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 715b0f10af580811dfca3ba067819e07fe1e7374..7b9c24ebdf5293ac73b8155b2c394fc9244a712d 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -15,8 +15,6 @@
 #ifndef _ASM_RISCV_TLBFLUSH_H
 #define _ASM_RISCV_TLBFLUSH_H
 
-#ifdef CONFIG_MMU
-
 #include <linux/mm_types.h>
 
 /*
@@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 	flush_tlb_all();
 }
 
-#endif /* CONFIG_MMU */
-
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 27b90d64814b8d0422d0316f8e765b6e97081a47..14b0b22fb57875dfe920685265a79da317c517e0 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state);
  * call.
  */
 
-#ifdef CONFIG_MMU
 #define __get_user_asm(insn, x, ptr, err)			\
 do {								\
 	uintptr_t __tmp;					\
@@ -153,13 +152,11 @@ do {								\
 	__disable_user_access();				\
 	(x) = __x;						\
 } while (0)
-#endif /* CONFIG_MMU */
 
 #ifdef CONFIG_64BIT
 #define __get_user_8(x, ptr, err) \
 	__get_user_asm("ld", x, ptr, err)
 #else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
 #define __get_user_8(x, ptr, err)				\
 do {								\
 	u32 __user *__ptr = (u32 __user *)(ptr);		\
@@ -193,7 +190,6 @@ do {								\
 	(x) = (__typeof__(x))((__typeof__((x)-(x)))(		\
 		(((u64)__hi << 32) | __lo)));			\
 } while (0)
-#endif /* CONFIG_MMU */
 #endif /* CONFIG_64BIT */
 
 
@@ -267,8 +263,6 @@ do {								\
 		((x) = 0, -EFAULT);				\
 })
 
-
-#ifdef CONFIG_MMU
 #define __put_user_asm(insn, x, ptr, err)			\
 do {								\
 	uintptr_t __tmp;					\
@@ -292,14 +286,11 @@ do {								\
 		: "rJ" (__x), "i" (-EFAULT));			\
 	__disable_user_access();				\
 } while (0)
-#endif /* CONFIG_MMU */
-
 
 #ifdef CONFIG_64BIT
 #define __put_user_8(x, ptr, err) \
 	__put_user_asm("sd", x, ptr, err)
 #else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
 #define __put_user_8(x, ptr, err)				\
 do {								\
 	u32 __user *__ptr = (u32 __user *)(ptr);		\
@@ -329,7 +320,6 @@ do {								\
 		: "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT));	\
 	__disable_user_access();				\
 } while (0)
-#endif /* CONFIG_MMU */
 #endif /* CONFIG_64BIT */
 
 
@@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
  * will set "err" to -EFAULT, while successful accesses return the previous
  * value.
  */
-#ifdef CONFIG_MMU
 #define __cmpxchg_user(ptr, old, new, err, size, lrb, scb)	\
 ({								\
 	__typeof__(ptr) __ptr = (ptr);				\
@@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
 	(err) = __err;						\
 	__ret;							\
 })
-#endif /* CONFIG_MMU */
 
 #endif /* _ASM_RISCV_UACCESS_H */
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 9f250ed007cd816e523898a10f32ec6814da6892..2f704a5c4196e30fd2a9a9c3607eeeed3d6f019f 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -14,3 +14,4 @@
 #define __ARCH_HAVE_MMU
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
+#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h
deleted file mode 100644
index a2ccf189492958e475cc7cc6664d86559124f001..0000000000000000000000000000000000000000
--- a/arch/riscv/include/asm/vdso-syscalls.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2017 SiFive
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
-#define _ASM_RISCV_VDSO_SYSCALLS_H
-
-#ifdef CONFIG_SMP
-
-/* These syscalls are only used by the vDSO and are not in the uapi. */
-#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
-__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
-
-#endif
-
-#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild
index 5ded96b063526e0073e3d79a41ba8b62e21e040c..7e91f485047576b559b3a8549ec7b5fc80827ac3 100644
--- a/arch/riscv/include/uapi/asm/Kbuild
+++ b/arch/riscv/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += setup.h
 generic-y += unistd.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h
new file mode 100644
index 0000000000000000000000000000000000000000..818655b0d5356a1f5c0768419c6bd98ca5a58ec6
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/syscalls.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2017 SiFive
+ */
+
+#ifndef _ASM__UAPI__SYSCALLS_H
+#define _ASM__UAPI__SYSCALLS_H
+
+/*
+ * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart.  There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller.  We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+#endif
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 20ee86f782a93b19779236f39a40daa3d9563ac5..7404ec222406290ed03c671a3b4463c61aa49c12 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -196,7 +196,7 @@ handle_syscall:
 	addi s2, s2, 0x4
 	REG_S s2, PT_SEPC(sp)
 	/* System calls run with interrupts enabled */
-	csrs sstatus, SR_IE
+	csrs sstatus, SR_SIE
 	/* Trace syscalls, but only if requested by the user. */
 	REG_L t0, TASK_TI_FLAGS(tp)
 	andi t0, t0, _TIF_SYSCALL_TRACE
@@ -224,8 +224,8 @@ ret_from_syscall:
 
 ret_from_exception:
 	REG_L s0, PT_SSTATUS(sp)
-	csrc sstatus, SR_IE
-	andi s0, s0, SR_PS
+	csrc sstatus, SR_SIE
+	andi s0, s0, SR_SPP
 	bnez s0, restore_all
 
 resume_userspace:
@@ -255,7 +255,7 @@ work_pending:
 	bnez s1, work_resched
 work_notifysig:
 	/* Handle pending signals and notify-resume requests */
-	csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */
+	csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
 	move a0, sp /* pt_regs */
 	move a1, s0 /* current_thread_info->flags */
 	tail do_notify_resume
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 0d90dcc1fbd36559823e48db2dc15ac4319b063d..d74d4adf2d54f94a4dff39c14d7953b8c0fdbab8 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs)
 void start_thread(struct pt_regs *regs, unsigned long pc,
 	unsigned long sp)
 {
-	regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL;
+	regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
 	regs->sepc = pc;
 	regs->sp = sp;
 	set_fs(USER_DS);
@@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		const register unsigned long gp __asm__ ("gp");
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gp = gp;
-		childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */
+		childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
 
 		p->thread.ra = (unsigned long)ret_from_kernel_thread;
 		p->thread.s[0] = usp; /* fn */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 8fbb6749910d42473d814b37eb255f812d4d206b..cb7b0c63014ecbc61c8d9a2b8263a65dd1775d11 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
 #include <asm/tlbflush.h>
 #include <asm/thread_info.h>
 
-#ifdef CONFIG_HVC_RISCV_SBI
-#include <asm/hvc_riscv_sbi.h>
-#endif
-
 #ifdef CONFIG_DUMMY_CONSOLE
 struct screen_info screen_info = {
 	.orig_video_lines	= 30,
@@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
 
 void __init setup_arch(char **cmdline_p)
 {
-#if defined(CONFIG_HVC_RISCV_SBI)
-	if (likely(early_console == NULL)) {
-		early_console = &riscv_sbi_early_console_dev;
-		register_console(early_console);
-	}
-#endif
-
 #ifdef CONFIG_CMDLINE_BOOL
 #ifdef CONFIG_CMDLINE_OVERRIDE
 	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index a2ae936a093e4f91d4def96fff0dd46e6e0226c0..79c78668258ede202086c072c63352ca14585903 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
 	bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
 
 	/* Check the reserved flags. */
-	if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
+	if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
 		return -EINVAL;
 
 	flush_icache_mm(mm, local);
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index a5bd6401f95e6988a376406f02b7e39cb7c1352e..ade52b903a43f2b27546fdd216b87b7057bc95d4 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -23,5 +23,4 @@
 void *sys_call_table[__NR_syscalls] = {
 	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
 #include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
 };
diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
index b0fbad74e873ea9fff553424b975502b589d7097..023e4d4aef588e139fafcbc9e3fb1ddef0482575 100644
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -13,7 +13,6 @@
 
 #include <linux/linkage.h>
 #include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
 
 	.text
 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index df2ca3c65048f9347781b05309c9552eaa7d0844..0713f3c67ab4242a2e54430331044724bf57f289 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 		goto vmalloc_fault;
 
 	/* Enable interrupts if they were enabled in the parent context. */
-	if (likely(regs->sstatus & SR_PIE))
+	if (likely(regs->sstatus & SR_SPIE))
 		local_irq_enable();
 
 	/*
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index eae2c64cf69d1cc8d6d4d920d8ae4ec18b520bb4..9fdff3fe1a42aac7414c06ccea3018399906fd42 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y				+= kernel/
 obj-y				+= mm/
 obj-$(CONFIG_KVM)		+= kvm/
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
index 99f1cf071304bd7686d1c26c5a7416207c802b13..b06def4a4f2f9955d9bb31c7950eeca0fce92af1 100644
--- a/arch/s390/appldata/Makefile
+++ b/arch/s390/appldata/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux - z/VM Monitor Stream.
 #
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr
index f02382ae5c48b1cd319a4cb1e755fa060f03cfdc..42a242597f346345c50d9be5422e74db79ad8496 100644
--- a/arch/s390/boot/compressed/vmlinux.scr
+++ b/arch/s390/boot/compressed/vmlinux.scr
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 SECTIONS
 {
   .rodata.compressed : {
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index c7de53d8da7553d58c797d7d43de6b23101aadf6..a00c17f761c190269058d2052cc1cf40758bb0c8 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Cryptographic API.
  *
@@ -16,12 +17,6 @@
  *   Copyright (c) Alan Smithee.
  *   Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
  *   Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
  */
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 2ee25ba252d68ab98efdcd7a525ae0b29778950d..06f601509ce983bf250ac58cf2e15c902f75b7b8 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the linux hypfs filesystem routines.
 #
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 41c211a4d8b17023ff2fde819d8c103527c09878..0484508693287feebdaf10ffb289161e1c9f2f50 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 generic-y += asm-offsets.h
 generic-y += cacheflush.h
 generic-y += clkdev.h
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index a72002056b54848103fc626338fee956c145bfc9..c2cf7bcdef9b7491a6b0c3dbc0998f0686625a51 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_S390_ALTERNATIVE_H
 #define _ASM_S390_ALTERNATIVE_H
 
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index c02f4aba88a6220bfc1bc7714632d52676aa5bef..cfce6835b109fd88e74d24af3022afdc01ddc5d3 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Adjunct processor (AP) interfaces
  *
  * Copyright IBM Corp. 2017
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  * Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
  *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Harald Freudenberger <freude@de.ibm.com>
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
index 0f5bd894f4dcfcbac666e3c5521c52c960d60623..aa42a179be33a5f92592c4099f38dc951e7a7abf 100644
--- a/arch/s390/include/asm/bugs.h
+++ b/arch/s390/include/asm/bugs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  S390 version
  *    Copyright IBM Corp. 1999
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index d6c9d1e0dc2d4bc0fe36a46109211f93682e5cfd..b9c0e361748bb46eb5dddb60f79e5de27eef51b1 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -40,6 +40,7 @@ struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
 
 /* Perf pt_regs extension for sample-data-entry indicators */
 struct perf_sf_sde_regs {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57d7bc92e0b8a766d24520ea5234fca56971b646..0a6b0286c32e9e0a7283d9cfb66dc357fe2e36fa 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
 	return pud;
 }
 
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
-	return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
-}
-
 static inline pud_t pud_mkclean(pud_t pud)
 {
 	if (pud_large(pud)) {
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index a3788dafc0e1f2272abd0ba9c455b775e81f627a..6f70d81c40f239fde907795707c2e8ebc2bb9d47 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -74,9 +74,14 @@ enum {
  */
 struct pt_regs 
 {
-	unsigned long args[1];
-	psw_t psw;
-	unsigned long gprs[NUM_GPRS];
+	union {
+		user_pt_regs user_regs;
+		struct {
+			unsigned long args[1];
+			psw_t psw;
+			unsigned long gprs[NUM_GPRS];
+		};
+	};
 	unsigned long orig_gpr2;
 	unsigned int int_code;
 	unsigned int int_parm;
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
index 8bfce3475b1c46715d6b82f1d911e82054269b86..97a0582b8d0f175942034895789c8988f24934a5 100644
--- a/arch/s390/include/asm/segment.h
+++ b/arch/s390/include/asm/segment.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index ec7b476c1ac571b82e219a786ad2525a0ff9ceb2..c61b2cc1a8a86a9508fe093a607d106ff0118be3 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -30,21 +30,20 @@ static inline void restore_access_regs(unsigned int *acrs)
 	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
 }
 
-#define switch_to(prev,next,last) do {					\
-	if (prev->mm) {							\
-		save_fpu_regs();					\
-		save_access_regs(&prev->thread.acrs[0]);		\
-		save_ri_cb(prev->thread.ri_cb);				\
-		save_gs_cb(prev->thread.gs_cb);				\
-	}								\
+#define switch_to(prev, next, last) do {				\
+	/* save_fpu_regs() sets the CIF_FPU flag, which enforces	\
+	 * a restore of the floating point / vector registers as	\
+	 * soon as the next task returns to user space			\
+	 */								\
+	save_fpu_regs();						\
+	save_access_regs(&prev->thread.acrs[0]);			\
+	save_ri_cb(prev->thread.ri_cb);					\
+	save_gs_cb(prev->thread.gs_cb);					\
 	update_cr_regs(next);						\
-	if (next->mm) {							\
-		set_cpu_flag(CIF_FPU);					\
-		restore_access_regs(&next->thread.acrs[0]);		\
-		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
-		restore_gs_cb(next->thread.gs_cb);			\
-	}								\
-	prev = __switch_to(prev,next);					\
+	restore_access_regs(&next->thread.acrs[0]);			\
+	restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);		\
+	restore_gs_cb(next->thread.gs_cb);				\
+	prev = __switch_to(prev, next);					\
 } while (0)
 
 #endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
index d375526c261f19a99053e7e3e06029ecde7c1983..605dc46bac5e0d182268e33246983e12b48bc41a 100644
--- a/arch/s390/include/asm/vga.h
+++ b/arch/s390/include/asm/vga.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_S390_VGA_H
 #define _ASM_S390_VGA_H
 
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 098f28778a13408e220e7d3b0a8a657e7e508c69..92b7c9b3e6417b50515f2cf4cdc3b4e3f3b7042e 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..cefe7c7cd4f6f29fabd90e33495d7a6d8ac6dbdd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
index 7c8564f98205a4b65163865ee1743c0c09bc3a92..d17dd9e5d51638f08ee44ad3e94d840ea0fc568d 100644
--- a/arch/s390/include/uapi/asm/perf_regs.h
+++ b/arch/s390/include/uapi/asm/perf_regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_S390_PERF_REGS_H
 #define _ASM_S390_PERF_REGS_H
 
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index 0d23c8ff290085b43745fdc23cd18a2911624aca..543dd70e12c81d59a9987c437f1895a216c2525b 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -162,7 +162,7 @@
 #define GPR_SIZE	8
 #define CR_SIZE		8
 
-#define STACK_FRAME_OVERHEAD    160      /* size of minimum stack frame */
+#define STACK_FRAME_OVERHEAD	160	 /* size of minimum stack frame */
 
 #endif /* __s390x__ */
 
@@ -179,17 +179,16 @@
 #define ACR_SIZE	4
 
 
-#define PTRACE_OLDSETOPTIONS         21
+#define PTRACE_OLDSETOPTIONS	     21
 
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
 #include <linux/types.h>
 
-typedef union
-{
-	float   f;
-	double  d;
-        __u64   ui;
+typedef union {
+	float	f;
+	double	d;
+	__u64	ui;
 	struct
 	{
 		__u32 hi;
@@ -197,23 +196,21 @@ typedef union
 	} fp;
 } freg_t;
 
-typedef struct
-{
-	__u32   fpc;
+typedef struct {
+	__u32	fpc;
 	__u32	pad;
-	freg_t  fprs[NUM_FPRS];              
+	freg_t	fprs[NUM_FPRS];
 } s390_fp_regs;
 
-#define FPC_EXCEPTION_MASK      0xF8000000
-#define FPC_FLAGS_MASK          0x00F80000
-#define FPC_DXC_MASK            0x0000FF00
-#define FPC_RM_MASK             0x00000003
+#define FPC_EXCEPTION_MASK	0xF8000000
+#define FPC_FLAGS_MASK		0x00F80000
+#define FPC_DXC_MASK		0x0000FF00
+#define FPC_RM_MASK		0x00000003
 
 /* this typedef defines how a Program Status Word looks like */
-typedef struct 
-{
-        unsigned long mask;
-        unsigned long addr;
+typedef struct {
+	unsigned long mask;
+	unsigned long addr;
 } __attribute__ ((aligned(8))) psw_t;
 
 #ifndef __s390x__
@@ -282,33 +279,40 @@ typedef struct
 /*
  * The s390_regs structure is used to define the elf_gregset_t.
  */
-typedef struct
-{
+typedef struct {
 	psw_t psw;
 	unsigned long gprs[NUM_GPRS];
 	unsigned int  acrs[NUM_ACRS];
 	unsigned long orig_gpr2;
 } s390_regs;
 
+/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+	unsigned long args[1];
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
 /*
  * Now for the user space program event recording (trace) definitions.
  * The following structures are used only for the ptrace interface, don't
  * touch or even look at it if you don't want to modify the user-space
  * ptrace interface. In particular stay away from it for in-kernel PER.
  */
-typedef struct
-{
+typedef struct {
 	unsigned long cr[NUM_CR_WORDS];
 } per_cr_words;
 
 #define PER_EM_MASK 0xE8000000UL
 
-typedef	struct
-{
+typedef struct {
 #ifdef __s390x__
-	unsigned                       : 32;
+	unsigned		       : 32;
 #endif /* __s390x__ */
-	unsigned em_branching          : 1;
+	unsigned em_branching	       : 1;
 	unsigned em_instruction_fetch  : 1;
 	/*
 	 * Switching on storage alteration automatically fixes
@@ -317,44 +321,41 @@ typedef	struct
 	unsigned em_storage_alteration : 1;
 	unsigned em_gpr_alt_unused     : 1;
 	unsigned em_store_real_address : 1;
-	unsigned                       : 3;
+	unsigned		       : 3;
 	unsigned branch_addr_ctl       : 1;
-	unsigned                       : 1;
+	unsigned		       : 1;
 	unsigned storage_alt_space_ctl : 1;
-	unsigned                       : 21;
+	unsigned		       : 21;
 	unsigned long starting_addr;
 	unsigned long ending_addr;
 } per_cr_bits;
 
-typedef struct
-{
+typedef struct {
 	unsigned short perc_atmid;
 	unsigned long address;
 	unsigned char access_id;
 } per_lowcore_words;
 
-typedef struct
-{
-	unsigned perc_branching          : 1;
+typedef struct {
+	unsigned perc_branching		 : 1;
 	unsigned perc_instruction_fetch  : 1;
 	unsigned perc_storage_alteration : 1;
-	unsigned perc_gpr_alt_unused     : 1;
+	unsigned perc_gpr_alt_unused	 : 1;
 	unsigned perc_store_real_address : 1;
-	unsigned                         : 3;
-	unsigned atmid_psw_bit_31        : 1;
-	unsigned atmid_validity_bit      : 1;
-	unsigned atmid_psw_bit_32        : 1;
-	unsigned atmid_psw_bit_5         : 1;
-	unsigned atmid_psw_bit_16        : 1;
-	unsigned atmid_psw_bit_17        : 1;
-	unsigned si                      : 2;
+	unsigned			 : 3;
+	unsigned atmid_psw_bit_31	 : 1;
+	unsigned atmid_validity_bit	 : 1;
+	unsigned atmid_psw_bit_32	 : 1;
+	unsigned atmid_psw_bit_5	 : 1;
+	unsigned atmid_psw_bit_16	 : 1;
+	unsigned atmid_psw_bit_17	 : 1;
+	unsigned si			 : 2;
 	unsigned long address;
-	unsigned                         : 4;
-	unsigned access_id               : 4;
+	unsigned			 : 4;
+	unsigned access_id		 : 4;
 } per_lowcore_bits;
 
-typedef struct
-{
+typedef struct {
 	union {
 		per_cr_words   words;
 		per_cr_bits    bits;
@@ -364,9 +365,9 @@ typedef struct
 	 * the kernel always sets them to zero. To enable single
 	 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
 	 */
-	unsigned  single_step       : 1;
+	unsigned  single_step	    : 1;
 	unsigned  instruction_fetch : 1;
-	unsigned                    : 30;
+	unsigned		    : 30;
 	/*
 	 * These addresses are copied into cr10 & cr11 if single
 	 * stepping is switched off
@@ -376,11 +377,10 @@ typedef struct
 	union {
 		per_lowcore_words words;
 		per_lowcore_bits  bits;
-	} lowcore; 
+	} lowcore;
 } per_struct;
 
-typedef struct
-{
+typedef struct {
 	unsigned int  len;
 	unsigned long kernel_addr;
 	unsigned long process_addr;
@@ -390,12 +390,12 @@ typedef struct
  * S/390 specific non posix ptrace requests. I chose unusual values so
  * they are unlikely to clash with future ptrace definitions.
  */
-#define PTRACE_PEEKUSR_AREA           0x5000
-#define PTRACE_POKEUSR_AREA           0x5001
+#define PTRACE_PEEKUSR_AREA	      0x5000
+#define PTRACE_POKEUSR_AREA	      0x5001
 #define PTRACE_PEEKTEXT_AREA	      0x5002
 #define PTRACE_PEEKDATA_AREA	      0x5003
 #define PTRACE_POKETEXT_AREA	      0x5004
-#define PTRACE_POKEDATA_AREA 	      0x5005
+#define PTRACE_POKEDATA_AREA	      0x5005
 #define PTRACE_GET_LAST_BREAK	      0x5006
 #define PTRACE_PEEK_SYSTEM_CALL       0x5007
 #define PTRACE_POKE_SYSTEM_CALL	      0x5008
@@ -413,21 +413,19 @@ typedef struct
  * PT_PROT definition is loosely based on hppa bsd definition in
  * gdb/hppab-nat.c
  */
-#define PTRACE_PROT                       21
+#define PTRACE_PROT			  21
 
-typedef enum
-{
+typedef enum {
 	ptprot_set_access_watchpoint,
 	ptprot_set_write_watchpoint,
 	ptprot_disable_watchpoint
 } ptprot_flags;
 
-typedef struct
-{
+typedef struct {
 	unsigned long lowaddr;
 	unsigned long hiaddr;
 	ptprot_flags prot;
-} ptprot_area;                     
+} ptprot_area;
 
 /* Sequence of bytes for breakpoint illegal instruction.  */
 #define S390_BREAKPOINT     {0x0,0x1}
@@ -439,8 +437,7 @@ typedef struct
  * The user_regs_struct defines the way the user registers are
  * store on the stack for signal handling.
  */
-struct user_regs_struct
-{
+struct user_regs_struct {
 	psw_t psw;
 	unsigned long gprs[NUM_GPRS];
 	unsigned int  acrs[NUM_ACRS];
diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h
index ec113db4eb7e2658909f80f35bc5679cf53b3189..b1b0223169839da11aeb00884d7dc901e4ab5329 100644
--- a/arch/s390/include/uapi/asm/sthyi.h
+++ b/arch/s390/include/uapi/asm/sthyi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_ASM_STHYI_H
 #define _UAPI_ASM_STHYI_H
 
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
index 3a77833c74dc20e065e0bc3d77ce0bda9f955fa5..2b605f7e8483675cde7cdf2e553e068afde0f950 100644
--- a/arch/s390/include/uapi/asm/virtio-ccw.h
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /*
  * Definitions for virtio-ccw devices.
  *
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
index 4caf71714a552332a169cb6799288442984ca088..aeaaa030030e6c9869e0d93b51232176b4845ef2 100644
--- a/arch/s390/include/uapi/asm/vmcp.h
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * Copyright IBM Corp. 2004, 2005
  * Interface implementation for communication with the z/VM control program
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 315986a06cf57f2c768b4c1a549a794af7952b46..574e77622c049c68e557d1a7413e6c161b457fb8 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 #include <asm/alternative.h>
 #include <asm/facility.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f04db3779b34507f9dd38791fc89131505d5f0c3..59eea9c65d3e9e8595d509001b1c794420060887 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 		return retval;
 	}
 
+	groups_sort(group_info);
 	retval = set_current_groups(group_info);
 	put_group_info(group_info);
 
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index f8603ebed669b6501de788e5c2c6cbb902023f88..54e2d634b849e128c1ec1d1ccc15c600978ed50b 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/perf_event.h>
 #include <linux/perf_regs.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 308a7b63348b3f1950c979dd947d22466f89059f..f7fc633855534cbc0ab53c0d00f26aed59c1a1ad 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -370,10 +370,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
 SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
 SYSCALL(sys_socket,sys_socket)
 SYSCALL(sys_socketpair,compat_sys_socketpair)		/* 360 */
-SYSCALL(sys_bind,sys_bind)
-SYSCALL(sys_connect,sys_connect)
+SYSCALL(sys_bind,compat_sys_bind)
+SYSCALL(sys_connect,compat_sys_connect)
 SYSCALL(sys_listen,sys_listen)
-SYSCALL(sys_accept4,sys_accept4)
+SYSCALL(sys_accept4,compat_sys_accept4)
 SYSCALL(sys_getsockopt,compat_sys_getsockopt)		/* 365 */
 SYSCALL(sys_setsockopt,compat_sys_setsockopt)
 SYSCALL(sys_getsockname,compat_sys_getsockname)
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
index 79a071e4357e4bc51f8813427d0f0ffa4c5ba56c..db19d0680a0afdf34b85eddf418a7e822dcad0a7 100644
--- a/arch/s390/kernel/vdso64/note.S
+++ b/arch/s390/kernel/vdso64/note.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
  * Here we can supply some information useful to userland.
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 6048b1c6e58062bac1258b003a4ebc3814896712..05ee90a5ea08bdb50eb3b9f2b77e70349f7e40bf 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -1,10 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
 # Makefile for kernel virtual machines on s390
 #
 # Copyright IBM Corp. 2008
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License (version 2 only)
-# as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
 common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index d93a2c0474bf67e45698882ecb5f36f0d8688a7d..89aa114a2cbada0989cec25757ec93daed36d064 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling diagnose instructions
  *
  * Copyright IBM Corp. 2008, 2011
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index bec42b852246f0c44f88acf20ae36c51a36c3d20..f4c51756c46239cb4246ae7bd2e0aeb166d46383 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * access guest memory
  *
  * Copyright IBM Corp. 2008, 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  */
 
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index bcbd86621d018085d036f43d815ae1c97791951c..b5f3e82006d0b2e2d8806e8a7e5e58e559255e5b 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kvm guest debug support
  *
  * Copyright IBM Corp. 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  */
 #include <linux/kvm_host.h>
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8fe034beb623217f42bc990c58dea9984426ef30..9c7d707158622e7f0743570db60599fa95a9de3b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * in-kernel handling for sie intercepts
  *
  * Copyright IBM Corp. 2008, 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa557372d600a0283663635ff198895cfad91709..024ad8bcc51655e98ffed300817bc0e06e051cf7 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling kvm guest interrupts
  *
  * Copyright IBM Corp. 2008, 2015
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  */
 
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e4159643df457a4dfeb50e93ab77d53291698..484608c71dd01185b88f9db1a11e3f523bcefebf 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * s390 irqchip routines
  *
  * Copyright IBM Corp. 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  */
 #ifndef __KVM_IRQ_H
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9614aea5839b6ecf1c36e2ccbbd64e2592621dd9..2c93cbbcd15e35a389078643874bf424160f25db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * hosting zSeries kernel virtual machines
+ * hosting IBM Z kernel virtual machines (s390x)
  *
- * Copyright IBM Corp. 2008, 2009
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
+ * Copyright IBM Corp. 2008, 2017
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -795,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
 
 	if (kvm->arch.use_cmma) {
 		/*
-		 * Get the last slot. They should be sorted by base_gfn, so the
-		 * last slot is also the one at the end of the address space.
-		 * We have verified above that at least one slot is present.
+		 * Get the first slot. They are reverse sorted by base_gfn, so
+		 * the first slot is also the one at the end of the address
+		 * space. We have verified above that at least one slot is
+		 * present.
 		 */
-		ms = slots->memslots + slots->used_slots - 1;
+		ms = slots->memslots;
 		/* round up so we only use full longs */
 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 		/* allocate enough bytes to store all the bits */
@@ -3808,6 +3806,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			r = -EINVAL;
 			break;
 		}
+		/* do not use irq_state.flags, it will break old QEMUs */
 		r = kvm_s390_set_irq_state(vcpu,
 					   (void __user *) irq_state.buf,
 					   irq_state.len);
@@ -3823,6 +3822,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			r = -EINVAL;
 			break;
 		}
+		/* do not use irq_state.flags, it will break old QEMUs */
 		r = kvm_s390_get_irq_state(vcpu,
 					   (__u8 __user *)  irq_state.buf,
 					   irq_state.len);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 10d65dfbc306736bc31e1a7f363c2b48516c52e7..5e46ba429bcb4dfe4345f531339557b6af71ef40 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * definition for kvm on s390
  *
  * Copyright IBM Corp. 2008, 2009
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c954ac49eee47158ac27bd1d16ba9dbcab7e25a3..0714bfa56da0f54cae66b26bd5329b18beb77b0b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling privileged instructions
  *
  * Copyright IBM Corp. 2008, 2013
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
@@ -235,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
 		VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 		return -EAGAIN;
 	}
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	return 0;
 }
 
@@ -247,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 	int reg1, reg2;
 	int rc;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	rc = try_handle_skey(vcpu);
 	if (rc)
 		return rc != -EAGAIN ? rc : 0;
@@ -276,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
 	int reg1, reg2;
 	int rc;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	rc = try_handle_skey(vcpu);
 	if (rc)
 		return rc != -EAGAIN ? rc : 0;
@@ -311,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 	int reg1, reg2;
 	int rc;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	rc = try_handle_skey(vcpu);
 	if (rc)
 		return rc != -EAGAIN ? rc : 0;
@@ -1002,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
 		cbrlo[entries] = gfn << PAGE_SHIFT;
 	}
 
-	if (orc) {
+	if (orc && gfn < ms->bitmap_size) {
 		/* increment only if we are really flipping the bit to 1 */
 		if (!test_and_set_bit(gfn, ms->pgste_bitmap))
 			atomic64_inc(&ms->dirty_pages);
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 9d592ef4104b0416029944fed5770a8ea74a47cb..c1f5cde2c878e63e32d44a47a10e3c77ccfc32ae 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling interprocessor communication
  *
  * Copyright IBM Corp. 2008, 2013
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a311938b63b3b4eea0dd57e80bd02c4708dfcd92..5d6ae0326d9e8fa2707e8d066488c8313dfef879 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kvm nested virtualization support for s390x
  *
  * Copyright IBM Corp. 2016
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  */
 #include <linux/vmalloc.h>
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index cae5a1e16cbd2d9ac5cc7b2fd1f67443919b8f80..c4f8039a35e8dda0bc20999b7db089e3ab09b613 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess);
 
 void disable_sacf_uaccess(mm_segment_t old_fs)
 {
+	current->thread.mm_segment = old_fs;
 	if (old_fs == USER_DS && test_facility(27)) {
 		__ctl_load(S390_lowcore.user_asce, 1, 1);
 		clear_cpu_flag(CIF_ASCE_PRIMARY);
 	}
-	current->thread.mm_segment = old_fs;
 }
 EXPORT_SYMBOL(disable_sacf_uaccess);
 
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 434a9564917beceadeffd0f34d041683b717af1c..cb364153c43ce204b8736fe1758e7bac8fda2a69 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -83,8 +83,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 
 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
 	VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
-	if (end >= TASK_SIZE_MAX)
-		return -ENOMEM;
 	rc = 0;
 	notify = 0;
 	while (mm->context.asce_limit < end) {
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index 90568c33ddb0ef72aaed02bcf4bcc15da9afef37..e0d5f245e42bc713443d5c6d09d9034850adbec6 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Arch-specific network modules
 #
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90f1bc9a5418bc1b4e5365e9cb0aef..9557d8b516df5a689dda995cd7fd501ddd6cf54c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
 #define SEEN_LITERAL	8	/* code uses literals */
 #define SEEN_FUNC	16	/* calls C functions */
 #define SEEN_TAIL_CALL	32	/* code uses tail calls */
-#define SEEN_SKB_CHANGE	64	/* code changes skb data */
-#define SEEN_REG_AX	128	/* code uses constant blinding */
+#define SEEN_REG_AX	64	/* code uses constant blinding */
 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
 
 /*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
 	}
-	if (jit->seen & SEEN_SKB)
+	if (jit->seen & SEEN_SKB) {
 		emit_load_skb_data_hlen(jit);
-	if (jit->seen & SEEN_SKB_CHANGE)
 		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
 		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
 			      STK_OFF_SKBP);
+	}
 }
 
 /*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		EMIT2(0x0d00, REG_14, REG_W1);
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
-		if (bpf_helper_changes_pkt_data((void *)func)) {
-			jit->seen |= SEEN_SKB_CHANGE;
+		if ((jit->seen & SEEN_SKB) &&
+		    bpf_helper_changes_pkt_data((void *)func)) {
 			/* lg %b1,ST_OFF_SKBP(%r15) */
 			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
 				      REG_15, STK_OFF_SKBP);
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index f94ecaffa71bb543f0d90c9340d7bee23739fe85..66c2dff74895f8ef5924e3a19606e18b7029a882 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y			+= numa.o
 obj-y			+= toptree.o
 obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 805d8b29193a5964b2d3d6edb617f94a9b37e905..22d0871291eef12438398207cb06e964b14ad73d 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the s390 PCI subsystem.
 #
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f7aa5a77827ec17d893d59834a0d33082bb8fd82..2d15d84c20ede64297e0c906a55e3de166f53e3d 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -181,6 +181,9 @@ static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
 			   size_t size, int flags)
 {
+	unsigned long irqflags;
+	int ret;
+
 	/*
 	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
 	 * translations when previously invalid translation-table entries are
@@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
 			return 0;
 	}
 
-	return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
-				  PAGE_ALIGN(size));
+	ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+				 PAGE_ALIGN(size));
+	if (ret == -ENOMEM && !s390_iommu_strict) {
+		/* enable the hypervisor to free some resources */
+		if (zpci_refresh_global(zdev))
+			goto out;
+
+		spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
+		bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+			      zdev->lazy_bitmap, zdev->iommu_pages);
+		bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+		spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
+		ret = 0;
+	}
+out:
+	return ret;
 }
 
 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 19bcb3b45a70fc12fa426d636fd4482c570c6654..f069929e82114004adea2cc0bfc3abc15bdf23da 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 	if (cc)
 		zpci_err_insn(cc, status, addr, range);
 
+	if (cc == 1 && (status == 4 || status == 16))
+		return -ENOMEM;
+
 	return (cc) ? -EIO : 0;
 }
 
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index 01d4c5a4bfe9781fdba3ccfd4e727e518892815f..357d42681cefae0e8c7e40d500bdb5b452d9173e 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Generate opcode table initializers for the in-kernel disassembler.
  *
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index c94ee54210bc489efd469493800596cd2b7061a3..81271d3af47cb1000ebfab2539efa92080a1eccc 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y	+= bpf_perf_event.h
 generic-y	+= siginfo.h
diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c
index 77c35350ee774d4fa26ed5bc57b70d9d03c7bcab..412326d59e6fcebd51469163f6476b88753aaa4c 100644
--- a/arch/sh/boards/mach-se/770x/setup.c
+++ b/arch/sh/boards/mach-se/770x/setup.c
@@ -9,6 +9,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/sh_eth.h>
 #include <mach-se/mach/se.h>
 #include <mach-se/mach/mrshpc.h>
 #include <asm/machvec.h>
@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
 #if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
 	defined(CONFIG_CPU_SUBTYPE_SH7712)
 /* SH771X Ethernet driver */
+static struct sh_eth_plat_data sh_eth_plat = {
+	.phy = PHY_ID,
+	.phy_interface = PHY_INTERFACE_MODE_MII,
+};
+
 static struct resource sh_eth0_resources[] = {
 	[0] = {
 		.start = SH_ETH0_BASE,
-		.end = SH_ETH0_BASE + 0x1B8,
+		.end = SH_ETH0_BASE + 0x1B8 - 1,
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = {
+		.start = SH_TSU_BASE,
+		.end = SH_TSU_BASE + 0x200 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[2] = {
 		.start = SH_ETH0_IRQ,
 		.end = SH_ETH0_IRQ,
 		.flags = IORESOURCE_IRQ,
@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
 	.name = "sh771x-ether",
 	.id = 0,
 	.dev = {
-		.platform_data = PHY_ID,
+		.platform_data = &sh_eth_plat,
 	},
 	.num_resources = ARRAY_SIZE(sh_eth0_resources),
 	.resource = sh_eth0_resources,
@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
 static struct resource sh_eth1_resources[] = {
 	[0] = {
 		.start = SH_ETH1_BASE,
-		.end = SH_ETH1_BASE + 0x1B8,
+		.end = SH_ETH1_BASE + 0x1B8 - 1,
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = {
+		.start = SH_TSU_BASE,
+		.end = SH_TSU_BASE + 0x200 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[2] = {
 		.start = SH_ETH1_IRQ,
 		.end = SH_ETH1_IRQ,
 		.flags = IORESOURCE_IRQ,
@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
 	.name = "sh771x-ether",
 	.id = 1,
 	.dev = {
-		.platform_data = PHY_ID,
+		.platform_data = &sh_eth_plat,
 	},
 	.num_resources = ARRAY_SIZE(sh_eth1_resources),
 	.resource = sh_eth1_resources,
diff --git a/arch/sh/include/mach-se/mach/se.h b/arch/sh/include/mach-se/mach/se.h
index 4246ef9b07a346ed590be0cd3651f5a370ee700a..aa83fe1ff0b124c002e375e2ce5a83c7c853e29c 100644
--- a/arch/sh/include/mach-se/mach/se.h
+++ b/arch/sh/include/mach-se/mach/se.h
@@ -100,6 +100,7 @@
 /* Base address */
 #define SH_ETH0_BASE 0xA7000000
 #define SH_ETH1_BASE 0xA7000400
+#define SH_TSU_BASE  0xA7000800
 /* PHY ID */
 #if defined(CONFIG_CPU_SUBTYPE_SH7710)
 # define PHY_ID 0x00
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index e28531333efa96d7195e1e9771d574c83caa040d..ba4d39cb321d0608d96f0b0c5ea45ddadea7cc45 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index 2178c78c7c1a6336d4a11c9619de521519245c0e..4680ba246b554708aec94287f7974adcca4c8c97 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generic-y += types.h
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index e5547b22cd1832c3aea507b3dfc694da90568222..0ddbbb03182232fe199f5222248617c72a2f23b7 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
 	.previous
 
 ENTRY(__arch_hweight64)
-	sethi	%hi(__sw_hweight16), %g1
-	jmpl	%g1 + %lo(__sw_hweight16), %g0
+	sethi	%hi(__sw_hweight64), %g1
+	jmpl	%g1 + %lo(__sw_hweight64), %g0
 	 nop
 ENDPROC(__arch_hweight64)
 EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9993e0c6c8cfa1d651b1685654a73..a8103a84b4ac4a2ec84c44c302862b3aed8b7e7f 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 	       tsk->comm, task_pid_nr(tsk), address,
 	       (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765524424b92866b1567ea2a43695d4..41363f46797bf9f74dd922fadbd2a3f190e8c9bb 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 	       tsk->comm, task_pid_nr(tsk), address,
 	       (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 33c0f8bb0f33de0c6beadd3dd8a9bef6253bcb10..5335ba3c850ed3acdc074ffe639d3ddac101f2ad 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	if (!(pmd_val(pmd) & _PAGE_VALID))
 		return 0;
 
-	if (!pmd_access_permitted(pmd, write))
+	if (write && !pmd_write(pmd))
 		return 0;
 
 	refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 	if (!(pud_val(pud) & _PAGE_VALID))
 		return 0;
 
-	if (!pud_access_permitted(pud, write))
+	if (write && !pud_write(pud))
 		return 0;
 
 	refs = 0;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f78248d2bff70f9c57ca48a4514355..ff5f9cb3039af1f91c8701915f08c051c21d0d81 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		u8 *func = ((u8 *)__bpf_call_base) + imm;
 
 		ctx->saw_call = true;
+		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+			emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
 
 		emit_call((u32 *)func, ctx);
 		emit_nop(ctx);
 
 		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
 
-		if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
-			load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+			load_skb_regs(ctx, L7);
 		break;
 	}
 
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index 5711de0a1b5efc92519e152462a0ef1516612add..cc439612bcd52fee78256802f5aac1ded0c37ec8 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 50a32c33d729ba2a570eadaf77cff69925218c42..73c57f614c9e0600a5b4df7b28a7fa55f4abe471 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
 generic-y += barrier.h
+generic-y += bpf_perf_event.h
 generic-y += bug.h
 generic-y += clkdev.h
 generic-y += current.h
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c2e4f7a4b75c8a67eada77449abc9..fca34b2177e28a055663055d01c4fb7d78420285 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 /*
  * Needed since we do not use the asm-generic/mm_hooks.h:
  */
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	uml_setup_stubs(mm);
+	return 0;
 }
 extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620ffb2125f648622499e5bf7c950e72..428644175956231aad112a0ce221452913736635 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
 		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d7a4e0d8b82fc147d3fdad7f75295..5c205a9cb5a6a4bb2c865255bc946d7ca4882db1 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
 	} \
 } while (0)
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-				 struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+				struct mm_struct *mm)
 {
+	return 0;
 }
 
 static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 759a71411169f4df318e3eb3e97e0f7602a04bb0..8611ef980554c2ef81378a087d15d42f1e3f3acb 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 5f25b39f04d4305dbec925a85dd43a4ed421386d..c4ac6043ebb0fc44e7016d6124ce7ce780adabd3 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -298,7 +298,6 @@ void abort(void)
 	/* if that doesn't kill us, halt */
 	panic("Oops failed to kill thread");
 }
-EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc774de5e3f344590f8889a999dea9c..20da391b5f329885e26b30e0351d73d571d28fc1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,7 +55,6 @@ config X86
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_PMEM_API		if X86_64
-	# Causing hangs/crashes, see the commit that added this change for details.
 	select ARCH_HAS_REFCOUNT
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IOMAP
@@ -429,6 +429,19 @@ config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
 
+config RETPOLINE
+	bool "Avoid speculative indirect branches in kernel"
+	default y
+	help
+	  Compile kernel with the retpoline compiler options to guard against
+	  kernel-to-user data leaks by avoiding speculative indirect
+	  branches. Requires a compiler with -mindirect-branch=thunk-extern
+	  support for full protection. The kernel may run slower.
+
+	  Without compiler support, at least indirect branches in assembler
+	  code are eliminated. Since this includes the syscall entry path,
+	  it is not entirely pointless.
+
 config INTEL_RDT
 	bool "Intel Resource Director Technology support"
 	default n
@@ -926,7 +939,8 @@ config MAXSMP
 config NR_CPUS
 	int "Maximum number of CPUs" if SMP && !MAXSMP
 	range 2 8 if SMP && X86_32 && !X86_BIGSMP
-	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+	range 2 64 if SMP && X86_32 && X86_BIGSMP
+	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
 	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
 	default "1" if !SMP
 	default "8192" if MAXSMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a9123038eeced9e8dc2c4874feed275..672441c008c73ae3d949b974b128ef167d7d486a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
 config UNWINDER_GUESS
 	bool "Guess unwinder"
 	depends on EXPERT
+	depends on !STACKDEPOT
 	---help---
 	  This option enables the "guess" unwinder for unwinding kernel stack
 	  traces.  It scans the stack and reports every kernel text address it
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3e73bc255e4eb3edda965a6bc493e0e6ab8de354..fad55160dcb94a28e60d537d3d69d471a1e10e2e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
 #
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+    ifneq ($(RETPOLINE_CFLAGS),)
+        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+    endif
+endif
+
 archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973af0e1024dc3751b99d16eb76574f7..f25e1530e0644c83d8c69b1a90436c54ce823ae4 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
 ifdef CONFIG_X86_64
 	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
 	vmlinux-objs-y += $(obj)/mem_encrypt.o
+	vmlinux-objs-y += $(obj)/pgtable_64.o
 endif
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f31330fbc36528e47cf7dd010e516c31c..fc313e29fe2c4be637ff7d41bfafdc7a29144ba4 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
 	leaq	boot_stack_end(%rbx), %rsp
 
 #ifdef CONFIG_X86_5LEVEL
-	/* Check if 5-level paging has already enabled */
-	movq	%cr4, %rax
-	testl	$X86_CR4_LA57, %eax
-	jnz	lvl5
+	/*
+	 * Check if we need to enable 5-level paging.
+	 * RSI holds real mode data and need to be preserved across
+	 * a function call.
+	 */
+	pushq	%rsi
+	call	l5_paging_required
+	popq	%rsi
+
+	/* If l5_paging_required() returned zero, we're done here. */
+	cmpq	$0, %rax
+	je	lvl5
 
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25257bff89dd2d9d5f23534340076e..98761a1576ceb5c21b2d8c7e98c1217fd48abb26 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
 	}
 }
 
+static bool l5_supported(void)
+{
+	/* Check if leaf 7 is supported. */
+	if (native_cpuid_eax(0) < 7)
+		return 0;
+
+	/* Check if la57 is supported. */
+	return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
 #if CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len,
 			       unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	console_init();
 	debug_putstr("early console in extract_kernel\n");
 
+	if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+		error("This linux kernel as configured requires 5-level paging\n"
+			"This CPU does not support the required 'cr4.la57' feature\n"
+			"Unable to boot - please use a kernel appropriate for your CPU\n");
+	}
+
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index d5364ca2e3f9290d0ba36606b7e25369f872c144..b5e5e02f8cde7fa9123dc56981c3a3a98f45843c 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
  */
 #undef CONFIG_AMD_MEM_ENCRYPT
 
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4469a37e9a16ff758c92e6159922c9dbb1b7228
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+	/* Check if leaf 7 is supported. */
+
+	if (native_cpuid_eax(0) < 7)
+		return 0;
+
+	/* Check if la57 is supported. */
+	if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+		return 0;
+
+	/* Check if 5-level paging has already been enabled. */
+	if (native_read_cr4() & X86_CR4_LA57)
+		return 0;
+
+	return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b3bddacad0b2965acad0ae112cb03..6a10d52a41452d941c22e197fa50b4ebd93ba0d5 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
 
 # Make sure the files actually exist
 verify "$FBZIMAGE"
-verify "$MTOOLSRC"
 
 genbzdisk() {
+	verify "$MTOOLSRC"
 	mformat a:
 	syslinux $FIMAGE
 	echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
 }
 
 genfdimage144() {
+	verify "$MTOOLSRC"
 	dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
 	mformat v:
 	syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
 }
 
 genfdimage288() {
+	verify "$MTOOLSRC"
 	dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
 	mformat w:
 	syslinux $FIMAGE
@@ -78,39 +80,43 @@ genfdimage288() {
 	mcopy $FBZIMAGE w:linux
 }
 
-genisoimage() {
+geniso() {
 	tmp_dir=`dirname $FIMAGE`/isoimage
 	rm -rf $tmp_dir
 	mkdir $tmp_dir
-	for i in lib lib64 share end ; do
+	for i in lib lib64 share ; do
 		for j in syslinux ISOLINUX ; do
 			if [ -f /usr/$i/$j/isolinux.bin ] ; then
 				isolinux=/usr/$i/$j/isolinux.bin
-				cp $isolinux $tmp_dir
 			fi
 		done
 		for j in syslinux syslinux/modules/bios ; do
 			if [ -f /usr/$i/$j/ldlinux.c32 ]; then
 				ldlinux=/usr/$i/$j/ldlinux.c32
-				cp $ldlinux $tmp_dir
 			fi
 		done
 		if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
 			break
 		fi
-		if [ $i = end -a -z "$isolinux" ] ; then
-			echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
-			exit 1
-		fi
 	done
+	if [ -z "$isolinux" ] ; then
+		echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
+		exit 1
+	fi
+	if [ -z "$ldlinux" ] ; then
+		echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
+		exit 1
+	fi
+	cp $isolinux $tmp_dir
+	cp $ldlinux $tmp_dir
 	cp $FBZIMAGE $tmp_dir/linux
 	echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
 	if [ -f "$FDINITRD" ] ; then
 		cp "$FDINITRD" $tmp_dir/initrd.img
 	fi
-	mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
-		-c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
-		$tmp_dir
+	genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
+		-b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
+		-boot-info-table $tmp_dir
 	isohybrid $FIMAGE 2>/dev/null || true
 	rm -rf $tmp_dir
 }
@@ -119,6 +125,6 @@ case $1 in
 	bzdisk)     genbzdisk;;
 	fdimage144) genfdimage144;;
 	fdimage288) genfdimage288;;
-	isoimage)   genisoimage;;
+	isoimage)   geniso;;
 	*)          echo 'Unknown image format'; exit 1;
 esac
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 16627fec80b26b211ba3c4b50c3850c134e2bd62..3d09e3aca18dad33ba0c27c3673e49dcbfac0ce8 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
 #include <linux/linkage.h>
 #include <asm/inst.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
 	pxor INC, STATE4
 	movdqu IV, 0x30(OUTP)
 
-	call *%r11
+	CALL_NOSPEC %r11
 
 	movdqu 0x00(OUTP), INC
 	pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
 	_aesni_gf128mul_x_ble()
 	movups IV, (IVP)
 
-	call *%r11
+	CALL_NOSPEC %r11
 
 	movdqu 0x40(OUTP), INC
 	pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index f7c495e2863cb0daecb7b023e19e393cfc32c587..a14af6eb09cb074a1dcf42d0adaaf1c388ceb30b 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-	call *%r9;
+	CALL_NOSPEC %r9;
 
 	addq $(16 * 16), %rsp;
 
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index eee5b3982cfd3c6838a9a34534409df8935eba17..b66bbfa62f50d7c05ddb29b57a322979f64a6999 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-	call *%r9;
+	CALL_NOSPEC %r9;
 
 	addq $(16 * 32), %rsp;
 
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 7a7de27c6f415206f2c3b8e5a8a43d4468bc7aaa..d9b734d0c8cc78ecdc3293ca117546eb538a12fc 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
 
 #include <asm/inst.h>
 #include <linux/linkage.h>
+#include <asm/nospec-branch.h>
 
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
@@ -172,7 +173,7 @@ continue_block:
 	movzxw  (bufp, %rax, 2), len
 	lea	crc_array(%rip), bufp
 	lea     (bufp, len, 1), bufp
-	jmp     *bufp
+	JMP_NOSPEC bufp
 
 	################################################################
 	## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d067d6367603714633fb8c4de6ab77275a..cb91a64a99e7cdbc0422227383611378fb6b076a 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
 
 	salsa20_ivsetup(ctx, walk.iv);
 
-	if (likely(walk.nbytes == nbytes))
-	{
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, nbytes);
-		return blkcipher_walk_done(desc, &walk, 0);
-	}
-
 	while (walk.nbytes >= 64) {
 		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
 				      walk.dst.virt.addr,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3fd8bc560faece4cb6253e87a4c092965a73e7af..3f48f695d5e6ac6546a009c734fcac517564b24d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/jump_label.h>
 #include <asm/unwind_hints.h>
+#include <asm/cpufeatures.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
 /*
 
@@ -187,6 +192,148 @@ For 32-bit we have the following conventions - kernel is built with
 #endif
 .endm
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_USER_PGTABLE_BIT		PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK		(1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT		X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK		(1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
+
+.macro SET_NOFLUSH_BIT	reg:req
+	bts	$X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+.macro ADJUST_KERNEL_CR3 reg:req
+	ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
+	/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+	andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
+.endm
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+	mov	%cr3, \scratch_reg
+	ADJUST_KERNEL_CR3 \scratch_reg
+	mov	\scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+#define THIS_CPU_user_pcid_flush_mask   \
+	PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+	mov	%cr3, \scratch_reg
+
+	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+	/*
+	 * Test if the ASID needs a flush.
+	 */
+	movq	\scratch_reg, \scratch_reg2
+	andq	$(0x7FF), \scratch_reg		/* mask ASID */
+	bt	\scratch_reg, THIS_CPU_user_pcid_flush_mask
+	jnc	.Lnoflush_\@
+
+	/* Flush needed, clear the bit */
+	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
+	movq	\scratch_reg2, \scratch_reg
+	jmp	.Lwrcr3_pcid_\@
+
+.Lnoflush_\@:
+	movq	\scratch_reg2, \scratch_reg
+	SET_NOFLUSH_BIT \scratch_reg
+
+.Lwrcr3_pcid_\@:
+	/* Flip the ASID to the user version */
+	orq	$(PTI_USER_PCID_MASK), \scratch_reg
+
+.Lwrcr3_\@:
+	/* Flip the PGD to the user version */
+	orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
+	mov	\scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK	scratch_reg:req
+	pushq	%rax
+	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+	popq	%rax
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+	ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+	movq	%cr3, \scratch_reg
+	movq	\scratch_reg, \save_reg
+	/*
+	 * Test the user pagetable bit. If set, then the user page tables
+	 * are active. If clear CR3 already has the kernel page table
+	 * active.
+	 */
+	bt	$PTI_USER_PGTABLE_BIT, \scratch_reg
+	jnc	.Ldone_\@
+
+	ADJUST_KERNEL_CR3 \scratch_reg
+	movq	\scratch_reg, %cr3
+
+.Ldone_\@:
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+	/*
+	 * KERNEL pages can always resume with NOFLUSH as we do
+	 * explicit flushes.
+	 */
+	bt	$PTI_USER_PGTABLE_BIT, \save_reg
+	jnc	.Lnoflush_\@
+
+	/*
+	 * Check if there's a pending flush for the user ASID we're
+	 * about to set.
+	 */
+	movq	\save_reg, \scratch_reg
+	andq	$(0x7FF), \scratch_reg
+	bt	\scratch_reg, THIS_CPU_user_pcid_flush_mask
+	jnc	.Lnoflush_\@
+
+	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
+	jmp	.Lwrcr3_\@
+
+.Lnoflush_\@:
+	SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
+	/*
+	 * The CR3 write could be avoided when not changing its value,
+	 * but would require a CR3 read *and* a scratch register.
+	 */
+	movq	\save_reg, %cr3
+.Lend_\@:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+.endm
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif
+
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6edffda62b5b045c837fcc29402f0..a1f28a54f23a42e85436ed607293bea14486391c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 	.section .entry.text, "ax"
 
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
 
 	/* kernel thread */
 1:	movl	%edi, %eax
-	call	*%ebx
+	CALL_NOSPEC %ebx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
 	movl	%ecx, %es
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	call	*%edi
+	CALL_NOSPEC %edi
 	jmp	ret_from_exception
 END(common_exception)
 
@@ -941,9 +942,10 @@ ENTRY(debug)
 	movl	%esp, %eax			# pt_regs pointer
 
 	/* Are we currently on the SYSENTER stack? */
-	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
-	cmpl	$SIZEOF_SYSENTER_stack, %ecx
+	movl	PER_CPU_VAR(cpu_entry_area), %ecx
+	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
+	cmpl	$SIZEOF_entry_stack, %ecx
 	jb	.Ldebug_from_sysenter_stack
 
 	TRACE_IRQS_OFF
@@ -984,9 +986,10 @@ ENTRY(nmi)
 	movl	%esp, %eax			# pt_regs pointer
 
 	/* Are we currently on the SYSENTER stack? */
-	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
-	cmpl	$SIZEOF_SYSENTER_stack, %ecx
+	movl	PER_CPU_VAR(cpu_entry_area), %ecx
+	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
+	cmpl	$SIZEOF_entry_stack, %ecx
 	jb	.Lnmi_from_sysenter_stack
 
 	/* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceacdefa06d61482687937096c68421c..4f8e1d35a97ca7071a11f33f07e2b87c2eac7acf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
 #include <asm/segment.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
-#include "calling.h"
 #include <asm/asm-offsets.h>
 #include <asm/msr.h>
 #include <asm/unistd.h>
@@ -38,8 +37,11 @@
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 #include <linux/err.h>
 
+#include "calling.h"
+
 .code64
 .section .entry.text, "ax"
 
@@ -140,6 +142,67 @@ END(native_usergs_sysret64)
  * with them due to bugs in both AMD and Intel CPUs.
  */
 
+	.pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address).  So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline.  We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+	_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH	CPU_ENTRY_AREA_entry_stack + \
+			SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+	UNWIND_HINT_EMPTY
+	swapgs
+
+	/* Stash the user RSP. */
+	movq	%rsp, RSP_SCRATCH
+
+	/* Note: using %rsp as a scratch reg. */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+	/* Load the top of the task stack into RSP */
+	movq	CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+	/* Start building the simulated IRET frame. */
+	pushq	$__USER_DS			/* pt_regs->ss */
+	pushq	RSP_SCRATCH			/* pt_regs->sp */
+	pushq	%r11				/* pt_regs->flags */
+	pushq	$__USER_CS			/* pt_regs->cs */
+	pushq	%rcx				/* pt_regs->ip */
+
+	/*
+	 * x86 lacks a near absolute jump, and we can't jump to the real
+	 * entry text with a relative jump.  We could push the target
+	 * address and then use retq, but this destroys the pipeline on
+	 * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
+	 * spill RDI and restore it in a second-stage trampoline.
+	 */
+	pushq	%rdi
+	movq	$entry_SYSCALL_64_stage2, %rdi
+	JMP_NOSPEC %rdi
+END(entry_SYSCALL_64_trampoline)
+
+	.popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+	UNWIND_HINT_EMPTY
+	popq	%rdi
+	jmp	entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
 ENTRY(entry_SYSCALL_64)
 	UNWIND_HINT_EMPTY
 	/*
@@ -149,6 +212,10 @@ ENTRY(entry_SYSCALL_64)
 	 */
 
 	swapgs
+	/*
+	 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+	 * is not required to switch CR3.
+	 */
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -204,7 +271,12 @@ entry_SYSCALL_64_fastpath:
 	 * It might end up jumping to the slow path.  If it jumps, RAX
 	 * and all argument registers are clobbered.
 	 */
+#ifdef CONFIG_RETPOLINE
+	movq	sys_call_table(, %rax, 8), %rax
+	call	__x86_indirect_thunk_rax
+#else
 	call	*sys_call_table(, %rax, 8)
+#endif
 .Lentry_SYSCALL_64_after_fastpath_call:
 
 	movq	%rax, RAX(%rsp)
@@ -330,8 +402,25 @@ syscall_return_via_sysret:
 	popq	%rsi	/* skip rcx */
 	popq	%rdx
 	popq	%rsi
+
+	/*
+	 * Now all regs are restored except RSP and RDI.
+	 * Save old stack pointer and switch to trampoline stack.
+	 */
+	movq	%rsp, %rdi
+	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+	pushq	RSP-RDI(%rdi)	/* RSP */
+	pushq	(%rdi)		/* RDI */
+
+	/*
+	 * We are on the trampoline stack.  All regs except RDI are live.
+	 * We can do future final exit work right here.
+	 */
+	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
 	popq	%rdi
-	movq	RSP-ORIG_RAX(%rsp), %rsp
+	popq	%rsp
 	USERGS_SYSRET64
 END(entry_SYSCALL_64)
 
@@ -359,7 +448,7 @@ ENTRY(stub_ptregs_64)
 	jmp	entry_SYSCALL64_slow_path
 
 1:
-	jmp	*%rax				/* Called from C */
+	JMP_NOSPEC %rax				/* Called from C */
 END(stub_ptregs_64)
 
 .macro ptregs_stub func
@@ -438,7 +527,7 @@ ENTRY(ret_from_fork)
 1:
 	/* kernel thread */
 	movq	%r12, %rdi
-	call	*%rbx
+	CALL_NOSPEC %rbx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -466,12 +555,13 @@ END(irq_entries_start)
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
-	pushfq
-	testl $X86_EFLAGS_IF, (%rsp)
+	pushq %rax
+	SAVE_FLAGS(CLBR_RAX)
+	testl $X86_EFLAGS_IF, %eax
 	jz .Lokay_\@
 	ud2
 .Lokay_\@:
-	addq $8, %rsp
+	popq %rax
 #endif
 .endm
 
@@ -563,6 +653,13 @@ END(irq_entries_start)
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
 	cld
+
+	testb	$3, CS-ORIG_RAX(%rsp)
+	jz	1f
+	SWAPGS
+	call	switch_to_thread_stack
+1:
+
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
@@ -572,12 +669,8 @@ END(irq_entries_start)
 	jz	1f
 
 	/*
-	 * IRQ from user mode.  Switch to kernel gsbase and inform context
-	 * tracking that we're in kernel mode.
-	 */
-	SWAPGS
-
-	/*
+	 * IRQ from user mode.
+	 *
 	 * We need to tell lockdep that IRQs are off.  We can't do this until
 	 * we fix gsbase, and we should do it before enter_from_user_mode
 	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +723,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 	ud2
 1:
 #endif
-	SWAPGS
 	POP_EXTRA_REGS
-	POP_C_REGS
-	addq	$8, %rsp	/* skip regs->orig_ax */
+	popq	%r11
+	popq	%r10
+	popq	%r9
+	popq	%r8
+	popq	%rax
+	popq	%rcx
+	popq	%rdx
+	popq	%rsi
+
+	/*
+	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+	 * Save old stack pointer and switch to trampoline stack.
+	 */
+	movq	%rsp, %rdi
+	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+	/* Copy the IRET frame to the trampoline stack. */
+	pushq	6*8(%rdi)	/* SS */
+	pushq	5*8(%rdi)	/* RSP */
+	pushq	4*8(%rdi)	/* EFLAGS */
+	pushq	3*8(%rdi)	/* CS */
+	pushq	2*8(%rdi)	/* RIP */
+
+	/* Push user RDI on the trampoline stack. */
+	pushq	(%rdi)
+
+	/*
+	 * We are on the trampoline stack.  All regs except RDI are live.
+	 * We can do future final exit work right here.
+	 */
+
+	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
+	/* Restore RDI. */
+	popq	%rdi
+	SWAPGS
 	INTERRUPT_RETURN
 
 
@@ -713,7 +839,9 @@ native_irq_return_ldt:
 	 */
 
 	pushq	%rdi				/* Stash user RDI */
-	SWAPGS
+	SWAPGS					/* to kernel GS */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
+
 	movq	PER_CPU_VAR(espfix_waddr), %rdi
 	movq	%rax, (0*8)(%rdi)		/* user RAX */
 	movq	(1*8)(%rsp), %rax		/* user RIP */
@@ -729,7 +857,6 @@ native_irq_return_ldt:
 	/* Now RAX == RSP. */
 
 	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
-	popq	%rdi				/* Restore user RDI */
 
 	/*
 	 * espfix_stack[31:16] == 0.  The page tables are set up such that
@@ -740,7 +867,11 @@ native_irq_return_ldt:
 	 * still points to an RO alias of the ESPFIX stack.
 	 */
 	orq	PER_CPU_VAR(espfix_stack), %rax
-	SWAPGS
+
+	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+	SWAPGS					/* to user GS */
+	popq	%rdi				/* Restore user RDI */
+
 	movq	%rax, %rsp
 	UNWIND_HINT_IRET_REGS offset=8
 
@@ -829,7 +960,35 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 /*
  * Exception entry points.
  */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack.  This is called with the IRET frame and
+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+	UNWIND_HINT_FUNC
+
+	pushq	%rdi
+	/* Need to switch before accessing the thread stack. */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+	movq	%rsp, %rdi
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+	pushq	7*8(%rdi)		/* regs->ss */
+	pushq	6*8(%rdi)		/* regs->rsp */
+	pushq	5*8(%rdi)		/* regs->eflags */
+	pushq	4*8(%rdi)		/* regs->cs */
+	pushq	3*8(%rdi)		/* regs->ip */
+	pushq	2*8(%rdi)		/* regs->orig_ax */
+	pushq	8(%rdi)			/* return address */
+	UNWIND_HINT_FUNC
+
+	movq	(%rdi), %rdi
+	ret
+END(switch_to_thread_stack)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -848,11 +1007,12 @@ ENTRY(\sym)
 
 	ALLOC_PT_GPREGS_ON_STACK
 
-	.if \paranoid
-	.if \paranoid == 1
+	.if \paranoid < 2
 	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
-	jnz	1f
+	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
+
+	.if \paranoid
 	call	paranoid_entry
 	.else
 	call	error_entry
@@ -894,20 +1054,15 @@ ENTRY(\sym)
 	jmp	error_exit
 	.endif
 
-	.if \paranoid == 1
+	.if \paranoid < 2
 	/*
-	 * Paranoid entry from userspace.  Switch stacks and treat it
+	 * Entry from userspace.  Switch stacks and treat it
 	 * as a normal entry.  This means that paranoid handlers
 	 * run in real process context if user_mode(regs).
 	 */
-1:
+.Lfrom_usermode_switch_stack_\@:
 	call	error_entry
 
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-	call	sync_regs
-	movq	%rax, %rsp			/* switch stack */
-
 	movq	%rsp, %rdi			/* pt_regs pointer */
 
 	.if \has_error_code
@@ -1119,7 +1274,11 @@ ENTRY(paranoid_entry)
 	js	1f				/* negative -> in kernel */
 	SWAPGS
 	xorl	%ebx, %ebx
-1:	ret
+
+1:
+	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
+	ret
 END(paranoid_entry)
 
 /*
@@ -1141,6 +1300,7 @@ ENTRY(paranoid_exit)
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
+	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 	SWAPGS_UNSAFE_STACK
 	jmp	.Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
@@ -1168,8 +1328,18 @@ ENTRY(error_entry)
 	 * from user mode due to an IRET fault.
 	 */
 	SWAPGS
+	/* We have user CR3.  Change to kernel CR3. */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
 .Lerror_entry_from_usermode_after_swapgs:
+	/* Put us onto the real thread stack. */
+	popq	%r12				/* save return addr in %12 */
+	movq	%rsp, %rdi			/* arg0 = pt_regs pointer */
+	call	sync_regs
+	movq	%rax, %rsp			/* switch stack */
+	ENCODE_FRAME_POINTER
+	pushq	%r12
+
 	/*
 	 * We need to tell lockdep that IRQs are off.  We can't do this until
 	 * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1376,7 @@ ENTRY(error_entry)
 	 * .Lgs_change's error handler with kernel gsbase.
 	 */
 	SWAPGS
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 	jmp .Lerror_entry_done
 
 .Lbstep_iret:
@@ -1215,10 +1386,11 @@ ENTRY(error_entry)
 
 .Lerror_bad_iret:
 	/*
-	 * We came from an IRET to user mode, so we have user gsbase.
-	 * Switch to kernel gsbase:
+	 * We came from an IRET to user mode, so we have user
+	 * gsbase and CR3.  Switch to kernel gsbase and CR3:
 	 */
 	SWAPGS
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
 	/*
 	 * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1422,10 @@ END(error_exit)
 /*
  * Runs on exception stack.  Xen PV does not go through this path at all,
  * so we can use real assembly here.
+ *
+ * Registers:
+ *	%r14: Used to save/restore the CR3 of the interrupted context
+ *	      when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
  */
 ENTRY(nmi)
 	UNWIND_HINT_IRET_REGS
@@ -1313,6 +1489,7 @@ ENTRY(nmi)
 
 	swapgs
 	cld
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
 	movq	%rsp, %rdx
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 	UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1742,8 @@ end_repeat_nmi:
 	movq	$-1, %rsi
 	call	do_nmi
 
+	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	nmi_restore
 nmi_swapgs:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932cd2a7d44393e5fc52408cffe64f34..98d5358e4041a7e144ec566f7db19ff054cedbcc 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
  */
 ENTRY(entry_SYSENTER_compat)
 	/* Interrupts are off on entry. */
-	SWAPGS_UNSAFE_STACK
+	SWAPGS
+
+	/* We are about to clobber %rsp anyway, clobbering here is OK */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 	/*
@@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
 	swapgs
 
-	/* Stash user ESP and switch to the kernel stack. */
+	/* Stash user ESP */
 	movl	%esp, %r8d
+
+	/* Use %rsp as scratch reg. User ESP is stashed in r8 */
+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+	/* Switch to the kernel stack */
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 	/* Construct struct pt_regs on stack */
@@ -256,10 +265,22 @@ sysret32_from_system_call:
 	 * when the system call started, which is already known to user
 	 * code.  We zero R8-R10 to avoid info leaks.
          */
+	movq	RSP-ORIG_RAX(%rsp), %rsp
+
+	/*
+	 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
+	 * on the process stack which is not mapped to userspace and
+	 * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
+	 * switch until after after the last reference to the process
+	 * stack.
+	 *
+	 * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+	 */
+	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+
 	xorq	%r8, %r8
 	xorq	%r9, %r9
 	xorq	%r10, %r10
-	movq	RSP-ORIG_RAX(%rsp), %rsp
 	swapgs
 	sysretl
 END(entry_SYSCALL_compat)
@@ -306,8 +327,11 @@ ENTRY(entry_INT80_compat)
 	 */
 	movl	%eax, %eax
 
-	/* Construct struct pt_regs on stack (iret frame is already on stack) */
 	pushq	%rax			/* pt_regs->orig_ax */
+
+	/* switch to thread stack expects orig_ax to be pushed */
+	call	switch_to_thread_stack
+
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d55f8d6c8b239f478ecb302d4cfd07..f19856d95c60919c92d1679e0037d9339c4c2a65 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
 		*t = result;
 	return result;
 }
-int time(time_t *t)
+time_t time(time_t *t)
 	__attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..577fa8adb785baf5ea1c993a2bbc88adf43fbbcc 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
 #include <asm/unistd.h>
 #include <asm/fixmap.h>
 #include <asm/traps.h>
+#include <asm/paravirt.h>
 
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
 	WARN_ON_ONCE(address != regs->ip);
 
+	/* This should be unreachable in NATIVE mode. */
+	if (WARN_ON(vsyscall_mode == NATIVE))
+		return false;
+
 	if (vsyscall_mode == NONE) {
 		warn_bad_vsyscall(KERN_INFO, regs,
 				  "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
 	return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
 }
 
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+	set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+	p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+	p4d->p4d |= _PAGE_USER;
+#endif
+	pud = pud_offset(p4d, VSYSCALL_ADDR);
+	set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+	pmd = pmd_offset(pud, VSYSCALL_ADDR);
+	set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
 void __init map_vsyscall(void)
 {
 	extern char __vsyscall_page;
 	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-	if (vsyscall_mode != NONE)
+	if (vsyscall_mode != NONE) {
 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
 			     vsyscall_mode == NATIVE
 			     ? PAGE_KERNEL_VSYSCALL
 			     : PAGE_KERNEL_VVAR);
+		set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+	}
 
 	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
 		     (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 141e07b0621689e745582599c009f4af1d053c6c..24ffa1e88cf948ecbe9839f6d956c69a12ecd4f0 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -582,6 +582,24 @@ static __init int bts_init(void)
 	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
 		return -ENODEV;
 
+	if (boot_cpu_has(X86_FEATURE_PTI)) {
+		/*
+		 * BTS hardware writes through a virtual memory map we must
+		 * either use the kernel physical map, or the user mapping of
+		 * the AUX buffer.
+		 *
+		 * However, since this driver supports per-CPU and per-task inherit
+		 * we cannot use the user mapping since it will not be availble
+		 * if we're not running the owning process.
+		 *
+		 * With PTI we can't use the kernal map either, because its not
+		 * there when we run userspace.
+		 *
+		 * For now, disable this driver when using PTI.
+		 */
+		return -ENODEV;
+	}
+
 	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
 				  PERF_PMU_CAP_EXCLUSIVE;
 	bts_pmu.task_ctx_nr	= perf_sw_context;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 09c26a4f139c125e000675689ebc983acd8ab91a..731153a4681e73f761dea8c0c15ce6757b89860e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
 
 __init int intel_pmu_init(void)
 {
+	struct attribute **extra_attr = NULL;
+	struct attribute **to_free = NULL;
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
 	union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
 	unsigned int unused;
 	struct extra_reg *er;
 	int version, i;
-	struct attribute **extra_attr = NULL;
 	char *name;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
 		extra_attr = merge_attr(extra_attr, skl_format_attr);
+		to_free = extra_attr;
 		x86_pmu.cpu_events = get_hsw_events_attrs();
 		intel_pmu_pebs_data_source_skl(
 			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
 		pr_cont("full-width counters, ");
 	}
 
+	kfree(to_free);
 	return 0;
 }
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd0c5f12223b8f5c16067a933450df..8156e47da7ba4c5a31e9f9436b27a96cfb7da4c3 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,19 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/perf_event.h>
+#include <asm/tlbflush.h>
 #include <asm/insn.h>
 
 #include "../perf_event.h"
 
+/* Waste a full page so it can be mapped into the cpu_entry_area */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
 
-#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
 #define PEBS_FIXUP_SIZE		PAGE_SIZE
 
 /*
@@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
-static int alloc_pebs_buffer(int cpu)
+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	unsigned long start = (unsigned long)cea;
+	phys_addr_t pa;
+	size_t msz = 0;
+
+	pa = virt_to_phys(addr);
+
+	preempt_disable();
+	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+		cea_set_pte(cea, pa, prot);
+
+	/*
+	 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
+	 * all TLB entries for it.
+	 */
+	flush_tlb_kernel_range(start, start + size);
+	preempt_enable();
+}
+
+static void ds_clear_cea(void *cea, size_t size)
+{
+	unsigned long start = (unsigned long)cea;
+	size_t msz = 0;
+
+	preempt_disable();
+	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+		cea_set_pte(cea, 0, PAGE_NONE);
+
+	flush_tlb_kernel_range(start, start + size);
+	preempt_enable();
+}
+
+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+{
+	unsigned int order = get_order(size);
 	int node = cpu_to_node(cpu);
-	int max;
-	void *buffer, *ibuffer;
+	struct page *page;
+
+	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+	return page ? page_address(page) : NULL;
+}
+
+static void dsfree_pages(const void *buffer, size_t size)
+{
+	if (buffer)
+		free_pages((unsigned long)buffer, get_order(size));
+}
+
+static int alloc_pebs_buffer(int cpu)
+{
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	size_t bsiz = x86_pmu.pebs_buffer_size;
+	int max, node = cpu_to_node(cpu);
+	void *buffer, *ibuffer, *cea;
 
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu)
 	if (x86_pmu.intel_cap.pebs_format < 2) {
 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 		if (!ibuffer) {
-			kfree(buffer);
+			dsfree_pages(buffer, bsiz);
 			return -ENOMEM;
 		}
 		per_cpu(insn_buffer, cpu) = ibuffer;
 	}
-
-	max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
-	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+	hwev->ds_pebs_vaddr = buffer;
+	/* Update the cpu entry area mapping */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+	ds->pebs_buffer_base = (unsigned long) cea;
+	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
 	ds->pebs_index = ds->pebs_buffer_base;
-	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-		max * x86_pmu.pebs_record_size;
-
+	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
+	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
 	return 0;
 }
 
 static void release_pebs_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	void *cea;
 
 	if (!ds || !x86_pmu.pebs)
 		return;
@@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu)
 	kfree(per_cpu(insn_buffer, cpu));
 	per_cpu(insn_buffer, cpu) = NULL;
 
-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
+	/* Clear the fixmap */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+	ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
 	ds->pebs_buffer_base = 0;
+	dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
+	hwev->ds_pebs_vaddr = NULL;
 }
 
 static int alloc_bts_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-	int node = cpu_to_node(cpu);
-	int max, thresh;
-	void *buffer;
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	void *buffer, *cea;
+	int max;
 
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
 	if (unlikely(!buffer)) {
 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 		return -ENOMEM;
 	}
-
-	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-	thresh = max / 16;
-
-	ds->bts_buffer_base = (u64)(unsigned long)buffer;
+	hwev->ds_bts_vaddr = buffer;
+	/* Update the fixmap */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+	ds->bts_buffer_base = (unsigned long) cea;
+	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
 	ds->bts_index = ds->bts_buffer_base;
-	ds->bts_absolute_maximum = ds->bts_buffer_base +
-		max * BTS_RECORD_SIZE;
-	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-		thresh * BTS_RECORD_SIZE;
-
+	max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
+	ds->bts_absolute_maximum = ds->bts_buffer_base + max;
+	ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
 	return 0;
 }
 
 static void release_bts_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	void *cea;
 
 	if (!ds || !x86_pmu.bts)
 		return;
 
-	kfree((void *)(unsigned long)ds->bts_buffer_base);
+	/* Clear the fixmap */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+	ds_clear_cea(cea, BTS_BUFFER_SIZE);
 	ds->bts_buffer_base = 0;
+	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
+	hwev->ds_bts_vaddr = NULL;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-	int node = cpu_to_node(cpu);
-	struct debug_store *ds;
-
-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-	if (unlikely(!ds))
-		return -ENOMEM;
+	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 
+	memset(ds, 0, sizeof(*ds));
 	per_cpu(cpu_hw_events, cpu).ds = ds;
-
 	return 0;
 }
 
 static void release_ds_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-	if (!ds)
-		return;
-
 	per_cpu(cpu_hw_events, cpu).ds = NULL;
-	kfree(ds);
 }
 
 void release_ds_buffers(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f7aaadf9331fb75587e74a42b041d78b2a014fc0..8e4ea143ed96403d275bf6727801961db9a053d7 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
 
 #include <linux/perf_event.h>
 
+#include <asm/intel_ds.h>
+
 /* To enable MSR tracing please use the generic trace points. */
 
 /*
@@ -77,8 +79,6 @@ struct amd_nb {
 	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
 };
 
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		8
 #define PEBS_COUNTER_MASK	((1ULL << MAX_PEBS_EVENTS) - 1)
 
 /*
@@ -95,23 +95,6 @@ struct amd_nb {
 	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
 	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
 
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-	u64	bts_buffer_base;
-	u64	bts_index;
-	u64	bts_absolute_maximum;
-	u64	bts_interrupt_threshold;
-	u64	pebs_buffer_base;
-	u64	pebs_index;
-	u64	pebs_absolute_maximum;
-	u64	pebs_interrupt_threshold;
-	u64	pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
 #define PEBS_REGS \
 	(PERF_REG_X86_AX | \
 	 PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
 	 * Intel DebugStore bits
 	 */
 	struct debug_store	*ds;
+	void			*ds_pebs_vaddr;
+	void			*ds_bts_vaddr;
 	u64			pebs_enabled;
 	int			n_pebs;
 	int			n_large_pebs;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index dbfd0854651fe4fa13154e3ce04e9baa1f643637..cf5961ca867746ae2eadb4a2b6f2c9068cec48c4 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	".popsection\n"							\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
-	".popsection"
+	".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
 	OLDINSTR_2(oldinstr, 1, 2)					\
@@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
-	".popsection"
+	".popsection\n"
 
 /*
  * Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ff700d81e91efcf89a2f2ac725b3e80d868c8621..0927cdc4f9460165a9159b1c4f7bea0e759bdbb9 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -11,7 +11,32 @@
 #include <asm/pgtable.h>
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
+#include <asm/asm.h>
 
 #ifndef CONFIG_X86_CMPXCHG64
 extern void cmpxchg8b_emu(void);
 #endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51dfa192f69d8893c8844219c0c89029..386a6900e206f6578e3b38ee7f085d36ac50a928 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
 #endif
 
 #ifndef __ASSEMBLY__
+#ifndef __BPF__
 /*
  * This output constraint should be used for any inline asm which has a "call"
  * instruction.  Otherwise the asm may be inserted before the frame pointer
@@ -145,5 +146,6 @@
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
+#endif
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a7884b8dca55bc58f077a90ad93d6398bddd053
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+#include <asm/intel_ds.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code.  Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+	char gdt[PAGE_SIZE];
+
+	/*
+	 * The GDT is just below entry_stack and thus serves (on x86_64) as
+	 * a a read-only guard page.
+	 */
+	struct entry_stack_page entry_stack_page;
+
+	/*
+	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+	 * we need task switches to work, and task switches write to the TSS.
+	 */
+	struct tss_struct tss;
+
+	char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Exception stacks used for IST entries.
+	 *
+	 * In the future, this should have a separate slot for each stack
+	 * with guard pages between them.
+	 */
+	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+	/*
+	 * Per CPU debug store for Intel performance monitoring. Wastes a
+	 * full page at the moment.
+	 */
+	struct debug_store cpu_debug_store;
+	/*
+	 * The actual PEBS/BTS buffers must be mapped to user space
+	 * Reserve enough fixmap PTEs.
+	 */
+	struct debug_store_buffers cpu_debug_buffers;
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE			\
+	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a779ee131b4df8c89449ab52abd0a79..ea9a7dde62e5c4d551ba89e429f911fb5c6603fd 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66eb645eba71784e80aa93b0f0df79..f275447862f4583909d14b6e2469c16e4a81b951 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,14 @@
 #define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE		( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -266,6 +269,7 @@
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
@@ -339,5 +343,8 @@
 #define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08e52db15f52779ce366c26359a34b..13c5ee878a477902b8494532b24853b6c156a170 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
 #include <asm/mmu.h>
 #include <asm/fixmap.h>
 #include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
 	desc->type		= (info->read_exec_only ^ 1) << 1;
 	desc->type	       |= info->contents << 2;
+	/* Set the ACCESS bit so it can be mapped RO */
+	desc->type	       |= 1;
 
 	desc->s			= 1;
 	desc->dpl		= 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
 	return this_cpu_ptr(&gdt_page)->gdt;
 }
 
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
-	return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
 /* Provide the fixmap address of the remapped GDT */
 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
 {
-	unsigned int idx = get_cpu_gdt_ro_index(cpu);
-	return (struct desc_struct *)__fix_to_virt(idx);
+	return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
 }
 
 /* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 #endif
 }
 
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
 {
 	struct desc_struct *d = get_cpu_gdt_rw(cpu);
 	tss_desc tss;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 14d6d50073142b0f49b06850ccd0d394546479ee..b027633e73003e121d7c043438ac7dbd10fc07a4 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
 # define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
 #endif
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI		0
+#else
+# define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -60,7 +66,7 @@
 #define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
-#define DISABLED_MASK7	0
+#define DISABLED_MASK7	(DISABLE_PTI)
 #define DISABLED_MASK8	0
 #define DISABLED_MASK9	(DISABLE_MPX)
 #define DISABLED_MASK10	0
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea8b9ed6648b9bf298c99c8b2124ad..6777480d8a427eaaa07559f77985c125aa66bb6c 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_ESPFIX_H
 #define _ASM_X86_ESPFIX_H
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
 
 #include <asm/percpu.h>
 
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
 extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
 
 #endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a958c701fef6d96f281bb8ab1a773de..64c4a30e0d39621ff8587fc8da538cd3d1d9f144 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
 			 PAGE_SIZE)
 #endif
 
-
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
 	FIX_IO_APIC_BASE_0,
 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
 #endif
-	FIX_RO_IDT,	/* Virtual mapping for read-only IDT */
 #ifdef CONFIG_X86_32
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
-	/* Fixmap entries to remap the GDTs, one per processor. */
-	FIX_GDT_REMAP_BEGIN,
-	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
 
 #ifdef CONFIG_ACPI_APEI_GHES
 	/* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
 extern void reserve_top_address(unsigned long reserve);
 
 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
 
 extern int fixmaps_set;
 
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8aeb6e700013c5434aaeb0bba7a152f..96aa6b9884dc5b3bc8d54c9ef1c6258eea13a0d0 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
 #ifndef _ASM_X86_HYPERVISOR_H
 #define _ASM_X86_HYPERVISOR_H
 
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types  */
 enum x86_hypervisor_type {
 	X86_HYPER_NATIVE = 0,
 	X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
 	X86_HYPER_KVM,
 };
 
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
 struct hypervisor_x86 {
 	/* Hypervisor name */
 	const char	*name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
 
 extern enum x86_hypervisor_type x86_hyper_type;
 extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+	return x86_hyper_type == type;
+}
 #else
 static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+	return type == X86_HYPER_NATIVE;
+}
 #endif /* CONFIG_HYPERVISOR_GUEST */
 #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644
index 0000000000000000000000000000000000000000..62a9f4966b4298ec2f3aa6c32d0881bd4296956c
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_INTEL_DS_H
+#define _ASM_INTEL_DS_H
+
+#include <linux/percpu-defs.h>
+
+#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS		8
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
+} __aligned(PAGE_SIZE);
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
+struct debug_store_buffers {
+	char	bts_buffer[BTS_BUFFER_SIZE];
+	char	pebs_buffer[PEBS_BUFFER_SIZE];
+};
+
+#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 0000000000000000000000000000000000000000..989cfa86de85184359e0205de9237f9e048e5e20
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+			     unsigned long type)
+{
+	struct { u64 d[2]; } desc = { { pcid, addr } };
+
+	/*
+	 * The memory clobber is because the whole point is to invalidate
+	 * stale TLB entries and, especially if we're flushing global
+	 * mappings, we don't want the compiler to reorder any subsequent
+	 * memory accesses before the TLB flush.
+	 *
+	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+	 * invpcid (%rcx), %rax in long mode.
+	 */
+	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR		0
+#define INVPCID_TYPE_SINGLE_CTXT	1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
+#define INVPCID_TYPE_ALL_NON_GLOBAL	3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+				     unsigned long addr)
+{
+	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 139feef467f7e298c6f9db57c43facc64f5468b6..c066ffae222b769996b78c13ee5d063dc7c5c544 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
 			      unsigned int nr_irqs);
 extern int mp_irqdomain_activate(struct irq_domain *domain,
-				 struct irq_data *irq_data, bool early);
+				 struct irq_data *irq_data, bool reserve);
 extern void mp_irqdomain_deactivate(struct irq_domain *domain,
 				    struct irq_data *irq_data);
 extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f17c59308b9c41179c47f85e075ad..89f08955fff733c688a5ce4f4a0b8d74050ee617 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
 	swapgs;					\
 	sysretl
 
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x)		pushfq; popq %rax
+#endif
 #else
 #define INTERRUPT_RETURN		iret
 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e8873c3f34e6e8b8cd509ebadd819..395c9631e000a3a17aa574c1b25fcc2cafd5b5fb 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/arch/x86/include/asm/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 034caa1a084e360ff74c77e84116bb0de6e28dcd..b24b1c8b397989304ec88d9979c66b1b5415f78e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -214,8 +214,6 @@ struct x86_emulate_ops {
 	void (*halt)(struct x86_emulate_ctxt *ctxt);
 	void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
 	int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
-	void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
-	void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
 	int (*intercept)(struct x86_emulate_ctxt *ctxt,
 			 struct x86_instruction_info *info,
 			 enum x86_intercept_stage stage);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 977de5fb968be412862de87aeb95136bae69e620..51679843132829e38ed204eda60d50379a4fabb5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_cache;
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
+	/*
+	 * QEMU userspace and the guest each have their own FPU state.
+	 * In vcpu_run, we switch between the user and guest FPU contexts.
+	 * While running a VCPU, the VCPU thread will have the guest FPU
+	 * context.
+	 *
+	 * Note that while the PKRU state lives inside the fpu registers,
+	 * it is switched out separately at VMENTER and VMEXIT time. The
+	 * "guest_fpu" state here contains the guest FPU context, with the
+	 * host PRKU bits.
+	 */
+	struct fpu user_fpu;
 	struct fpu guest_fpu;
+
 	u64 xcr0;
 	u64 guest_supported_xcr0;
 	u32 guest_xstate_size;
@@ -1435,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define put_smstate(type, buf, offset, val)                      \
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f16749706fddd5b15e8bf557a9e6156e165..5ff3e8af2c2056b7fe19560ee2ba1ad7146aaf2a 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
 #define _ASM_X86_MMU_H
 
 #include <linux/spinlock.h>
+#include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 
@@ -27,7 +28,8 @@ typedef struct {
 	atomic64_t tlb_gen;
 
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
-	struct ldt_struct *ldt;
+	struct rw_semaphore	ldt_usr_sem;
+	struct ldt_struct	*ldt;
 #endif
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0daed96a1e3d670b6203d1779b98e..c931b88982a0ff59e3b67947cc606e452f327dc0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
 	 * call gates.  On native, we could merge the ldt_struct and LDT
 	 * allocations, but it's not worth trying to optimize.
 	 */
-	struct desc_struct *entries;
-	unsigned int nr_entries;
+	struct desc_struct	*entries;
+	unsigned int		nr_entries;
+
+	/*
+	 * If PTI is in use, then the entries array is not mapped while we're
+	 * in user mode.  The whole array will be aliased at the addressed
+	 * given by ldt_slot_va(slot).  We use two slots so that we can allocate
+	 * and map, and enable a new LDT without invalidating the mapping
+	 * of an older, still-in-use LDT.
+	 *
+	 * slot will be -1 if this LDT doesn't have an alias mapping.
+	 */
+	int			slot;
 };
 
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+#ifdef CONFIG_X86_64
+	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+#else
+	BUG();
+#endif
+}
+
 /*
  * Used for LDT copy/destruction.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+	mm->context.ldt = NULL;
+	init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
 void destroy_context_ldt(struct mm_struct *mm);
+void ldt_arch_exit_mmap(struct mm_struct *mm);
 #else	/* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
-				       struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+				  struct mm_struct *mm)
 {
 	return 0;
 }
-static inline void destroy_context_ldt(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) { }
+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 #endif
 
 static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
 	 * that we can see.
 	 */
 
-	if (unlikely(ldt))
-		set_ldt(ldt->entries, ldt->nr_entries);
-	else
+	if (unlikely(ldt)) {
+		if (static_cpu_has(X86_FEATURE_PTI)) {
+			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+				/*
+				 * Whoops -- either the new LDT isn't mapped
+				 * (if slot == -1) or is mapped into a bogus
+				 * slot (if slot > 1).
+				 */
+				clear_LDT();
+				return;
+			}
+
+			/*
+			 * If page table isolation is enabled, ldt->entries
+			 * will not be mapped in the userspace pagetables.
+			 * Tell the CPU to access the LDT through the alias
+			 * at ldt_slot_va(ldt->slot).
+			 */
+			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+		} else {
+			set_ldt(ldt->entries, ldt->nr_entries);
+		}
+	} else {
 		clear_LDT();
+	}
 #else
 	clear_LDT();
 #endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	mutex_init(&mm->context.lock);
+
 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
 	atomic64_set(&mm->context.tlb_gen, 0);
 
-	#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
 		/* pkey 0 is the default and always allocated */
 		mm->context.pkey_allocation_map = 0x1;
 		/* -1 means unallocated or invalid */
 		mm->context.execute_only_pkey = -1;
 	}
-	#endif
-	return init_new_context_ldt(tsk, mm);
+#endif
+	init_new_context_ldt(mm);
+	return 0;
 }
 static inline void destroy_context(struct mm_struct *mm)
 {
@@ -176,15 +231,16 @@ do {						\
 } while (0)
 #endif
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-				 struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	paravirt_arch_dup_mmap(oldmm, mm);
+	return ldt_dup_context(oldmm, mm);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
 {
 	paravirt_arch_exit_mmap(mm);
+	ldt_arch_exit_mmap(mm);
 }
 
 #ifdef CONFIG_X86_64
@@ -281,33 +337,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
-/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits.  This serves two purposes.  It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero.  It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
-	if (static_cpu_has(X86_FEATURE_PCID)) {
-		VM_WARN_ON_ONCE(asid > 4094);
-		return __sme_pa(mm->pgd) | (asid + 1);
-	} else {
-		VM_WARN_ON_ONCE(asid != 0);
-		return __sme_pa(mm->pgd);
-	}
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
-	VM_WARN_ON_ONCE(asid > 4094);
-	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
 /*
  * This can be used from process context to figure out what the value of
  * CR3 is without needing to do a (slow) __read_cr3().
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
 		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
 	/* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5400add2885b9646fa7d8d53c5b35e16c2794c7d..8bf450b13d9f53883db9ee89ac6d39913963907e 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
 #include <linux/nmi.h>
 #include <asm/io.h>
 #include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 		return U64_MAX;
 
 	__asm__ __volatile__("mov %4, %%r8\n"
-			     "call *%5"
+			     CALL_NOSPEC
 			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			       "+c" (control), "+d" (input_address)
-			     :  "r" (output_address), "m" (hv_hypercall_pg)
+			     :  "r" (output_address),
+				THUNK_TARGET(hv_hypercall_pg)
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
 #else
 	u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	if (!hv_hypercall_pg)
 		return U64_MAX;
 
-	__asm__ __volatile__("call *%7"
+	__asm__ __volatile__(CALL_NOSPEC
 			     : "=A" (hv_status),
 			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			     : "A" (control),
 			       "b" (input_address_hi),
 			       "D"(output_address_hi), "S"(output_address_lo),
-			       "m" (hv_hypercall_pg)
+			       THUNK_TARGET(hv_hypercall_pg)
 			     : "cc", "memory");
 #endif /* !x86_64 */
 	return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
 #ifdef CONFIG_X86_64
 	{
-		__asm__ __volatile__("call *%4"
+		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
-				     : "m" (hv_hypercall_pg)
+				     : THUNK_TARGET(hv_hypercall_pg)
 				     : "cc", "r8", "r9", "r10", "r11");
 	}
 #else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 		u32 input1_hi = upper_32_bits(input1);
 		u32 input1_lo = lower_32_bits(input1);
 
-		__asm__ __volatile__ ("call *%5"
+		__asm__ __volatile__ (CALL_NOSPEC
 				      : "=A"(hv_status),
 					"+c"(input1_lo),
 					ASM_CALL_CONSTRAINT
 				      :	"A" (control),
 					"b" (input1_hi),
-					"m" (hv_hypercall_pg)
+					THUNK_TARGET(hv_hypercall_pg)
 				      : "cc", "edi", "esi");
 	}
 #endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 34c4922bbc3fb5ed95cb0819d7ac6b9cd37a7f41..e7b983a355060a6c5b5db76f1fc18475eb647ae5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -355,6 +355,9 @@
 #define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define MSR_FAM10H_NODE_ID		0xc001100c
+#define MSR_F10H_DECFG			0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
 
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 0000000000000000000000000000000000000000..402a11c803c38b0bf8de9d5664e5113249427d47
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS		16	/* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
+	mov	$(nr/2), reg;			\
+771:						\
+	call	772f;				\
+773:	/* speculation trap */			\
+	pause;					\
+	jmp	773b;				\
+772:						\
+	call	774f;				\
+775:	/* speculation trap */			\
+	pause;					\
+	jmp	775b;				\
+774:						\
+	dec	reg;				\
+	jnz	771b;				\
+	add	$(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative.  It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+	.Lannotate_\@:
+	.pushsection .discard.nospec
+	.long .Lannotate_\@ - .
+	.popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+	call	.Ldo_rop_\@
+.Lspec_trap_\@:
+	pause
+	jmp	.Lspec_trap_\@
+.Ldo_rop_\@:
+	mov	\reg, (%_ASM_SP)
+	ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+	jmp	.Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+	RETPOLINE_JMP \reg
+.Ldo_call_\@:
+	call	.Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE_2 __stringify(jmp *\reg),				\
+		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
+		__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+	jmp	*\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE_2 __stringify(call *\reg),				\
+		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+		__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+	call	*\reg
+#endif
+.endm
+
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
+		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
+		\ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE				\
+	"999:\n\t"						\
+	".pushsection .discard.nospec\n\t"			\
+	".long 999b - .\n\t"					\
+	".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC						\
+	ANNOTATE_NOSPEC_ALTERNATIVE				\
+	ALTERNATIVE(						\
+	"call *%[thunk_target]\n",				\
+	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
+	X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",	\
+	"       jmp    904f;\n"					\
+	"       .align 16\n"					\
+	"901:	call   903f;\n"					\
+	"902:	pause;\n"					\
+	"       jmp    902b;\n"					\
+	"       .align 16\n"					\
+	"903:	addl   $4, %%esp;\n"				\
+	"       pushl  %[thunk_target];\n"			\
+	"       ret;\n"						\
+	"       .align 16\n"					\
+	"904:	call   901b;\n",				\
+	X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+	SPECTRE_V2_NONE,
+	SPECTRE_V2_RETPOLINE_MINIMAL,
+	SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+	SPECTRE_V2_RETPOLINE_GENERIC,
+	SPECTRE_V2_RETPOLINE_AMD,
+	SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+	unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+		      ALTERNATIVE("jmp 910f",
+				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+				  X86_FEATURE_RETPOLINE)
+		      "910:"
+		      : "=&r" (loops), ASM_CALL_CONSTRAINT
+		      : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8aff86f2c004bc23e4b8642cbf3d527..892df375b6155a51f584760efb9f9e77c3f732e8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers)                                        \
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
 #endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 7a5d6695abd37e737bfb0e532ec4d86c53256433..eb66fa9cd0fc6187d55dc0c1a8fb30e5e32687a8 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -38,6 +38,7 @@ do {						\
 #define PCI_NOASSIGN_ROMS	0x80000
 #define PCI_ROOT_NO_CRS		0x100000
 #define PCI_NOASSIGN_BARS	0x200000
+#define PCI_BIG_ROOT_WINDOW	0x400000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 4b5e1eafada731cdedd48d771d79d55e766a84eb..aff42e1da6ee1591bbec15ce6f0543953789d19c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
  */
 extern gfp_t __userpte_alloc_gfp;
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
+ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#define PGD_ALLOCATION_ORDER 1
+#else
+#define PGD_ALLOCATION_ORDER 0
+#endif
+
 /*
  * Allocate and free page tables.
  */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 95e2dfd755218ccfaf6417b44c822b545a35568e..e42b8943cb1a311a00ddceb36129ede3012489ef 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
 void ptdump_walk_pgd_level_checkwx(void);
 
 #ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 
 static inline int p4d_bad(p4d_t p4d)
 {
-	return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+	unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
+
+	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+		ignore_flags |= _PAGE_NX;
+
+	return (p4d_flags(p4d) & ~ignore_flags) != 0;
 }
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 
 static inline int pgd_bad(pgd_t pgd)
 {
-	return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+	unsigned long ignore_flags = _PAGE_USER;
+
+	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+		ignore_flags |= _PAGE_NX;
+
+	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
 }
 
 static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
 /*
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
  */
 static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 {
-       memcpy(dst, src, count * sizeof(pgd_t));
+	memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+	/* Clone the user space pgd as well */
+	memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
+	       count * sizeof(pgd_t));
+#endif
 }
 
 #define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68303f4494775564aa9da77ddcd53a..ce245b0cdfcaa42bd932a387bbb189ee7349bfef 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 #define LAST_PKMAP 1024
 #endif
 
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))	\
-		    & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE				\
+	((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE		\
+	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
 #else
-# define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END	(CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
 #endif
 
 #define MODULES_VADDR	VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e9f05331e732a057b341e78977d666b8cfe35289..81462e9a34f6af49645a08f55c7d67e0144dbb77 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 #endif
 }
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
+ * the user one is in the last 4k.  To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT	PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+	unsigned long __ptr = (unsigned long)ptr;
+
+	__ptr |= BIT(bit);
+	return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+	unsigned long __ptr = (unsigned long)ptr;
+
+	__ptr &= ~BIT(bit);
+	return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+	return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+	return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+	return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+	return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned.  The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+	unsigned long ptr = (unsigned long)__ptr;
+
+	return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return pgd;
+	return __pti_set_user_pgd(pgdp, pgd);
+}
+#else
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	return pgd;
+}
+#endif
+
 static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+	p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
 	*p4dp = p4d;
+#endif
 }
 
 static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	*pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
 	*pgdp = pgd;
+#endif
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a13caafbf184f323d0725c2d5f53e4..6b8f73dcbc2c2b6a835e17ac0828d8b6484f4dce 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -75,33 +75,52 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+/*
+ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ *
+ * Be very careful vs. KASLR when changing anything here. The KASLR address
+ * range must not overlap with anything except the KASAN shadow area, which
+ * is correct as KASAN disables KASLR.
+ */
+#define MAXMEM			_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
 #ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE	_AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE	_AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB	_AC(12800, UL)
+# define __VMALLOC_BASE		_AC(0xffa0000000000000, UL)
+# define __VMEMMAP_BASE		_AC(0xffd4000000000000, UL)
+# define LDT_PGD_ENTRY		_AC(-112, UL)
+# define LDT_BASE_ADDR		(LDT_PGD_ENTRY << PGDIR_SHIFT)
 #else
-#define VMALLOC_SIZE_TB	_AC(32, UL)
-#define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE	_AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB	_AC(32, UL)
+# define __VMALLOC_BASE		_AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE		_AC(0xffffea0000000000, UL)
+# define LDT_PGD_ENTRY		_AC(-3, UL)
+# define LDT_BASE_ADDR		(LDT_PGD_ENTRY << PGDIR_SHIFT)
 #endif
+
 #ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START	vmalloc_base
-#define VMEMMAP_START	vmemmap_base
+# define VMALLOC_START		vmalloc_base
+# define VMEMMAP_START		vmemmap_base
 #else
-#define VMALLOC_START	__VMALLOC_BASE
-#define VMEMMAP_START	__VMEMMAP_BASE
+# define VMALLOC_START		__VMALLOC_BASE
+# define VMEMMAP_START		__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END		(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR		(__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 /* The module sections ends with the start of the fixmap */
-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END	 (-68 * (_AC(1, UL) << 30))
+#define MODULES_END		_AC(0xffffffffff000000, UL)
+#define MODULES_LEN		(MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY	_AC(-2, UL)
+#define ESPFIX_BASE_ADDR	(ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD	_AC(-4, UL)
+#define CPU_ENTRY_AREA_BASE	(CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START		( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END		(-68 * (_AC(1, UL) << 30))
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 43212a43ee69feea1de27275d9075c566cdfcd2c..625a52a5594f53b3ddc889843b92d228cc39a8e8 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,11 @@
 #define CR3_ADDR_MASK	__sme_clr(0x7FFFFFFFFFFFF000ull)
 #define CR3_PCID_MASK	0xFFFull
 #define CR3_NOFLUSH	BIT_ULL(63)
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define X86_CR3_PTI_PCID_USER_BIT	11
+#endif
+
 #else
 /*
  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e760a40351cf3e7476ac9f25ffe00..d3a67fba200ae2a5c03f52a7815dca00b43c63ad 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
 extern struct cpuinfo_x86	boot_cpu_data;
 extern struct cpuinfo_x86	new_cpu_data;
 
-extern struct tss_struct	doublefault_tss;
-extern __u32			cpu_caps_cleared[NCAPINTS];
-extern __u32			cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss	doublefault_tss;
+extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
 	write_cr3(__sme_pa(pgdir));
 }
 
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
 #ifdef CONFIG_X86_32
 /* This is the TSS defined by the hardware. */
 struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
 struct x86_hw_tss {
 	u32			reserved1;
 	u64			sp0;
+
+	/*
+	 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+	 * Linux does not use ring 1, so sp1 is not otherwise needed.
+	 */
 	u64			sp1;
+
 	u64			sp2;
 	u64			reserved2;
 	u64			ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
 #define IO_BITMAP_BITS			65536
 #define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET		offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 #define INVALID_IO_BITMAP_OFFSET	0x8000
 
+struct entry_stack {
+	unsigned long		words[64];
+};
+
+struct entry_stack_page {
+	struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
 struct tss_struct {
 	/*
-	 * The hardware state:
+	 * The fixed hardware portion.  This must not cross a page boundary
+	 * at risk of violating the SDM's advice and potentially triggering
+	 * errata.
 	 */
 	struct x86_hw_tss	x86_tss;
 
@@ -339,18 +360,9 @@ struct tss_struct {
 	 * be within the limit.
 	 */
 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
 
-#ifdef CONFIG_X86_32
-	/*
-	 * Space for the temporary SYSENTER stack.
-	 */
-	unsigned long		SYSENTER_stack_canary;
-	unsigned long		SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 
 /*
  * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
 
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 #endif
 
 /*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
 static inline void
 native_load_sp0(unsigned long sp0)
 {
-	this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
 
 static inline unsigned long current_top_of_stack(void)
 {
-#ifdef CONFIG_X86_64
-	return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
-	/* sp0 on x86_32 is special in and around vm86 mode. */
+	/*
+	 *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
+	 *  and around vm86 mode and sp0 on x86_64 is special because of the
+	 *  entry trampoline.
+	 */
 	return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
 }
 
 static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
 
 #else
 /*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
+ * User space process size.  This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen.  This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
  */
 #define TASK_SIZE_MAX	((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644
index 0000000000000000000000000000000000000000..0b5ef05b2d2d9f03a967afe10256a266ef242c98
--- /dev/null
+++ b/arch/x86/include/asm/pti.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _ASM_X86_PTI_H
+#define _ASM_X86_PTI_H
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern void pti_init(void);
+extern void pti_check_boottime_disable(void);
+#else
+static inline void pti_check_boottime_disable(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c639fd212c5e313bd5e450747b1eb..8f09012b92e779d7aabf4ad663b8eb10b2379c37 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
  */
 #define EARLY_IDT_HANDLER_SIZE 9
 
+/*
+ * xen_early_idt_handler_array is for Xen pv guests: for each entry in
+ * early_idt_handler_array it contains a prequel in the form of
+ * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
+ * max 8 bytes.
+ */
+#define XEN_EARLY_IDT_HANDLER_SIZE 8
+
 #ifndef __ASSEMBLY__
 
 extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 extern void early_ignore_irq(void);
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
+#endif
+
 /*
  * Load a segment. Fall back on loading the zero segment if something goes
  * wrong.  This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342bbb61a9e630e101c8a83422a15ea..f737068787729f045a578776845231b0a0ee3e0d 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
 	STACK_TYPE_TASK,
 	STACK_TYPE_IRQ,
 	STACK_TYPE_SOFTIRQ,
+	STACK_TYPE_ENTRY,
 	STACK_TYPE_EXCEPTION,
 	STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
 };
@@ -28,6 +29,8 @@ struct stack_info {
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info);
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info, unsigned long *visit_mask);
 
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad3377a4f0d80a5808f4d731a87926a5..8be6afb584715dc8d5a50d1bbd989bf053366294 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
 
 /* image of the saved processor state */
 struct saved_context {
-	u16 es, fs, gs, ss;
+	/*
+	 * On x86_32, all segment registers, with the possible exception of
+	 * gs, are saved at kernel entry in pt_regs.
+	 */
+#ifdef CONFIG_X86_32_LAZY_GS
+	u16 gs;
+#endif
 	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
 	bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee20694908bdf482166546776a7e92..a7af9f53c0cb773d05fd84ebe94525a1971e3ebb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
  */
 struct saved_context {
 	struct pt_regs regs;
-	u16 ds, es, fs, gs, ss;
-	unsigned long gs_base, gs_kernel_base, fs_base;
+
+	/*
+	 * User CS and SS are saved in current_pt_regs().  The rest of the
+	 * segment selectors need to be saved and restored here.
+	 */
+	u16 ds, es, fs, gs;
+
+	/*
+	 * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+	 * so we save them separately.  We save the kernelmode GSBASE to
+	 * restore percpu access after resume.
+	 */
+	unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
 	unsigned long cr0, cr2, cr3, cr4, cr8;
 	u64 misc_enable;
 	bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
 	u16 gdt_pad; /* Unused */
 	struct desc_ptr gdt_desc;
 	u16 idt_pad;
-	u16 idt_limit;
-	unsigned long idt_base;
+	struct desc_ptr idt;
 	u16 ldt;
 	u16 tss;
 	unsigned long tr;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9d6b737cd0649324c154f9b9798a3..eb5f7999a8934c7af8b6c5b170b05e5772264783 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss);
 
 /* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *prev,
-				     struct task_struct *next)
+static inline void prepare_switch_to(struct task_struct *next)
 {
 #ifdef CONFIG_VMAP_STACK
 	/*
@@ -70,7 +69,7 @@ struct fork_frame {
 
 #define switch_to(prev, next, last)					\
 do {									\
-	prepare_switch_to(prev, next);					\
+	prepare_switch_to(next);					\
 									\
 	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
@@ -79,10 +78,10 @@ do {									\
 static inline void refresh_sysenter_cs(struct thread_struct *thread)
 {
 	/* Only happens when SEP is enabled, no need to test "SEP"arately: */
-	if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+	if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
 		return;
 
-	this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+	this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
 	wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 }
 #endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 /* This is used when switching tasks or entering/exiting vm86 mode. */
 static inline void update_sp0(struct task_struct *task)
 {
+	/* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
 #ifdef CONFIG_X86_32
 	load_sp0(task->thread.sp0);
 #else
-	load_sp0(task_top_of_stack(task));
+	if (static_cpu_has(X86_FEATURE_XENPV))
+		load_sp0(task_top_of_stack(task));
 #endif
 }
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc50f3e99ca639c0ead0d7e1cce636d..00223333821a96616647a9cbb6fe729c4a18b7b6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce893357366348468a5c5ff8e86a09..d33e4a26dc7ed2db51687ddb4ca7ee3a35ddf835 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,130 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 #include <asm/smp.h>
+#include <asm/invpcid.h>
+#include <asm/pti.h>
+#include <asm/processor-flags.h>
 
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
-			     unsigned long type)
-{
-	struct { u64 d[2]; } desc = { { pcid, addr } };
+/*
+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
+ * to what is traditionally called ASID on the RISC processors.
+ *
+ * We don't use the traditional ASID implementation, where each process/mm gets
+ * its own ASID and flush/restart when we run out of ASID space.
+ *
+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
+ * that came by on this CPU, allowing cheaper switch_mm between processes on
+ * this CPU.
+ *
+ * We end up with different spaces for different things. To avoid confusion we
+ * use different names for each of them:
+ *
+ * ASID  - [0, TLB_NR_DYN_ASIDS-1]
+ *         the canonical identifier for an mm
+ *
+ * kPCID - [1, TLB_NR_DYN_ASIDS]
+ *         the value we write into the PCID part of CR3; corresponds to the
+ *         ASID+1, because PCID 0 is special.
+ *
+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ *         for KPTI each mm has two address spaces and thus needs two
+ *         PCID values, but we can still do with a single ASID denomination
+ *         for each mm. Corresponds to kPCID + 2048.
+ *
+ */
 
-	/*
-	 * The memory clobber is because the whole point is to invalidate
-	 * stale TLB entries and, especially if we're flushing global
-	 * mappings, we don't want the compiler to reorder any subsequent
-	 * memory accesses before the TLB flush.
-	 *
-	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-	 * invpcid (%rcx), %rax in long mode.
-	 */
-	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
-}
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS		12
 
-#define INVPCID_TYPE_INDIV_ADDR		0
-#define INVPCID_TYPE_SINGLE_CTXT	1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
-#define INVPCID_TYPE_ALL_NON_GLOBAL	3
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define PTI_CONSUMED_PCID_BITS	1
+#else
+# define PTI_CONSUMED_PCID_BITS	0
+#endif
 
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
-				     unsigned long addr)
-{
-	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+ * for them being zero-based.  Another -1 is because PCID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
 
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS	6
+
+/*
+ * Given @asid, compute kPCID
+ */
+static inline u16 kern_pcid(u16 asid)
 {
-	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	/*
+	 * Make sure that the dynamic ASID space does not confict with the
+	 * bit we are using to switch between user and kernel ASIDs.
+	 */
+	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
+
+	/*
+	 * The ASID being passed in here should have respected the
+	 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
+	 */
+	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
+#endif
+	/*
+	 * The dynamically-assigned ASIDs that get passed in are small
+	 * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,
+	 * so do not bother to clear it.
+	 *
+	 * If PCID is on, ASID-aware code paths put the ASID+1 into the
+	 * PCID bits.  This serves two purposes.  It prevents a nasty
+	 * situation in which PCID-unaware code saves CR3, loads some other
+	 * value (with PCID == 0), and then restores CR3, thus corrupting
+	 * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
+	 * that any bugs involving loading a PCID-enabled CR3 with
+	 * CR4.PCIDE off will trigger deterministically.
+	 */
+	return asid + 1;
 }
 
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+/*
+ * Given @asid, compute uPCID
+ */
+static inline u16 user_pcid(u16 asid)
 {
-	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+	u16 ret = kern_pcid(asid);
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
+#endif
+	return ret;
 }
 
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 {
-	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		return __sme_pa(pgd) | kern_pcid(asid);
+	} else {
+		VM_WARN_ON_ONCE(asid != 0);
+		return __sme_pa(pgd);
+	}
 }
 
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 {
-	u64 new_tlb_gen;
-
-	/*
-	 * Bump the generation count.  This also serves as a full barrier
-	 * that synchronizes with switch_mm(): callers are required to order
-	 * their read of mm_cpumask after their writes to the paging
-	 * structures.
-	 */
-	smp_mb__before_atomic();
-	new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-	smp_mb__after_atomic();
-
-	return new_tlb_gen;
+	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+	VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
 }
 
 #ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
 	return !static_cpu_has(X86_FEATURE_PCID);
 }
 
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in
- * two cache lines.
- */
-#define TLB_NR_DYN_ASIDS 6
-
 struct tlb_context {
 	u64 ctx_id;
 	u64 tlb_gen;
@@ -138,6 +192,24 @@ struct tlb_state {
 	 */
 	bool is_lazy;
 
+	/*
+	 * If set we changed the page tables in such a way that we
+	 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
+	 * This tells us to go invalidate all the non-loaded ctxs[]
+	 * on the next context switch.
+	 *
+	 * The current ctx was kept up-to-date as it ran and does not
+	 * need to be invalidated.
+	 */
+	bool invalidate_other;
+
+	/*
+	 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
+	 * the corresponding user PCID needs a flush next time we
+	 * switch to it; see SWITCH_TO_USER_CR3.
+	 */
+	unsigned short user_pcid_flush_mask;
+
 	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
 	 * disabling interrupts when modifying either one.
@@ -173,40 +245,43 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
+static inline void __cr4_set(unsigned long cr4)
+{
+	lockdep_assert_irqs_disabled();
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Set in this cpu's CR4. */
 static inline void cr4_set_bits(unsigned long mask)
 {
-	unsigned long cr4;
+	unsigned long cr4, flags;
 
+	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 | mask) != cr4) {
-		cr4 |= mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
-	}
+	if ((cr4 | mask) != cr4)
+		__cr4_set(cr4 | mask);
+	local_irq_restore(flags);
 }
 
 /* Clear in this cpu's CR4. */
 static inline void cr4_clear_bits(unsigned long mask)
 {
-	unsigned long cr4;
+	unsigned long cr4, flags;
 
+	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 & ~mask) != cr4) {
-		cr4 &= ~mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
-	}
+	if ((cr4 & ~mask) != cr4)
+		__cr4_set(cr4 & ~mask);
+	local_irq_restore(flags);
 }
 
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	cr4 ^= mask;
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
+	__cr4_set(cr4 ^ mask);
 }
 
 /* Read the CR4 shadow. */
@@ -215,6 +290,14 @@ static inline unsigned long cr4_read_shadow(void)
 	return this_cpu_read(cpu_tlbstate.cr4);
 }
 
+/*
+ * Mark all other ASIDs as invalid, preserves the current.
+ */
+static inline void invalidate_other_asid(void)
+{
+	this_cpu_write(cpu_tlbstate.invalidate_other, true);
+}
+
 /*
  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
  * enable and PPro Global page enable), so that any CPU's that boot
@@ -234,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
 extern void initialize_tlbstate_and_flush(void);
 
-static inline void __native_flush_tlb(void)
+/*
+ * Given an ASID, flush the corresponding user ASID.  We can delay this
+ * until the next time we switch to it.
+ *
+ * See SWITCH_TO_USER_CR3.
+ */
+static inline void invalidate_user_asid(u16 asid)
 {
+	/* There is no user ASID if address space separation is off */
+	if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+		return;
+
 	/*
-	 * If current->mm == NULL then we borrow a mm which may change during a
-	 * task switch and therefore we must not be preempted while we write CR3
-	 * back:
+	 * We only have a single ASID if PCID is off and the CR3
+	 * write will have flushed it.
 	 */
-	preempt_disable();
-	native_write_cr3(__native_read_cr3());
-	preempt_enable();
+	if (!cpu_feature_enabled(X86_FEATURE_PCID))
+		return;
+
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	__set_bit(kern_pcid(asid),
+		  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
 }
 
-static inline void __native_flush_tlb_global_irq_disabled(void)
+/*
+ * flush the entire current user mapping
+ */
+static inline void __native_flush_tlb(void)
 {
-	unsigned long cr4;
+	/*
+	 * Preemption or interrupts must be disabled to protect the access
+	 * to the per CPU variable and to prevent being preempted between
+	 * read_cr3() and write_cr3().
+	 */
+	WARN_ON_ONCE(preemptible());
 
-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	/* clear PGE */
-	native_write_cr4(cr4 & ~X86_CR4_PGE);
-	/* write old PGE again and flush TLBs */
-	native_write_cr4(cr4);
+	invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+
+	/* If current->mm == NULL then the read_cr3() "borrows" an mm */
+	native_write_cr3(__native_read_cr3());
 }
 
+/*
+ * flush everything
+ */
 static inline void __native_flush_tlb_global(void)
 {
-	unsigned long flags;
+	unsigned long cr4, flags;
 
 	if (static_cpu_has(X86_FEATURE_INVPCID)) {
 		/*
 		 * Using INVPCID is considerably faster than a pair of writes
 		 * to CR4 sandwiched inside an IRQ flag save/restore.
+		 *
+		 * Note, this works with CR4.PCIDE=0 or 1.
 		 */
 		invpcid_flush_all();
 		return;
@@ -277,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
 	 */
 	raw_local_irq_save(flags);
 
-	__native_flush_tlb_global_irq_disabled();
+	cr4 = this_cpu_read(cpu_tlbstate.cr4);
+	/* toggle PGE */
+	native_write_cr4(cr4 ^ X86_CR4_PGE);
+	/* write old PGE again and flush TLBs */
+	native_write_cr4(cr4);
 
 	raw_local_irq_restore(flags);
 }
 
+/*
+ * flush one page in the user mapping
+ */
 static inline void __native_flush_tlb_single(unsigned long addr)
 {
+	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	/*
+	 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
+	 * Just use invalidate_user_asid() in case we are called early.
+	 */
+	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
+		invalidate_user_asid(loaded_mm_asid);
+	else
+		invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
 }
 
+/*
+ * flush everything
+ */
 static inline void __flush_tlb_all(void)
 {
-	if (boot_cpu_has(X86_FEATURE_PGE))
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		__flush_tlb_global();
-	else
+	} else {
+		/*
+		 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+		 */
 		__flush_tlb();
-
-	/*
-	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
-	 * we'd end up flushing kernel translations for the current ASID but
-	 * we might fail to flush kernel translations for other cached ASIDs.
-	 *
-	 * To avoid this issue, we force PCID off if PGE is off.
-	 */
+	}
 }
 
+/*
+ * flush one page in the kernel mapping
+ */
 static inline void __flush_tlb_one(unsigned long addr)
 {
 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
 	__flush_tlb_single(addr);
+
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	/*
+	 * __flush_tlb_single() will have cleared the TLB entry for this ASID,
+	 * but since kernel space is replicated across all, we must also
+	 * invalidate all others.
+	 */
+	invalidate_other_asid();
 }
 
 #define TLB_FLUSH_ALL	-1UL
@@ -367,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 void native_flush_tlb_others(const struct cpumask *cpumask,
 			     const struct flush_tlb_info *info);
 
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+	/*
+	 * Bump the generation count.  This also serves as a full barrier
+	 * that synchronizes with switch_mm(): callers are required to order
+	 * their read of mm_cpumask after their writes to the paging
+	 * structures.
+	 */
+	return atomic64_inc_return(&mm->context.tlb_gen);
+}
+
 static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
 					struct mm_struct *mm)
 {
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 84b9ec0c1bc0867795c3a2d327f3b8831bf4f8ba..22647a642e98c41508dab9976a1ff4665b97a4d2 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
 DECLARE_EVENT_CLASS(vector_activate,
 
 	TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
-		 bool early),
+		 bool reserve),
 
-	TP_ARGS(irq, is_managed, can_reserve, early),
+	TP_ARGS(irq, is_managed, can_reserve, reserve),
 
 	TP_STRUCT__entry(
 		__field(	unsigned int,	irq		)
 		__field(	bool,		is_managed	)
 		__field(	bool,		can_reserve	)
-		__field(	bool,		early		)
+		__field(	bool,		reserve		)
 	),
 
 	TP_fast_assign(
 		__entry->irq		= irq;
 		__entry->is_managed	= is_managed;
 		__entry->can_reserve	= can_reserve;
-		__entry->early		= early;
+		__entry->reserve	= reserve;
 	),
 
-	TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+	TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
 		  __entry->irq, __entry->is_managed, __entry->can_reserve,
-		  __entry->early)
+		  __entry->reserve)
 );
 
 #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name)				\
 DEFINE_EVENT_FN(vector_activate, name,					\
 	TP_PROTO(unsigned int irq, bool is_managed,			\
-		 bool can_reserve, bool early),				\
-	TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL);	\
+		 bool can_reserve, bool reserve),			\
+	TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL);	\
 
 DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
 DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff680ece697fa65a8db410c380a8547e..31051f35cbb768e452c4f76a60c5415a45f572e7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f953c38ddcc61fcf06fd90d72ab04..1f86e1b0a5cdc1afeb4667e8c770ec81456e9fb4 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
 struct unwind_state {
 	struct stack_info stack_info;
 	unsigned long stack_mask;
@@ -52,15 +55,28 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
 }
 
 #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+/*
+ * If 'partial' returns true, only the iret frame registers are valid.
+ */
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+						    bool *partial)
 {
 	if (unwind_done(state))
 		return NULL;
 
+	if (partial) {
+#ifdef CONFIG_UNWINDER_ORC
+		*partial = !state->full_regs;
+#else
+		*partial = false;
+#endif
+	}
+
 	return state->regs;
 }
 #else
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+						    bool *partial)
 {
 	return NULL;
 }
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c94b2ee04b4ddcc94e5874cc403b0..b986b2ca688a0e4fa24ace613c7e465e909723fe 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 
 /*
  * Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 7cb282e9e58777aeb2ffd86b344d39a5189c5f84..bfd882617613923f0db79c4aae82690bda7a8d4f 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/smap.h>
+#include <asm/nospec-branch.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
 	stac();
-	asm volatile("call *%[call]"
+	asm volatile(CALL_NOSPEC
 		     : __HYPERCALL_5PARAM
-		     : [call] "a" (&hypercall_page[call])
+		     : [thunk_target] "a" (&hypercall_page[call])
 		     : __HYPERCALL_CLOBBER5);
 	clac();
 
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index da1489cb64dce5fcec622f2869cd26d2b58d00ac..1e901e421f2db09d4a0c2c543f47b0e5bbc8be62 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bpf_perf_event.h
 generated-y += unistd_32.h
 generated-y += unistd_64.h
 generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 7e1e730396ae08f5a267adaccf0c3ba46448f780..bcba3c643e63dced1c873ee5e1cdbfdd5d307928 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -78,7 +78,12 @@
 #define X86_CR3_PWT		_BITUL(X86_CR3_PWT_BIT)
 #define X86_CR3_PCD_BIT		4 /* Page Cache Disable */
 #define X86_CR3_PCD		_BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK	_AC(0x00000fff,UL) /* PCID Mask */
+
+#define X86_CR3_PCID_BITS	12
+#define X86_CR3_PCID_MASK	(_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
 
 /*
  * Intel CPU features in CR4
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index dbaf14d69ebd510b86a4a0e9e83bbd1862d99d14..4817d743c26359c697559053a23518a5e25b241b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
 	unsigned long flags;
+	int i;
 
-	if (instr[0] != 0x90)
-		return;
+	for (i = 0; i < a->padlen; i++) {
+		if (instr[i] != 0x90)
+			return;
+	}
 
 	local_irq_save(flags);
 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e272f3ea984a220975d1b7c144867840fb867a9..880441f2414610298002c34652b96e571772e08f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
 		apic_verbosity = APIC_DEBUG;
 	else if (strcmp("verbose", arg) == 0)
 		apic_verbosity = APIC_VERBOSE;
+#ifdef CONFIG_X86_64
 	else {
 		pr_warning("APIC Verbosity level %s not recognised"
 			" use apic=verbose or apic=debug\n", arg);
 		return -EINVAL;
 	}
+#endif
 
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index aa85690e9b6416b797db8129c8f1cbd7606bf687..25a87028cb3fe9bb4a6e9eba757d50cd49f15620 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
 	.apic_id_valid			= default_apic_id_valid,
 	.apic_id_registered		= flat_apic_id_registered,
 
-	.irq_delivery_mode		= dest_LowestPrio,
+	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 1, /* logical */
 
 	.disable_esr			= 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7b659c4480c91a680466cc961cee68754c633ada..5078b5ce63a7aaf4569368e6447c9a4423271109 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
 	.apic_id_valid			= default_apic_id_valid,
 	.apic_id_registered		= noop_apic_id_registered,
 
-	.irq_delivery_mode		= dest_LowestPrio,
+	.irq_delivery_mode		= dest_Fixed,
 	/* logical delivery broadcast to all CPUs: */
 	.irq_dest_mode			= 1,
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 201579dc52428edb5c3989102a432c14799d1e1f..8a79634214600ab02076cfb601dac780996b6f3f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
 }
 
 int mp_irqdomain_activate(struct irq_domain *domain,
-			  struct irq_data *irq_data, bool early)
+			  struct irq_data *irq_data, bool reserve)
 {
 	unsigned long flags;
 
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be76442236eab971cab86cbf366f9e1e134f..ce503c99f5c4cd67fddb5fafdda7e3e64b8e7690 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
 		((apic->irq_dest_mode == 0) ?
 			MSI_ADDR_DEST_MODE_PHYSICAL :
 			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU :
-			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_REDIRECTION_CPU |
 		MSI_ADDR_DEST_ID(cfg->dest_apicid);
 
 	msg->data =
 		MSI_DATA_TRIGGER_EDGE |
 		MSI_DATA_LEVEL_ASSERT |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED :
-			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_DELIVERY_FIXED |
 		MSI_DATA_VECTOR(cfg->vector);
 }
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fa22017de806504b79593ead951bffe4e5d5421b..02e8acb134f856faa0030bf1c007dc3ac1bd9ca3 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
 	.apic_id_valid			= default_apic_id_valid,
 	.apic_id_registered		= default_apic_id_registered,
 
-	.irq_delivery_mode		= dest_LowestPrio,
+	.irq_delivery_mode		= dest_Fixed,
 	/* logical delivery broadcast to all CPUs: */
 	.irq_dest_mode			= 1,
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff03787660bd1f92e587362259e54d..f8b03bb8e72560a436dbad677ddeed30f19531cf 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
 	irq_matrix_reserve(vector_matrix);
 	apicd->can_reserve = true;
 	apicd->has_reserved = true;
+	irqd_set_can_reserve(irqd);
 	trace_vector_reserve(irqd->irq, 0);
 	vector_assign_managed_shutdown(irqd);
 }
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
 	int ret;
 
 	ret = assign_irq_vector_any_locked(irqd);
-	if (!ret)
+	if (!ret) {
 		apicd->has_reserved = false;
+		/*
+		 * Core might have disabled reservation mode after
+		 * allocating the irq descriptor. Ideally this should
+		 * happen before allocation time, but that would require
+		 * completely convoluted ways of transporting that
+		 * information.
+		 */
+		if (!irqd_can_reserve(irqd))
+			apicd->can_reserve = false;
+	}
 	return ret;
 }
 
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
 }
 
 static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
-			       bool early)
+			       bool reserve)
 {
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	unsigned long flags;
 	int ret = 0;
 
 	trace_vector_activate(irqd->irq, apicd->is_managed,
-			      apicd->can_reserve, early);
+			      apicd->can_reserve, reserve);
 
 	/* Nothing to do for fixed assigned vectors */
 	if (!apicd->can_reserve && !apicd->is_managed)
 		return 0;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	if (early || irqd_is_managed_and_shutdown(irqd))
+	if (reserve || irqd_is_managed_and_shutdown(irqd))
 		vector_assign_managed_shutdown(irqd);
 	else if (apicd->is_managed)
 		ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
 	} else {
 		/* Release the vector */
 		apicd->can_reserve = true;
+		irqd_set_can_reserve(irqd);
 		clear_irq_vector(irqd);
 		realloc = true;
 	}
@@ -542,8 +554,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 }
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
-			   struct irq_data *irqd, int ind)
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+				  struct irq_data *irqd, int ind)
 {
 	unsigned int cpu, vector, prev_cpu, prev_vector;
 	struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 622f13ca8a943c270e70df6100561ce4c56d7db2..8b04234e010b2616e42bff6cbeabdc3c2087c13c 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.apic_id_valid			= x2apic_apic_id_valid,
 	.apic_id_registered		= x2apic_apic_id_registered,
 
-	.irq_delivery_mode		= dest_LowestPrio,
+	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 1, /* logical */
 
 	.disable_esr			= 0,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480dafeb702e11ba73364cd9e7c52f21..76417a9aab73c3f7e3376261eb9ec592b16adf3b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -17,6 +17,7 @@
 #include <asm/sigframe.h>
 #include <asm/bootparam.h>
 #include <asm/suspend.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
 
 	BLANK();
 	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+	/* TLB state for the entry code */
+	OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+
+	/* Layout info for cpu_entry_area */
+	OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+	OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+	OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+	DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
 }
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b68b0a4748fc1ac3032193c9121362..fa1261eefa16e73cedf27aadb878753be693f919 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
 	BLANK();
 
 	/* Offset from the sysenter stack to tss.sp0 */
-	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-	       offsetofend(struct tss_struct, SYSENTER_stack));
-
-	/* Offset from cpu_tss to SYSENTER_stack */
-	OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-	/* Size of SYSENTER_stack */
-	DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+	DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+	       offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 	BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9da3f0498fc30d4c193c5926c43abb..bf51e51d808dd8914abd3b4bca69b37ce3ec023b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
 #ifdef CONFIG_PARAVIRT
 	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+	OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
 	BLANK();
 #endif
 
@@ -63,6 +66,7 @@ int main(void)
 
 	OFFSET(TSS_ist, tss_struct, x86_tss.ist);
 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
 	BLANK();
 
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd4438144e2d0ac3f4744d19ff4ffb31..ea831c85819583c3dd0b0a48c2a31518eaa67b44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x17: init_amd_zn(c); break;
 	}
 
-	/* Enable workaround for FXSAVE leak */
-	if (c->x86 >= 6)
+	/*
+	 * Enable workaround for FXSAVE leak on CPUs
+	 * without a XSaveErPtr feature
+	 */
+	if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
 		set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
 
 	cpu_detect_cache_sizes(c);
@@ -826,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_K8);
 
 	if (cpu_has(c, X86_FEATURE_XMM2)) {
-		/* MFENCE stops RDTSC speculation */
-		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+		unsigned long long val;
+		int ret;
+
+		/*
+		 * A serializing LFENCE has less overhead than MFENCE, so
+		 * use it for execution serialization.  On families which
+		 * don't have that MSR, LFENCE is already serializing.
+		 * msr_set_bit() uses the safe accessors, too, even if the MSR
+		 * is not present.
+		 */
+		msr_set_bit(MSR_F10H_DECFG,
+			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+		/*
+		 * Verify that the MSR write was successful (could be running
+		 * under a hypervisor) and only then assume that LFENCE is
+		 * serializing.
+		 */
+		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+			/* A serializing LFENCE stops RDTSC speculation */
+			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+		} else {
+			/* MFENCE stops RDTSC speculation */
+			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+		}
 	}
 
 	/*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b050100ac6eeef97c4aef45181d455..e4dc26185aa70bcdecf8ca932b6c720121819558 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
  */
 #include <linux/init.h>
 #include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 
+static void __init spectre_v2_select_mitigation(void);
+
 void __init check_bugs(void)
 {
 	identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
 		print_cpu_info(&boot_cpu_data);
 	}
 
+	/* Select the proper spectre mitigation before patching alternatives */
+	spectre_v2_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
 		set_memory_4k((unsigned long)__va(0), 1);
 #endif
 }
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+	SPECTRE_V2_CMD_NONE,
+	SPECTRE_V2_CMD_AUTO,
+	SPECTRE_V2_CMD_FORCE,
+	SPECTRE_V2_CMD_RETPOLINE,
+	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+	SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+	[SPECTRE_V2_NONE]			= "Vulnerable",
+	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline",
+	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline",
+	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
+	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+	return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+	int len = strlen(opt);
+
+	return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+	char arg[20];
+	int ret;
+
+	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+				  sizeof(arg));
+	if (ret > 0)  {
+		if (match_option(arg, ret, "off")) {
+			goto disable;
+		} else if (match_option(arg, ret, "on")) {
+			spec2_print_if_secure("force enabled on command line.");
+			return SPECTRE_V2_CMD_FORCE;
+		} else if (match_option(arg, ret, "retpoline")) {
+			spec2_print_if_insecure("retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE;
+		} else if (match_option(arg, ret, "retpoline,amd")) {
+			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+				return SPECTRE_V2_CMD_AUTO;
+			}
+			spec2_print_if_insecure("AMD retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE_AMD;
+		} else if (match_option(arg, ret, "retpoline,generic")) {
+			spec2_print_if_insecure("generic retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+		} else if (match_option(arg, ret, "auto")) {
+			return SPECTRE_V2_CMD_AUTO;
+		}
+	}
+
+	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+		return SPECTRE_V2_CMD_AUTO;
+disable:
+	spec2_print_if_insecure("disabled on command line.");
+	return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+	/*
+	 * If the CPU is not affected and the command line mode is NONE or AUTO
+	 * then nothing to do.
+	 */
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+		return;
+
+	switch (cmd) {
+	case SPECTRE_V2_CMD_NONE:
+		return;
+
+	case SPECTRE_V2_CMD_FORCE:
+		/* FALLTRHU */
+	case SPECTRE_V2_CMD_AUTO:
+		goto retpoline_auto;
+
+	case SPECTRE_V2_CMD_RETPOLINE_AMD:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_amd;
+		break;
+	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_generic;
+		break;
+	case SPECTRE_V2_CMD_RETPOLINE:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_auto;
+		break;
+	}
+	pr_err("kernel not compiled with retpoline; no mitigation available!");
+	return;
+
+retpoline_auto:
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+	retpoline_amd:
+		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+			pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+			goto retpoline_generic;
+		}
+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+	} else {
+	retpoline_generic:
+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+					 SPECTRE_V2_RETPOLINE_MINIMAL;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+	}
+
+	spectre_v2_enabled = mode;
+	pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		return sprintf(buf, "Not affected\n");
+	if (boot_cpu_has(X86_FEATURE_PTI))
+		return sprintf(buf, "Mitigation: PTI\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+		return sprintf(buf, "Not affected\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5aa5b4899dbe8a57c5b543f927009e..ef29ad001991d6acdd5b59cd707d85f9ff99f9ea 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
 	return NULL;		/* Not found */
 }
 
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
 
 void load_percpu_segment(int cpu)
 {
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
-	/* On 64-bit systems, we use a read-only fixmap GDT. */
-	pgprot_t prot = PAGE_KERNEL_RO;
-#else
-	/*
-	 * On native 32-bit systems, the GDT cannot be read-only because
-	 * our double fault handler uses a task gate, and entering through
-	 * a task gate needs to change an available TSS to busy.  If the GDT
-	 * is read-only, that will triple fault.
-	 *
-	 * On Xen PV, the GDT must be read-only because the hypervisor requires
-	 * it.
-	 */
-	pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-		PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
-	__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
+	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+};
+#endif
 
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 {
 	int i;
 
-	for (i = 0; i < NCAPINTS; i++) {
+	for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
 		c->x86_capability[i] &= ~cpu_caps_cleared[i];
 		c->x86_capability[i] |= cpu_caps_set[i];
 	}
@@ -927,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	}
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+	if (c->x86_vendor != X86_VENDOR_AMD)
+		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
 	fpu__init_system(c);
 
 #ifdef CONFIG_X86_32
@@ -1250,7 +1252,7 @@ void enable_sep_cpu(void)
 		return;
 
 	cpu = get_cpu();
-	tss = &per_cpu(cpu_tss, cpu);
+	tss = &per_cpu(cpu_tss_rw, cpu);
 
 	/*
 	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1261,7 @@ void enable_sep_cpu(void)
 
 	tss->x86_tss.ss1 = __KERNEL_CS;
 	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-	wrmsr(MSR_IA32_SYSENTER_ESP,
-	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-	      0);
-
+	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
 	put_cpu();
@@ -1357,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
-	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
+	extern char _entry_trampoline[];
+	extern char entry_SYSCALL_64_trampoline[];
+
+	int cpu = smp_processor_id();
+	unsigned long SYSCALL64_entry_trampoline =
+		(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+		(entry_SYSCALL_64_trampoline - _entry_trampoline);
+
 	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+	if (static_cpu_has(X86_FEATURE_PTI))
+		wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+	else
+		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
 
 #ifdef CONFIG_IA32_EMULATION
 	wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1381,7 @@ void syscall_init(void)
 	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
 	 */
 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
 	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1525,7 @@ void cpu_init(void)
 	if (cpu)
 		load_ucode_ap();
 
-	t = &per_cpu(cpu_tss, cpu);
+	t = &per_cpu(cpu_tss_rw, cpu);
 	oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1569,7 +1564,7 @@ void cpu_init(void)
 	 * set up and load the per-CPU TSS
 	 */
 	if (!oist->ist[0]) {
-		char *estacks = per_cpu(exception_stacks, cpu);
+		char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
 
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
 			estacks += exception_stack_sizes[v];
@@ -1580,7 +1575,7 @@ void cpu_init(void)
 		}
 	}
 
-	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 	/*
 	 * <= is required because the CPU will access up to
@@ -1596,11 +1591,12 @@ void cpu_init(void)
 	enter_lazy_tlb(&init_mm, me);
 
 	/*
-	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
-	 * task never enters user mode.
+	 * Initialize the TSS.  sp0 points to the entry trampoline stack
+	 * regardless of what task is running.
 	 */
-	set_tss_desc(cpu, t);
+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 	load_TR_desc();
+	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
 
 	load_mm_ldt(&init_mm);
 
@@ -1612,7 +1608,6 @@ void cpu_init(void)
 	if (is_uv_system())
 		uv_cpu_init();
 
-	setup_fixmap_gdt(cpu);
 	load_fixmap_gdt(cpu);
 }
 
@@ -1622,7 +1617,7 @@ void cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+	struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
 
 	wait_for_master_cpu(cpu);
 
@@ -1657,12 +1652,12 @@ void cpu_init(void)
 	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
 	 * task never enters user mode.
 	 */
-	set_tss_desc(cpu, t);
+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 	load_TR_desc();
 
 	load_mm_ldt(&init_mm);
 
-	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 #ifdef CONFIG_DOUBLEFAULT
 	/* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1669,6 @@ void cpu_init(void)
 
 	fpu__init_cpu();
 
-	setup_fixmap_gdt(cpu);
 	load_fixmap_gdt(cpu);
 }
 #endif
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5b180e45c5f78019fcba7b2880428..330b8462d426faad0dccdc480f34eec34cd8b92f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 #define F14H_MPB_MAX_SIZE 1824
 #define F15H_MPB_MAX_SIZE 4096
 #define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
 
 	switch (family) {
 	case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	case 0x16:
 		max_size = F16H_MPB_MAX_SIZE;
 		break;
+	case 0x17:
+		max_size = F17H_MPB_MAX_SIZE;
+		break;
 	default:
 		max_size = F1XH_MPB_MAX_SIZE;
 		break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf7975f7f29c38651c23c478ad315a34c..d9e460fc7a3b309327c8f2899cc54a7be2b9dc6b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
 }
 #else
 
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-	__native_flush_tlb_global_irq_disabled();
-}
-
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	if (rev != mc->hdr.rev)
 		return -1;
 
-#ifdef CONFIG_X86_64
-	/* Flush global tlb. This is precaution. */
-	flush_tlb_early();
-#endif
 	uci->cpu_sig.rev = rev;
 
 	if (early)
@@ -923,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-		pr_err_once("late loading on model 79 is disabled.\n");
+	/*
+	 * Late loading on model 79 with microcode revision less than 0x0b000021
+	 * may result in a system hang. This behavior is documented in item
+	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+	 */
+	if (c->x86 == 6 &&
+	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
+	    c->x86_mask == 0x01 &&
+	    c->microcode < 0x0b000021) {
+		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
 		return true;
 	}
 
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae902fedd5c78c1ed87a972b35a79856..0b8cedb20d6d92f2875a49292680c8cfecd5b044 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
 		cpu_relax();
 }
 
-struct tss_struct doublefault_tss __cacheline_aligned = {
-	.x86_tss = {
-		.sp0		= STACK_START,
-		.ss0		= __KERNEL_DS,
-		.ldt		= 0,
-		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
-
-		.ip		= (unsigned long) doublefault_fn,
-		/* 0x2 bit is always set */
-		.flags		= X86_EFLAGS_SF | 0x2,
-		.sp		= STACK_START,
-		.es		= __USER_DS,
-		.cs		= __KERNEL_CS,
-		.ss		= __KERNEL_DS,
-		.ds		= __USER_DS,
-		.fs		= __KERNEL_PERCPU,
-
-		.__cr3		= __pa_nodebug(swapper_pg_dir),
-	}
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+	.sp0		= STACK_START,
+	.ss0		= __KERNEL_DS,
+	.ldt		= 0,
+	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
+
+	.ip		= (unsigned long) doublefault_fn,
+	/* 0x2 bit is always set */
+	.flags		= X86_EFLAGS_SF | 0x2,
+	.sp		= STACK_START,
+	.es		= __USER_DS,
+	.cs		= __KERNEL_CS,
+	.ss		= __KERNEL_DS,
+	.ds		= __USER_DS,
+	.fs		= __KERNEL_PERCPU,
+
+	.__cr3		= __pa_nodebug(swapper_pg_dir),
 };
 
 /* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de4b7a7b36c40d27311672bcc9d05c..afbecff161d162c11a82d0610ae406b910c3f791 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
 	return true;
 }
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+	struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+	void *begin = ss;
+	void *end = ss + 1;
+
+	if ((void *)stack < begin || (void *)stack >= end)
+		return false;
+
+	info->type	= STACK_TYPE_ENTRY;
+	info->begin	= begin;
+	info->end	= end;
+	info->next_sp	= NULL;
+
+	return true;
+}
+
 static void printk_stack_address(unsigned long address, int reliable,
 				 char *log_lvl)
 {
@@ -50,6 +69,39 @@ static void printk_stack_address(unsigned long address, int reliable,
 	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
+void show_iret_regs(struct pt_regs *regs)
+{
+	printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+		regs->sp, regs->flags);
+}
+
+static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
+				  bool partial)
+{
+	/*
+	 * These on_stack() checks aren't strictly necessary: the unwind code
+	 * has already validated the 'regs' pointer.  The checks are done for
+	 * ordering reasons: if the registers are on the next stack, we don't
+	 * want to print them out yet.  Otherwise they'll be shown as part of
+	 * the wrong stack.  Later, when show_trace_log_lvl() switches to the
+	 * next stack, this function will be called again with the same regs so
+	 * they can be printed in the right context.
+	 */
+	if (!partial && on_stack(info, regs, sizeof(*regs))) {
+		__show_regs(regs, 0);
+
+	} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+				       IRET_FRAME_SIZE)) {
+		/*
+		 * When an interrupt or exception occurs in entry code, the
+		 * full pt_regs might not have been saved yet.  In that case
+		 * just print the iret frame.
+		 */
+		show_iret_regs(regs);
+	}
+}
+
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			unsigned long *stack, char *log_lvl)
 {
@@ -57,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	struct stack_info stack_info = {0};
 	unsigned long visit_mask = 0;
 	int graph_idx = 0;
+	bool partial;
 
 	printk("%sCall Trace:\n", log_lvl);
 
 	unwind_start(&state, task, regs, stack);
 	stack = stack ? : get_stack_pointer(task, regs);
+	regs = unwind_get_entry_regs(&state, &partial);
 
 	/*
 	 * Iterate through the stacks, starting with the current stack pointer.
@@ -71,31 +125,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	 * - task stack
 	 * - interrupt stack
 	 * - HW exception stacks (double fault, nmi, debug, mce)
+	 * - entry stack
 	 *
-	 * x86-32 can have up to three stacks:
+	 * x86-32 can have up to four stacks:
 	 * - task stack
 	 * - softirq stack
 	 * - hardirq stack
+	 * - entry stack
 	 */
-	for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+	for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
 		const char *stack_name;
 
-		/*
-		 * If we overflowed the task stack into a guard page, jump back
-		 * to the bottom of the usable stack.
-		 */
-		if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
-			stack = task_stack_page(task);
-
-		if (get_stack_info(stack, task, &stack_info, &visit_mask))
-			break;
+		if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+			/*
+			 * We weren't on a valid stack.  It's possible that
+			 * we overflowed a valid stack into a guard page.
+			 * See if the next page up is valid so that we can
+			 * generate some kind of backtrace if this happens.
+			 */
+			stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+			if (get_stack_info(stack, task, &stack_info, &visit_mask))
+				break;
+		}
 
 		stack_name = stack_type_name(stack_info.type);
 		if (stack_name)
 			printk("%s <%s>\n", log_lvl, stack_name);
 
-		if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-			__show_regs(regs, 0);
+		if (regs)
+			show_regs_if_on_stack(&stack_info, regs, partial);
 
 		/*
 		 * Scan the stack, printing any text addresses we find.  At the
@@ -119,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 			/*
 			 * Don't print regs->ip again if it was already printed
-			 * by __show_regs() below.
+			 * by show_regs_if_on_stack().
 			 */
 			if (regs && stack == &regs->ip)
 				goto next;
@@ -154,9 +212,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			unwind_next_frame(&state);
 
 			/* if the frame has entry regs, print them */
-			regs = unwind_get_entry_regs(&state);
-			if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-				__show_regs(regs, 0);
+			regs = unwind_get_entry_regs(&state, &partial);
+			if (regs)
+				show_regs_if_on_stack(&stack_info, regs, partial);
 		}
 
 		if (stack_name)
@@ -252,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
 	unsigned long sp;
 #endif
 	printk(KERN_DEFAULT
-	       "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+	       "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
 	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
 	       IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
 	       debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
-	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
+	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
+	       IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+	       (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
 
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa86c59602b75bd3e6734c6e3b1030..04170f63e3a1d567caac3deea641e014b7e10823 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
 	if (type == STACK_TYPE_SOFTIRQ)
 		return "SOFTIRQ";
 
+	if (type == STACK_TYPE_ENTRY)
+		return "ENTRY_TRAMPOLINE";
+
 	return NULL;
 }
 
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 	if (task != current)
 		goto unknown;
 
+	if (in_entry_stack(stack, info))
+		goto recursion_check;
+
 	if (in_hardirq_stack(stack, info))
 		goto recursion_check;
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110303502ad33e64d357d8af5afd8c6..563e28d14f2ca157178d9de3a139d8370aaf89fe 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
 	if (type == STACK_TYPE_IRQ)
 		return "IRQ";
 
+	if (type == STACK_TYPE_ENTRY) {
+		/*
+		 * On 64-bit, we have a generic entry stack that we
+		 * use for all the kernel entry points, including
+		 * SYSENTER.
+		 */
+		return "ENTRY_TRAMPOLINE";
+	}
+
 	if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
 		return exception_stack_names[type - STACK_TYPE_EXCEPTION];
 
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 	if (in_irq_stack(stack, info))
 		goto recursion_check;
 
+	if (in_entry_stack(stack, info))
+		goto recursion_check;
+
 	goto unknown;
 
 recursion_check:
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index b6c6468e10bc96625c25c4ae8d19b734af81366b..4c8440de33559942a98245cf123a5ed10a30e406 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -8,6 +8,7 @@
 #include <asm/segment.h>
 #include <asm/export.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CC_USING_FENTRY
 # define function_hook	__fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
 	movl	0x4(%ebp), %edx
 	subl	$MCOUNT_INSN_SIZE, %eax
 
-	call	*ftrace_trace_function
+	movl	ftrace_trace_function, %ecx
+	CALL_NOSPEC %ecx
 
 	popl	%edx
 	popl	%ecx
@@ -241,5 +243,5 @@ return_to_handler:
 	movl	%eax, %ecx
 	popl	%edx
 	popl	%eax
-	jmp	*%ecx
+	JMP_NOSPEC %ecx
 #endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index c832291d948a6b4fd487a3e42fab8b8f7b9dd244..7cb8ba08beb997ef66724e2db5e66021c5ce32ee 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -7,7 +7,7 @@
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
 #include <asm/export.h>
-
+#include <asm/nospec-branch.h>
 
 	.code64
 	.section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
 	 * ip and parent ip are used and the list function is called when
 	 * function tracing is enabled.
 	 */
-	call   *ftrace_trace_function
-
+	movq ftrace_trace_function, %r8
+	CALL_NOSPEC %r8
 	restore_mcount_regs
 
 	jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $24, %rsp
-	jmp *%rdi
+	JMP_NOSPEC %rdi
 #endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7dca675fe78db60c5d79bc450bbd14bfee35cfc2..04a625f0fcda322dab7c9d8459a19ee56b71c936 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
 	.balign	PAGE_SIZE; \
 GLOBAL(name)
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned.  We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define PTI_USER_PGD_FILL	512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+	.balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define PTI_USER_PGD_FILL	0
+#endif
+
 /* Automate the creation of 1 to 1 mapping pmd entries */
 #define PMDS(START, PERM, COUNT)			\
 	i = 0 ;						\
@@ -350,13 +371,14 @@ GLOBAL(name)
 	.endr
 
 	__INITDATA
-NEXT_PAGE(early_top_pgt)
+NEXT_PGD_PAGE(early_top_pgt)
 	.fill	511,8,0
 #ifdef CONFIG_X86_5LEVEL
 	.quad	level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #else
 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #endif
+	.fill	PTI_USER_PGD_FILL,8,0
 
 NEXT_PAGE(early_dynamic_pgts)
 	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
 	.data
 
 #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + PGD_START_KERNEL*8, 0
 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+	.fill	PTI_USER_PGD_FILL,8,0
 
 NEXT_PAGE(level3_ident_pgt)
 	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
 	 */
 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 #else
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
 	.fill	512,8,0
+	.fill	PTI_USER_PGD_FILL,8,0
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c470a7a49ee26749712ba7da891fe9..2f723301eb58fc5ad0d6796b342446ae2ee0c9e6 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(cpu_tss, get_cpu());
+	tss = &per_cpu(cpu_tss_rw, get_cpu());
 
 	if (turn_on)
 		bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589fa5ef2bef5d4a5d6431b7007836f..68e1867cca8045d0ed728ffc6b75a866c25484ed 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	/* high bit used in ret_from_ code  */
 	unsigned vector = ~regs->orig_ax;
 
-	/*
-	 * NB: Unlike exception entries, IRQ entries do not reliably
-	 * handle context tracking in the low-level entry code.  This is
-	 * because syscall entries execute briefly with IRQs on before
-	 * updating context tracking state, so we can take an IRQ from
-	 * kernel mode with CONTEXT_USER.  The low-level entry code only
-	 * updates the context if we came from user mode, so we won't
-	 * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
-	 * code is cleaned up enough that we can cleanly defer enabling
-	 * IRQs.
-	 */
-
 	entering_irq();
 
 	/* entering_irq() tells RCU that we're not quiescent.  Check it. */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a83b3346a0e104833793687aea68f7f216cf0374..c1bdbd3d3232cb83d07bfa4ce604ae0b620c796a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 
 #include <asm/apic.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
 static void call_on_stack(void *func, void *stack)
 {
 	asm volatile("xchgl	%%ebx,%%esp	\n"
-		     "call	*%%edi		\n"
+		     CALL_NOSPEC
 		     "movl	%%ebx,%%esp	\n"
 		     : "=b" (stack)
 		     : "0" (stack),
-		       "D"(func)
+		       [thunk_target] "D"(func)
 		     : "memory", "cc", "edx", "ecx", "eax");
 }
 
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 		call_on_stack(print_stack_overflow, isp);
 
 	asm volatile("xchgl	%%ebx,%%esp	\n"
-		     "call	*%%edi		\n"
+		     CALL_NOSPEC
 		     "movl	%%ebx,%%esp	\n"
 		     : "=a" (arg1), "=b" (isp)
 		     :  "0" (desc),   "1" (isp),
-			"D" (desc->handle_irq)
+			[thunk_target] "D" (desc->handle_irq)
 		     : "memory", "cc", "ecx");
 	return 1;
 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b35d343a8632cd14ac4f800465d9b..d86e344f5b3debfed504b72a7c0f83f36fe16387 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
 		return;
 
-	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
 		current->comm, curbase, regs->sp,
 		irq_stack_top, irq_stack_bottom,
-		estack_top, estack_bottom);
+		estack_top, estack_bottom, (void *)regs->ip);
 
 	if (sysctl_panic_on_stackoverflow)
 		panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae9613406b14c3154065e1fd036f985a384c..26d713ecad34a847e650638d02442eda04379f5e 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
  * Copyright (C) 2002 Andi Kleen
  *
  * This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ *	contex.ldt_usr_sem
+ *	  mmap_sem
+ *	    context.lock
  */
 
 #include <linux/errno.h>
@@ -19,6 +24,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/ldt.h>
+#include <asm/tlb.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
 #endif
 }
 
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
 static void flush_ldt(void *__mm)
 {
 	struct mm_struct *mm = __mm;
-	mm_context_t *pc;
 
 	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
 		return;
 
-	pc = &mm->context;
-	set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+	load_mm_ldt(mm);
 
 	refresh_ldt_segments();
 }
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 		return NULL;
 	}
 
+	/* The new LDT isn't aliased for PTI yet. */
+	new_ldt->slot = -1;
+
 	new_ldt->nr_entries = num_entries;
 	return new_ldt;
 }
 
+/*
+ * If PTI is enabled, this maps the LDT into the kernelmode and
+ * usermode tables for the given mm.
+ *
+ * There is no corresponding unmap function.  Even if the LDT is freed, we
+ * leave the PTEs around until the slot is reused or the mm is destroyed.
+ * This is harmless: the LDT is always in ordinary memory, and no one will
+ * access the freed slot.
+ *
+ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
+ * it useful, and the flush would slow down modify_ldt().
+ */
+static int
+map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	bool is_vmalloc, had_top_level_entry;
+	unsigned long va;
+	spinlock_t *ptl;
+	pgd_t *pgd;
+	int i;
+
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return 0;
+
+	/*
+	 * Any given ldt_struct should have map_ldt_struct() called at most
+	 * once.
+	 */
+	WARN_ON(ldt->slot != -1);
+
+	/*
+	 * Did we already have the top level entry allocated?  We can't
+	 * use pgd_none() for this because it doens't do anything on
+	 * 4-level page table kernels.
+	 */
+	pgd = pgd_offset(mm, LDT_BASE_ADDR);
+	had_top_level_entry = (pgd->pgd != 0);
+
+	is_vmalloc = is_vmalloc_addr(ldt->entries);
+
+	for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+		unsigned long offset = i << PAGE_SHIFT;
+		const void *src = (char *)ldt->entries + offset;
+		unsigned long pfn;
+		pte_t pte, *ptep;
+
+		va = (unsigned long)ldt_slot_va(slot) + offset;
+		pfn = is_vmalloc ? vmalloc_to_pfn(src) :
+			page_to_pfn(virt_to_page(src));
+		/*
+		 * Treat the PTI LDT range as a *userspace* range.
+		 * get_locked_pte() will allocate all needed pagetables
+		 * and account for them in this mm.
+		 */
+		ptep = get_locked_pte(mm, va, &ptl);
+		if (!ptep)
+			return -ENOMEM;
+		/*
+		 * Map it RO so the easy to find address is not a primary
+		 * target via some kernel interface which misses a
+		 * permission check.
+		 */
+		pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+		set_pte_at(mm, va, ptep, pte);
+		pte_unmap_unlock(ptep, ptl);
+	}
+
+	if (mm->context.ldt) {
+		/*
+		 * We already had an LDT.  The top-level entry should already
+		 * have been allocated and synchronized with the usermode
+		 * tables.
+		 */
+		WARN_ON(!had_top_level_entry);
+		if (static_cpu_has(X86_FEATURE_PTI))
+			WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
+	} else {
+		/*
+		 * This is the first time we're mapping an LDT for this process.
+		 * Sync the pgd to the usermode tables.
+		 */
+		WARN_ON(had_top_level_entry);
+		if (static_cpu_has(X86_FEATURE_PTI)) {
+			WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
+			set_pgd(kernel_to_user_pgdp(pgd), *pgd);
+		}
+	}
+
+	va = (unsigned long)ldt_slot_va(slot);
+	flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
+
+	ldt->slot = slot;
+#endif
+	return 0;
+}
+
+static void free_ldt_pgtables(struct mm_struct *mm)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	struct mmu_gather tlb;
+	unsigned long start = LDT_BASE_ADDR;
+	unsigned long end = start + (1UL << PGDIR_SHIFT);
+
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	tlb_gather_mmu(&tlb, mm, start, end);
+	free_pgd_range(&tlb, start, end, start, end);
+	tlb_finish_mmu(&tlb, start, end);
+#endif
+}
+
 /* After calling this, the LDT is immutable. */
 static void finalize_ldt_struct(struct ldt_struct *ldt)
 {
 	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
 }
 
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
-			struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
 {
+	mutex_lock(&mm->context.lock);
+
 	/* Synchronizes with READ_ONCE in load_mm_ldt. */
-	smp_store_release(&current_mm->context.ldt, ldt);
+	smp_store_release(&mm->context.ldt, ldt);
 
-	/* Activate the LDT for all CPUs using current_mm. */
-	on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+	/* Activate the LDT for all CPUs using currents mm. */
+	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+	mutex_unlock(&mm->context.lock);
 }
 
 static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
 }
 
 /*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
 {
 	struct ldt_struct *new_ldt;
-	struct mm_struct *old_mm;
 	int retval = 0;
 
-	mutex_init(&mm->context.lock);
-	old_mm = current->mm;
-	if (!old_mm) {
-		mm->context.ldt = NULL;
+	if (!old_mm)
 		return 0;
-	}
 
 	mutex_lock(&old_mm->context.lock);
-	if (!old_mm->context.ldt) {
-		mm->context.ldt = NULL;
+	if (!old_mm->context.ldt)
 		goto out_unlock;
-	}
 
 	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
 	if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
 	       new_ldt->nr_entries * LDT_ENTRY_SIZE);
 	finalize_ldt_struct(new_ldt);
 
+	retval = map_ldt_struct(mm, new_ldt, 0);
+	if (retval) {
+		free_ldt_pgtables(mm);
+		free_ldt_struct(new_ldt);
+		goto out_unlock;
+	}
 	mm->context.ldt = new_ldt;
 
 out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
 	mm->context.ldt = NULL;
 }
 
+void ldt_arch_exit_mmap(struct mm_struct *mm)
+{
+	free_ldt_pgtables(mm);
+}
+
 static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long entries_size;
 	int retval;
 
-	mutex_lock(&mm->context.lock);
+	down_read(&mm->context.ldt_usr_sem);
 
 	if (!mm->context.ldt) {
 		retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
 	retval = bytecount;
 
 out_unlock:
-	mutex_unlock(&mm->context.lock);
+	up_read(&mm->context.ldt_usr_sem);
 	return retval;
 }
 
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 			ldt.avl = 0;
 	}
 
-	mutex_lock(&mm->context.lock);
+	if (down_write_killable(&mm->context.ldt_usr_sem))
+		return -EINTR;
 
 	old_ldt       = mm->context.ldt;
 	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	new_ldt->entries[ldt_info.entry_number] = ldt;
 	finalize_ldt_struct(new_ldt);
 
+	/*
+	 * If we are using PTI, map the new LDT into the userspace pagetables.
+	 * If there is already an LDT, use the other slot so that other CPUs
+	 * will continue to use the old LDT until install_ldt() switches
+	 * them over to the new LDT.
+	 */
+	error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
+	if (error) {
+		/*
+		 * This only can fail for the first LDT setup. If an LDT is
+		 * already installed then the PTE page is already
+		 * populated. Mop up a half populated page table.
+		 */
+		if (!WARN_ON_ONCE(old_ldt))
+			free_ldt_pgtables(mm);
+		free_ldt_struct(new_ldt);
+		goto out_unlock;
+	}
+
 	install_ldt(mm, new_ldt);
 	free_ldt_struct(old_ldt);
 	error = 0;
 
 out_unlock:
-	mutex_unlock(&mm->context.lock);
+	up_write(&mm->context.ldt_usr_sem);
 out:
 	return error;
 }
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 00bc751c861ce8fa42c1b93d39d029694f4e328d..edfede76868870e4cf86fed553377fd6bd0cac06 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -48,8 +48,6 @@ static void load_segments(void)
 		"\tmovl $"STR(__KERNEL_DS)",%%eax\n"
 		"\tmovl %%eax,%%ds\n"
 		"\tmovl %%eax,%%es\n"
-		"\tmovl %%eax,%%fs\n"
-		"\tmovl %%eax,%%gs\n"
 		"\tmovl %%eax,%%ss\n"
 		: : : "eax", "memory");
 #undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	set_gdt(phys_to_virt(0), 0);
 	idt_invalidate(phys_to_virt(0));
+	set_gdt(phys_to_virt(0), 0);
 
 	/* now call it */
 	image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325edfdc2752f862b4c0cef208a931c..9edadabf04f66c657f8a29bb56fe994b2559d5cf 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_mmu_ops, read_cr2);
 		PATCH_SITE(pv_mmu_ops, read_cr3);
 		PATCH_SITE(pv_mmu_ops, write_cr3);
-		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
 		PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
 		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5d0b5d50f8d9232726923c2692e65..832a6acd730fad70e1426052d502f6438b30e38e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
 	.x86_tss = {
 		/*
 		 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 		 * Poison it.
 		 */
 		.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+		/*
+		 * .sp1 is cpu_current_top_of_stack.  The init task never
+		 * runs user code, but cpu_current_top_of_stack should still
+		 * be well defined before the first context switch.
+		 */
+		.sp1 = TOP_OF_INIT_STACK,
+#endif
+
 #ifdef CONFIG_X86_32
 		.ss0 = __KERNEL_DS,
 		.ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	  */
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
-#ifdef CONFIG_X86_32
-	.SYSENTER_stack_canary	= STACK_END_MAGIC,
-#endif
 };
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
 
 DEFINE_PER_CPU(bool, __tss_limit_invalid);
 EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
 	struct fpu *fpu = &t->fpu;
 
 	if (bp) {
-		struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+		struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
 
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
@@ -299,7 +306,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	}
 
 	if ((tifp ^ tifn) & _TIF_NOTSC)
-		cr4_toggle_bits(X86_CR4_TSD);
+		cr4_toggle_bits_irqsoff(X86_CR4_TSD);
 
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e15103060d67d5245756ab72ce8fe5..5224c609918416337b97440eb2d515d8052463ae 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c250e8c01188b6d32cf5a62bd1af8a0..c754662320163107ca3a254362ce0e404a8d3c11 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
 
-	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
-	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
-		regs->sp, regs->flags);
+	show_iret_regs(regs);
+
 	if (regs->orig_ax != -1)
 		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
 	else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
 	       regs->r13, regs->r14, regs->r15);
 
+	if (!all)
+		return;
+
 	asm("movl %%ds,%0" : "=r" (ds));
 	asm("movl %%cs,%0" : "=r" (cs));
 	asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
 	rdmsrl(MSR_GS_BASE, gs);
 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
-	if (!all)
-		return;
-
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
 		     this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * Switch the PDA and FPU contexts.
 	 */
 	this_cpu_write(current_task, next_p);
+	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
 	/* Reload sp0. */
 	update_sp0(next_p);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 8af2e8d0c0a1d2d0290ff2026afeab056cdc59b6..145810b0edf6738b57a0e6560c53392ae54a82e7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p)
 		set_bit(EFI_BOOT, &efi.flags);
 		set_bit(EFI_64BIT, &efi.flags);
 	}
-
-	if (efi_enabled(EFI_BOOT))
-		efi_memblock_x86_reserve_range();
 #endif
 
 	x86_init.oem.arch_setup();
@@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+	if (efi_enabled(EFI_BOOT))
+		efi_memblock_x86_reserve_range();
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/*
 	 * Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf60cdbe1342fe84006405712394663..ed556d50d7ed6d71f03d202ef84a6b6b54e95a02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
 
 /* Flag to indicate if a complete sched domain rebuild is required */
 bool x86_topology_update;
@@ -126,25 +126,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 	spin_lock_irqsave(&rtc_lock, flags);
 	CMOS_WRITE(0xa, 0xf);
 	spin_unlock_irqrestore(&rtc_lock, flags);
-	local_flush_tlb();
-	pr_debug("1.\n");
 	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
 							start_eip >> 4;
-	pr_debug("2.\n");
 	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
 							start_eip & 0xf;
-	pr_debug("3.\n");
 }
 
 static inline void smpboot_restore_warm_reset_vector(void)
 {
 	unsigned long flags;
 
-	/*
-	 * Install writable page 0 entry to set BIOS data area.
-	 */
-	local_flush_tlb();
-
 	/*
 	 * Paranoid:  Set warm reset code and vector here back
 	 * to default values.
@@ -237,7 +228,7 @@ static void notrace start_secondary(void *unused)
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 #endif
-
+	load_current_idt();
 	cpu_init();
 	x86_cpuinit.early_percpu_clock_init();
 	preempt_disable();
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
-	/*
-	 * Enable the espfix hack for this CPU
-	*/
-#ifdef CONFIG_X86_ESPFIX64
+	/* Enable the espfix hack for this CPU */
 	init_espfix_ap(cpu);
-#endif
 
 	/* So we see what's up */
 	announce_cpu(cpu, apicid);
@@ -1304,7 +1291,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	 * Today neither Intel nor AMD support heterogenous systems so
 	 * extrapolate the boot cpu's data to all packages.
 	 */
-	ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
 	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
 	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c766744dd1976b4429141e3fb645fa..093f2ea5dd56b613d03ccaeeb87fd50900f64ba1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
 	for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
 	     unwind_next_frame(&state)) {
 
-		regs = unwind_get_entry_regs(&state);
+		regs = unwind_get_entry_regs(&state, NULL);
 		if (regs) {
 			/*
 			 * Kernel mode registers on the stack indicate an
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
 {
 	int ret;
 
+	/*
+	 * If the task doesn't have a stack (e.g., a zombie), the stack is
+	 * "reliably" empty.
+	 */
 	if (!try_get_task_stack(tsk))
-		return -EINVAL;
+		return 0;
 
 	ret = __save_stack_trace_reliable(trace, tsk);
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a4eb27918cebf99dc2415f7eec49c4838d6f9f41..a2486f4440734756d49a96313c0e2f9a174a87db 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
 		return -1;
 	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
 	pte_unmap(pte);
+
+	/*
+	 * PTI poisons low addresses in the kernel page tables in the
+	 * name of making them unusable for userspace.  To execute
+	 * code at such a low address, the poison must be cleared.
+	 *
+	 * Note: 'pgd' actually gets set in p4d_alloc() _or_
+	 * pud_alloc() depending on 4/5-level paging.
+	 */
+	pgd->pgd &= ~_PAGE_NX;
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9a9c9b076955dd493c7de80b8d814a27dd0fceb7..a5b802a1221272402b344d75ccf94bcff4b69b38 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
 	cpu = get_cpu();
 
 	while (n-- > 0) {
-		if (LDT_empty(info) || LDT_zero(info)) {
+		if (LDT_empty(info) || LDT_zero(info))
 			memset(desc, 0, sizeof(*desc));
-		} else {
+		else
 			fill_ldt(desc, info);
-
-			/*
-			 * Always set the accessed bit so that the CPU
-			 * doesn't try to write to the (read-only) GDT.
-			 */
-			desc->type |= 1;
-		}
 		++info;
 		++desc;
 	}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55d8fa93a07192edd199be1a607bf8..446c9ef8cfc32b68d77d4429128b3c794b3fc069 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
 	/*
 	 * If IRET takes a non-IST fault on the espfix64 stack, then we
-	 * end up promoting it to a doublefault.  In that case, modify
-	 * the stack to make it look like we just entered the #GP
-	 * handler from user space, similar to bad_iret.
+	 * end up promoting it to a doublefault.  In that case, take
+	 * advantage of the fact that we're not using the normal (TSS.sp0)
+	 * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
+	 * and then modify our own IRET frame so that, when we return,
+	 * we land directly at the #GP(0) vector with the stack already
+	 * set up according to its expectations.
+	 *
+	 * The net result is that our #GP handler will think that we
+	 * entered from usermode with the bad user context.
 	 *
 	 * No need for ist_enter here because we don't use RCU.
 	 */
-	if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+	if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
 		regs->cs == __KERNEL_CS &&
 		regs->ip == (unsigned long)native_irq_return_iret)
 	{
-		struct pt_regs *normal_regs = task_pt_regs(current);
+		struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
-		/* Fake a #GP(0) from userspace. */
-		memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
-		normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
+		/*
+		 * regs->sp points to the failing IRET frame on the
+		 * ESPFIX64 stack.  Copy it to the entry stack.  This fills
+		 * in gpregs->ss through gpregs->ip.
+		 *
+		 */
+		memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+		gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
+
+		/*
+		 * Adjust our frame so that we return straight to the #GP
+		 * vector with the expected RSP value.  This is safe because
+		 * we won't enable interupts or schedule before we invoke
+		 * general_protection, so nothing will clobber the stack
+		 * frame we just set up.
+		 */
 		regs->ip = (unsigned long)general_protection;
-		regs->sp = (unsigned long)&normal_regs->orig_ax;
+		regs->sp = (unsigned long)&gpregs->orig_ax;
 
 		return;
 	}
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	 *
 	 *   Processors update CR2 whenever a page fault is detected. If a
 	 *   second page fault occurs while an earlier page fault is being
-	 *   deliv- ered, the faulting linear address of the second fault will
+	 *   delivered, the faulting linear address of the second fault will
 	 *   overwrite the contents of CR2 (replacing the previous
 	 *   address). These updates to CR2 occur even if the page fault
 	 *   results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
  */
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
-	struct pt_regs *regs = task_pt_regs(current);
-	*regs = *eregs;
+	struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+	if (regs != eregs)
+		*regs = *eregs;
 	return regs;
 }
 NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
 	/*
 	 * This is called from entry_64.S early in handling a fault
 	 * caused by a bad iret to user mode.  To handle the fault
-	 * correctly, we want move our stack frame to task_pt_regs
-	 * and we want to pretend that the exception came from the
-	 * iret target.
+	 * correctly, we want to move our stack frame to where it would
+	 * be had we entered directly on the entry stack (rather than
+	 * just below the IRET frame) and we want to pretend that the
+	 * exception came from the IRET target.
 	 */
 	struct bad_iret_stack *new_stack =
-		container_of(task_pt_regs(current),
-			     struct bad_iret_stack, regs);
+		(struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
 	/* Copy the IRET target to the new stack. */
 	memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	debug_stack_usage_dec();
 
 exit:
-#if defined(CONFIG_X86_32)
-	/*
-	 * This is the most likely code path that involves non-trivial use
-	 * of the SYSENTER stack.  Check that we haven't overrun it.
-	 */
-	WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
-	     "Overran or corrupted SYSENTER stack\n");
-#endif
 	ist_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
+	/* Init cpu_entry_area before IST entries are set up */
+	setup_cpu_entry_areas();
+
 	idt_setup_traps();
 
 	/*
@@ -936,8 +952,9 @@ void __init trap_init(void)
 	 * "sidt" instruction will not leak the location of the kernel, and
 	 * to defend the IDT against arbitrary memory write vulnerabilities.
 	 * It will be reloaded in cpu_init() */
-	__set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
-	idt_descr.address = fix_to_virt(FIX_RO_IDT);
+	cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+		    PAGE_KERNEL_RO);
+	idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
 
 	/*
 	 * Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a03b121fe0173dbdc9298216721e6..be86a865087a6b9dc8e04031dbf2e2fbeeda1ed5 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	return NULL;
 }
 
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
 			    size_t len)
 {
 	struct stack_info *info = &state->stack_info;
+	void *addr = (void *)_addr;
 
-	/*
-	 * If the address isn't on the current stack, switch to the next one.
-	 *
-	 * We may have to traverse multiple stacks to deal with the possibility
-	 * that info->next_sp could point to an empty stack and the address
-	 * could be on a subsequent stack.
-	 */
-	while (!on_stack(info, (void *)addr, len))
-		if (get_stack_info(info->next_sp, state->task, info,
-				   &state->stack_mask))
-			return false;
+	if (!on_stack(info, addr, len) &&
+	    (get_stack_info(addr, state->task, info, &state->stack_mask)))
+		return false;
 
 	return true;
 }
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
 	return true;
 }
 
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
-			     unsigned long *ip, unsigned long *sp, bool full)
+			     unsigned long *ip, unsigned long *sp)
 {
-	size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
-	size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
-	struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
-	if (IS_ENABLED(CONFIG_X86_64)) {
-		if (!stack_access_ok(state, addr, regs_size))
-			return false;
+	struct pt_regs *regs = (struct pt_regs *)addr;
 
-		*ip = regs->ip;
-		*sp = regs->sp;
+	/* x86-32 support will be more complicated due to the &regs->sp hack */
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
 
-		return true;
-	}
-
-	if (!stack_access_ok(state, addr, sp_offset))
+	if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
 		return false;
 
 	*ip = regs->ip;
+	*sp = regs->sp;
+	return true;
+}
 
-	if (user_mode(regs)) {
-		if (!stack_access_ok(state, addr + sp_offset,
-				     REGS_SIZE - SP_OFFSET))
-			return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+				  unsigned long *ip, unsigned long *sp)
+{
+	struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
 
-		*sp = regs->sp;
-	} else
-		*sp = (unsigned long)&regs->sp;
+	if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+		return false;
 
+	*ip = regs->ip;
+	*sp = regs->sp;
 	return true;
 }
 
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
 	unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
 	enum stack_type prev_type = state->stack_info.type;
 	struct orc_entry *orc;
-	struct pt_regs *ptregs;
 	bool indirect = false;
 
 	if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	case ORC_TYPE_REGS:
-		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+		if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
 			orc_warn("can't dereference registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	case ORC_TYPE_REGS_IRET:
-		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+		if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
 			orc_warn("can't dereference iret registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
 
-		ptregs = container_of((void *)sp, struct pt_regs, ip);
-		if ((unsigned long)ptregs >= prev_sp &&
-		    on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
-			state->regs = ptregs;
-			state->full_regs = false;
-		} else
-			state->regs = NULL;
-
+		state->regs = (void *)sp - IRET_FRAME_OFFSET;
+		state->full_regs = false;
 		state->signal = true;
 		break;
 
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	}
 
 	if (get_stack_info((unsigned long *)state->sp, state->task,
-			   &state->stack_info, &state->stack_mask))
-		return;
+			   &state->stack_info, &state->stack_mask)) {
+		/*
+		 * We weren't on a valid stack.  It's possible that
+		 * we overflowed a valid stack into a guard page.
+		 * See if the next page up is valid so that we can
+		 * generate some kind of backtrace if this happens.
+		 */
+		void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+		if (get_stack_info(next_page, state->task, &state->stack_info,
+				   &state->stack_mask))
+			return;
+	}
 
 	/*
 	 * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be8725ce7bda96cd5e8160e524903266..1e413a9326aaa152a47d51fa4c1d1ea03cbb4d94 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
 		. = ALIGN(HPAGE_SIZE);				\
 		__end_rodata_hpage_align = .;
 
+#define ALIGN_ENTRY_TEXT_BEGIN	. = ALIGN(PMD_SIZE);
+#define ALIGN_ENTRY_TEXT_END	. = ALIGN(PMD_SIZE);
+
 #else
 
 #define X64_ALIGN_RODATA_BEGIN
 #define X64_ALIGN_RODATA_END
 
+#define ALIGN_ENTRY_TEXT_BEGIN
+#define ALIGN_ENTRY_TEXT_END
+
 #endif
 
 PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
 		CPUIDLE_TEXT
 		LOCK_TEXT
 		KPROBES_TEXT
+		ALIGN_ENTRY_TEXT_BEGIN
 		ENTRY_TEXT
 		IRQENTRY_TEXT
+		ALIGN_ENTRY_TEXT_END
 		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+		. = ALIGN(PAGE_SIZE);
+		_entry_trampoline = .;
+		*(.entry_trampoline)
+		. = ALIGN(PAGE_SIZE);
+		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
 		/* End of text section */
 		_etext = .;
 	} :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7d04d0c8008d1a1d69966105ad855351a1f474f..b514b2b2845a334d4b53f28ed0b73c96f12d0e6a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
 
 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
 	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
 #endif
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
 			  int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
 	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
 #endif
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
 	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
 	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	ctxt->ops->get_fpu(ctxt);
 	asm volatile("fninit");
-	ctxt->ops->put_fpu(ctxt);
 	return X86EMUL_CONTINUE;
 }
 
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	ctxt->ops->get_fpu(ctxt);
 	asm volatile("fnstcw %0": "+m"(fcw));
-	ctxt->ops->put_fpu(ctxt);
 
 	ctxt->dst.val = fcw;
 
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	ctxt->ops->get_fpu(ctxt);
 	asm volatile("fnstsw %0": "+m"(fsw));
-	ctxt->ops->put_fpu(ctxt);
 
 	ctxt->dst.val = fsw;
 
@@ -2404,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
 }
 
 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-				     u64 cr0, u64 cr4)
+				    u64 cr0, u64 cr3, u64 cr4)
 {
 	int bad;
+	u64 pcid;
+
+	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+	pcid = 0;
+	if (cr4 & X86_CR4_PCIDE) {
+		pcid = cr3 & 0xfff;
+		cr3 &= ~0xfff;
+	}
+
+	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+	if (bad)
+		return X86EMUL_UNHANDLEABLE;
 
 	/*
 	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2425,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
 		if (bad)
 			return X86EMUL_UNHANDLEABLE;
+		if (pcid) {
+			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+			if (bad)
+				return X86EMUL_UNHANDLEABLE;
+		}
+
 	}
 
 	return X86EMUL_CONTINUE;
@@ -2435,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	struct desc_struct desc;
 	struct desc_ptr dt;
 	u16 selector;
-	u32 val, cr0, cr4;
+	u32 val, cr0, cr3, cr4;
 	int i;
 
 	cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
-	ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+	cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
 	ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
 	ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
 
@@ -2481,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
 	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
 
-	return rsm_enter_protected_mode(ctxt, cr0, cr4);
+	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 }
 
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 {
 	struct desc_struct desc;
 	struct desc_ptr dt;
-	u64 val, cr0, cr4;
+	u64 val, cr0, cr3, cr4;
 	u32 base3;
 	u16 selector;
 	int i, r;
@@ -2505,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
 
 	cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
-	ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+	cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
 	cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
 	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
 	val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2533,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
 	ctxt->ops->set_gdt(ctxt, &dt);
 
-	r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 	if (r != X86EMUL_CONTINUE)
 		return r;
 
@@ -4001,12 +4005,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->ops->get_fpu(ctxt);
-
 	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
 
-	ctxt->ops->put_fpu(ctxt);
-
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -4049,8 +4049,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->ops->get_fpu(ctxt);
-
 	if (size < __fxstate_size(16)) {
 		rc = fxregs_fixup(&fx_state, size);
 		if (rc != X86EMUL_CONTINUE)
@@ -4066,8 +4064,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
 
 out:
-	ctxt->ops->put_fpu(ctxt);
-
 	return rc;
 }
 
@@ -5317,9 +5313,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 
-	ctxt->ops->get_fpu(ctxt);
 	rc = asm_safe("fwait");
-	ctxt->ops->put_fpu(ctxt);
 
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c664057bb5cc5ad2660008ccbf19b69e5..2b8eb4da4d0823bdcdf7bde1feb44132d0113cde 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		if(make_mmu_pages_available(vcpu) < 0) {
 			spin_unlock(&vcpu->kvm->mmu_lock);
-			return 1;
+			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, 0, 0,
 				vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 			spin_lock(&vcpu->kvm->mmu_lock);
 			if (make_mmu_pages_available(vcpu) < 0) {
 				spin_unlock(&vcpu->kvm->mmu_lock);
-				return 1;
+				return -ENOSPC;
 			}
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
 					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		if (make_mmu_pages_available(vcpu) < 0) {
 			spin_unlock(&vcpu->kvm->mmu_lock);
-			return 1;
+			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
 				vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		if (make_mmu_pages_available(vcpu) < 0) {
 			spin_unlock(&vcpu->kvm->mmu_lock);
-			return 1;
+			return -ENOSPC;
 		}
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
 				      0, ACC_ALL);
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 {
 	if (unlikely(!lapic_in_kernel(vcpu) ||
-		     kvm_event_needs_reinjection(vcpu)))
+		     kvm_event_needs_reinjection(vcpu) ||
+		     vcpu->arch.exception.pending))
 		return false;
 
 	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
 
 int kvm_mmu_module_init(void)
 {
+	int ret = -ENOMEM;
+
 	kvm_mmu_clear_all_pte_masks();
 
 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
 					    sizeof(struct pte_list_desc),
 					    0, SLAB_ACCOUNT, NULL);
 	if (!pte_list_desc_cache)
-		goto nomem;
+		goto out;
 
 	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
 						  sizeof(struct kvm_mmu_page),
 						  0, SLAB_ACCOUNT, NULL);
 	if (!mmu_page_header_cache)
-		goto nomem;
+		goto out;
 
 	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
-		goto nomem;
+		goto out;
 
-	register_shrinker(&mmu_shrinker);
+	ret = register_shrinker(&mmu_shrinker);
+	if (ret)
+		goto out;
 
 	return 0;
 
-nomem:
+out:
 	mmu_destroy_caches();
-	return -ENOMEM;
+	return ret;
 }
 
 /*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index eb714f1cdf7eee4ca9036005c3ab72ef9228ae9b..f40d0da1f1d321e4705f24ee85b80ad15233e438 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,6 +45,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *c, *h;
 	struct nested_state *g;
-	u32 h_intercept_exceptions;
 
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 	h = &svm->nested.hsave->control;
 	g = &svm->nested;
 
-	/* No need to intercept #UD if L1 doesn't intercept it */
-	h_intercept_exceptions =
-		h->intercept_exceptions & ~(1U << UD_VECTOR);
-
 	c->intercept_cr = h->intercept_cr | g->intercept_cr;
 	c->intercept_dr = h->intercept_dr | g->intercept_dr;
-	c->intercept_exceptions =
-		h_intercept_exceptions | g->intercept_exceptions;
+	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
 	c->intercept = h->intercept | g->intercept;
 }
 
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
 {
 	int er;
 
-	WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
 	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
 	if (er == EMULATE_USER_EXIT)
 		return 0;
@@ -4985,6 +4979,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r13, %c[r13](%[svm]) \n\t"
 		"mov %%r14, %c[r14](%[svm]) \n\t"
 		"mov %%r15, %c[r15](%[svm]) \n\t"
+#endif
+		/*
+		* Clear host registers marked as clobbered to prevent
+		* speculative use.
+		*/
+		"xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+		"xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+		"xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+		"xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+		"xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+		"xor %%r8, %%r8 \n\t"
+		"xor %%r9, %%r9 \n\t"
+		"xor %%r10, %%r10 \n\t"
+		"xor %%r11, %%r11 \n\t"
+		"xor %%r12, %%r12 \n\t"
+		"xor %%r13, %%r13 \n\t"
+		"xor %%r14, %%r14 \n\t"
+		"xor %%r15, %%r15 \n\t"
 #endif
 		"pop %%" _ASM_BP
 		:
@@ -5015,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 		);
 
+	/* Eliminate branch target predictions from guest mode */
+	vmexit_fill_RSB();
+
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4704aaf6d19e2ea36e3d12ddc0c345bc8ed3632f..c829d89e2e63f85bc36c6821b0ca1d7712297043 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
 {
 	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
 
-	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
-	    vmcs_field_to_offset_table[field] == 0)
+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+		return -ENOENT;
+
+	/*
+	 * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
+	 * generic mechanism.
+	 */
+	asm("lfence");
+
+	if (vmcs_field_to_offset_table[field] == 0)
 		return -ENOENT;
 
 	return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
 	u32 eb;
 
-	eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
 	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
 	if ((vcpu->guest_debug &
 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	 */
 	if (is_guest_mode(vcpu))
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
-	else
-		eb |= 1u << UD_VECTOR;
 
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -2302,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * processors.  See 22.2.4.
 		 */
 		vmcs_writel(HOST_TR_BASE,
-			    (unsigned long)this_cpu_ptr(&cpu_tss));
+			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
 		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
 		/*
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		return 1;  /* already handled by vmx_vcpu_run() */
 
 	if (is_invalid_opcode(intr_info)) {
-		WARN_ON_ONCE(is_guest_mode(vcpu));
 		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
 		if (er == EMULATE_USER_EXIT)
 			return 0;
@@ -6751,16 +6757,10 @@ static __init int hardware_setup(void)
 			goto out;
 	}
 
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
 	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
 	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
 
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
 	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
 
 	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 
@@ -9421,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		/* Save guest registers, load host registers, keep flags */
 		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
 		"pop %0 \n\t"
+		"setbe %c[fail](%0)\n\t"
 		"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
 		"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
 		__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9437,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r13, %c[r13](%0) \n\t"
 		"mov %%r14, %c[r14](%0) \n\t"
 		"mov %%r15, %c[r15](%0) \n\t"
+		"xor %%r8d,  %%r8d \n\t"
+		"xor %%r9d,  %%r9d \n\t"
+		"xor %%r10d, %%r10d \n\t"
+		"xor %%r11d, %%r11d \n\t"
+		"xor %%r12d, %%r12d \n\t"
+		"xor %%r13d, %%r13d \n\t"
+		"xor %%r14d, %%r14d \n\t"
+		"xor %%r15d, %%r15d \n\t"
 #endif
 		"mov %%cr2, %%" _ASM_AX "   \n\t"
 		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
+		"xor %%eax, %%eax \n\t"
+		"xor %%ebx, %%ebx \n\t"
+		"xor %%esi, %%esi \n\t"
+		"xor %%edi, %%edi \n\t"
 		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
-		"setbe %c[fail](%0) \n\t"
 		".pushsection .rodata \n\t"
 		".global vmx_return \n\t"
 		"vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9479,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
+	/* Eliminate branch target predictions from guest mode */
+	vmexit_fill_RSB();
+
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	if (debugctlmsr)
 		update_debugctlmsr(debugctlmsr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eee8e7faf1af5778763b8181df472651d3573149..1cec2c62a0b08405d2bd7c8908d6b7f33de3b63c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	pagefault_enable();
 	kvm_x86_ops->vcpu_put(vcpu);
-	kvm_put_guest_fpu(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();
 }
 
@@ -4385,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 					 addr, n, v))
 		    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
 			break;
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
 		handled += n;
 		addr += n;
 		len -= n;
@@ -4644,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 {
 	if (vcpu->mmio_read_completed) {
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-			       vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+			       vcpu->mmio_fragments[0].gpa, val);
 		vcpu->mmio_read_completed = 0;
 		return 1;
 	}
@@ -4666,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
 {
-	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
 	return vcpu_mmio_write(vcpu, gpa, bytes, val);
 }
 
 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  void *val, int bytes)
 {
-	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
 	return X86EMUL_IO_NEEDED;
 }
 
@@ -5252,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 	emul_to_vcpu(ctxt)->arch.halt_request = 1;
 }
 
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
-	preempt_disable();
-	kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
-	preempt_enable();
-}
-
 static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
 			      struct x86_instruction_info *info,
 			      enum x86_intercept_stage stage)
@@ -5340,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.halt                = emulator_halt,
 	.wbinvd              = emulator_wbinvd,
 	.fix_hypercall       = emulator_fix_hypercall,
-	.get_fpu             = emulator_get_fpu,
-	.put_fpu             = emulator_put_fpu,
 	.intercept           = emulator_intercept,
 	.get_cpuid           = emulator_get_cpuid,
 	.set_nmi_mask        = emulator_set_nmi_mask,
@@ -6778,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end)
+{
+	unsigned long apic_address;
+
+	/*
+	 * The physical address of apic access page is stored in the VMCS.
+	 * Update it when it becomes invalid.
+	 */
+	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	if (start <= apic_address && apic_address < end)
+		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
 	struct page *page = NULL;
@@ -6952,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
-	kvm_load_guest_fpu(vcpu);
 
 	/*
 	 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@ -7265,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	struct fpu *fpu = &current->thread.fpu;
 	int r;
 
-	fpu__initialize(fpu);
-
 	kvm_sigset_activate(vcpu);
 
+	kvm_load_guest_fpu(vcpu);
+
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		if (kvm_run->immediate_exit) {
 			r = -EINTR;
@@ -7312,6 +7310,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		r = vcpu_run(vcpu);
 
 out:
+	kvm_put_guest_fpu(vcpu);
 	post_kvm_run_save(vcpu);
 	kvm_sigset_deactivate(vcpu);
 
@@ -7381,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 #endif
 
 	kvm_rip_write(vcpu, regs->rip);
-	kvm_set_rflags(vcpu, regs->rflags);
+	kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
 
 	vcpu->arch.exception.pending = false;
 
@@ -7495,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+		/*
+		 * When EFER.LME and CR0.PG are set, the processor is in
+		 * 64-bit mode (though maybe in a 32-bit code segment).
+		 * CR4.PAE and EFER.LMA must be set.
+		 */
+		if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+		    || !(sregs->efer & EFER_LMA))
+			return -EINVAL;
+	} else {
+		/*
+		 * Not in 64-bit mode: EFER.LMA is clear and the code
+		 * segment cannot be 64-bit.
+		 */
+		if (sregs->efer & EFER_LMA || sregs->cs.l)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
@@ -7507,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 			(sregs->cr4 & X86_CR4_OSXSAVE))
 		return -EINVAL;
 
+	if (kvm_valid_sregs(vcpu, sregs))
+		return -EINVAL;
+
 	apic_base_msr.data = sregs->apic_base;
 	apic_base_msr.host_initiated = true;
 	if (kvm_set_apic_base(vcpu, &apic_base_msr))
@@ -7704,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
 
+/* Swap (qemu) user FPU context for the guest FPU context. */
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->guest_fpu_loaded)
-		return;
-
-	/*
-	 * Restore all possible states in the guest,
-	 * and assume host would use all available bits.
-	 * Guest xcr0 would be loaded later.
-	 */
-	vcpu->guest_fpu_loaded = 1;
-	__kernel_fpu_begin();
+	preempt_disable();
+	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
 	/* PKRU is separately restored in kvm_x86_ops->run.  */
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
 				~XFEATURE_MASK_PKRU);
+	preempt_enable();
 	trace_kvm_fpu(1);
 }
 
+/* When vcpu_run ends, restore user space FPU context. */
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->guest_fpu_loaded)
-		return;
-
-	vcpu->guest_fpu_loaded = 0;
+	preempt_disable();
 	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
-	__kernel_fpu_end();
+	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+	preempt_enable();
 	++vcpu->stat.fpu_reload;
 	trace_kvm_fpu(0);
 }
@@ -7846,7 +7864,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		 * To avoid have the INIT path from kvm_apic_has_events() that be
 		 * called with loaded FPU and does not let userspace fix the state.
 		 */
-		kvm_put_guest_fpu(vcpu);
+		if (init_event)
+			kvm_put_guest_fpu(vcpu);
 		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
 					XFEATURE_MASK_BNDREGS);
 		if (mpx_state_buffer)
@@ -7855,6 +7874,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 					XFEATURE_MASK_BNDCSR);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+		if (init_event)
+			kvm_load_guest_fpu(vcpu);
 	}
 
 	if (!init_event) {
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e769fc41a062a3eddbb62c4e53793..f23934bbaf4ebe6a55331669160d6b0aa72ba5d4 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb548b41ebdddc20fcf464aad4cb44c6a763..46e71a74e6129b861c233ccf86f452b485e62d59 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
 #include <asm/errno.h>
 #include <asm/asm.h>
 #include <asm/export.h>
-				
+#include <asm/nospec-branch.h>
+
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
  */
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
 	negl %ebx
 	lea 45f(%ebx,%ebx,2), %ebx
 	testl %esi, %esi
-	jmp *%ebx
+	JMP_NOSPEC %ebx
 
 	# Handle 2-byte-aligned regions
 20:	addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
 	andl $-32,%edx
 	lea 3f(%ebx,%ebx), %ebx
 	testl %esi, %esi 
-	jmp *%ebx
+	JMP_NOSPEC %ebx
 1:	addl $64,%esi
 	addl $64,%edi 
 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4733d0edafa862b95446f7a04bab1..4846eff7e4c8b1505501d7f1dcb64127d0a4c67c 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
 		delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
 
 		/*
-		 * Use cpu_tss as a cacheline-aligned, seldomly
+		 * Use cpu_tss_rw as a cacheline-aligned, seldomly
 		 * accessed per-cpu variable as the monitor target.
 		 */
-		__monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+		__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
 		/*
 		 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 0000000000000000000000000000000000000000..cb45c6cb465f4b1ddea6b1a36a26f8ed3abd79db
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+	.section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+	CFI_STARTPROC
+	JMP_NOSPEC %\reg
+	CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac19e06afbb00a4124fbf1b334b4d46..e0b85930dd773e87417e2b4957b8af61221b04c0 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bedb0dc84eea64cd80ca6ae39037eaa..27e9e90a8d3572b900ccaacae1623de803072b17 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o	= -pg
 endif
 
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
-obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
-obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_X86_INTEL_MPX)			+= mpx.o
+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)	+= pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY)			+= kaslr.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION)		+= pti.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 0000000000000000000000000000000000000000..b9283cc276220db667ab091a3358eb5741813f7f
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+	return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+	unsigned long va = (unsigned long) cea_vaddr;
+
+	set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+static void percpu_setup_debug_store(int cpu)
+{
+#ifdef CONFIG_CPU_SUP_INTEL
+	int npages;
+	void *cea;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return;
+
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
+	npages = sizeof(struct debug_store) / PAGE_SIZE;
+	BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
+	cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
+			     PAGE_KERNEL);
+
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
+	/*
+	 * Force the population of PMDs for not yet allocated per cpu
+	 * memory like debug store buffers.
+	 */
+	npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
+	for (; npages; npages--, cea += PAGE_SIZE)
+		cea_set_pte(cea, 0, PAGE_NONE);
+#endif
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+	extern char _entry_trampoline[];
+
+	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+	pgprot_t gdt_prot = PAGE_KERNEL_RO;
+	pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+	/*
+	 * On native 32-bit systems, the GDT cannot be read-only because
+	 * our double fault handler uses a task gate, and entering through
+	 * a task gate needs to change an available TSS to busy.  If the
+	 * GDT is read-only, that will triple fault.  The TSS cannot be
+	 * read-only because the CPU writes to it on task switches.
+	 *
+	 * On Xen PV, the GDT must be read-only because the hypervisor
+	 * requires it.
+	 */
+	pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+		PAGE_KERNEL_RO : PAGE_KERNEL;
+	pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+	cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+		    gdt_prot);
+
+	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+			     per_cpu_ptr(&entry_stack_storage, cpu), 1,
+			     PAGE_KERNEL);
+
+	/*
+	 * The Intel SDM says (Volume 3, 7.2.1):
+	 *
+	 *  Avoid placing a page boundary in the part of the TSS that the
+	 *  processor reads during a task switch (the first 104 bytes). The
+	 *  processor may not correctly perform address translations if a
+	 *  boundary occurs in this area. During a task switch, the processor
+	 *  reads and writes into the first 104 bytes of each TSS (using
+	 *  contiguous physical addresses beginning with the physical address
+	 *  of the first byte of the TSS). So, after TSS access begins, if
+	 *  part of the 104 bytes is not physically contiguous, the processor
+	 *  will access incorrect information without generating a page-fault
+	 *  exception.
+	 *
+	 * There are also a lot of errata involving the TSS spanning a page
+	 * boundary.  Assert that we're not doing that.
+	 */
+	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+			     &per_cpu(cpu_tss_rw, cpu),
+			     sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+	per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+	BUILD_BUG_ON(sizeof(exception_stacks) !=
+		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+			     &per_cpu(exception_stacks, cpu),
+			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+	cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+	percpu_setup_debug_store(cpu);
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+	unsigned long start, end;
+
+	BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+	start = CPU_ENTRY_AREA_BASE;
+	end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+	/* Careful here: start + PMD_SIZE might wrap around */
+	for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+		populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+	unsigned int cpu;
+
+	setup_cpu_entry_area_ptes();
+
+	for_each_possible_cpu(cpu)
+		setup_cpu_entry_area(cpu);
+}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index bfcffdf6c5775f7ac5bd4c9f768573d7ae7bba55..421f2664ffa06e6cd4b31e5d6521648add3e2c4e 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -5,7 +5,7 @@
 
 static int ptdump_show(struct seq_file *m, void *v)
 {
-	ptdump_walk_pgd_level(m, NULL);
+	ptdump_walk_pgd_level_debugfs(m, NULL, false);
 	return 0;
 }
 
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
 	.release	= single_release,
 };
 
-static struct dentry *pe;
+static int ptdump_show_curknl(struct seq_file *m, void *v)
+{
+	if (current->mm->pgd) {
+		down_read(&current->mm->mmap_sem);
+		ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
+		up_read(&current->mm->mmap_sem);
+	}
+	return 0;
+}
+
+static int ptdump_open_curknl(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, ptdump_show_curknl, NULL);
+}
+
+static const struct file_operations ptdump_curknl_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ptdump_open_curknl,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+static struct dentry *pe_curusr;
+
+static int ptdump_show_curusr(struct seq_file *m, void *v)
+{
+	if (current->mm->pgd) {
+		down_read(&current->mm->mmap_sem);
+		ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
+		up_read(&current->mm->mmap_sem);
+	}
+	return 0;
+}
+
+static int ptdump_open_curusr(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, ptdump_show_curusr, NULL);
+}
+
+static const struct file_operations ptdump_curusr_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ptdump_open_curusr,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
+static struct dentry *dir, *pe_knl, *pe_curknl;
 
 static int __init pt_dump_debug_init(void)
 {
-	pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
-				 &ptdump_fops);
-	if (!pe)
+	dir = debugfs_create_dir("page_tables", NULL);
+	if (!dir)
 		return -ENOMEM;
 
+	pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
+				     &ptdump_fops);
+	if (!pe_knl)
+		goto err;
+
+	pe_curknl = debugfs_create_file("current_kernel", 0400,
+					dir, NULL, &ptdump_curknl_fops);
+	if (!pe_curknl)
+		goto err;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	pe_curusr = debugfs_create_file("current_user", 0400,
+					dir, NULL, &ptdump_curusr_fops);
+	if (!pe_curusr)
+		goto err;
+#endif
 	return 0;
+err:
+	debugfs_remove_recursive(dir);
+	return -ENOMEM;
 }
 
 static void __exit pt_dump_debug_exit(void)
 {
-	debugfs_remove_recursive(pe);
+	debugfs_remove_recursive(dir);
 }
 
 module_init(pt_dump_debug_init);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e32ed1906f4f9bf736310a7193c7d..2a4849e92831b0f9771a2742b01c5a6c9c806a42 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,68 +44,97 @@ struct addr_marker {
 	unsigned long max_lines;
 };
 
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
 enum address_markers_idx {
 	USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
 	KERNEL_SPACE_NR,
 	LOW_KERNEL_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+	LDT_NR,
+#endif
 	VMALLOC_START_NR,
 	VMEMMAP_START_NR,
 #ifdef CONFIG_KASAN
 	KASAN_SHADOW_START_NR,
 	KASAN_SHADOW_END_NR,
 #endif
-# ifdef CONFIG_X86_ESPFIX64
+	CPU_ENTRY_AREA_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
+	LDT_NR,
+#endif
+#ifdef CONFIG_X86_ESPFIX64
 	ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+	EFI_END_NR,
+#endif
 	HIGH_KERNEL_NR,
 	MODULES_VADDR_NR,
 	MODULES_END_NR,
-#else
+	FIXADDR_START_NR,
+	END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+	[USER_SPACE_NR]		= { 0,			"User Space" },
+	[KERNEL_SPACE_NR]	= { (1UL << 63),	"Kernel Space" },
+	[LOW_KERNEL_NR]		= { 0UL,		"Low Kernel Mapping" },
+	[VMALLOC_START_NR]	= { 0UL,		"vmalloc() Area" },
+	[VMEMMAP_START_NR]	= { 0UL,		"Vmemmap" },
+#ifdef CONFIG_KASAN
+	[KASAN_SHADOW_START_NR]	= { KASAN_SHADOW_START,	"KASAN shadow" },
+	[KASAN_SHADOW_END_NR]	= { KASAN_SHADOW_END,	"KASAN shadow end" },
+#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+	[LDT_NR]		= { LDT_BASE_ADDR,	"LDT remap" },
+#endif
+	[CPU_ENTRY_AREA_NR]	= { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+	[ESPFIX_START_NR]	= { ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+	[EFI_END_NR]		= { EFI_VA_END,		"EFI Runtime Services" },
+#endif
+	[HIGH_KERNEL_NR]	= { __START_KERNEL_map,	"High Kernel Mapping" },
+	[MODULES_VADDR_NR]	= { MODULES_VADDR,	"Modules" },
+	[MODULES_END_NR]	= { MODULES_END,	"End Modules" },
+	[FIXADDR_START_NR]	= { FIXADDR_START,	"Fixmap Area" },
+	[END_OF_SPACE_NR]	= { -1,			NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+	USER_SPACE_NR = 0,
 	KERNEL_SPACE_NR,
 	VMALLOC_START_NR,
 	VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
 	PKMAP_BASE_NR,
-# endif
-	FIXADDR_START_NR,
 #endif
+	CPU_ENTRY_AREA_NR,
+	FIXADDR_START_NR,
+	END_OF_SPACE_NR,
 };
 
-/* Address space markers hints */
 static struct addr_marker address_markers[] = {
-	{ 0, "User Space" },
-#ifdef CONFIG_X86_64
-	{ 0x8000000000000000UL, "Kernel Space" },
-	{ 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
-	{ 0/* VMALLOC_START */, "vmalloc() Area" },
-	{ 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
-	{ KASAN_SHADOW_START,	"KASAN shadow" },
-	{ KASAN_SHADOW_END,	"KASAN shadow end" },
+	[USER_SPACE_NR]		= { 0,			"User Space" },
+	[KERNEL_SPACE_NR]	= { PAGE_OFFSET,	"Kernel Mapping" },
+	[VMALLOC_START_NR]	= { 0UL,		"vmalloc() Area" },
+	[VMALLOC_END_NR]	= { 0UL,		"vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+	[PKMAP_BASE_NR]		= { 0UL,		"Persistent kmap() Area" },
 #endif
-# ifdef CONFIG_X86_ESPFIX64
-	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
-	{ EFI_VA_END,		"EFI Runtime Services" },
-# endif
-	{ __START_KERNEL_map,   "High Kernel Mapping" },
-	{ MODULES_VADDR,        "Modules" },
-	{ MODULES_END,          "End Modules" },
-#else
-	{ PAGE_OFFSET,          "Kernel Mapping" },
-	{ 0/* VMALLOC_START */, "vmalloc() Area" },
-	{ 0/*VMALLOC_END*/,     "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
-	{ 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
-# endif
-	{ 0/*FIXADDR_START*/,   "Fixmap Area" },
-#endif
-	{ -1, NULL }		/* End of list */
+	[CPU_ENTRY_AREA_NR]	= { 0UL,		"CPU entry area" },
+	[FIXADDR_START_NR]	= { 0UL,		"Fixmap area" },
+	[END_OF_SPACE_NR]	= { -1,			NULL }
 };
 
+#endif /* !CONFIG_X86_64 */
+
 /* Multipliers for offsets within the PTEs */
 #define PTE_LEVEL_MULT (PAGE_SIZE)
 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 	static const char * const level_name[] =
 		{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
-	if (!pgprot_val(prot)) {
+	if (!(pr & _PAGE_PRESENT)) {
 		/* Not present */
 		pt_dump_cont_printf(m, dmsg, "                              ");
 	} else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
 }
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
-				       bool checkwx)
+				       bool checkwx, bool dmesg)
 {
 #ifdef CONFIG_X86_64
 	pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
 	if (pgd) {
 		start = pgd;
-		st.to_dmesg = true;
+		st.to_dmesg = dmesg;
 	}
 
 	st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
 {
-	ptdump_walk_pgd_level_core(m, pgd, false);
+	ptdump_walk_pgd_level_core(m, pgd, false, true);
+}
+
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	if (user && static_cpu_has(X86_FEATURE_PTI))
+		pgd = kernel_to_user_pgdp(pgd);
+#endif
+	ptdump_walk_pgd_level_core(m, pgd, false, false);
+}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
+
+static void ptdump_walk_user_pgd_level_checkwx(void)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	pgd_t *pgd = (pgd_t *) &init_top_pgt;
+
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	pr_info("x86/mm: Checking user space page tables\n");
+	pgd = kernel_to_user_pgdp(pgd);
+	ptdump_walk_pgd_level_core(NULL, pgd, true, false);
+#endif
 }
-EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
 
 void ptdump_walk_pgd_level_checkwx(void)
 {
-	ptdump_walk_pgd_level_core(NULL, NULL, true);
+	ptdump_walk_pgd_level_core(NULL, NULL, true, false);
+	ptdump_walk_user_pgd_level_checkwx();
 }
 
 static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
 	address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
 # endif
 	address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+	address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
 #endif
-
 	return 0;
 }
 __initcall(pt_dump_init);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66cdb99f16fe145bf9bad50f9d1fd01..9fe656c42aa5b16560e139cebba247ca52756c80 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
+#include <xen/xen.h>
 
 #include <asm/fpu/internal.h>
 #include <asm/traps.h>
@@ -82,7 +83,7 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
 
 	return true;
 }
-EXPORT_SYMBOL_GPL(ex_handler_refcount);
+EXPORT_SYMBOL(ex_handler_refcount);
 
 /*
  * Handler for when we fail to restore a task's FPU state.  We should never get
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	 * Old CPUs leave the high bits of CS on the stack
 	 * undefined.  I'm not sure which CPUs do this, but at least
 	 * the 486 DX works this way.
+	 * Xen pv domains are not using the default __KERNEL_CS.
 	 */
-	if (regs->cs != __KERNEL_CS)
+	if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
 		goto fail;
 
 	/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78ca9a8ee4548a270045e81841ef6380ed6d260a..06fe3d51d385b88111961c0b5addc673fcd597a2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -701,7 +701,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 	else
 		printk(KERN_CONT "paging request");
 
-	printk(KERN_CONT " at %p\n", (void *) address);
+	printk(KERN_CONT " at %px\n", (void *) address);
 	printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
 
 	dump_pagetable(address);
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 		tsk->comm, task_pid_nr(tsk), address,
 		(void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6fdf91ef130a4737ab434ce98f77b2583fe1840d..82f5252c723a4a544593067981d90191c7b22c1d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,6 +20,7 @@
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
 #include <asm/cpufeature.h>
+#include <asm/pti.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
 
 static int page_size_mask;
 
+static void enable_global_pages(void)
+{
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		__supported_pte_mask |= _PAGE_GLOBAL;
+}
+
 static void __init probe_page_size_mask(void)
 {
 	/*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
 		cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
 	/* Enable PGE if available */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
 	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
-		__supported_pte_mask |= _PAGE_GLOBAL;
-	} else
-		__supported_pte_mask &= ~_PAGE_GLOBAL;
+		enable_global_pages();
+	}
 
 	/* Enable 1 GB linear kernel mappings if available: */
 	if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
 
 static void setup_pcid(void)
 {
-#ifdef CONFIG_X86_64
-	if (boot_cpu_has(X86_FEATURE_PCID)) {
-		if (boot_cpu_has(X86_FEATURE_PGE)) {
-			/*
-			 * This can't be cr4_set_bits_and_update_boot() --
-			 * the trampoline code can't handle CR4.PCIDE and
-			 * it wouldn't do any good anyway.  Despite the name,
-			 * cr4_set_bits_and_update_boot() doesn't actually
-			 * cause the bits in question to remain set all the
-			 * way through the secondary boot asm.
-			 *
-			 * Instead, we brute-force it and set CR4.PCIDE
-			 * manually in start_secondary().
-			 */
-			cr4_set_bits(X86_CR4_PCIDE);
-		} else {
-			/*
-			 * flush_tlb_all(), as currently implemented, won't
-			 * work if PCID is on but PGE is not.  Since that
-			 * combination doesn't exist on real hardware, there's
-			 * no reason to try to fully support it, but it's
-			 * polite to avoid corrupting data if we're on
-			 * an improperly configured VM.
-			 */
-			setup_clear_cpu_cap(X86_FEATURE_PCID);
-		}
+	if (!IS_ENABLED(CONFIG_X86_64))
+		return;
+
+	if (!boot_cpu_has(X86_FEATURE_PCID))
+		return;
+
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
+		/*
+		 * This can't be cr4_set_bits_and_update_boot() -- the
+		 * trampoline code can't handle CR4.PCIDE and it wouldn't
+		 * do any good anyway.  Despite the name,
+		 * cr4_set_bits_and_update_boot() doesn't actually cause
+		 * the bits in question to remain set all the way through
+		 * the secondary boot asm.
+		 *
+		 * Instead, we brute-force it and set CR4.PCIDE manually in
+		 * start_secondary().
+		 */
+		cr4_set_bits(X86_CR4_PCIDE);
+
+		/*
+		 * INVPCID's single-context modes (2/3) only work if we set
+		 * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
+		 * on systems that have X86_CR4_PCIDE clear, or that have
+		 * no INVPCID support at all.
+		 */
+		if (boot_cpu_has(X86_FEATURE_INVPCID))
+			setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
+	} else {
+		/*
+		 * flush_tlb_all(), as currently implemented, won't work if
+		 * PCID is on but PGE is not.  Since that combination
+		 * doesn't exist on real hardware, there's no reason to try
+		 * to fully support it, but it's polite to avoid corrupting
+		 * data if we're on an improperly configured VM.
+		 */
+		setup_clear_cpu_cap(X86_FEATURE_PCID);
 	}
-#endif
 }
 
 #ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
 {
 	unsigned long end;
 
+	pti_check_boottime_disable();
 	probe_page_size_mask();
 	setup_pcid();
 
@@ -845,12 +863,12 @@ void __init zone_sizes_init(void)
 	free_area_init_nodes(max_zone_pfns);
 }
 
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
 	.loaded_mm = &init_mm,
 	.next_asid = 1,
 	.cr4 = ~0UL,	/* fail hard if we screw up cr4 shadow initialization */
 };
-EXPORT_SYMBOL_GPL(cpu_tlbstate);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
 
 void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d9be2e73a341f4d87ab2dc35de09f..135c9a7898c7da908f1340f9750774b4327e63b3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
 #include <asm/setup.h>
 #include <asm/set_memory.h>
 #include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/init.h>
 
 #include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
 	mem_init_print_info(NULL);
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
 		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
 		FIXADDR_START, FIXADDR_TOP,
 		(FIXADDR_TOP - FIXADDR_START) >> 10,
 
+		CPU_ENTRY_AREA_BASE,
+		CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+		CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
 #ifdef CONFIG_HIGHMEM
 		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
 		(LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da341bd41095b11a692afcba51e4b850..c45b6ec5357bcd2e9f6626bd738c700cccd0a173 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
+	mmiotrace_iounmap(addr);
+
 	addr = (volatile void __iomem *)
 		(PAGE_MASK & (unsigned long __force)addr);
 
-	mmiotrace_iounmap(addr);
-
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
 	   in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8b2f9028f82b89ab8dc2bde8173c4..47388f0c0e59649ca3574d4e7c31b356dad7d247 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
 
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
 void __init kasan_init(void)
 {
 	int i;
+	void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
 
 #ifdef CONFIG_KASAN_INLINE
 	register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
 		map_range(&pfn_mapped[i]);
 	}
 
+	shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+	shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+	shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+						PAGE_SIZE);
+
+	shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+					CPU_ENTRY_AREA_MAP_SIZE);
+	shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+	shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+					PAGE_SIZE);
+
 	kasan_populate_zero_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-		kasan_mem_to_shadow((void *)__START_KERNEL_map));
+		shadow_cpu_entry_begin);
+
+	kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+			      (unsigned long)shadow_cpu_entry_end, 0);
+
+	kasan_populate_zero_shadow(shadow_cpu_entry_end,
+				kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
 	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
 			      (unsigned long)kasan_mem_to_shadow(_end),
 			      early_pfn_to_nid(__pa(_stext)));
 
 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-			(void *)KASAN_SHADOW_END);
+				(void *)KASAN_SHADOW_END);
 
 	load_cr3(init_top_pgt);
 	__flush_tlb_all();
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 879ef930e2c2b04d60c85efec9f6e0fa5555fa2a..aedebd2ebf1ead0ff2b1b17816ff80d9fce88b8f 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,25 +34,14 @@
 #define TB_SHIFT 40
 
 /*
- * Virtual address start and end range for randomization. The end changes base
- * on configuration to have the highest amount of space for randomization.
- * It increases the possible random position for each randomized region.
+ * Virtual address start and end range for randomization.
  *
- * You need to add an if/def entry if you introduce a new memory region
- * compatible with KASLR. Your entry must be in logical order with memory
- * layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
- * ensure that this order is correct and won't be changed.
+ * The end address could depend on more configuration options to make the
+ * highest amount of space for randomization available, but that's too hard
+ * to keep straight and caused issues already.
  */
 static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-
-#if defined(CONFIG_X86_ESPFIX64)
-static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
-#elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_END;
-#else
-static const unsigned long vaddr_end = __START_KERNEL_map;
-#endif
+static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
 
 /* Default values */
 unsigned long page_offset_base = __PAGE_OFFSET_BASE;
@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
 	unsigned long remain_entropy;
 
 	/*
-	 * All these BUILD_BUG_ON checks ensures the memory layout is
-	 * consistent with the vaddr_start/vaddr_end variables.
+	 * These BUILD_BUG_ON checks ensure the memory layout is consistent
+	 * with the vaddr_start/vaddr_end variables. These checks are very
+	 * limited....
 	 */
 	BUILD_BUG_ON(vaddr_start >= vaddr_end);
-	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
-		     vaddr_end >= EFI_VA_END);
-	BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
-		      IS_ENABLED(CONFIG_EFI)) &&
-		     vaddr_end >= __START_KERNEL_map);
+	BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
 	BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
 
 	if (!kaslr_memory_enabled())
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
deleted file mode 100644
index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/error.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/error.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
deleted file mode 100644
index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/opcode.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
deleted file mode 100644
index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/pte.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/pte.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
deleted file mode 100644
index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/selftest.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed046120c8e12d439e71848200f11c41bd5..58477ec3d66d08acf07c1bc21bb9a55a78fcaa28 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	unsigned long flags;
 	int ret = 0;
 	unsigned long size = 0;
+	unsigned long addr = p->addr & PAGE_MASK;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	unsigned int l;
 	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
-	if (get_kmmio_probe(p->addr)) {
+	if (get_kmmio_probe(addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
 
-	pte = lookup_address(p->addr, &l);
+	pte = lookup_address(addr, &l);
 	if (!pte) {
 		ret = -EINVAL;
 		goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
-		if (add_kmmio_fault_page(p->addr + size))
+		if (add_kmmio_fault_page(addr + size))
 			pr_err("Unable to set page fault.\n");
 		size += page_level_size(l);
 	}
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 {
 	unsigned long flags;
 	unsigned long size = 0;
+	unsigned long addr = p->addr & PAGE_MASK;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
 	unsigned int l;
 	pte_t *pte;
 
-	pte = lookup_address(p->addr, &l);
+	pte = lookup_address(addr, &l);
 	if (!pte)
 		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
-		release_kmmio_fault_page(p->addr + size, &release_list);
+		release_kmmio_fault_page(addr + size, &release_list);
 		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd7b511050adc6bd4c914b0c4fcdaa..391b13402e403041222ef8e6bd22f60edaeb9446 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -405,13 +405,13 @@ bool sme_active(void)
 {
 	return sme_me_mask && !sev_enabled;
 }
-EXPORT_SYMBOL_GPL(sme_active);
+EXPORT_SYMBOL(sme_active);
 
 bool sev_active(void)
 {
 	return sme_me_mask && sev_enabled;
 }
-EXPORT_SYMBOL_GPL(sev_active);
+EXPORT_SYMBOL(sev_active);
 
 static const struct dma_map_ops sev_dma_ops = {
 	.alloc                  = sev_alloc,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 96d456a94b0342eb967f918e4233498ac24e4349..004abf9ebf1222c169448090f7f1c570635bce41 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
 		kmem_cache_free(pgd_cache, pgd);
 }
 #else
+
 static inline pgd_t *_pgd_alloc(void)
 {
-	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+	return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
 }
 
 static inline void _pgd_free(pgd_t *pgd)
 {
-	free_page((unsigned long)pgd);
+	free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
 }
 #endif /* CONFIG_X86_PAE */
 
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700559b87ae7ac89570d9bbd26d1f05..c3c5274410a908e762aed936406006d63c3116ac 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644
index 0000000000000000000000000000000000000000..ce38f165489b5a13d92091c8671879f30ce44e20
--- /dev/null
+++ b/arch/x86/mm/pti.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This code is based in part on work published here:
+ *
+ *	https://github.com/IAIK/KAISER
+ *
+ * The original work was written by and and signed off by for the Linux
+ * kernel by:
+ *
+ *   Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+ *   Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+ *   Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+ *   Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+ *
+ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
+ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
+ *		       Andy Lutomirsky <luto@amacapital.net>
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
+#include <asm/cmdline.h>
+#include <asm/pti.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
+
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK	0
+#endif
+
+static void __init pti_print_if_insecure(const char *reason)
+{
+	if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		pr_info("%s\n", reason);
+}
+
+static void __init pti_print_if_secure(const char *reason)
+{
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		pr_info("%s\n", reason);
+}
+
+void __init pti_check_boottime_disable(void)
+{
+	char arg[5];
+	int ret;
+
+	if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+		pti_print_if_insecure("disabled on XEN PV.");
+		return;
+	}
+
+	ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+	if (ret > 0)  {
+		if (ret == 3 && !strncmp(arg, "off", 3)) {
+			pti_print_if_insecure("disabled on command line.");
+			return;
+		}
+		if (ret == 2 && !strncmp(arg, "on", 2)) {
+			pti_print_if_secure("force enabled on command line.");
+			goto enable;
+		}
+		if (ret == 4 && !strncmp(arg, "auto", 4))
+			goto autosel;
+	}
+
+	if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+		pti_print_if_insecure("disabled on command line.");
+		return;
+	}
+
+autosel:
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		return;
+enable:
+	setup_force_cpu_cap(X86_FEATURE_PTI);
+}
+
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	/*
+	 * Changes to the high (kernel) portion of the kernelmode page
+	 * tables are not automatically propagated to the usermode tables.
+	 *
+	 * Users should keep in mind that, unlike the kernelmode tables,
+	 * there is no vmalloc_fault equivalent for the usermode tables.
+	 * Top-level entries added to init_mm's usermode pgd after boot
+	 * will not be automatically propagated to other mms.
+	 */
+	if (!pgdp_maps_userspace(pgdp))
+		return pgd;
+
+	/*
+	 * The user page tables get the full PGD, accessible from
+	 * userspace:
+	 */
+	kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+	/*
+	 * If this is normal user memory, make it NX in the kernel
+	 * pagetables so that, if we somehow screw up and return to
+	 * usermode with the kernel CR3 loaded, we'll get a page fault
+	 * instead of allowing user code to execute with the wrong CR3.
+	 *
+	 * As exceptions, we don't set NX if:
+	 *  - _PAGE_USER is not set.  This could be an executable
+	 *     EFI runtime mapping or something similar, and the kernel
+	 *     may execute from it
+	 *  - we don't have NX support
+	 *  - we're clearing the PGD (i.e. the new pgd is not present).
+	 */
+	if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
+	    (__supported_pte_mask & _PAGE_NX))
+		pgd.pgd |= _PAGE_NX;
+
+	/* return the copy of the PGD we want the kernel to use: */
+	return pgd;
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+	pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+	if (address < PAGE_OFFSET) {
+		WARN_ONCE(1, "attempt to walk user address\n");
+		return NULL;
+	}
+
+	if (pgd_none(*pgd)) {
+		unsigned long new_p4d_page = __get_free_page(gfp);
+		if (!new_p4d_page)
+			return NULL;
+
+		set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+	}
+	BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+	return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+	p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+	pud_t *pud;
+
+	BUILD_BUG_ON(p4d_large(*p4d) != 0);
+	if (p4d_none(*p4d)) {
+		unsigned long new_pud_page = __get_free_page(gfp);
+		if (!new_pud_page)
+			return NULL;
+
+		set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+	}
+
+	pud = pud_offset(p4d, address);
+	/* The user page tables do not use large mappings: */
+	if (pud_large(*pud)) {
+		WARN_ON(1);
+		return NULL;
+	}
+	if (pud_none(*pud)) {
+		unsigned long new_pmd_page = __get_free_page(gfp);
+		if (!new_pmd_page)
+			return NULL;
+
+		set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+	}
+
+	return pmd_offset(pud, address);
+}
+
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+	pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+	pte_t *pte;
+
+	/* We can't do anything sensible if we hit a large mapping. */
+	if (pmd_large(*pmd)) {
+		WARN_ON(1);
+		return NULL;
+	}
+
+	if (pmd_none(*pmd)) {
+		unsigned long new_pte_page = __get_free_page(gfp);
+		if (!new_pte_page)
+			return NULL;
+
+		set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+	if (pte_flags(*pte) & _PAGE_USER) {
+		WARN_ONCE(1, "attempt to walk to user pte\n");
+		return NULL;
+	}
+	return pte;
+}
+
+static void __init pti_setup_vsyscall(void)
+{
+	pte_t *pte, *target_pte;
+	unsigned int level;
+
+	pte = lookup_address(VSYSCALL_ADDR, &level);
+	if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+		return;
+
+	target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+	if (WARN_ON(!target_pte))
+		return;
+
+	*target_pte = *pte;
+	set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+}
+#else
+static void __init pti_setup_vsyscall(void) { }
+#endif
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+	unsigned long addr;
+
+	/*
+	 * Clone the populated PMDs which cover start to end. These PMD areas
+	 * can have holes.
+	 */
+	for (addr = start; addr < end; addr += PMD_SIZE) {
+		pmd_t *pmd, *target_pmd;
+		pgd_t *pgd;
+		p4d_t *p4d;
+		pud_t *pud;
+
+		pgd = pgd_offset_k(addr);
+		if (WARN_ON(pgd_none(*pgd)))
+			return;
+		p4d = p4d_offset(pgd, addr);
+		if (WARN_ON(p4d_none(*p4d)))
+			return;
+		pud = pud_offset(p4d, addr);
+		if (pud_none(*pud))
+			continue;
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			continue;
+
+		target_pmd = pti_user_pagetable_walk_pmd(addr);
+		if (WARN_ON(!target_pmd))
+			return;
+
+		/*
+		 * Copy the PMD.  That is, the kernelmode and usermode
+		 * tables will share the last-level page tables of this
+		 * address range
+		 */
+		*target_pmd = pmd_clear_flags(*pmd, clear);
+	}
+}
+
+/*
+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
+ * next-level entry on 5-level systems.
+ */
+static void __init pti_clone_p4d(unsigned long addr)
+{
+	p4d_t *kernel_p4d, *user_p4d;
+	pgd_t *kernel_pgd;
+
+	user_p4d = pti_user_pagetable_walk_p4d(addr);
+	kernel_pgd = pgd_offset_k(addr);
+	kernel_p4d = p4d_offset(kernel_pgd, addr);
+	*user_p4d = *kernel_p4d;
+}
+
+/*
+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
+ */
+static void __init pti_clone_user_shared(void)
+{
+	pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+}
+
+/*
+ * Clone the ESPFIX P4D into the user space visinble page table
+ */
+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+	pti_clone_p4d(ESPFIX_BASE_ADDR);
+#endif
+}
+
+/*
+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
+ */
+static void __init pti_clone_entry_text(void)
+{
+	pti_clone_pmds((unsigned long) __entry_text_start,
+			(unsigned long) __irqentry_text_end,
+		       _PAGE_RW | _PAGE_GLOBAL);
+}
+
+/*
+ * Initialize kernel page table isolation
+ */
+void __init pti_init(void)
+{
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	pr_info("enabled\n");
+
+	pti_clone_user_shared();
+	pti_clone_entry_text();
+	pti_setup_espfix64();
+	pti_setup_vsyscall();
+}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf756bfc913d7a4137d5f7e0d46b046d..a1561957dccbb82d188d8209d76f7eddb780519a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+/*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts.  We do the
+ * necessary invalidation by clearing out the 'ctx_id' which
+ * forces a TLB flush when the context is loaded.
+ */
+void clear_asid_other(void)
+{
+	u16 asid;
+
+	/*
+	 * This is only expected to be set if we have disabled
+	 * kernel _PAGE_GLOBAL pages.
+	 */
+	if (!static_cpu_has(X86_FEATURE_PTI)) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+		/* Do not need to flush the current asid */
+		if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
+			continue;
+		/*
+		 * Make sure the next time we go to switch to
+		 * this asid, we do a flush:
+		 */
+		this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
+	}
+	this_cpu_write(cpu_tlbstate.invalidate_other, false);
+}
+
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
 
 
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
 		return;
 	}
 
+	if (this_cpu_read(cpu_tlbstate.invalidate_other))
+		clear_asid_other();
+
 	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
 		if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
 		    next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
 	*need_flush = true;
 }
 
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+{
+	unsigned long new_mm_cr3;
+
+	if (need_flush) {
+		invalidate_user_asid(new_asid);
+		new_mm_cr3 = build_cr3(pgdir, new_asid);
+	} else {
+		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+	}
+
+	/*
+	 * Caution: many callers of this function expect
+	 * that load_cr3() is serializing and orders TLB
+	 * fills with respect to the mm_cpumask writes.
+	 */
+	write_cr3(new_mm_cr3);
+}
+
 void leave_mm(int cpu)
 {
 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-			write_cr3(build_cr3(next, new_asid));
+			load_new_mm_cr3(next->pgd, new_asid, true);
 
 			/*
 			 * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
-			write_cr3(build_cr3_noflush(next, new_asid));
+			load_new_mm_cr3(next->pgd, new_asid, false);
 
 			/* See above wrt _rcuidle. */
 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(build_cr3(mm, 0));
+	write_cr3(build_cr3(mm->pgd, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
 
 	/* flush range by one by one 'invlpg' */
 	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-		__flush_tlb_single(addr);
+		__flush_tlb_one(addr);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01abc78cdc45c6e69f013128e04ccdb4..526536c81ddc41d395fd971d909a3b687e46d989 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
 	 * We should get host bridge information from ACPI unless the BIOS
 	 * doesn't support it.
 	 */
-	if (acpi_os_get_root_pointer())
+	if (!acpi_disabled && acpi_os_get_root_pointer())
 		return 0;
 #endif
 
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 7a5350d08cef711a14c29cf1f8fcedb70ecc7465..563049c483a12c5fe587e463fb96bff4dde22c5a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
 	} else if (!strcmp(str, "nocrs")) {
 		pci_probe |= PCI_ROOT_NO_CRS;
 		return NULL;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	} else if (!strcmp(str, "big_root_window")) {
+		pci_probe |= PCI_BIG_ROOT_WINDOW;
+		return NULL;
+#endif
 	} else if (!strcmp(str, "earlydump")) {
 		pci_early_dump_regs = 1;
 		return NULL;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 1e996df687a3bc47fb796c4fe8025bcb148a6890..f6a26e3cb4763325465df6ec19efb282e26e41fd 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,9 +662,23 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
  */
 static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 {
-	unsigned i;
 	u32 base, limit, high;
-	struct resource *res, *conflict;
+	struct pci_dev *other;
+	struct resource *res;
+	unsigned i;
+	int r;
+
+	if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
+		return;
+
+	/* Check that we are the only device of that type */
+	other = pci_get_device(dev->vendor, dev->device, NULL);
+	if (other != dev ||
+	    (other = pci_get_device(dev->vendor, dev->device, other))) {
+		/* This is a multi-socket system, don't touch it for now */
+		pci_dev_put(other);
+		return;
+	}
 
 	for (i = 0; i < 8; i++) {
 		pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base);
@@ -689,17 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 	if (!res)
 		return;
 
+	/*
+	 * Allocate a 256GB window directly below the 0xfd00000000 hardware
+	 * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
+	 */
 	res->name = "PCI Bus 0000:00";
 	res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
 		IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
-	res->start = 0x100000000ull;
+	res->start = 0xbd00000000ull;
 	res->end = 0xfd00000000ull - 1;
 
-	/* Just grab the free area behind system memory for this */
-	while ((conflict = request_resource_conflict(&iomem_resource, res)))
-		res->start = conflict->end + 1;
+	r = request_resource(&iomem_resource, res);
+	if (r) {
+		kfree(res);
+		return;
+	}
 
-	dev_info(&dev->dev, "adding root bus resource %pR\n", res);
+	dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+		 res);
+	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 
 	base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
 		AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
@@ -714,10 +736,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 
 	pci_bus_add_resource(dev->bus, res, 0);
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
 
 #endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6a151ce70e865caadde95c859855c4b63283ad4b..2dd15e967c3f5257c530d53c7d4f51f2e3165190 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
 				pud[j] = *pud_offset(p4d_k, vaddr);
 			}
 		}
+		pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
 	}
+
 out:
 	__flush_tlb_all();
 
@@ -196,6 +198,9 @@ static pgd_t *efi_pgd;
  * because we want to avoid inserting EFI region mappings (EFI_VA_END
  * to EFI_VA_START) into the standard kernel page tables. Everything
  * else can be shared, see efi_sync_low_kernel_mappings().
+ *
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
+ * allocation.
  */
 int __init efi_alloc_page_tables(void)
 {
@@ -208,7 +213,7 @@ int __init efi_alloc_page_tables(void)
 		return 0;
 
 	gfp_mask = GFP_KERNEL | __GFP_ZERO;
-	efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+	efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
 	if (!efi_pgd)
 		return -ENOMEM;
 
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 8a99a2e96537a91db6574955aafeae1a5103ce02..5b513ccffde404adbd1ea2929226669f2a282825 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
 	/*
 	 * Update the first page pointer to skip over the CSH header.
 	 */
-	cap_info->pages[0] += csh->headersize;
+	cap_info->phys[0] += csh->headersize;
+
+	/*
+	 * cap_info->capsule should point at a virtual mapping of the entire
+	 * capsule, starting at the capsule header. Our image has the Quark
+	 * security header prepended, so we cannot rely on the default vmap()
+	 * mapping created by the generic capsule code.
+	 * Given that the Quark firmware does not appear to care about the
+	 * virtual mapping, let's just point cap_info->capsule at our copy
+	 * of the capsule header.
+	 */
+	cap_info->capsule = &cap_info->header;
 
 	return 1;
 }
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48d3adc3261ca4d4214c1c6244dbe6..5a0483e7bf662cb27044b7684567b644dfe49b81 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
 	return 0;
 }
 
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
 	.setup	= tng_bt_sfi_setup,
 };
 
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2f45ad42462a5f23f4db4672d1257..8538a6723171a5606058a8823ed1cbb2d343fdb6 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
 		local_flush_tlb();
 		stat->d_alltlb++;
 	} else {
-		__flush_tlb_one(msg->address);
+		__flush_tlb_single(msg->address);
 		stat->d_onetlb++;
 	}
 	stat->d_requestee++;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 5f6fd860820a3c5175609a5a3468262aac937604..e4cb9f4cde8ae224afe51ced1970805df293c956 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 static int uv_domain_activate(struct irq_domain *domain,
-			      struct irq_data *irq_data, bool early)
+			      struct irq_data *irq_data, bool reserve)
 {
 	uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
 	return 0;
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c81ddff649e970f79802b9ae7ef27..5f64f30873e257757091b88f8e263711d8db548f 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
 /*
  * UV NMI handler
  */
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 {
 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
 	int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
 }
 
 /* Setup HUB NMI info */
-void __init uv_nmi_setup_common(bool hubbed)
+static void __init uv_nmi_setup_common(bool hubbed)
 {
 	int size = sizeof(void *) * (1 << NODES_SHIFT);
 	int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f3f5bb4b85efc20ab106c419dcc11..a7d966964c6f20577c927cf5e618bc86b3331977 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * descriptor tables
 	 */
-#ifdef CONFIG_X86_32
 	store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
 	/*
 	 * We save it here, but restore it only in the hibernate case.
 	 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * segment registers
 	 */
-#ifdef CONFIG_X86_32
-	savesegment(es, ctxt->es);
-	savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
 	savesegment(gs, ctxt->gs);
-	savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
-	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
-	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
-	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
-	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+	savesegment(gs, ctxt->gs);
+	savesegment(fs, ctxt->fs);
+	savesegment(ds, ctxt->ds);
+	savesegment(es, ctxt->es);
 
 	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
-	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
-	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
 	mtrr_save_fixed_ranges(NULL);
 
 	rdmsrl(MSR_EFER, ctxt->efer);
@@ -160,17 +152,19 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
 	struct desc_struct *desc = get_cpu_gdt_rw(cpu);
 	tss_desc tss;
 #endif
-	set_tss_desc(cpu, t);	/*
-				 * This just modifies memory; should not be
-				 * necessary. But... This is necessary, because
-				 * 386 hardware has concept of busy TSS or some
-				 * similar stupidity.
-				 */
+
+	/*
+	 * We need to reload TR, which requires that we change the
+	 * GDT entry to indicate "available" first.
+	 *
+	 * XXX: This could probably all be replaced by a call to
+	 * force_reload_TR().
+	 */
+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 
 #ifdef CONFIG_X86_64
 	memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -178,6 +172,9 @@ static void fix_processor_context(void)
 	write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
 
 	syscall_init();				/* This sets MSR_*STAR and related */
+#else
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu();
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_mm_ldt(current->active_mm);	/* This does lldt */
@@ -190,9 +187,12 @@ static void fix_processor_context(void)
 }
 
 /**
- *	__restore_processor_state - restore the contents of CPU registers saved
- *		by __save_processor_state()
- *	@ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ *                             by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
  */
 static void notrace __restore_processor_state(struct saved_context *ctxt)
 {
@@ -215,46 +215,52 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	write_cr2(ctxt->cr2);
 	write_cr0(ctxt->cr0);
 
+	/* Restore the IDT. */
+	load_idt(&ctxt->idt);
+
 	/*
-	 * now restore the descriptor tables to their proper values
-	 * ltr is done i fix_processor_context().
+	 * Just in case the asm code got us here with the SS, DS, or ES
+	 * out of sync with the GDT, update them.
 	 */
-#ifdef CONFIG_X86_32
-	load_idt(&ctxt->idt);
+	loadsegment(ss, __KERNEL_DS);
+	loadsegment(ds, __USER_DS);
+	loadsegment(es, __USER_DS);
+
+	/*
+	 * Restore percpu access.  Percpu access can happen in exception
+	 * handlers or in complicated helpers like load_gs_index().
+	 */
+#ifdef CONFIG_X86_64
+	wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
 #else
-/* CONFIG_X86_64 */
-	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+	loadsegment(fs, __KERNEL_PERCPU);
+	loadsegment(gs, __KERNEL_STACK_CANARY);
 #endif
 
+	/* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
+	fix_processor_context();
+
 	/*
-	 * segment registers
+	 * Now that we have descriptor tables fully restored and working
+	 * exception handling, restore the usermode segments.
 	 */
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+	loadsegment(ds, ctxt->es);
 	loadsegment(es, ctxt->es);
 	loadsegment(fs, ctxt->fs);
-	loadsegment(gs, ctxt->gs);
-	loadsegment(ss, ctxt->ss);
+	load_gs_index(ctxt->gs);
 
 	/*
-	 * sysenter MSRs
+	 * Restore FSBASE and GSBASE after restoring the selectors, since
+	 * restoring the selectors clobbers the bases.  Keep in mind
+	 * that MSR_KERNEL_GS_BASE is horribly misnamed.
 	 */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
-	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
-	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
-	load_gs_index(ctxt->gs);
-	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
-
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
-	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+	loadsegment(gs, ctxt->gs);
 #endif
 
-	fix_processor_context();
-
 	do_fpu_end();
 	tsc_verify_tsc_adjust(true);
 	x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 6b830d4cb4c8e8e78c44dd87a12f642234533b4e..de58533d3664cdf067aedecfc81af2729c3b539b 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
 		return 0;
 
 	if (reg == APIC_LVR)
-		return 0x10;
+		return 0x14;
 #ifdef CONFIG_X86_32
 	if (reg == APIC_LDR)
 		return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d890017770456abe458f1161eb2509c09e..c9081c6671f0b7a05ecfaaf206e7e1ed2b1f456a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
 
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/interface/memory.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+	struct xen_memory_map memmap;
+	int rc;
+	unsigned int i, last_guest_ram;
+	phys_addr_t max_addr = PFN_PHYS(max_pfn);
+	struct e820_table *xen_e820_table;
+	const struct e820_entry *entry;
+	struct resource *res;
+
+	if (!xen_initial_domain())
+		return;
+
+	xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+	if (!xen_e820_table)
+		return;
+
+	memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+	set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+	rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+	if (rc) {
+		pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+		goto out;
+	}
+
+	last_guest_ram = 0;
+	for (i = 0; i < memmap.nr_entries; i++) {
+		if (xen_e820_table->entries[i].addr >= max_addr)
+			break;
+		if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+			last_guest_ram = i;
+	}
+
+	entry = &xen_e820_table->entries[last_guest_ram];
+	if (max_addr >= entry->addr + entry->size)
+		goto out; /* No unallocated host RAM. */
+
+	hostmem_resource->start = max_addr;
+	hostmem_resource->end = entry->addr + entry->size;
+
+	/*
+	 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+	 * as unavailable. The rest of that region can be used for hotplug-based
+	 * ballooning.
+	 */
+	for (; i < memmap.nr_entries; i++) {
+		entry = &xen_e820_table->entries[i];
+
+		if (entry->type == E820_TYPE_RAM)
+			continue;
+
+		if (entry->addr >= hostmem_resource->end)
+			break;
+
+		res = kzalloc(sizeof(*res), GFP_KERNEL);
+		if (!res)
+			goto out;
+
+		res->name = "Unavailable host RAM";
+		res->start = entry->addr;
+		res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+			    entry->addr + entry->size : hostmem_resource->end;
+		rc = insert_resource(hostmem_resource, res);
+		if (rc) {
+			pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+				__func__, res->start, res->end, rc);
+			kfree(res);
+			goto  out;
+		}
+	}
+
+ out:
+	kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f653112fbe00484f5dcae0de02543df3c..c047f42552e1a61ed0a5787d904681974cc05af1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
 #include "multicalls.h"
 #include "pmu.h"
 
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
 void *xen_initial_gdt;
 
 static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -622,7 +624,7 @@ static struct trap_array_entry trap_array[] = {
 	{ simd_coprocessor_error,      xen_simd_coprocessor_error,      false },
 };
 
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
 {
 	unsigned int nr;
 	bool ist_okay = false;
@@ -644,6 +646,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
 		}
 	}
 
+	if (nr == ARRAY_SIZE(trap_array) &&
+	    *addr >= (void *)early_idt_handler_array[0] &&
+	    *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+		nr = (*addr - (void *)early_idt_handler_array[0]) /
+		     EARLY_IDT_HANDLER_SIZE;
+		*addr = (void *)xen_early_idt_handler_array[nr];
+	}
+
 	if (WARN_ON(ist != 0 && !ist_okay))
 		return false;
 
@@ -818,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
 	mcs = xen_mc_entry(0);
 	MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
-	this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 void xen_set_iopl_mask(unsigned mask)
@@ -1250,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
 	/* Work out if we support NX */
+	get_cpu_cap(&boot_cpu_data);
 	x86_configure_nx();
 
 	/* Get mfn list */
@@ -1262,6 +1273,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	xen_setup_gdt(0);
 
 	xen_init_irq_ops();
+
+	/* Let's presume PV guests always boot on vCPU with id 0. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
+	/*
+	 * Setup xen_vcpu early because idt_setup_early_handler needs it for
+	 * local_irq_disable(), irqs_disabled().
+	 *
+	 * Don't do the full vcpu_info placement stuff until we have
+	 * the cpu_possible_mask and a non-dummy shared_info.
+	 */
+	xen_vcpu_info_reset(0);
+
+	idt_setup_early_handler();
+
 	xen_init_capabilities();
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1321,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	acpi_numa = -1;
 #endif
-	/* Let's presume PV guests always boot on vCPU with id 0. */
-	per_cpu(xen_vcpu_id, 0) = 0;
-
-	/*
-	 * Setup xen_vcpu early because start_kernel needs it for
-	 * local_irq_disable(), irqs_disabled().
-	 *
-	 * Don't do the full vcpu_info placement stuff until we have
-	 * the cpu_possible_mask and a non-dummy shared_info.
-	 */
-	xen_vcpu_info_reset(0);
-
 	WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
 
 	local_irq_disable();
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7699b263254c79b482ccf935c21ef..d85076223a696d0b00bee7c22a9e28b1e66dc975 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 {
 	struct {
 		struct mmuext_op op;
-#ifdef CONFIG_SMP
-		DECLARE_BITMAP(mask, num_processors);
-#else
 		DECLARE_BITMAP(mask, NR_CPUS);
-#endif
 	} *args;
 	struct multicall_space mcs;
+	const size_t mc_entry_size = sizeof(args->op) +
+		sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
 
 	trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
 
 	if (cpumask_empty(cpus))
 		return;		/* nothing to do */
 
-	mcs = xen_mc_entry(sizeof(*args));
+	mcs = xen_mc_entry(mc_entry_size);
 	args = mcs.args;
 	args->op.arg2.vcpumask = to_cpumask(args->mask);
 
@@ -1902,6 +1900,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	/* Graft it onto L4[511][510] */
 	copy_page(level2_kernel_pgt, l2);
 
+	/*
+	 * Zap execute permission from the ident map. Due to the sharing of
+	 * L1 entries we need to do this in the L2.
+	 */
+	if (__supported_pte_mask & _PAGE_NX) {
+		for (i = 0; i < PTRS_PER_PMD; ++i) {
+			if (pmd_none(level2_ident_pgt[i]))
+				continue;
+			level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+		}
+	}
+
 	/* Copy the initial P->M table mappings if necessary. */
 	i = pgd_index(xen_start_info->mfn_list);
 	if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
 	switch (idx) {
 	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-	case FIX_RO_IDT:
 #ifdef CONFIG_X86_32
 	case FIX_WP_TEST:
 # ifdef CONFIG_HIGHMEM
@@ -2272,7 +2281,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
 	case FIX_TEXT_POKE0:
 	case FIX_TEXT_POKE1:
-	case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
 		/* All local page mappings */
 		pte = pfn_pte(phys, prot);
 		break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b8a382918e2b0160983fa257318db..6e0d2086eacbf37326467b5142e59750151a5328 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
 	addr = xen_e820_table.entries[0].addr;
 	size = xen_e820_table.entries[0].size;
 	while (i < xen_e820_table.nr_entries) {
-		bool discard = false;
 
 		chunk_size = size;
 		type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
 				xen_add_extra_mem(pfn_s, n_pfns);
 				xen_max_p2m_pfn = pfn_s + n_pfns;
 			} else
-				discard = true;
+				type = E820_TYPE_UNUSABLE;
 		}
 
-		if (!discard)
-			xen_align_and_add_e820_region(addr, chunk_size, type);
+		xen_align_and_add_e820_region(addr, chunk_size, type);
 
 		addr += chunk_size;
 		size -= chunk_size;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b50651b2c8dd322956298402e79e7d..417b339e5c8e1aadedd20231c9be82ac93dbe728 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
 
 #include <xen/interface/xen.h>
 
+#include <linux/init.h>
 #include <linux/linkage.h>
 
 .macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
 #endif
 xen_pv_trap hypervisor_callback
 
+	__INIT
+ENTRY(xen_early_idt_handler_array)
+	i = 0
+	.rept NUM_EXCEPTION_VECTORS
+	pop %rcx
+	pop %r11
+	jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+	i = i + 1
+	.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+	.endr
+END(xen_early_idt_handler_array)
+	__FINIT
+
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
  * Xen64 iret frame:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 75011b80660f6262206b6759e7b63a06a1d809ea..3b34745d0a52dd097d9c2d8955bd4afce1d24ee1 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 void xen_save_time_memory_area(void);
 void xen_restore_time_memory_area(void);
-void __init xen_init_time_ops(void);
+void __ref xen_init_time_ops(void);
 void __init xen_hvm_init_time_ops(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index a5bcdfb890f1b77fa8f5f2b349a05d7f759fb661..837d4dd7678545dec75f53c21bcf5e2dea500202 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
 generic-y += errno.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/block/bio.c b/block/bio.c
index 8bfdea58159ba9ffd972dd95717e0eee99101e0a..9ef6cf3addb38cae822d0e5c5ef18ba9e98cd2d7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_disk = bio_src->bi_disk;
 	bio->bi_partno = bio_src->bi_partno;
 	bio_set_flag(bio, BIO_CLONED);
+	if (bio_flagged(bio_src, BIO_THROTTLED))
+		bio_set_flag(bio, BIO_THROTTLED);
 	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-core.c b/block/blk-core.c
index b8881750a3acd705789050c845c942673da999a8..3ba4326a63b59632fad81e686bf8229eee07320b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -562,6 +562,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 	}
 }
 
+void blk_drain_queue(struct request_queue *q)
+{
+	spin_lock_irq(q->queue_lock);
+	__blk_drain_queue(q, true);
+	spin_unlock_irq(q->queue_lock);
+}
+
 /**
  * blk_queue_bypass_start - enter queue bypass mode
  * @q: queue of interest
@@ -689,8 +696,6 @@ void blk_cleanup_queue(struct request_queue *q)
 	 */
 	blk_freeze_queue(q);
 	spin_lock_irq(lock);
-	if (!q->mq_ops)
-		__blk_drain_queue(q, true);
 	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
diff --git a/block/blk-map.c b/block/blk-map.c
index b21f8e86f1207f9b76bf3e2083fcf72b5062f0b7..d3a94719f03fb2af81d6270d6fc9ed58f0dde373 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
 #include "blk.h"
 
 /*
- * Append a bio to a passthrough request.  Only works can be merged into
- * the request based on the driver constraints.
+ * Append a bio to a passthrough request.  Only works if the bio can be merged
+ * into the request based on the driver constraints.
  */
-int blk_rq_append_bio(struct request *rq, struct bio *bio)
+int blk_rq_append_bio(struct request *rq, struct bio **bio)
 {
-	blk_queue_bounce(rq->q, &bio);
+	struct bio *orig_bio = *bio;
+
+	blk_queue_bounce(rq->q, bio);
 
 	if (!rq->bio) {
-		blk_rq_bio_prep(rq->q, rq, bio);
+		blk_rq_bio_prep(rq->q, rq, *bio);
 	} else {
-		if (!ll_back_merge_fn(rq->q, rq, bio))
+		if (!ll_back_merge_fn(rq->q, rq, *bio)) {
+			if (orig_bio != *bio) {
+				bio_put(*bio);
+				*bio = orig_bio;
+			}
 			return -EINVAL;
+		}
 
-		rq->biotail->bi_next = bio;
-		rq->biotail = bio;
-		rq->__data_len += bio->bi_iter.bi_size;
+		rq->biotail->bi_next = *bio;
+		rq->biotail = *bio;
+		rq->__data_len += (*bio)->bi_iter.bi_size;
 	}
 
 	return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
 	 * We link the bounce buffer in and could have to traverse it
 	 * later so we have to get a ref to prevent it from being freed
 	 */
-	ret = blk_rq_append_bio(rq, bio);
-	bio_get(bio);
+	ret = blk_rq_append_bio(rq, &bio);
 	if (ret) {
-		bio_endio(bio);
 		__blk_rq_unmap_user(orig_bio);
-		bio_put(bio);
 		return ret;
 	}
+	bio_get(bio);
 
 	return 0;
 }
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	int reading = rq_data_dir(rq) == READ;
 	unsigned long addr = (unsigned long) kbuf;
 	int do_copy = 0;
-	struct bio *bio;
+	struct bio *bio, *orig_bio;
 	int ret;
 
 	if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (do_copy)
 		rq->rq_flags |= RQF_COPY_USER;
 
-	ret = blk_rq_append_bio(rq, bio);
+	orig_bio = bio;
+	ret = blk_rq_append_bio(rq, &bio);
 	if (unlikely(ret)) {
 		/* request is too big */
-		bio_put(bio);
+		bio_put(orig_bio);
 		return ret;
 	}
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 11097477eeab6591088ca817d4690535e114e699..3d379732749175ece7ae39e427e115f51621c521 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -161,6 +161,8 @@ void blk_freeze_queue(struct request_queue *q)
 	 * exported to drivers as the only user for unfreeze is blk_mq.
 	 */
 	blk_freeze_queue_start(q);
+	if (!q->mq_ops)
+		blk_drain_queue(q);
 	blk_mq_freeze_queue_wait(q);
 }
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 825bc29767e6699ac85675d319a9866b70cc9b84..d19f416d61012ac032c49608f0afe463c948e8bc 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 out_unlock:
 	spin_unlock_irq(q->queue_lock);
 out:
-	/*
-	 * As multiple blk-throtls may stack in the same issue path, we
-	 * don't want bios to leave with the flag set.  Clear the flag if
-	 * being issued.
-	 */
-	if (!throttled)
-		bio_clear_flag(bio, BIO_THROTTLED);
+	bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	if (throttled || !td->track_bio_latency)
diff --git a/block/blk.h b/block/blk.h
index 3f1446937aece26f38ceb66cf4e3d159a23df871..442098aa9463a37dad0dfccb1718eea65be6cdb3 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -330,4 +330,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 }
 #endif /* CONFIG_BOUNCE */
 
+extern void blk_drain_queue(struct request_queue *q);
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index fceb1a96480bfb9600e4664fa2b4992c8bb64210..1d05c422c932ad56d705f94deed6cce0891ff9d3 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	unsigned i = 0;
 	bool bounce = false;
 	int sectors = 0;
+	bool passthrough = bio_is_passthrough(*bio_orig);
 
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	if (!bounce)
 		return;
 
-	if (sectors < bio_sectors(*bio_orig)) {
+	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
 		bio_chain(bio, *bio_orig);
 		generic_make_request(*bio_orig);
 		*bio_orig = bio;
 	}
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
+	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+			bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c291692f01138b91608dc6c80f71bb9aa..f95c60774ce8ca613417d3ccf54bee52010752ee 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
 	unsigned int cur_domain;
 	unsigned int batching;
 	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
 	atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+			     void *key);
+
 static int rq_sched_domain(const struct request *rq)
 {
 	unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
 		INIT_LIST_HEAD(&khd->rqs[i]);
+		init_waitqueue_func_entry(&khd->domain_wait[i],
+					  kyber_domain_wake);
+		khd->domain_wait[i].private = hctx;
 		INIT_LIST_HEAD(&khd->domain_wait[i].entry);
 		atomic_set(&khd->wait_index[i], 0);
 	}
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 	int nr;
 
 	nr = __sbitmap_queue_get(domain_tokens);
-	if (nr >= 0)
-		return nr;
 
 	/*
 	 * If we failed to get a domain token, make sure the hardware queue is
 	 * run when one becomes available. Note that this is serialized on
 	 * khd->lock, but we still need to be careful about the waker.
 	 */
-	if (list_empty_careful(&wait->entry)) {
-		init_waitqueue_func_entry(wait, kyber_domain_wake);
-		wait->private = hctx;
+	if (nr < 0 && list_empty_careful(&wait->entry)) {
 		ws = sbq_wait_ptr(domain_tokens,
 				  &khd->wait_index[sched_domain]);
+		khd->domain_ws[sched_domain] = ws;
 		add_wait_queue(&ws->wait, wait);
 
 		/*
 		 * Try again in case a token was freed before we got on the wait
-		 * queue. The waker may have already removed the entry from the
-		 * wait queue, but list_del_init() is okay with that.
+		 * queue.
 		 */
 		nr = __sbitmap_queue_get(domain_tokens);
-		if (nr >= 0) {
-			unsigned long flags;
+	}
 
-			spin_lock_irqsave(&ws->wait.lock, flags);
-			list_del_init(&wait->entry);
-			spin_unlock_irqrestore(&ws->wait.lock, flags);
-		}
+	/*
+	 * If we got a token while we were on the wait queue, remove ourselves
+	 * from the wait queue to ensure that all wake ups make forward
+	 * progress. It's possible that the waker already deleted the entry
+	 * between the !list_empty_careful() check and us grabbing the lock, but
+	 * list_del_init() is okay with that.
+	 */
+	if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+		ws = khd->domain_ws[sched_domain];
+		spin_lock_irq(&ws->wait.lock);
+		list_del_init(&wait->entry);
+		spin_unlock_irq(&ws->wait.lock);
 	}
+
 	return nr;
 }
 
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 358749c38894e31481fb0d903c0d1f7504311aa8..35d4dcea381fbee7aa312667aa235456bb9d7918 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
 	unsigned int i;
 
 	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
-		ctx->rcvused -= rsgl->sg_num_bytes;
+		atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused);
 		af_alg_free_sg(&rsgl->sgl);
 		list_del(&rsgl->list);
 		if (rsgl != &areq->first_rsgl)
@@ -672,14 +672,15 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
 	}
 
 	tsgl = areq->tsgl;
-	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
-		if (!sg_page(sg))
-			continue;
-		put_page(sg_page(sg));
-	}
+	if (tsgl) {
+		for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+			if (!sg_page(sg))
+				continue;
+			put_page(sg_page(sg));
+		}
 
-	if (areq->tsgl && areq->tsgl_entries)
 		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+	}
 }
 EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
 
@@ -1137,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
 		if (!af_alg_readable(sk))
 			break;
 
-		if (!ctx->used) {
-			err = af_alg_wait_for_data(sk, flags);
-			if (err)
-				return err;
-		}
-
 		seglen = min_t(size_t, (maxsize - len),
 			       msg_data_left(msg));
 
@@ -1168,7 +1163,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
 
 		areq->last_rsgl = rsgl;
 		len += err;
-		ctx->rcvused += err;
+		atomic_add(err, &ctx->rcvused);
 		rsgl->sg_num_bytes = err;
 		iov_iter_advance(&msg->msg_iter, err);
 	}
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 60d7366ed343e9ad6a76b9dd36be22fbd7543c89..9a636f961572b99af844b1d8a28bebfd9463c9e2 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 
 			spawn->alg = NULL;
 			spawns = &inst->alg.cra_users;
+
+			/*
+			 * We may encounter an unregistered instance here, since
+			 * an instance's spawns are set up prior to the instance
+			 * being registered.  An unregistered instance will have
+			 * NULL ->cra_users.next, since ->cra_users isn't
+			 * properly initialized until registration.  But an
+			 * unregistered instance cannot have any users, so treat
+			 * it the same as ->cra_users being empty.
+			 */
+			if (spawns->next == NULL)
+				break;
 		}
 	} while ((spawns = crypto_more_spawns(alg, &stack, &top,
 					      &secondary_spawns)));
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 805f485ddf1be4711a9d2ec47998964543f1d217..e9885a35ef6e2372a393cf3f0445924f9f044788 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
 	size_t processed = 0;		/* [in]  TX bufs to be consumed */
 
+	if (!ctx->used) {
+		err = af_alg_wait_for_data(sk, flags);
+		if (err)
+			return err;
+	}
+
 	/*
 	 * Data length provided by caller via sendmsg/sendpage that has not
 	 * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		/* AIO operation */
 		sock_hold(sk);
 		areq->iocb = msg->msg_iocb;
+
+		/* Remember output size that will be generated. */
+		areq->outlen = outlen;
+
 		aead_request_set_callback(&areq->cra_u.aead_req,
 					  CRYPTO_TFM_REQ_MAY_BACKLOG,
 					  af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 				 crypto_aead_decrypt(&areq->cra_u.aead_req);
 
 		/* AIO operation in progress */
-		if (err == -EINPROGRESS || err == -EBUSY) {
-			/* Remember output size that will be generated. */
-			areq->outlen = outlen;
-
+		if (err == -EINPROGRESS || err == -EBUSY)
 			return -EIOCBQUEUED;
-		}
 
 		sock_put(sk);
 	} else {
@@ -503,6 +509,7 @@ static void aead_release(void *private)
 	struct aead_tfm *tfm = private;
 
 	crypto_free_aead(tfm->aead);
+	crypto_put_default_null_skcipher2();
 	kfree(tfm);
 }
 
@@ -535,7 +542,6 @@ static void aead_sock_destruct(struct sock *sk)
 	unsigned int ivlen = crypto_aead_ivsize(tfm);
 
 	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
-	crypto_put_default_null_skcipher2();
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
@@ -565,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
 	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->used = 0;
-	ctx->rcvused = 0;
+	atomic_set(&ctx->rcvused, 0);
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 30cff827dd8fff048fa3e2ca7de770ab73022749..c5c47b680152034581957dc13ac1884f0c69f546 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	int err = 0;
 	size_t len = 0;
 
+	if (!ctx->used) {
+		err = af_alg_wait_for_data(sk, flags);
+		if (err)
+			return err;
+	}
+
 	/* Allocate cipher request for current operation. */
 	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
 				     crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 		/* AIO operation */
 		sock_hold(sk);
 		areq->iocb = msg->msg_iocb;
+
+		/* Remember output size that will be generated. */
+		areq->outlen = len;
+
 		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
 					      CRYPTO_TFM_REQ_MAY_SLEEP,
 					      af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
 
 		/* AIO operation in progress */
-		if (err == -EINPROGRESS || err == -EBUSY) {
-			/* Remember output size that will be generated. */
-			areq->outlen = len;
-
+		if (err == -EINPROGRESS || err == -EBUSY)
 			return -EIOCBQUEUED;
-		}
 
 		sock_put(sk);
 	} else {
@@ -384,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->used = 0;
-	ctx->rcvused = 0;
+	atomic_set(&ctx->rcvused, 0);
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index c1ca1e86f5c4f86d1110343aa5194e2c3147f4d5..a6dcaa659aa8c162e2593df43c52259a0f2900c4 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -148,8 +148,10 @@ struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen)
 	}
 
 	ret = pkcs7_check_authattrs(ctx->msg);
-	if (ret < 0)
+	if (ret < 0) {
+		msg = ERR_PTR(ret);
 		goto out;
+	}
 
 	msg = ctx->msg;
 	ctx->msg = NULL;
diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index f6a009d88a33fb550654b11d2cfc4460c733a049..1f4e25f10049c2645c2a421b07bb21fdaf1c0857 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -69,7 +69,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 		 /* Self-signed certificates form roots of their own, and if we
 		  * don't know them, then we can't accept them.
 		  */
-		if (x509->next == x509) {
+		if (x509->signer == x509) {
 			kleave(" = -ENOKEY [unknown self-signed]");
 			return -ENOKEY;
 		}
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 2d93d9eccb4d0c46d7b93306f50f590709d1b21f..39e6de0c2761fb03efab8f144fa1d1693e7cb035 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -59,11 +59,8 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
 	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	/* Digest the message [RFC2315 9.3] */
-	ret = crypto_shash_init(desc);
-	if (ret < 0)
-		goto error;
-	ret = crypto_shash_finup(desc, pkcs7->data, pkcs7->data_len,
-				 sig->digest);
+	ret = crypto_shash_digest(desc, pkcs7->data, pkcs7->data_len,
+				  sig->digest);
 	if (ret < 0)
 		goto error;
 	pr_devel("MsgDigest = [%*ph]\n", 8, sig->digest);
@@ -150,7 +147,7 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7,
 		pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
 			 sinfo->index, certix);
 
-		if (x509->pub->pkey_algo != sinfo->sig->pkey_algo) {
+		if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) {
 			pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
 				sinfo->index);
 			continue;
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index bc3035ef27a22b3ca593532dedc8020bb2773a2a..de996586762a83c0b3214aaf5fa561bde92f65c0 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -73,7 +73,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 	char alg_name_buf[CRYPTO_MAX_ALG_NAME];
 	void *output;
 	unsigned int outlen;
-	int ret = -ENOMEM;
+	int ret;
 
 	pr_devel("==>%s()\n", __func__);
 
@@ -99,6 +99,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
+	ret = -ENOMEM;
 	req = akcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req)
 		goto error_free_tfm;
@@ -127,7 +128,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 	 * signature and returns that to us.
 	 */
 	ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
-	if (ret < 0)
+	if (ret)
 		goto out_free_output;
 
 	/* Do the actual verification step. */
@@ -142,6 +143,8 @@ int public_key_verify_signature(const struct public_key *pkey,
 error_free_tfm:
 	crypto_free_akcipher(tfm);
 	pr_devel("<==%s() = %d\n", __func__, ret);
+	if (WARN_ON_ONCE(ret > 0))
+		ret = -EINVAL;
 	return ret;
 }
 EXPORT_SYMBOL_GPL(public_key_verify_signature);
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index dd03fead1ca358fc4f21cdb0c5c65b9896a39b38..ce2df8c9c583970be0177d2a50de85316fb1c2b2 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -409,6 +409,8 @@ int x509_extract_key_data(void *context, size_t hdrlen,
 	ctx->cert->pub->pkey_algo = "rsa";
 
 	/* Discard the BIT STRING metadata */
+	if (vlen < 1 || *(const u8 *)value != 0)
+		return -EBADMSG;
 	ctx->key = value + 1;
 	ctx->key_size = vlen - 1;
 	return 0;
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index c9013582c026748a10b09eeb09d5c71f0571ea7a..9338b4558cdc52b70a86c820ef2d39186bf1e3c3 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -79,11 +79,7 @@ int x509_get_sig_params(struct x509_certificate *cert)
 	desc->tfm = tfm;
 	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
-	ret = crypto_shash_init(desc);
-	if (ret < 0)
-		goto error_2;
-	might_sleep();
-	ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest);
+	ret = crypto_shash_digest(desc, cert->tbs, cert->tbs_size, sig->digest);
 	if (ret < 0)
 		goto error_2;
 
@@ -135,7 +131,7 @@ int x509_check_for_self_signed(struct x509_certificate *cert)
 	}
 
 	ret = -EKEYREJECTED;
-	if (cert->pub->pkey_algo != cert->sig->pkey_algo)
+	if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0)
 		goto out;
 
 	ret = public_key_verify_signature(cert->pub, cert->sig);
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index db1bc3147bc4708b5a4e16c36844e5452101bf64..600afa99941fe07470b74f3c142b59716656df92 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 						    algt->mask));
 	if (IS_ERR(poly))
 		return PTR_ERR(poly);
+	poly_hash = __crypto_hash_alg_common(poly);
+
+	err = -EINVAL;
+	if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+		goto out_put_poly;
 
 	err = -ENOMEM;
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 
 	ctx = aead_instance_ctx(inst);
 	ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
-	poly_hash = __crypto_hash_alg_common(poly);
 	err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
 				      aead_crypto_instance(inst));
 	if (err)
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 92871dc2a63ec66ca628df3a44b025b7ef6f247e..e74730224f0a5f6346bb8ae7b80f3ed5e6cb6281 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -195,11 +195,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	salg = shash_attr_alg(tb[1], 0, 0);
 	if (IS_ERR(salg))
 		return PTR_ERR(salg);
+	alg = &salg->base;
 
+	/* The underlying hash algorithm must be unkeyed */
 	err = -EINVAL;
+	if (crypto_shash_alg_has_setkey(salg))
+		goto out_put_alg;
+
 	ds = salg->digestsize;
 	ss = salg->statesize;
-	alg = &salg->base;
 	if (ds > alg->cra_blocksize ||
 	    ss < alg->cra_blocksize)
 		goto out_put_alg;
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index 4e64726588524f137acd590809bef11673695ed2..eca04d3729b37c696c2dac4b0ac472422f30615d 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
 		pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
 		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
 		INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+		spin_lock_init(&cpu_queue->q_lock);
 	}
 	return 0;
 }
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
 	int cpu, err;
 	struct mcryptd_cpu_queue *cpu_queue;
 
-	cpu = get_cpu();
-	cpu_queue = this_cpu_ptr(queue->cpu_queue);
-	rctx->tag.cpu = cpu;
+	cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+	spin_lock(&cpu_queue->q_lock);
+	cpu = smp_processor_id();
+	rctx->tag.cpu = smp_processor_id();
 
 	err = crypto_enqueue_request(&cpu_queue->queue, request);
 	pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
 		 cpu, cpu_queue, request);
+	spin_unlock(&cpu_queue->q_lock);
 	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-	put_cpu();
 
 	return err;
 }
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
 	cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
 	i = 0;
 	while (i < MCRYPTD_BATCH || single_task_running()) {
-		/*
-		 * preempt_disable/enable is used to prevent
-		 * being preempted by mcryptd_enqueue_request()
-		 */
-		local_bh_disable();
-		preempt_disable();
+
+		spin_lock_bh(&cpu_queue->q_lock);
 		backlog = crypto_get_backlog(&cpu_queue->queue);
 		req = crypto_dequeue_request(&cpu_queue->queue);
-		preempt_enable();
-		local_bh_enable();
+		spin_unlock_bh(&cpu_queue->q_lock);
 
 		if (!req) {
 			mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
 		++i;
 	}
 	if (cpu_queue->queue.qlen)
-		queue_work(kcrypto_wq, &cpu_queue->work);
+		queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
 }
 
 void mcryptd_flusher(struct work_struct *__work)
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index ee9cfb99fe256af06ae7ad5d946c3b76d90de1e9..f8ec3d4ba4a80f8eefed739d9e8a852865a7ac02 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
 	crypto_free_aead(ctx->child);
 }
 
+static void pcrypt_free(struct aead_instance *inst)
+{
+	struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
+
+	crypto_drop_aead(&ctx->spawn);
+	kfree(inst);
+}
+
 static int pcrypt_init_instance(struct crypto_instance *inst,
 				struct crypto_alg *alg)
 {
@@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.encrypt = pcrypt_aead_encrypt;
 	inst->alg.decrypt = pcrypt_aead_decrypt;
 
+	inst->free = pcrypt_free;
+
 	err = aead_register_instance(tmpl, inst);
 	if (err)
 		goto out_drop_aead;
@@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
 	return -EINVAL;
 }
 
-static void pcrypt_free(struct crypto_instance *inst)
-{
-	struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
-
-	crypto_drop_aead(&ctx->spawn);
-	kfree(inst);
-}
-
 static int pcrypt_cpumask_change_notify(struct notifier_block *self,
 					unsigned long val, void *data)
 {
@@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
 static struct crypto_template pcrypt_tmpl = {
 	.name = "pcrypt",
 	.create = pcrypt_create,
-	.free = pcrypt_free,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c
index 0b66dc8246068aa084dd0b44210b04dee5f2bccb..cad395d70d78e18527866bf1a3f6452c338e4c7c 100644
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
 		return -EINVAL;
 
 	if (fips_enabled) {
-		while (!*ptr && n_sz) {
+		while (n_sz && !*ptr) {
 			ptr++;
 			n_sz--;
 		}
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index f550b5d9463074b16670129341de59e069f8509c..d7da0eea5622af96f63300ad45c35450951551e1 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc,
 
 	salsa20_ivsetup(ctx, walk.iv);
 
-	if (likely(walk.nbytes == nbytes))
-	{
-		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-				      walk.src.virt.addr, nbytes);
-		return blkcipher_walk_done(desc, &walk, 0);
-	}
-
 	while (walk.nbytes >= 64) {
 		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
 				      walk.src.virt.addr,
diff --git a/crypto/shash.c b/crypto/shash.c
index 325a14da58278f01b8c1ffd92bdd8990db2860c4..e849d3ee2e2728d346df1f21f6a8d4db57fc42c5 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -25,11 +25,12 @@
 
 static const struct crypto_type crypto_shash_type;
 
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
-			   unsigned int keylen)
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+		    unsigned int keylen)
 {
 	return -ENOSYS;
 }
+EXPORT_SYMBOL_GPL(shash_no_setkey);
 
 static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 				  unsigned int keylen)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 778e0ff42bfa801eda5be848da9e6747ebbc2626..11af5fd6a443570550e1dac5b0a429b2cae801b1 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
 
 	walk->total = req->cryptlen;
 	walk->nbytes = 0;
+	walk->iv = req->iv;
+	walk->oiv = req->iv;
 
 	if (unlikely(!walk->total))
 		return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
 	scatterwalk_start(&walk->in, req->src);
 	scatterwalk_start(&walk->out, req->dst);
 
-	walk->iv = req->iv;
-	walk->oiv = req->iv;
-
 	walk->flags &= ~SKCIPHER_WALK_SLEEP;
 	walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
 		       SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
 	int err;
 
 	walk->nbytes = 0;
+	walk->iv = req->iv;
+	walk->oiv = req->iv;
 
 	if (unlikely(!walk->total))
 		return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
 	scatterwalk_done(&walk->in, 0, walk->total);
 	scatterwalk_done(&walk->out, 0, walk->total);
 
-	walk->iv = req->iv;
-	walk->oiv = req->iv;
-
 	if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
 		walk->flags |= SKCIPHER_WALK_SLEEP;
 	else
diff --git a/drivers/Makefile b/drivers/Makefile
index 1d034b6804310a1d740b4e8aa40c649922748d77..e06f7f633f73ff285c64e19b89041d17e2be23ce 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -105,6 +105,7 @@ obj-$(CONFIG_TC)		+= tc/
 obj-$(CONFIG_UWB)		+= uwb/
 obj-$(CONFIG_USB_PHY)		+= usb/
 obj-$(CONFIG_USB)		+= usb/
+obj-$(CONFIG_USB_SUPPORT)	+= usb/
 obj-$(CONFIG_PCI)		+= usb/
 obj-$(CONFIG_USB_GADGET)	+= usb/
 obj-$(CONFIG_OF)		+= usb/
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6742f6c68034c5e833505d294902dd97c274c1b0..9bff853e85f37831d8d053a2aa363f139537c9b5 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ static ssize_t erst_reader(struct pstore_record *record)
 	/* The record may be cleared by others, try read next record */
 	if (len == -ENOENT)
 		goto skip;
-	else if (len < sizeof(*rcd)) {
+	else if (len < 0 || len < sizeof(*rcd)) {
 		rc = -EIO;
 		goto out;
 	}
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 21c28433c590a4aec323bdcea19a0e1426c751e4..06ea4749ebd9826a3d7b8b0a9798a1cc797f4d61 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -949,7 +949,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
 	}
 
 	*val = 0;
-	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
+	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
 		vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
 	else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
 		vaddr = reg_res->sys_mem_vaddr;
@@ -988,7 +988,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
 	struct cpc_reg *reg = &reg_res->cpc_entry.reg;
 
-	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
+	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
 		vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
 	else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
 		vaddr = reg_res->sys_mem_vaddr;
@@ -1035,14 +1035,15 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 		*lowest_non_linear_reg, *nominal_reg;
 	u64 high, low, nom, min_nonlinear;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-	struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+	struct cppc_pcc_data *pcc_ss_data;
 	int ret = 0, regs_in_pcc = 0;
 
-	if (!cpc_desc) {
+	if (!cpc_desc || pcc_ss_id < 0) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
 		return -ENODEV;
 	}
 
+	pcc_ss_data = pcc_data[pcc_ss_id];
 	highest_reg = &cpc_desc->cpc_regs[HIGHEST_PERF];
 	lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF];
 	lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF];
@@ -1095,15 +1096,16 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 	struct cpc_register_resource *delivered_reg, *reference_reg,
 		*ref_perf_reg, *ctr_wrap_reg;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-	struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+	struct cppc_pcc_data *pcc_ss_data;
 	u64 delivered, reference, ref_perf, ctr_wrap_time;
 	int ret = 0, regs_in_pcc = 0;
 
-	if (!cpc_desc) {
+	if (!cpc_desc || pcc_ss_id < 0) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
 		return -ENODEV;
 	}
 
+	pcc_ss_data = pcc_data[pcc_ss_id];
 	delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR];
 	reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR];
 	ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF];
@@ -1169,14 +1171,15 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
 	struct cpc_register_resource *desired_reg;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
-	struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+	struct cppc_pcc_data *pcc_ss_data;
 	int ret = 0;
 
-	if (!cpc_desc) {
+	if (!cpc_desc || pcc_ss_id < 0) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpu);
 		return -ENODEV;
 	}
 
+	pcc_ss_data = pcc_data[pcc_ss_id];
 	desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
 
 	/*
@@ -1301,7 +1304,7 @@ unsigned int cppc_get_transition_latency(int cpu_num)
 	struct cpc_desc *cpc_desc;
 	struct cpc_register_resource *desired_reg;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu_num);
-	struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+	struct cppc_pcc_data *pcc_ss_data;
 
 	cpc_desc = per_cpu(cpc_desc_ptr, cpu_num);
 	if (!cpc_desc)
@@ -1311,6 +1314,10 @@ unsigned int cppc_get_transition_latency(int cpu_num)
 	if (!CPC_IN_PCC(desired_reg))
 		return CPUFREQ_ETERNAL;
 
+	if (pcc_ss_id < 0)
+		return CPUFREQ_ETERNAL;
+
+	pcc_ss_data = pcc_data[pcc_ss_id];
 	if (pcc_ss_data->pcc_mpar)
 		latency_ns = 60 * (1000 * 1000 * 1000 / pcc_ss_data->pcc_mpar);
 
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index e4ffaeec9ec204110f7e50c5cff49172a486f081..a4c8ad98560dc4a3a4e21bd1bbb825bd73d237b7 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1138,7 +1138,7 @@ int acpi_subsys_thaw_noirq(struct device *dev)
 	 * skip all of the subsequent "thaw" callbacks for the device.
 	 */
 	if (dev_pm_smart_suspend_and_suspended(dev)) {
-		dev->power.direct_complete = true;
+		dev_pm_skip_next_resume_phases(dev);
 		return 0;
 	}
 
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index ff2580e7611d18c6d56c58d50c2cbc3a2d54aa36..abeb4df4f22e43d7f0d1398af9962135a37af4b6 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 				dev_name(&adev_dimm->dev));
 		return -ENXIO;
 	}
+	/*
+	 * Record nfit_mem for the notification path to track back to
+	 * the nfit sysfs attributes for this dimm device object.
+	 */
+	dev_set_drvdata(&adev_dimm->dev, nfit_mem);
 
 	/*
 	 * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
 			sysfs_put(nfit_mem->flags_attr);
 			nfit_mem->flags_attr = NULL;
 		}
-		if (adev_dimm)
+		if (adev_dimm) {
 			acpi_remove_notify_handler(adev_dimm->handle,
 					ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+			dev_set_drvdata(&adev_dimm->dev, NULL);
+		}
 	}
 	mutex_unlock(&acpi_desc->init_mutex);
 }
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index a73596a4f804c817bfd277d3ff51eb42ca969bae..a7ecfde66b7b34f44cc9cffeac0be1b703e29aff 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -482,7 +482,8 @@ enum binder_deferred_state {
  * @tsk                   task_struct for group_leader of process
  *                        (invariant after initialized)
  * @files                 files_struct for process
- *                        (invariant after initialized)
+ *                        (protected by @files_lock)
+ * @files_lock            mutex to protect @files
  * @deferred_work_node:   element for binder_deferred_list
  *                        (protected by binder_deferred_lock)
  * @deferred_work:        bitmap of deferred work to perform
@@ -530,6 +531,7 @@ struct binder_proc {
 	int pid;
 	struct task_struct *tsk;
 	struct files_struct *files;
+	struct mutex files_lock;
 	struct hlist_node deferred_work_node;
 	int deferred_work;
 	bool is_dead;
@@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
 static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 {
-	struct files_struct *files = proc->files;
 	unsigned long rlim_cur;
 	unsigned long irqs;
+	int ret;
 
-	if (files == NULL)
-		return -ESRCH;
-
-	if (!lock_task_sighand(proc->tsk, &irqs))
-		return -EMFILE;
-
+	mutex_lock(&proc->files_lock);
+	if (proc->files == NULL) {
+		ret = -ESRCH;
+		goto err;
+	}
+	if (!lock_task_sighand(proc->tsk, &irqs)) {
+		ret = -EMFILE;
+		goto err;
+	}
 	rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
 	unlock_task_sighand(proc->tsk, &irqs);
 
-	return __alloc_fd(files, 0, rlim_cur, flags);
+	ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
+err:
+	mutex_unlock(&proc->files_lock);
+	return ret;
 }
 
 /*
@@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 static void task_fd_install(
 	struct binder_proc *proc, unsigned int fd, struct file *file)
 {
+	mutex_lock(&proc->files_lock);
 	if (proc->files)
 		__fd_install(proc->files, fd, file);
+	mutex_unlock(&proc->files_lock);
 }
 
 /*
@@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
 {
 	int retval;
 
-	if (proc->files == NULL)
-		return -ESRCH;
-
+	mutex_lock(&proc->files_lock);
+	if (proc->files == NULL) {
+		retval = -ESRCH;
+		goto err;
+	}
 	retval = __close_fd(proc->files, fd);
 	/* can't restart close syscall because file table entry was cleared */
 	if (unlikely(retval == -ERESTARTSYS ||
@@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
 		     retval == -ERESTARTNOHAND ||
 		     retval == -ERESTART_RESTARTBLOCK))
 		retval = -EINTR;
-
+err:
+	mutex_unlock(&proc->files_lock);
 	return retval;
 }
 
@@ -1947,6 +1960,26 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 	}
 }
 
+/**
+ * binder_cleanup_transaction() - cleans up undelivered transaction
+ * @t:		transaction that needs to be cleaned up
+ * @reason:	reason the transaction wasn't delivered
+ * @error_code:	error to return to caller (if synchronous call)
+ */
+static void binder_cleanup_transaction(struct binder_transaction *t,
+				       const char *reason,
+				       uint32_t error_code)
+{
+	if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) {
+		binder_send_failed_reply(t, error_code);
+	} else {
+		binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+			"undelivered transaction %d, %s\n",
+			t->debug_id, reason);
+		binder_free_transaction(t);
+	}
+}
+
 /**
  * binder_validate_object() - checks for a valid metadata object in a buffer.
  * @buffer:	binder_buffer that we're parsing.
@@ -4015,12 +4048,20 @@ static int binder_thread_read(struct binder_proc *proc,
 		if (put_user(cmd, (uint32_t __user *)ptr)) {
 			if (t_from)
 				binder_thread_dec_tmpref(t_from);
+
+			binder_cleanup_transaction(t, "put_user failed",
+						   BR_FAILED_REPLY);
+
 			return -EFAULT;
 		}
 		ptr += sizeof(uint32_t);
 		if (copy_to_user(ptr, &tr, sizeof(tr))) {
 			if (t_from)
 				binder_thread_dec_tmpref(t_from);
+
+			binder_cleanup_transaction(t, "copy_to_user failed",
+						   BR_FAILED_REPLY);
+
 			return -EFAULT;
 		}
 		ptr += sizeof(tr);
@@ -4090,15 +4131,9 @@ static void binder_release_work(struct binder_proc *proc,
 			struct binder_transaction *t;
 
 			t = container_of(w, struct binder_transaction, work);
-			if (t->buffer->target_node &&
-			    !(t->flags & TF_ONE_WAY)) {
-				binder_send_failed_reply(t, BR_DEAD_REPLY);
-			} else {
-				binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
-					"undelivered transaction %d\n",
-					t->debug_id);
-				binder_free_transaction(t);
-			}
+
+			binder_cleanup_transaction(t, "process died.",
+						   BR_DEAD_REPLY);
 		} break;
 		case BINDER_WORK_RETURN_ERROR: {
 			struct binder_error *e = container_of(
@@ -4605,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	ret = binder_alloc_mmap_handler(&proc->alloc, vma);
 	if (ret)
 		return ret;
+	mutex_lock(&proc->files_lock);
 	proc->files = get_files_struct(current);
+	mutex_unlock(&proc->files_lock);
 	return 0;
 
 err_bad_arg:
@@ -4629,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	spin_lock_init(&proc->outer_lock);
 	get_task_struct(current->group_leader);
 	proc->tsk = current->group_leader;
+	mutex_init(&proc->files_lock);
 	INIT_LIST_HEAD(&proc->todo);
 	proc->default_priority = task_nice(current);
 	binder_dev = container_of(filp->private_data, struct binder_device,
@@ -4881,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work)
 
 		files = NULL;
 		if (defer & BINDER_DEFERRED_PUT_FILES) {
+			mutex_lock(&proc->files_lock);
 			files = proc->files;
 			if (files)
 				proc->files = NULL;
+			mutex_unlock(&proc->files_lock);
 		}
 
 		if (defer & BINDER_DEFERRED_FLUSH)
diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c
index 80854f71559a319fc8fb0ca4b0dd15eb7aec6460..0ae6971c2a4cfcd3bba93856335f4a031137385c 100644
--- a/drivers/ata/ahci_mtk.c
+++ b/drivers/ata/ahci_mtk.c
@@ -1,5 +1,5 @@
 /*
- * MeidaTek AHCI SATA driver
+ * MediaTek AHCI SATA driver
  *
  * Copyright (c) 2017 MediaTek Inc.
  * Author: Ryder Lee <ryder.lee@mediatek.com>
@@ -25,7 +25,7 @@
 #include <linux/reset.h>
 #include "ahci.h"
 
-#define DRV_NAME		"ahci"
+#define DRV_NAME		"ahci-mtk"
 
 #define SYS_CFG			0x14
 #define SYS_CFG_SATA_MSK	GENMASK(31, 30)
@@ -192,5 +192,5 @@ static struct platform_driver mtk_ahci_driver = {
 };
 module_platform_driver(mtk_ahci_driver);
 
-MODULE_DESCRIPTION("MeidaTek SATA AHCI Driver");
+MODULE_DESCRIPTION("MediaTek SATA AHCI Driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index b6b0bf76dfc7bb7fe90f45418aab974bf73b6f87..2685f28160f70764ee4013930239566031c9b058 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -35,6 +35,8 @@
 
 /* port register default value */
 #define AHCI_PORT_PHY_1_CFG	0xa003fffe
+#define AHCI_PORT_PHY2_CFG	0x28184d1f
+#define AHCI_PORT_PHY3_CFG	0x0e081509
 #define AHCI_PORT_TRANS_CFG	0x08000029
 #define AHCI_PORT_AXICC_CFG	0x3fffffff
 
@@ -183,6 +185,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 		writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
 				qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+		writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+		writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
 			writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -190,6 +194,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 
 	case AHCI_LS2080A:
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+		writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+		writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
 			writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -201,6 +207,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 		writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
 				qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+		writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+		writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
 			writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -212,6 +220,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 		writel(readl(qpriv->ecc_addr) | ECC_DIS_LS1088A,
 		       qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+		writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+		writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
 			writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -219,6 +229,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 
 	case AHCI_LS2088A:
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+		writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+		writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
 			writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2a882929de4aa3cfcce6b46de459ee7f7360a075..8193b38a1cae7a8d738fb4c29828654dd02f0572 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3082,13 +3082,19 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
 	bit = fls(mask) - 1;
 	mask &= ~(1 << bit);
 
-	/* Mask off all speeds higher than or equal to the current
-	 * one.  Force 1.5Gbps if current SPD is not available.
+	/*
+	 * Mask off all speeds higher than or equal to the current one.  At
+	 * this point, if current SPD is not available and we previously
+	 * recorded the link speed from SStatus, the driver has already
+	 * masked off the highest bit so mask should already be 1 or 0.
+	 * Otherwise, we should not force 1.5Gbps on a link where we have
+	 * not previously recorded speed from SStatus.  Just return in this
+	 * case.
 	 */
 	if (spd > 1)
 		mask &= (1 << (spd - 1)) - 1;
 	else
-		mask &= 1;
+		return -EINVAL;
 
 	/* were we already at the bottom? */
 	if (!mask)
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index ffd8d33c6e0f044dece4e4bcc4560aebd68290ac..6db2e34bd52f2e5521e72ac2b17e1ee1fe43cc44 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -82,7 +82,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed
  * is issued to the device. However, if the controller clock is 133MHz,
  * the following tables must be used.
  */
-static struct pdc2027x_pio_timing {
+static const struct pdc2027x_pio_timing {
 	u8 value0, value1, value2;
 } pdc2027x_pio_timing_tbl[] = {
 	{ 0xfb, 0x2b, 0xac }, /* PIO mode 0 */
@@ -92,7 +92,7 @@ static struct pdc2027x_pio_timing {
 	{ 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */
 };
 
-static struct pdc2027x_mdma_timing {
+static const struct pdc2027x_mdma_timing {
 	u8 value0, value1;
 } pdc2027x_mdma_timing_tbl[] = {
 	{ 0xdf, 0x5f }, /* MDMA mode 0 */
@@ -100,7 +100,7 @@ static struct pdc2027x_mdma_timing {
 	{ 0x69, 0x25 }, /* MDMA mode 2 */
 };
 
-static struct pdc2027x_udma_timing {
+static const struct pdc2027x_udma_timing {
 	u8 value0, value1, value2;
 } pdc2027x_udma_timing_tbl[] = {
 	{ 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */
@@ -649,7 +649,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host)
  * @host: target ATA host
  * @board_idx: board identifier
  */
-static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
+static void pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
 {
 	long pll_clock;
 
@@ -665,8 +665,6 @@ static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
 
 	/* Adjust PLL control register */
 	pdc_adjust_pll(host, pll_clock, board_idx);
-
-	return 0;
 }
 
 /**
@@ -753,8 +751,7 @@ static int pdc2027x_init_one(struct pci_dev *pdev,
 	//pci_enable_intx(pdev);
 
 	/* initialize adapter */
-	if (pdc_hardware_init(host, board_idx) != 0)
-		return -EIO;
+	pdc_hardware_init(host, board_idx);
 
 	pci_set_master(pdev);
 	return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt,
@@ -778,8 +775,7 @@ static int pdc2027x_reinit_one(struct pci_dev *pdev)
 	else
 		board_idx = PDC_UDMA_133;
 
-	if (pdc_hardware_init(host, board_idx))
-		return -EIO;
+	pdc_hardware_init(host, board_idx);
 
 	ata_host_resume(host);
 	return 0;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 2c3cab066871680b9a6a5e778e8deb20b57f99b9..49fd50fccd48bef76feb90bce09f6f78fe3e0e5f 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -91,22 +91,23 @@ config FIRMWARE_IN_KERNEL
 	depends on FW_LOADER
 	default y
 	help
-	  The kernel source tree includes a number of firmware 'blobs'
-	  that are used by various drivers. The recommended way to
-	  use these is to run "make firmware_install", which, after
-	  converting ihex files to binary, copies all of the needed
-	  binary files in firmware/ to /lib/firmware/ on your system so
-	  that they can be loaded by userspace helpers on request.
+	  Various drivers in the kernel source tree may require firmware,
+	  which is generally available in your distribution's linux-firmware
+	  package.
+
+	  The linux-firmware package should install firmware into
+	  /lib/firmware/ on your system, so they can be loaded by userspace
+	  helpers on request.
 
 	  Enabling this option will build each required firmware blob
-	  into the kernel directly, where request_firmware() will find
-	  them without having to call out to userspace. This may be
-	  useful if your root file system requires a device that uses
-	  such firmware and do not wish to use an initrd.
+	  specified by EXTRA_FIRMWARE into the kernel directly, where
+	  request_firmware() will find them without having to call out to
+	  userspace. This may be useful if your root file system requires a
+	  device that uses such firmware and you do not wish to use an
+	  initrd.
 
 	  This single option controls the inclusion of firmware for
-	  every driver that uses request_firmware() and ships its
-	  firmware in the kernel source tree, which avoids a
+	  every driver that uses request_firmware(), which avoids a
 	  proliferation of 'Include firmware for xxx device' options.
 
 	  Say 'N' and let firmware be loaded from userspace.
@@ -235,6 +236,9 @@ config GENERIC_CPU_DEVICES
 config GENERIC_CPU_AUTOPROBE
 	bool
 
+config GENERIC_CPU_VULNERABILITIES
+	bool
+
 config SOC_BUS
 	bool
 	select GLOB
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index eb3af2739537a8def39259206e2345a2adc72c71..07532d83be0bca7d06aa5e99670ac920435e0ed9 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
 		this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
 }
 
+static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+{
+	return of_property_read_bool(this_leaf->of_node, "cache-unified");
+}
+
 static void cache_of_override_properties(unsigned int cpu)
 {
 	int index;
@@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
 
 	for (index = 0; index < cache_leaves(cpu); index++) {
 		this_leaf = this_cpu_ci->info_list + index;
+		/*
+		 * init_cache_level must setup the cache level correctly
+		 * overriding the architecturally specified levels, so
+		 * if type is NONE at this stage, it should be unified
+		 */
+		if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+		    cache_node_is_unified(this_leaf))
+			this_leaf->type = CACHE_TYPE_UNIFIED;
 		cache_size(this_leaf);
 		cache_get_line_size(this_leaf);
 		cache_nr_sets(this_leaf);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 58a9b608d8216a67f13d6c514d92290dc0a27de3..d99038487a0d2fab5164f8df3f1e0f7e58ea8ea3 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -511,10 +511,58 @@ static void __init cpu_dev_register_generic(void)
 #endif
 }
 
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+	&dev_attr_meltdown.attr,
+	&dev_attr_spectre_v1.attr,
+	&dev_attr_spectre_v2.attr,
+	NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+	.name  = "vulnerabilities",
+	.attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+	if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+			       &cpu_root_vulnerabilities_group))
+		pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
 void __init cpu_dev_init(void)
 {
 	if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
 		panic("Failed to register CPU subsystem");
 
 	cpu_dev_register_generic();
+	cpu_register_vulnerabilities();
 }
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index cd6ccdcf9df0c5ef2a37fabb2b4b7bbfa3c88755..372d10af26009dfae317affd625c756a2700fef1 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -39,7 +39,7 @@ static int isa_bus_probe(struct device *dev)
 {
 	struct isa_driver *isa_driver = dev->platform_data;
 
-	if (isa_driver->probe)
+	if (isa_driver && isa_driver->probe)
 		return isa_driver->probe(dev, to_isa_dev(dev)->id);
 
 	return 0;
@@ -49,7 +49,7 @@ static int isa_bus_remove(struct device *dev)
 {
 	struct isa_driver *isa_driver = dev->platform_data;
 
-	if (isa_driver->remove)
+	if (isa_driver && isa_driver->remove)
 		return isa_driver->remove(dev, to_isa_dev(dev)->id);
 
 	return 0;
@@ -59,7 +59,7 @@ static void isa_bus_shutdown(struct device *dev)
 {
 	struct isa_driver *isa_driver = dev->platform_data;
 
-	if (isa_driver->shutdown)
+	if (isa_driver && isa_driver->shutdown)
 		isa_driver->shutdown(dev, to_isa_dev(dev)->id);
 }
 
@@ -67,7 +67,7 @@ static int isa_bus_suspend(struct device *dev, pm_message_t state)
 {
 	struct isa_driver *isa_driver = dev->platform_data;
 
-	if (isa_driver->suspend)
+	if (isa_driver && isa_driver->suspend)
 		return isa_driver->suspend(dev, to_isa_dev(dev)->id, state);
 
 	return 0;
@@ -77,7 +77,7 @@ static int isa_bus_resume(struct device *dev)
 {
 	struct isa_driver *isa_driver = dev->platform_data;
 
-	if (isa_driver->resume)
+	if (isa_driver && isa_driver->resume)
 		return isa_driver->resume(dev, to_isa_dev(dev)->id);
 
 	return 0;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index db2f044159274a35457f4f1e5b3a55d226f08cd2..08744b572af6a25184d274ba91304e19ec2be732 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -525,6 +525,21 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd)
 
 /*------------------------- Resume routines -------------------------*/
 
+/**
+ * dev_pm_skip_next_resume_phases - Skip next system resume phases for device.
+ * @dev: Target device.
+ *
+ * Make the core skip the "early resume" and "resume" phases for @dev.
+ *
+ * This function can be called by middle-layer code during the "noirq" phase of
+ * system resume if necessary, but not by device drivers.
+ */
+void dev_pm_skip_next_resume_phases(struct device *dev)
+{
+	dev->power.is_late_suspended = false;
+	dev->power.is_suspended = false;
+}
+
 /**
  * device_resume_noirq - Execute a "noirq resume" callback for given device.
  * @dev: Device to handle.
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 027d159ac3810e9b5b190345c1bee60dbbc13790..6e89b51ea3d92b40eba2059bfdb47703916454d9 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -276,7 +276,8 @@ static int rpm_get_suppliers(struct device *dev)
 			continue;
 
 		retval = pm_runtime_get_sync(link->supplier);
-		if (retval < 0) {
+		/* Ignore suppliers with disabled runtime PM. */
+		if (retval < 0 && retval != -EACCES) {
 			pm_runtime_put_noidle(link->supplier);
 			return retval;
 		}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bc8e61506968a0c3da43f2e4396f0cc9550b0d51..d5fe720cf14940b668f8764de2bad6cf95549528 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1581,9 +1581,8 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
 	return err;
 }
 
-static void lo_release(struct gendisk *disk, fmode_t mode)
+static void __lo_release(struct loop_device *lo)
 {
-	struct loop_device *lo = disk->private_data;
 	int err;
 
 	if (atomic_dec_return(&lo->lo_refcnt))
@@ -1610,6 +1609,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 	mutex_unlock(&lo->lo_ctl_mutex);
 }
 
+static void lo_release(struct gendisk *disk, fmode_t mode)
+{
+	mutex_lock(&loop_index_mutex);
+	__lo_release(disk->private_data);
+	mutex_unlock(&loop_index_mutex);
+}
+
 static const struct block_device_operations lo_fops = {
 	.owner =	THIS_MODULE,
 	.open =		lo_open,
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ccb9975a97fa3f214d658776450ab618bae26643..ad0477ae820f040affe54f4368d3a02d9da63350 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
 struct nullb_cmd {
 	struct list_head list;
 	struct llist_node ll_list;
-	call_single_data_t csd;
+	struct __call_single_data csd;
 	struct request *rq;
 	struct bio *bio;
 	unsigned int tag;
+	blk_status_t error;
 	struct nullb_queue *nq;
 	struct hrtimer timer;
-	blk_status_t error;
 };
 
 struct nullb_queue {
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 38fc5f397fdede04ebaf4f3f9c2e89118ee6b235..cc93522a6d419dc77902bab085306c2b316f6c16 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3047,13 +3047,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
 	mutex_unlock(&rbd_dev->watch_mutex);
 }
 
+static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
+{
+	struct rbd_client_id cid = rbd_get_cid(rbd_dev);
+
+	strcpy(rbd_dev->lock_cookie, cookie);
+	rbd_set_owner_cid(rbd_dev, &cid);
+	queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
+}
+
 /*
  * lock_rwsem must be held for write
  */
 static int rbd_lock(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-	struct rbd_client_id cid = rbd_get_cid(rbd_dev);
 	char cookie[32];
 	int ret;
 
@@ -3068,9 +3076,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
 		return ret;
 
 	rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
-	strcpy(rbd_dev->lock_cookie, cookie);
-	rbd_set_owner_cid(rbd_dev, &cid);
-	queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
+	__rbd_lock(rbd_dev, cookie);
 	return 0;
 }
 
@@ -3856,7 +3862,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
 			queue_delayed_work(rbd_dev->task_wq,
 					   &rbd_dev->lock_dwork, 0);
 	} else {
-		strcpy(rbd_dev->lock_cookie, cookie);
+		__rbd_lock(rbd_dev, cookie);
 	}
 }
 
@@ -4381,7 +4387,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	segment_size = rbd_obj_bytes(&rbd_dev->header);
 	blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
 	q->limits.max_sectors = queue_max_hw_sectors(q);
-	blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
+	blk_queue_max_segments(q, USHRT_MAX);
 	blk_queue_max_segment_size(q, segment_size);
 	blk_queue_io_min(q, segment_size);
 	blk_queue_io_opt(q, segment_size);
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 3c29d36702a8eceb4eabd5ed36cf3188adeb1694..5426c04fe24bc88faa88a2cff0624ccbce277ebe 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -1755,14 +1755,17 @@ static int cci_pmu_probe(struct platform_device *pdev)
 	raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
 	mutex_init(&cci_pmu->reserve_mutex);
 	atomic_set(&cci_pmu->active_events, 0);
-	cpumask_set_cpu(smp_processor_id(), &cci_pmu->cpus);
+	cpumask_set_cpu(get_cpu(), &cci_pmu->cpus);
 
 	ret = cci_pmu_init(cci_pmu, pdev);
-	if (ret)
+	if (ret) {
+		put_cpu();
 		return ret;
+	}
 
 	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
 					 &cci_pmu->node);
+	put_cpu();
 	pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
 	return 0;
 }
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index 3063f53123979d740c1f9c97099bcdb86103d43d..b52332e52ca5e9c9a9d4b4ede3667c3ec8066474 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -262,7 +262,7 @@ static struct attribute *arm_ccn_pmu_format_attrs[] = {
 	NULL
 };
 
-static struct attribute_group arm_ccn_pmu_format_attr_group = {
+static const struct attribute_group arm_ccn_pmu_format_attr_group = {
 	.name = "format",
 	.attrs = arm_ccn_pmu_format_attrs,
 };
@@ -451,7 +451,7 @@ static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
 static struct attribute
 		*arm_ccn_pmu_events_attrs[ARRAY_SIZE(arm_ccn_pmu_events) + 1];
 
-static struct attribute_group arm_ccn_pmu_events_attr_group = {
+static const struct attribute_group arm_ccn_pmu_events_attr_group = {
 	.name = "events",
 	.is_visible = arm_ccn_pmu_events_is_visible,
 	.attrs = arm_ccn_pmu_events_attrs,
@@ -548,7 +548,7 @@ static struct attribute *arm_ccn_pmu_cmp_mask_attrs[] = {
 	NULL
 };
 
-static struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = {
+static const struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = {
 	.name = "cmp_mask",
 	.attrs = arm_ccn_pmu_cmp_mask_attrs,
 };
@@ -569,7 +569,7 @@ static struct attribute *arm_ccn_pmu_cpumask_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group arm_ccn_pmu_cpumask_attr_group = {
+static const struct attribute_group arm_ccn_pmu_cpumask_attr_group = {
 	.attrs = arm_ccn_pmu_cpumask_attrs,
 };
 
@@ -1268,10 +1268,12 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 	if (ccn->dt.id == 0) {
 		name = "ccn";
 	} else {
-		int len = snprintf(NULL, 0, "ccn_%d", ccn->dt.id);
-
-		name = devm_kzalloc(ccn->dev, len + 1, GFP_KERNEL);
-		snprintf(name, len + 1, "ccn_%d", ccn->dt.id);
+		name = devm_kasprintf(ccn->dev, GFP_KERNEL, "ccn_%d",
+				      ccn->dt.id);
+		if (!name) {
+			err = -ENOMEM;
+			goto error_choose_name;
+		}
 	}
 
 	/* Perf driver registration */
@@ -1298,7 +1300,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 	}
 
 	/* Pick one CPU which we will use to collect data from CCN... */
-	cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu);
+	cpumask_set_cpu(get_cpu(), &ccn->dt.cpu);
 
 	/* Also make sure that the overflow interrupt is handled by this CPU */
 	if (ccn->irq) {
@@ -1315,10 +1317,13 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 
 	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
 					 &ccn->dt.node);
+	put_cpu();
 	return 0;
 
 error_pmu_register:
 error_set_affinity:
+	put_cpu();
+error_choose_name:
 	ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
 	for (i = 0; i < ccn->num_xps; i++)
 		writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
@@ -1581,8 +1586,8 @@ static int __init arm_ccn_init(void)
 
 static void __exit arm_ccn_exit(void)
 {
-	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
 	platform_driver_unregister(&arm_ccn_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
 }
 
 module_init(arm_ccn_init);
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 328ca93781cf2691fb692b64e86ec4c203ca5b47..1b76d958590275daa6b9c2f8c69ecd2d51655da7 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = {
 	.match		= sunxi_rsb_device_match,
 	.probe		= sunxi_rsb_device_probe,
 	.remove		= sunxi_rsb_device_remove,
+	.uevent		= of_device_uevent_modalias,
 };
 
 static void sunxi_rsb_dev_release(struct device *dev)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 779869ed32b1516261e80fffd440b3ca1e1132ea..71fad747c0c7c1052cc19ee3bad0568b4a80c55b 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -199,6 +199,9 @@ struct smi_info {
 	/* The timer for this si. */
 	struct timer_list   si_timer;
 
+	/* This flag is set, if the timer can be set */
+	bool		    timer_can_start;
+
 	/* This flag is set, if the timer is running (timer_pending() isn't enough) */
 	bool		    timer_running;
 
@@ -355,6 +358,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 
 static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
 {
+	if (!smi_info->timer_can_start)
+		return;
 	smi_info->last_timeout_jiffies = jiffies;
 	mod_timer(&smi_info->si_timer, new_val);
 	smi_info->timer_running = true;
@@ -374,21 +379,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
 	smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
 }
 
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
 {
 	unsigned char msg[2];
 
 	msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
 	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
 
-	if (start_timer)
-		start_new_msg(smi_info, msg, 2);
-	else
-		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+	start_new_msg(smi_info, msg, 2);
 	smi_info->si_state = SI_CHECKING_ENABLES;
 }
 
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
 {
 	unsigned char msg[3];
 
@@ -397,10 +399,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
 	msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
 	msg[2] = WDT_PRE_TIMEOUT_INT;
 
-	if (start_timer)
-		start_new_msg(smi_info, msg, 3);
-	else
-		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+	start_new_msg(smi_info, msg, 3);
 	smi_info->si_state = SI_CLEARING_FLAGS;
 }
 
@@ -435,11 +434,11 @@ static void start_getting_events(struct smi_info *smi_info)
  * Note that we cannot just use disable_irq(), since the interrupt may
  * be shared.
  */
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) {
 		smi_info->interrupt_disabled = true;
-		start_check_enables(smi_info, start_timer);
+		start_check_enables(smi_info);
 		return true;
 	}
 	return false;
@@ -449,7 +448,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) {
 		smi_info->interrupt_disabled = false;
-		start_check_enables(smi_info, true);
+		start_check_enables(smi_info);
 		return true;
 	}
 	return false;
@@ -467,7 +466,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
 
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
-		if (!disable_si_irq(smi_info, true))
+		if (!disable_si_irq(smi_info))
 			smi_info->si_state = SI_NORMAL;
 	} else if (enable_si_irq(smi_info)) {
 		ipmi_free_smi_msg(msg);
@@ -483,7 +482,7 @@ static void handle_flags(struct smi_info *smi_info)
 		/* Watchdog pre-timeout */
 		smi_inc_stat(smi_info, watchdog_pretimeouts);
 
-		start_clear_flags(smi_info, true);
+		start_clear_flags(smi_info);
 		smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
 		if (smi_info->intf)
 			ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -866,7 +865,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
 		 * disable and messages disabled.
 		 */
 		if (smi_info->supports_event_msg_buff || smi_info->io.irq) {
-			start_check_enables(smi_info, true);
+			start_check_enables(smi_info);
 		} else {
 			smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
 			if (!smi_info->curr_msg)
@@ -1167,6 +1166,7 @@ static int smi_start_processing(void       *send_info,
 
 	/* Set up the timer that drives the interface. */
 	timer_setup(&new_smi->si_timer, smi_timeout, 0);
+	new_smi->timer_can_start = true;
 	smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
 
 	/* Try to claim any interrupts. */
@@ -1936,10 +1936,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
 	check_set_rcv_irq(smi_info);
 }
 
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
 {
 	if (smi_info->thread != NULL)
 		kthread_stop(smi_info->thread);
+
+	smi_info->timer_can_start = false;
 	if (smi_info->timer_running)
 		del_timer_sync(&smi_info->si_timer);
 }
@@ -2152,7 +2154,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	 * Start clearing the flags before we enable interrupts or the
 	 * timer to avoid racing with the timer.
 	 */
-	start_clear_flags(new_smi, false);
+	start_clear_flags(new_smi);
 
 	/*
 	 * IRQ is defined to be set when non-zero.  req_events will
@@ -2238,7 +2240,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	dev_set_drvdata(new_smi->io.dev, NULL);
 
 out_err_stop_timer:
-	wait_for_timer_and_thread(new_smi);
+	stop_timer_and_thread(new_smi);
 
 out_err:
 	new_smi->interrupt_disabled = true;
@@ -2388,7 +2390,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
 	 */
 	if (to_clean->io.irq_cleanup)
 		to_clean->io.irq_cleanup(&to_clean->io);
-	wait_for_timer_and_thread(to_clean);
+	stop_timer_and_thread(to_clean);
 
 	/*
 	 * Timeouts are stopped, now make sure the interrupts are off
@@ -2400,7 +2402,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
 		schedule_timeout_uninterruptible(1);
 	}
 	if (to_clean->handlers)
-		disable_si_irq(to_clean, false);
+		disable_si_irq(to_clean);
 	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);
diff --git a/drivers/char/ipmi/ipmi_si_parisc.c b/drivers/char/ipmi/ipmi_si_parisc.c
index 090b073ab441961f5c530dd9f453f1f1290e5a08..6b10f0e18a95d7f846edd78d22e392ec0e10cc69 100644
--- a/drivers/char/ipmi/ipmi_si_parisc.c
+++ b/drivers/char/ipmi/ipmi_si_parisc.c
@@ -10,6 +10,8 @@ static int __init ipmi_parisc_probe(struct parisc_device *dev)
 {
 	struct si_sm_io io;
 
+	memset(&io, 0, sizeof(io));
+
 	io.si_type	= SI_KCS;
 	io.addr_source	= SI_DEVICETREE;
 	io.addr_type	= IPMI_MEM_ADDR_SPACE;
diff --git a/drivers/char/ipmi/ipmi_si_pci.c b/drivers/char/ipmi/ipmi_si_pci.c
index 99771f5cad07a7b25285f33cf0073739ca27e4ae..27dd11c49d2197aa098426ca2992565f75e45a32 100644
--- a/drivers/char/ipmi/ipmi_si_pci.c
+++ b/drivers/char/ipmi/ipmi_si_pci.c
@@ -103,10 +103,13 @@ static int ipmi_pci_probe(struct pci_dev *pdev,
 	io.addr_source_cleanup = ipmi_pci_cleanup;
 	io.addr_source_data = pdev;
 
-	if (pci_resource_flags(pdev, 0) & IORESOURCE_IO)
+	if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) {
 		io.addr_type = IPMI_IO_ADDR_SPACE;
-	else
+		io.io_setup = ipmi_si_port_setup;
+	} else {
 		io.addr_type = IPMI_MEM_ADDR_SPACE;
+		io.io_setup = ipmi_si_mem_setup;
+	}
 	io.addr_data = pci_resource_start(pdev, 0);
 
 	io.regspacing = ipmi_pci_probe_regspacing(&io);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 647d056df88c8dd2a7d8288e35fa2eeba9b7705b..b56c11f51bafad32bafc7b5b9c7467e7cd16632b 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core)
 
 	ret = core->ops->is_enabled(core->hw);
 done:
-	clk_pm_runtime_put(core);
+	if (core->dev)
+		pm_runtime_put(core->dev);
 
 	return ret;
 }
@@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core)
 		best_parent_rate = core->parent->rate;
 	}
 
+	if (clk_pm_runtime_get(core))
+		return;
+
 	if (core->flags & CLK_SET_RATE_UNGATE) {
 		unsigned long flags;
 
@@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core)
 	/* handle the new child who might not be in core->children yet */
 	if (core->new_child)
 		clk_change_rate(core->new_child);
+
+	clk_pm_runtime_put(core);
 }
 
 static int clk_core_set_rate_nolock(struct clk_core *core,
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index a1a634253d6f2299bfad888b2fa193c98b4ac019..f00d8758ba24f6e5ed537a88be76ff85e5a7c5e4 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -16,6 +16,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
 	return 0;
 }
 
+static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
+				 unsigned long id)
+{
+	sun9i_mmc_reset_assert(rcdev, id);
+	udelay(10);
+	sun9i_mmc_reset_deassert(rcdev, id);
+
+	return 0;
+}
+
 static const struct reset_control_ops sun9i_mmc_reset_ops = {
 	.assert		= sun9i_mmc_reset_assert,
 	.deassert	= sun9i_mmc_reset_deassert,
+	.reset		= sun9i_mmc_reset_reset,
 };
 
 static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 58d4f4e1ad6a907991873a03027e6c7aa2f31fc4..ca38229b045ab288a2f250dddaf1b174e8c0572f 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
 
 #include "cpufreq_governor.h"
 
+#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL	(2 * TICK_NSEC / NSEC_PER_USEC)
+
 static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
 
 static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct policy_dbs_info *policy_dbs;
+	unsigned int sampling_interval;
 	int ret;
-	ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
-	if (ret != 1)
+
+	ret = sscanf(buf, "%u", &sampling_interval);
+	if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
 		return -EINVAL;
 
+	dbs_data->sampling_rate = sampling_interval;
+
 	/*
 	 * We are operating under dbs_data->mutex and so the list and its
 	 * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 	if (ret)
 		goto free_policy_dbs_info;
 
-	dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
+	/*
+	 * The sampling interval should not be less than the transition latency
+	 * of the CPU and it also cannot be too small for dbs_update() to work
+	 * correctly.
+	 */
+	dbs_data->sampling_rate = max_t(unsigned int,
+					CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
+					cpufreq_policy_transition_delay_us(policy));
 
 	if (!have_governor_per_policy())
 		gov->gdbs_data = dbs_data;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 628fe899cb483da9dbf0f7661b537734bc82f784..d9b2c2de49c43f125c91b382f818ff81d0ffc6ac 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
 	val >>= OCOTP_CFG3_SPEED_SHIFT;
 	val &= 0x3;
 
-	if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
-	     of_machine_is_compatible("fsl,imx6q"))
-		if (dev_pm_opp_disable(dev, 1200000000))
-			dev_warn(dev, "failed to disable 1.2GHz OPP\n");
 	if (val < OCOTP_CFG3_SPEED_996MHZ)
 		if (dev_pm_opp_disable(dev, 996000000))
 			dev_warn(dev, "failed to disable 996MHz OPP\n");
-	if (of_machine_is_compatible("fsl,imx6q")) {
+
+	if (of_machine_is_compatible("fsl,imx6q") ||
+	    of_machine_is_compatible("fsl,imx6qp")) {
 		if (val != OCOTP_CFG3_SPEED_852MHZ)
 			if (dev_pm_opp_disable(dev, 852000000))
 				dev_warn(dev, "failed to disable 852MHz OPP\n");
+		if (val != OCOTP_CFG3_SPEED_1P2GHZ)
+			if (dev_pm_opp_disable(dev, 1200000000))
+				dev_warn(dev, "failed to disable 1.2GHz OPP\n");
 	}
 	iounmap(base);
 put_node:
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 3e104f5aa0c2f9e05a3d7abd22dd6045ed420e9c..b56b3f711d9410a9ebddaa821428fd646ad02417 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
 	select CRYPTO_AUTHENC
+	select CRYPTO_GF128MUL
 	---help---
 	  The Chelsio Crypto Co-processor driver for T6 adapters.
 
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 89ba9e85c0f377577c3e119bac5e7157372faed6..4bcef78a08aad241675e50d959aff2a6c58e126b 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
 		ndesc = ctx->handle_result(priv, ring, sreq->req,
 					   &should_complete, &ret);
 		if (ndesc < 0) {
+			kfree(sreq);
 			dev_err(priv->dev, "failed to handle result (%d)", ndesc);
 			return;
 		}
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index 5438552bc6d783b57a23763e64c59afb90c340ae..fcc0a606d74839bb46f2af6d0ac66bdb12b3d511 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -14,6 +14,7 @@
 
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
+#include <crypto/internal/skcipher.h>
 
 #include "safexcel.h"
 
@@ -33,6 +34,10 @@ struct safexcel_cipher_ctx {
 	unsigned int key_len;
 };
 
+struct safexcel_cipher_req {
+	bool needs_inv;
+};
+
 static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
 				  struct crypto_async_request *async,
 				  struct safexcel_command_desc *cdesc,
@@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
 	return 0;
 }
 
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
-				  struct crypto_async_request *async,
-				  bool *should_complete, int *ret)
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+				      struct crypto_async_request *async,
+				      bool *should_complete, int *ret)
 {
 	struct skcipher_request *req = skcipher_request_cast(async);
 	struct safexcel_result_desc *rdesc;
@@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async,
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
 	request->req = &req->base;
-	ctx->base.handle_result = safexcel_handle_result;
 
 	*commands = n_cdesc;
 	*results = n_rdesc;
@@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 
 	ring = safexcel_select_ring(priv);
 	ctx->base.ring = ring;
-	ctx->base.needs_inv = false;
-	ctx->base.send = safexcel_aes_send;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
 	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
@@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 	return ndesc;
 }
 
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+				  struct crypto_async_request *async,
+				  bool *should_complete, int *ret)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+	int err;
+
+	if (sreq->needs_inv) {
+		sreq->needs_inv = false;
+		err = safexcel_handle_inv_result(priv, ring, async,
+						 should_complete, ret);
+	} else {
+		err = safexcel_handle_req_result(priv, ring, async,
+						 should_complete, ret);
+	}
+
+	return err;
+}
+
 static int safexcel_cipher_send_inv(struct crypto_async_request *async,
 				    int ring, struct safexcel_request *request,
 				    int *commands, int *results)
@@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret;
 
-	ctx->base.handle_result = safexcel_handle_inv_result;
-
 	ret = safexcel_invalidate_cache(async, &ctx->base, priv,
 					ctx->base.ctxr_dma, ring, request);
 	if (unlikely(ret))
@@ -381,28 +401,46 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
 	return 0;
 }
 
+static int safexcel_send(struct crypto_async_request *async,
+			 int ring, struct safexcel_request *request,
+			 int *commands, int *results)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+	int ret;
+
+	if (sreq->needs_inv)
+		ret = safexcel_cipher_send_inv(async, ring, request,
+					       commands, results);
+	else
+		ret = safexcel_aes_send(async, ring, request,
+					commands, results);
+	return ret;
+}
+
 static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	struct skcipher_request req;
+	SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm));
+	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
 	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
-	memset(&req, 0, sizeof(struct skcipher_request));
+	memset(req, 0, sizeof(struct skcipher_request));
 
 	/* create invalidation request */
 	init_completion(&result.completion);
-	skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					safexcel_inv_complete, &result);
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      safexcel_inv_complete, &result);
 
-	skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm));
-	ctx = crypto_tfm_ctx(req.base.tfm);
+	skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm));
+	ctx = crypto_tfm_ctx(req->base.tfm);
 	ctx->base.exit_inv = true;
-	ctx->base.send = safexcel_cipher_send_inv;
+	sreq->needs_inv = true;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
-	crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+	crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
 	if (!priv->ring[ring].need_dequeue)
@@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req,
 			enum safexcel_cipher_direction dir, u32 mode)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret, ring;
 
+	sreq->needs_inv = false;
 	ctx->direction = dir;
 	ctx->mode = mode;
 
 	if (ctx->base.ctxr) {
-		if (ctx->base.needs_inv)
-			ctx->base.send = safexcel_cipher_send_inv;
+		if (ctx->base.needs_inv) {
+			sreq->needs_inv = true;
+			ctx->base.needs_inv = false;
+		}
 	} else {
 		ctx->base.ring = safexcel_select_ring(priv);
-		ctx->base.send = safexcel_aes_send;
-
 		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
 						 EIP197_GFP_FLAGS(req->base),
 						 &ctx->base.ctxr_dma);
@@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
 			     alg.skcipher.base);
 
 	ctx->priv = tmpl->priv;
+	ctx->base.send = safexcel_send;
+	ctx->base.handle_result = safexcel_handle_result;
+
+	crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+				    sizeof(struct safexcel_cipher_req));
 
 	return 0;
 }
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 74feb622710147baf79d49c25a119be7e640ac7a..0c5a5820b06e53cc9efa776c37822730af1421e4 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -32,9 +32,10 @@ struct safexcel_ahash_req {
 	bool last_req;
 	bool finish;
 	bool hmac;
+	bool needs_inv;
 
 	u8 state_sz;    /* expected sate size, only set once */
-	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
 
 	u64 len;
 	u64 processed;
@@ -119,15 +120,15 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
 	}
 }
 
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
-				  struct crypto_async_request *async,
-				  bool *should_complete, int *ret)
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+				      struct crypto_async_request *async,
+				      bool *should_complete, int *ret)
 {
 	struct safexcel_result_desc *rdesc;
 	struct ahash_request *areq = ahash_request_cast(async);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
-	int cache_len, result_sz = sreq->state_sz;
+	int cache_len;
 
 	*ret = 0;
 
@@ -148,8 +149,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
 	if (sreq->finish)
-		result_sz = crypto_ahash_digestsize(ahash);
-	memcpy(sreq->state, areq->result, result_sz);
+		memcpy(areq->result, sreq->state,
+		       crypto_ahash_digestsize(ahash));
 
 	dma_unmap_sg(priv->dev, areq->src,
 		     sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
@@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
 	return 1;
 }
 
-static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
-			       struct safexcel_request *request, int *commands,
-			       int *results)
+static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
+				   struct safexcel_request *request,
+				   int *commands, int *results)
 {
 	struct ahash_request *areq = ahash_request_cast(async);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
@@ -273,7 +274,7 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
 	/* Add the token */
 	safexcel_hash_token(first_cdesc, len, req->state_sz);
 
-	ctx->base.result_dma = dma_map_single(priv->dev, areq->result,
+	ctx->base.result_dma = dma_map_single(priv->dev, req->state,
 					      req->state_sz, DMA_FROM_DEVICE);
 	if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
 		ret = -EINVAL;
@@ -292,7 +293,6 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
 
 	req->processed += len;
 	request->req = &areq->base;
-	ctx->base.handle_result = safexcel_handle_result;
 
 	*commands = n_cdesc;
 	*results = 1;
@@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 
 	ring = safexcel_select_ring(priv);
 	ctx->base.ring = ring;
-	ctx->base.needs_inv = false;
-	ctx->base.send = safexcel_ahash_send;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
 	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
@@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 	return 1;
 }
 
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+				  struct crypto_async_request *async,
+				  bool *should_complete, int *ret)
+{
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	int err;
+
+	if (req->needs_inv) {
+		req->needs_inv = false;
+		err = safexcel_handle_inv_result(priv, ring, async,
+						 should_complete, ret);
+	} else {
+		err = safexcel_handle_req_result(priv, ring, async,
+						 should_complete, ret);
+	}
+
+	return err;
+}
+
 static int safexcel_ahash_send_inv(struct crypto_async_request *async,
 				   int ring, struct safexcel_request *request,
 				   int *commands, int *results)
@@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	int ret;
 
-	ctx->base.handle_result = safexcel_handle_inv_result;
 	ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
 					ctx->base.ctxr_dma, ring, request);
 	if (unlikely(ret))
@@ -412,28 +429,46 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
 	return 0;
 }
 
+static int safexcel_ahash_send(struct crypto_async_request *async,
+			       int ring, struct safexcel_request *request,
+			       int *commands, int *results)
+{
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	int ret;
+
+	if (req->needs_inv)
+		ret = safexcel_ahash_send_inv(async, ring, request,
+					      commands, results);
+	else
+		ret = safexcel_ahash_send_req(async, ring, request,
+					      commands, results);
+	return ret;
+}
+
 static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	struct ahash_request req;
+	AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm));
+	struct safexcel_ahash_req *rctx = ahash_request_ctx(req);
 	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
-	memset(&req, 0, sizeof(struct ahash_request));
+	memset(req, 0, sizeof(struct ahash_request));
 
 	/* create invalidation request */
 	init_completion(&result.completion);
-	ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   safexcel_inv_complete, &result);
 
-	ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm));
-	ctx = crypto_tfm_ctx(req.base.tfm);
+	ahash_request_set_tfm(req, __crypto_ahash_cast(tfm));
+	ctx = crypto_tfm_ctx(req->base.tfm);
 	ctx->base.exit_inv = true;
-	ctx->base.send = safexcel_ahash_send_inv;
+	rctx->needs_inv = true;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
-	crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+	crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
 	if (!priv->ring[ring].need_dequeue)
@@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret, ring;
 
-	ctx->base.send = safexcel_ahash_send;
+	req->needs_inv = false;
 
 	if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
 		ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
 
 	if (ctx->base.ctxr) {
-		if (ctx->base.needs_inv)
-			ctx->base.send = safexcel_ahash_send_inv;
+		if (ctx->base.needs_inv) {
+			ctx->base.needs_inv = false;
+			req->needs_inv = true;
+		}
 	} else {
 		ctx->base.ring = safexcel_select_ring(priv);
 		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
@@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
 			     struct safexcel_alg_template, alg.ahash);
 
 	ctx->priv = tmpl->priv;
+	ctx->base.send = safexcel_ahash_send;
+	ctx->base.handle_result = safexcel_handle_result;
 
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct safexcel_ahash_req));
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 48de52cf2ecc10f7a1a33b1fa0025e94ec706a19..662e709812cc6f79e973fa10f71ad8f6d1991193 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1625,6 +1625,7 @@ static int queue_cache_init(void)
 					  CWQ_ENTRY_SIZE, 0, NULL);
 	if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
 		kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+		queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
 		return -ENOMEM;
 	}
 	return 0;
@@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void)
 {
 	kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
 	kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+	queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
+	queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
 }
 
 static long spu_queue_register_workfn(void *arg)
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fbab271b3bf9f9506c86579c75ebe32fc3235228..a861b5b4d4437d6b3be7dcf5e9d0b3475205455d 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 			 unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
-	struct data_chunk	*first = xt->sgl;
+	struct data_chunk	*first;
 	struct at_desc		*desc = NULL;
 	size_t			xfer_count;
 	unsigned int		dwidth;
@@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 	if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
 		return NULL;
 
+	first = xt->sgl;
+
 	dev_info(chan2dev(chan),
 		 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
 		__func__, &xt->src_start, &xt->dst_start, xt->numf,
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index d50273fed715096ac625382f6c511f537da57bf4..afd5e10f8927cb0c5573bb946a48755aad58b0aa 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev)
 
 	ret = dma_async_device_register(dd);
 	if (ret)
-		return ret;
+		goto err_clk;
 
 	irq = platform_get_irq(pdev, 0);
 	ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
@@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev)
 
 err_unregister:
 	dma_async_device_unregister(dd);
+err_clk:
+	clk_disable_unprepare(dmadev->clk);
 	return ret;
 }
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 47edc7fbf91f52e5259060824c38eaab69ebdb56..ec5f9d2bc8202f340c615cbe43731016d316d547 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
 #define PATTERN_COUNT_MASK	0x1f
 #define PATTERN_MEMSET_IDX	0x01
 
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+	bool			done;
+	wait_queue_head_t	*wait;
+};
+
 struct dmatest_thread {
 	struct list_head	node;
 	struct dmatest_info	*info;
@@ -165,6 +171,8 @@ struct dmatest_thread {
 	u8			**dsts;
 	u8			**udsts;
 	enum dma_transaction_type type;
+	wait_queue_head_t done_wait;
+	struct dmatest_done test_done;
 	bool			done;
 };
 
@@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
 	return error_count;
 }
 
-/* poor man's completion - we want to use wait_event_freezable() on it */
-struct dmatest_done {
-	bool			done;
-	wait_queue_head_t	*wait;
-};
 
 static void dmatest_callback(void *arg)
 {
 	struct dmatest_done *done = arg;
-
-	done->done = true;
-	wake_up_all(done->wait);
+	struct dmatest_thread *thread =
+		container_of(arg, struct dmatest_thread, done_wait);
+	if (!thread->done) {
+		done->done = true;
+		wake_up_all(done->wait);
+	} else {
+		/*
+		 * If thread->done, it means that this callback occurred
+		 * after the parent thread has cleaned up. This can
+		 * happen in the case that driver doesn't implement
+		 * the terminate_all() functionality and a dma operation
+		 * did not occur within the timeout period
+		 */
+		WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
+	}
 }
 
 static unsigned int min_odd(unsigned int x, unsigned int y)
@@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
  */
 static int dmatest_func(void *data)
 {
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
 	struct dmatest_thread	*thread = data;
-	struct dmatest_done	done = { .wait = &done_wait };
+	struct dmatest_done	*done = &thread->test_done;
 	struct dmatest_info	*info;
 	struct dmatest_params	*params;
 	struct dma_chan		*chan;
@@ -673,9 +687,9 @@ static int dmatest_func(void *data)
 			continue;
 		}
 
-		done.done = false;
+		done->done = false;
 		tx->callback = dmatest_callback;
-		tx->callback_param = &done;
+		tx->callback_param = done;
 		cookie = tx->tx_submit(tx);
 
 		if (dma_submit_error(cookie)) {
@@ -688,21 +702,12 @@ static int dmatest_func(void *data)
 		}
 		dma_async_issue_pending(chan);
 
-		wait_event_freezable_timeout(done_wait, done.done,
+		wait_event_freezable_timeout(thread->done_wait, done->done,
 					     msecs_to_jiffies(params->timeout));
 
 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 
-		if (!done.done) {
-			/*
-			 * We're leaving the timed out dma operation with
-			 * dangling pointer to done_wait.  To make this
-			 * correct, we'll need to allocate wait_done for
-			 * each test iteration and perform "who's gonna
-			 * free it this time?" dancing.  For now, just
-			 * leave it dangling.
-			 */
-			WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
+		if (!done->done) {
 			dmaengine_unmap_put(um);
 			result("test timed out", total_tests, src_off, dst_off,
 			       len, 0);
@@ -789,7 +794,7 @@ static int dmatest_func(void *data)
 		dmatest_KBs(runtime, total_len), ret);
 
 	/* terminate all transfers on specified channels */
-	if (ret)
+	if (ret || failed_tests)
 		dmaengine_terminate_all(chan);
 
 	thread->done = true;
@@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
 		thread->info = info;
 		thread->chan = dtc->chan;
 		thread->type = type;
+		thread->test_done.wait = &thread->done_wait;
+		init_waitqueue_head(&thread->done_wait);
 		smp_wmb();
 		thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
 				dma_chan_name(chan), op, i);
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 6775f2c74e25b7269417bbe001adfb03698dea97..c7568869284e17d4b63379b236a0f30391640820 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -863,11 +863,11 @@ static void fsl_edma_irq_exit(
 	}
 }
 
-static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma)
+static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
 {
 	int i;
 
-	for (i = 0; i < DMAMUX_NR; i++)
+	for (i = 0; i < nr_clocks; i++)
 		clk_disable_unprepare(fsl_edma->muxclk[i]);
 }
 
@@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev)
 
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
 		fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(fsl_edma->muxbase[i]))
+		if (IS_ERR(fsl_edma->muxbase[i])) {
+			/* on error: disable all previously enabled clks */
+			fsl_disable_clocks(fsl_edma, i);
 			return PTR_ERR(fsl_edma->muxbase[i]);
+		}
 
 		sprintf(clkname, "dmamux%d", i);
 		fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
 		if (IS_ERR(fsl_edma->muxclk[i])) {
 			dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
+			/* on error: disable all previously enabled clks */
+			fsl_disable_clocks(fsl_edma, i);
 			return PTR_ERR(fsl_edma->muxclk[i]);
 		}
 
 		ret = clk_prepare_enable(fsl_edma->muxclk[i]);
-		if (ret) {
-			/* disable only clks which were enabled on error */
-			for (; i >= 0; i--)
-				clk_disable_unprepare(fsl_edma->muxclk[i]);
-
-			dev_err(&pdev->dev, "DMAMUX clk block failed.\n");
-			return ret;
-		}
+		if (ret)
+			/* on error: disable all previously enabled clks */
+			fsl_disable_clocks(fsl_edma, i);
 
 	}
 
@@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev,
 			"Can't register Freescale eDMA engine. (%d)\n", ret);
-		fsl_disable_clocks(fsl_edma);
+		fsl_disable_clocks(fsl_edma, DMAMUX_NR);
 		return ret;
 	}
 
@@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev,
 			"Can't register Freescale eDMA of_dma. (%d)\n", ret);
 		dma_async_device_unregister(&fsl_edma->dma_dev);
-		fsl_disable_clocks(fsl_edma);
+		fsl_disable_clocks(fsl_edma, DMAMUX_NR);
 		return ret;
 	}
 
@@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev)
 	fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
 	of_dma_controller_free(np);
 	dma_async_device_unregister(&fsl_edma->dma_dev);
-	fsl_disable_clocks(fsl_edma);
+	fsl_disable_clocks(fsl_edma, DMAMUX_NR);
 
 	return 0;
 }
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 2f31d3d0caa61821aa08aea360e06709bdb25d48..7792a9186f9cf35bae71792e5e0783cf53364b05 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
 	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
 		dev_err(dev, "Self-test copy failed compare, disabling\n");
 		err = -ENODEV;
-		goto free_resources;
+		goto unmap_dma;
 	}
 
 unmap_dma:
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index dfb373c8ba2a49629628dd33f1a36fd41a270b7b..7da9f1b83ebecf3641da1e87f35e5e9ec6621df8 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -28,7 +28,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bitmap.h>
-#include <linux/bitfield.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -73,13 +72,21 @@
 
 #define MAX_DVFS_DOMAINS	8
 #define MAX_DVFS_OPPS		16
-
-#define PROTO_REV_MAJOR_MASK	GENMASK(31, 16)
-#define PROTO_REV_MINOR_MASK	GENMASK(15, 0)
-
-#define FW_REV_MAJOR_MASK	GENMASK(31, 24)
-#define FW_REV_MINOR_MASK	GENMASK(23, 16)
-#define FW_REV_PATCH_MASK	GENMASK(15, 0)
+#define DVFS_LATENCY(hdr)	(le32_to_cpu(hdr) >> 16)
+#define DVFS_OPP_COUNT(hdr)	((le32_to_cpu(hdr) >> 8) & 0xff)
+
+#define PROTOCOL_REV_MINOR_BITS	16
+#define PROTOCOL_REV_MINOR_MASK	((1U << PROTOCOL_REV_MINOR_BITS) - 1)
+#define PROTOCOL_REV_MAJOR(x)	((x) >> PROTOCOL_REV_MINOR_BITS)
+#define PROTOCOL_REV_MINOR(x)	((x) & PROTOCOL_REV_MINOR_MASK)
+
+#define FW_REV_MAJOR_BITS	24
+#define FW_REV_MINOR_BITS	16
+#define FW_REV_PATCH_MASK	((1U << FW_REV_MINOR_BITS) - 1)
+#define FW_REV_MINOR_MASK	((1U << FW_REV_MAJOR_BITS) - 1)
+#define FW_REV_MAJOR(x)		((x) >> FW_REV_MAJOR_BITS)
+#define FW_REV_MINOR(x)		(((x) & FW_REV_MINOR_MASK) >> FW_REV_MINOR_BITS)
+#define FW_REV_PATCH(x)		((x) & FW_REV_PATCH_MASK)
 
 #define MAX_RX_TIMEOUT		(msecs_to_jiffies(30))
 
@@ -304,6 +311,10 @@ struct clk_get_info {
 	u8 name[20];
 } __packed;
 
+struct clk_get_value {
+	__le32 rate;
+} __packed;
+
 struct clk_set_value {
 	__le16 id;
 	__le16 reserved;
@@ -317,9 +328,7 @@ struct legacy_clk_set_value {
 } __packed;
 
 struct dvfs_info {
-	u8 domain;
-	u8 opp_count;
-	__le16 latency;
+	__le32 header;
 	struct {
 		__le32 freq;
 		__le32 m_volt;
@@ -342,6 +351,11 @@ struct _scpi_sensor_info {
 	char name[20];
 };
 
+struct sensor_value {
+	__le32 lo_val;
+	__le32 hi_val;
+} __packed;
+
 struct dev_pstate_set {
 	__le16 dev_id;
 	u8 pstate;
@@ -405,20 +419,19 @@ static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd)
 		unsigned int len;
 
 		if (scpi_info->is_legacy) {
-			struct legacy_scpi_shared_mem __iomem *mem =
-							ch->rx_payload;
+			struct legacy_scpi_shared_mem *mem = ch->rx_payload;
 
 			/* RX Length is not replied by the legacy Firmware */
 			len = match->rx_len;
 
-			match->status = ioread32(&mem->status);
+			match->status = le32_to_cpu(mem->status);
 			memcpy_fromio(match->rx_buf, mem->payload, len);
 		} else {
-			struct scpi_shared_mem __iomem *mem = ch->rx_payload;
+			struct scpi_shared_mem *mem = ch->rx_payload;
 
 			len = min(match->rx_len, CMD_SIZE(cmd));
 
-			match->status = ioread32(&mem->status);
+			match->status = le32_to_cpu(mem->status);
 			memcpy_fromio(match->rx_buf, mem->payload, len);
 		}
 
@@ -432,11 +445,11 @@ static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd)
 static void scpi_handle_remote_msg(struct mbox_client *c, void *msg)
 {
 	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
-	struct scpi_shared_mem __iomem *mem = ch->rx_payload;
+	struct scpi_shared_mem *mem = ch->rx_payload;
 	u32 cmd = 0;
 
 	if (!scpi_info->is_legacy)
-		cmd = ioread32(&mem->command);
+		cmd = le32_to_cpu(mem->command);
 
 	scpi_process_cmd(ch, cmd);
 }
@@ -446,7 +459,7 @@ static void scpi_tx_prepare(struct mbox_client *c, void *msg)
 	unsigned long flags;
 	struct scpi_xfer *t = msg;
 	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
-	struct scpi_shared_mem __iomem *mem = ch->tx_payload;
+	struct scpi_shared_mem *mem = (struct scpi_shared_mem *)ch->tx_payload;
 
 	if (t->tx_buf) {
 		if (scpi_info->is_legacy)
@@ -465,7 +478,7 @@ static void scpi_tx_prepare(struct mbox_client *c, void *msg)
 	}
 
 	if (!scpi_info->is_legacy)
-		iowrite32(t->cmd, &mem->command);
+		mem->command = cpu_to_le32(t->cmd);
 }
 
 static struct scpi_xfer *get_scpi_xfer(struct scpi_chan *ch)
@@ -570,13 +583,13 @@ scpi_clk_get_range(u16 clk_id, unsigned long *min, unsigned long *max)
 static unsigned long scpi_clk_get_val(u16 clk_id)
 {
 	int ret;
-	__le32 rate;
+	struct clk_get_value clk;
 	__le16 le_clk_id = cpu_to_le16(clk_id);
 
 	ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id,
-				sizeof(le_clk_id), &rate, sizeof(rate));
+				sizeof(le_clk_id), &clk, sizeof(clk));
 
-	return ret ? ret : le32_to_cpu(rate);
+	return ret ? ret : le32_to_cpu(clk.rate);
 }
 
 static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
@@ -631,35 +644,35 @@ static int opp_cmp_func(const void *opp1, const void *opp2)
 }
 
 static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
-{
-	if (domain >= MAX_DVFS_DOMAINS)
-		return ERR_PTR(-EINVAL);
-
-	return scpi_info->dvfs[domain] ?: ERR_PTR(-EINVAL);
-}
-
-static int scpi_dvfs_populate_info(struct device *dev, u8 domain)
 {
 	struct scpi_dvfs_info *info;
 	struct scpi_opp *opp;
 	struct dvfs_info buf;
 	int ret, i;
 
+	if (domain >= MAX_DVFS_DOMAINS)
+		return ERR_PTR(-EINVAL);
+
+	if (scpi_info->dvfs[domain])	/* data already populated */
+		return scpi_info->dvfs[domain];
+
 	ret = scpi_send_message(CMD_GET_DVFS_INFO, &domain, sizeof(domain),
 				&buf, sizeof(buf));
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
-	info = devm_kmalloc(dev, sizeof(*info), GFP_KERNEL);
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	info->count = buf.opp_count;
-	info->latency = le16_to_cpu(buf.latency) * 1000; /* uS to nS */
+	info->count = DVFS_OPP_COUNT(buf.header);
+	info->latency = DVFS_LATENCY(buf.header) * 1000; /* uS to nS */
 
-	info->opps = devm_kcalloc(dev, info->count, sizeof(*opp), GFP_KERNEL);
-	if (!info->opps)
-		return -ENOMEM;
+	info->opps = kcalloc(info->count, sizeof(*opp), GFP_KERNEL);
+	if (!info->opps) {
+		kfree(info);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	for (i = 0, opp = info->opps; i < info->count; i++, opp++) {
 		opp->freq = le32_to_cpu(buf.opps[i].freq);
@@ -669,15 +682,7 @@ static int scpi_dvfs_populate_info(struct device *dev, u8 domain)
 	sort(info->opps, info->count, sizeof(*opp), opp_cmp_func, NULL);
 
 	scpi_info->dvfs[domain] = info;
-	return 0;
-}
-
-static void scpi_dvfs_populate(struct device *dev)
-{
-	int domain;
-
-	for (domain = 0; domain < MAX_DVFS_DOMAINS; domain++)
-		scpi_dvfs_populate_info(dev, domain);
+	return info;
 }
 
 static int scpi_dev_domain_id(struct device *dev)
@@ -708,6 +713,9 @@ static int scpi_dvfs_get_transition_latency(struct device *dev)
 	if (IS_ERR(info))
 		return PTR_ERR(info);
 
+	if (!info->latency)
+		return 0;
+
 	return info->latency;
 }
 
@@ -768,19 +776,20 @@ static int scpi_sensor_get_info(u16 sensor_id, struct scpi_sensor_info *info)
 static int scpi_sensor_get_value(u16 sensor, u64 *val)
 {
 	__le16 id = cpu_to_le16(sensor);
-	__le64 value;
+	struct sensor_value buf;
 	int ret;
 
 	ret = scpi_send_message(CMD_SENSOR_VALUE, &id, sizeof(id),
-				&value, sizeof(value));
+				&buf, sizeof(buf));
 	if (ret)
 		return ret;
 
 	if (scpi_info->is_legacy)
-		/* only 32-bits supported, upper 32 bits can be junk */
-		*val = le32_to_cpup((__le32 *)&value);
+		/* only 32-bits supported, hi_val can be junk */
+		*val = le32_to_cpu(buf.lo_val);
 	else
-		*val = le64_to_cpu(value);
+		*val = (u64)le32_to_cpu(buf.hi_val) << 32 |
+			le32_to_cpu(buf.lo_val);
 
 	return 0;
 }
@@ -853,19 +862,23 @@ static int scpi_init_versions(struct scpi_drvinfo *info)
 static ssize_t protocol_version_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lu.%lu\n",
-		FIELD_GET(PROTO_REV_MAJOR_MASK, scpi_info->protocol_version),
-		FIELD_GET(PROTO_REV_MINOR_MASK, scpi_info->protocol_version));
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d\n",
+		       PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		       PROTOCOL_REV_MINOR(scpi_info->protocol_version));
 }
 static DEVICE_ATTR_RO(protocol_version);
 
 static ssize_t firmware_version_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lu.%lu.%lu\n",
-		     FIELD_GET(FW_REV_MAJOR_MASK, scpi_info->firmware_version),
-		     FIELD_GET(FW_REV_MINOR_MASK, scpi_info->firmware_version),
-		     FIELD_GET(FW_REV_PATCH_MASK, scpi_info->firmware_version));
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d.%d\n",
+		       FW_REV_MAJOR(scpi_info->firmware_version),
+		       FW_REV_MINOR(scpi_info->firmware_version),
+		       FW_REV_PATCH(scpi_info->firmware_version));
 }
 static DEVICE_ATTR_RO(firmware_version);
 
@@ -876,13 +889,39 @@ static struct attribute *versions_attrs[] = {
 };
 ATTRIBUTE_GROUPS(versions);
 
-static void scpi_free_channels(void *data)
+static void
+scpi_free_channels(struct device *dev, struct scpi_chan *pchan, int count)
 {
-	struct scpi_drvinfo *info = data;
 	int i;
 
-	for (i = 0; i < info->num_chans; i++)
-		mbox_free_channel(info->channels[i].chan);
+	for (i = 0; i < count && pchan->chan; i++, pchan++) {
+		mbox_free_channel(pchan->chan);
+		devm_kfree(dev, pchan->xfers);
+		devm_iounmap(dev, pchan->rx_payload);
+	}
+}
+
+static int scpi_remove(struct platform_device *pdev)
+{
+	int i;
+	struct device *dev = &pdev->dev;
+	struct scpi_drvinfo *info = platform_get_drvdata(pdev);
+
+	scpi_info = NULL; /* stop exporting SCPI ops through get_scpi_ops */
+
+	of_platform_depopulate(dev);
+	sysfs_remove_groups(&dev->kobj, versions_groups);
+	scpi_free_channels(dev, info->channels, info->num_chans);
+	platform_set_drvdata(pdev, NULL);
+
+	for (i = 0; i < MAX_DVFS_DOMAINS && info->dvfs[i]; i++) {
+		kfree(info->dvfs[i]->opps);
+		kfree(info->dvfs[i]);
+	}
+	devm_kfree(dev, info->channels);
+	devm_kfree(dev, info);
+
+	return 0;
 }
 
 #define MAX_SCPI_XFERS		10
@@ -913,6 +952,7 @@ static int scpi_probe(struct platform_device *pdev)
 {
 	int count, idx, ret;
 	struct resource res;
+	struct scpi_chan *scpi_chan;
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
 
@@ -929,19 +969,13 @@ static int scpi_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan),
-					   GFP_KERNEL);
-	if (!scpi_info->channels)
+	scpi_chan = devm_kcalloc(dev, count, sizeof(*scpi_chan), GFP_KERNEL);
+	if (!scpi_chan)
 		return -ENOMEM;
 
-	ret = devm_add_action(dev, scpi_free_channels, scpi_info);
-	if (ret)
-		return ret;
-
-	for (; scpi_info->num_chans < count; scpi_info->num_chans++) {
+	for (idx = 0; idx < count; idx++) {
 		resource_size_t size;
-		int idx = scpi_info->num_chans;
-		struct scpi_chan *pchan = scpi_info->channels + idx;
+		struct scpi_chan *pchan = scpi_chan + idx;
 		struct mbox_client *cl = &pchan->cl;
 		struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
 
@@ -949,14 +983,15 @@ static int scpi_probe(struct platform_device *pdev)
 		of_node_put(shmem);
 		if (ret) {
 			dev_err(dev, "failed to get SCPI payload mem resource\n");
-			return ret;
+			goto err;
 		}
 
 		size = resource_size(&res);
 		pchan->rx_payload = devm_ioremap(dev, res.start, size);
 		if (!pchan->rx_payload) {
 			dev_err(dev, "failed to ioremap SCPI payload\n");
-			return -EADDRNOTAVAIL;
+			ret = -EADDRNOTAVAIL;
+			goto err;
 		}
 		pchan->tx_payload = pchan->rx_payload + (size >> 1);
 
@@ -982,11 +1017,17 @@ static int scpi_probe(struct platform_device *pdev)
 				dev_err(dev, "failed to get channel%d err %d\n",
 					idx, ret);
 		}
+err:
+		scpi_free_channels(dev, scpi_chan, idx);
+		scpi_info = NULL;
 		return ret;
 	}
 
+	scpi_info->channels = scpi_chan;
+	scpi_info->num_chans = count;
 	scpi_info->commands = scpi_std_commands;
-	scpi_info->scpi_ops = &scpi_ops;
+
+	platform_set_drvdata(pdev, scpi_info);
 
 	if (scpi_info->is_legacy) {
 		/* Replace with legacy variants */
@@ -1002,23 +1043,23 @@ static int scpi_probe(struct platform_device *pdev)
 	ret = scpi_init_versions(scpi_info);
 	if (ret) {
 		dev_err(dev, "incorrect or no SCP firmware found\n");
+		scpi_remove(pdev);
 		return ret;
 	}
 
-	scpi_dvfs_populate(dev);
-
-	_dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n",
-		  FIELD_GET(PROTO_REV_MAJOR_MASK, scpi_info->protocol_version),
-		  FIELD_GET(PROTO_REV_MINOR_MASK, scpi_info->protocol_version),
-		  FIELD_GET(FW_REV_MAJOR_MASK, scpi_info->firmware_version),
-		  FIELD_GET(FW_REV_MINOR_MASK, scpi_info->firmware_version),
-		  FIELD_GET(FW_REV_PATCH_MASK, scpi_info->firmware_version));
+	_dev_info(dev, "SCP Protocol %d.%d Firmware %d.%d.%d version\n",
+		  PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		  PROTOCOL_REV_MINOR(scpi_info->protocol_version),
+		  FW_REV_MAJOR(scpi_info->firmware_version),
+		  FW_REV_MINOR(scpi_info->firmware_version),
+		  FW_REV_PATCH(scpi_info->firmware_version));
+	scpi_info->scpi_ops = &scpi_ops;
 
-	ret = devm_device_add_groups(dev, versions_groups);
+	ret = sysfs_create_groups(&dev->kobj, versions_groups);
 	if (ret)
 		dev_err(dev, "unable to create sysfs version group\n");
 
-	return devm_of_platform_populate(dev);
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
 }
 
 static const struct of_device_id scpi_of_match[] = {
@@ -1035,6 +1076,7 @@ static struct platform_driver scpi_driver = {
 		.of_match_table = scpi_of_match,
 	},
 	.probe = scpi_probe,
+	.remove = scpi_remove,
 };
 module_platform_driver(scpi_driver);
 
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index ec8ac5c4dd84f93e386db9871946c81ea1dbc8d5..055e2e8f985a3fbc5b334f8c0d414c8b6c51e3e3 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -20,10 +20,6 @@
 
 #define NO_FURTHER_WRITE_ACTION -1
 
-#ifndef phys_to_page
-#define phys_to_page(x)		pfn_to_page((x) >> PAGE_SHIFT)
-#endif
-
 /**
  * efi_free_all_buff_pages - free all previous allocated buffer pages
  * @cap_info: pointer to current instance of capsule_info structure
@@ -35,7 +31,7 @@
 static void efi_free_all_buff_pages(struct capsule_info *cap_info)
 {
 	while (cap_info->index > 0)
-		__free_page(phys_to_page(cap_info->pages[--cap_info->index]));
+		__free_page(cap_info->pages[--cap_info->index]);
 
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
 }
@@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info)
 
 	cap_info->pages = temp_page;
 
+	temp_page = krealloc(cap_info->phys,
+			     pages_needed * sizeof(phys_addr_t *),
+			     GFP_KERNEL | __GFP_ZERO);
+	if (!temp_page)
+		return -ENOMEM;
+
+	cap_info->phys = temp_page;
+
 	return 0;
 }
 
@@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
  **/
 static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
 {
+	bool do_vunmap = false;
 	int ret;
 
-	ret = efi_capsule_update(&cap_info->header, cap_info->pages);
+	/*
+	 * cap_info->capsule may have been assigned already by a quirk
+	 * handler, so only overwrite it if it is NULL
+	 */
+	if (!cap_info->capsule) {
+		cap_info->capsule = vmap(cap_info->pages, cap_info->index,
+					 VM_MAP, PAGE_KERNEL);
+		if (!cap_info->capsule)
+			return -ENOMEM;
+		do_vunmap = true;
+	}
+
+	ret = efi_capsule_update(cap_info->capsule, cap_info->phys);
+	if (do_vunmap)
+		vunmap(cap_info->capsule);
 	if (ret) {
 		pr_err("capsule update failed\n");
 		return ret;
@@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 			goto failed;
 		}
 
-		cap_info->pages[cap_info->index++] = page_to_phys(page);
+		cap_info->pages[cap_info->index] = page;
+		cap_info->phys[cap_info->index] = page_to_phys(page);
 		cap_info->page_bytes_remain = PAGE_SIZE;
+		cap_info->index++;
 	} else {
-		page = phys_to_page(cap_info->pages[cap_info->index - 1]);
+		page = cap_info->pages[cap_info->index - 1];
 	}
 
 	kbuff = kmap(page);
@@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file)
 	struct capsule_info *cap_info = file->private_data;
 
 	kfree(cap_info->pages);
+	kfree(cap_info->phys);
 	kfree(file->private_data);
 	file->private_data = NULL;
 	return 0;
@@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 	}
 
+	cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+	if (!cap_info->phys) {
+		kfree(cap_info->pages);
+		kfree(cap_info);
+		return -ENOMEM;
+	}
+
 	file->private_data = cap_info;
 
 	return 0;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index f70febf680c392b37217ce5e6f8c8c4301234869..557a47829d03f2b14b0c3b664e1044e7e2cb86bc 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -109,6 +109,8 @@ struct kobject *efi_kobj;
 /*
  * Let's not leave out systab information that snuck into
  * the efivars driver
+ * Note, do not add more fields in systab sysfs file as it breaks sysfs
+ * one value per file rule!
  */
 static ssize_t systab_show(struct kobject *kobj,
 			   struct kobj_attribute *attr, char *buf)
@@ -143,8 +145,7 @@ static ssize_t systab_show(struct kobject *kobj,
 	return str - buf;
 }
 
-static struct kobj_attribute efi_attr_systab =
-			__ATTR(systab, 0400, systab_show, NULL);
+static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
 
 #define EFI_FIELD(var) efi.var
 
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index bd7ed3c1148a7ccd5032e367e3af0ba66af18d20..c47e0c6ec00f858c0b9960f605974b4f2e4b1294 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -106,7 +106,7 @@ static const struct sysfs_ops esre_attr_ops = {
 };
 
 /* Generic ESRT Entry ("ESRE") support. */
-static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf)
+static ssize_t fw_class_show(struct esre_entry *entry, char *buf)
 {
 	char *str = buf;
 
@@ -117,18 +117,16 @@ static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf)
 	return str - buf;
 }
 
-static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400,
-	esre_fw_class_show, NULL);
+static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400);
 
 #define esre_attr_decl(name, size, fmt) \
-static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \
+static ssize_t name##_show(struct esre_entry *entry, char *buf) \
 { \
 	return sprintf(buf, fmt "\n", \
 		       le##size##_to_cpu(entry->esre.esre1->name)); \
 } \
 \
-static struct esre_attribute esre_##name = __ATTR(name, 0400, \
-	esre_##name##_show, NULL)
+static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400)
 
 esre_attr_decl(fw_type, 32, "%u");
 esre_attr_decl(fw_version, 32, "%u");
@@ -193,14 +191,13 @@ static int esre_create_sysfs_entry(void *esre, int entry_num)
 
 /* support for displaying ESRT fields at the top level */
 #define esrt_attr_decl(name, size, fmt) \
-static ssize_t esrt_##name##_show(struct kobject *kobj, \
+static ssize_t name##_show(struct kobject *kobj, \
 				  struct kobj_attribute *attr, char *buf)\
 { \
 	return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \
 } \
 \
-static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \
-	esrt_##name##_show, NULL)
+static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400)
 
 esrt_attr_decl(fw_resource_count, 32, "%u");
 esrt_attr_decl(fw_resource_count_max, 32, "%u");
@@ -431,7 +428,7 @@ static int __init esrt_sysfs_init(void)
 err_remove_esrt:
 	kobject_put(esrt_kobj);
 err:
-	kfree(esrt);
+	memunmap(esrt);
 	esrt = NULL;
 	return error;
 }
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 8e64b77aeac95e43c0e0571694f42bbe6c8ba73f..f377609ff141bca733bf498babc25f9d215aefad 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -63,11 +63,11 @@ static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr,
 	return map_attr->show(entry, buf);
 }
 
-static struct map_attribute map_type_attr = __ATTR_RO(type);
-static struct map_attribute map_phys_addr_attr   = __ATTR_RO(phys_addr);
-static struct map_attribute map_virt_addr_attr  = __ATTR_RO(virt_addr);
-static struct map_attribute map_num_pages_attr  = __ATTR_RO(num_pages);
-static struct map_attribute map_attribute_attr  = __ATTR_RO(attribute);
+static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400);
+static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400);
+static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400);
+static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400);
+static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400);
 
 /*
  * These are default attributes that are added for every memmap entry.
diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index 35e553b3b19051b45985991b9b66dc19366fc41e..e4b40f2b46274a0871d1cb881732e8358a324112 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c
@@ -295,38 +295,60 @@ static int vpd_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	return vpd_sections_init(entry.cbmem_addr);
+	vpd_kobj = kobject_create_and_add("vpd", firmware_kobj);
+	if (!vpd_kobj)
+		return -ENOMEM;
+
+	ret = vpd_sections_init(entry.cbmem_addr);
+	if (ret) {
+		kobject_put(vpd_kobj);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int vpd_remove(struct platform_device *pdev)
+{
+	vpd_section_destroy(&ro_vpd);
+	vpd_section_destroy(&rw_vpd);
+
+	kobject_put(vpd_kobj);
+
+	return 0;
 }
 
 static struct platform_driver vpd_driver = {
 	.probe = vpd_probe,
+	.remove = vpd_remove,
 	.driver = {
 		.name = "vpd",
 	},
 };
 
+static struct platform_device *vpd_pdev;
+
 static int __init vpd_platform_init(void)
 {
-	struct platform_device *pdev;
-
-	pdev = platform_device_register_simple("vpd", -1, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
+	int ret;
 
-	vpd_kobj = kobject_create_and_add("vpd", firmware_kobj);
-	if (!vpd_kobj)
-		return -ENOMEM;
+	ret = platform_driver_register(&vpd_driver);
+	if (ret)
+		return ret;
 
-	platform_driver_register(&vpd_driver);
+	vpd_pdev = platform_device_register_simple("vpd", -1, NULL, 0);
+	if (IS_ERR(vpd_pdev)) {
+		platform_driver_unregister(&vpd_driver);
+		return PTR_ERR(vpd_pdev);
+	}
 
 	return 0;
 }
 
 static void __exit vpd_platform_exit(void)
 {
-	vpd_section_destroy(&ro_vpd);
-	vpd_section_destroy(&rw_vpd);
-	kobject_put(vpd_kobj);
+	platform_device_unregister(vpd_pdev);
+	platform_driver_unregister(&vpd_driver);
 }
 
 module_init(vpd_platform_init);
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5cfe39f7a45f080f56f36eea6259ec4c1b1df8b6..deb483064f53c3e680d34b655360faac04853c3f 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device *pdev)
 {
 	pr_debug("fw_cfg: unloading.\n");
 	fw_cfg_sysfs_cache_cleanup();
+	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
+	fw_cfg_io_cleanup();
 	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
 	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
-	fw_cfg_io_cleanup();
 	return 0;
 }
 
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index 6b535ec858cc35330baa773782dd51bcccd4da24..15a1f4b348c41b2915755dba173d71704a862768 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -23,6 +23,7 @@
 struct gen_74x164_chip {
 	struct gpio_chip	gpio_chip;
 	struct mutex		lock;
+	struct gpio_desc	*gpiod_oe;
 	u32			registers;
 	/*
 	 * Since the registers are chained, every byte sent will make
@@ -31,8 +32,7 @@ struct gen_74x164_chip {
 	 * register at the end of the transfer. So, to have a logical
 	 * numbering, store the bytes in reverse order.
 	 */
-	u8			buffer[0];
-	struct gpio_desc	*gpiod_oe;
+	u8			buffer[];
 };
 
 static int __gen_74x164_write_config(struct gen_74x164_chip *chip)
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index dfcf56ee3c6181fc64d0a074e2f5659fec35a6d8..76861a00bb92c4b449f346433f282da8e6cb82c9 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = {
  * category than their parents, so it won't report false recursion.
  */
 static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
 
 static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
 				 irq_hw_number_t hwirq)
@@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
 	ret = irq_set_chip_data(irq, d->host_data);
 	if (ret < 0)
 		return ret;
-	irq_set_lockdep_class(irq, &gpio_lock_class);
+	irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class);
 	irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq);
 	irq_set_noprobe(irq);
 
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index 545d43a587b7ef1308dc827ffbea2c2dd733b478..bb4f8cf18bd9f6c7e47328aaf2ec1cdd3dfc3d6b 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank(
  * category than their parents, so it won't report false recursion.
  */
 static struct lock_class_key brcmstb_gpio_irq_lock_class;
+static struct lock_class_key brcmstb_gpio_irq_request_class;
 
 
 static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
@@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
 	ret = irq_set_chip_data(irq, &bank->gc);
 	if (ret < 0)
 		return ret;
-	irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class);
+	irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class,
+			      &brcmstb_gpio_irq_request_class);
 	irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq);
 	irq_set_noprobe(irq);
 	return 0;
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index f75d8443ecaff631d07e8b474e2bdf769357adb0..e4b3d7db68c95a2d87b9766e54f688fe2dd13f36 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -383,7 +383,7 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
 	u32 mask;
 
 	d = (struct davinci_gpio_controller *)irq_data_get_irq_handler_data(data);
-	g = (struct davinci_gpio_regs __iomem *)d->regs;
+	g = (struct davinci_gpio_regs __iomem *)d->regs[0];
 	mask = __gpio_mask(data->irq - d->base_irq);
 
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index babb7bd2ba59b60aa723fa10606c186862a12af3..a0a5f9730aa77b92ea5bc520c22f64b386edb583 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -947,7 +947,7 @@ static const struct of_device_id pca953x_dt_ids[] = {
 	{ .compatible = "ti,tca6416", .data = OF_953X(16, PCA_INT), },
 	{ .compatible = "ti,tca6424", .data = OF_953X(24, PCA_INT), },
 
-	{ .compatible = "onsemi,pca9654", .data = OF_953X( 8, PCA_INT), },
+	{ .compatible = "onnn,pca9654", .data = OF_953X( 8, PCA_INT), },
 
 	{ .compatible = "exar,xra1202", .data = OF_953X( 8, 0), },
 	{ }
diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c
index 23e771dba4c17ab7a497feb37c108f15fe117c79..e85903eddc68ecd5930df5ded45efbc398aa57ca 100644
--- a/drivers/gpio/gpio-reg.c
+++ b/drivers/gpio/gpio-reg.c
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
 	struct gpio_reg *r = to_gpio_reg(gc);
 	int irq = r->irqs[offset];
 
-	if (irq >= 0 && r->irq.domain)
-		irq = irq_find_mapping(r->irq.domain, irq);
+	if (irq >= 0 && r->irqdomain)
+		irq = irq_find_mapping(r->irqdomain, irq);
 
 	return irq;
 }
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 8db47f671708752dbaae420478935f1613847dd3..02fa8fe2292a13608e332d6247e9c0b55870ae98 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
  * than their parents, so it won't report false recursion.
  */
 static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
 
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
@@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
 		bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-		irq_set_lockdep_class(irq, &gpio_lock_class);
+		irq_set_lockdep_class(irq, &gpio_lock_class,
+				      &gpio_request_class);
 		irq_set_chip_data(irq, bank);
 		irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
 	}
diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c
index 2313af82fad3d4bb3cf7027bec9f25d9e6b5887c..acd59113e08b9cda0fce6930981520a481ec7203 100644
--- a/drivers/gpio/gpio-xgene-sb.c
+++ b/drivers/gpio/gpio-xgene-sb.c
@@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
 
 static int xgene_gpio_sb_domain_activate(struct irq_domain *d,
 					 struct irq_data *irq_data,
-					 bool early)
+					 bool reserve)
 {
 	struct xgene_gpio_sb *priv = d->host_data;
 	u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq);
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index eb4528c87c0b3977420a2108c7feaaf9b2a95869..d6f3d9ee1350e422e3f373427d55d6ce8b6be565 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
 	}
 
 	if (!chip->names)
-		devprop_gpiochip_set_names(chip);
+		devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
 
 	acpi_gpiochip_request_regions(acpi_gpio);
 	acpi_gpiochip_scan_gpios(acpi_gpio);
diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c
index 27f383bda7d9621322a3340d9def92354d21d7a3..f748aa3e77f72000b357718d5c955abba38c5c07 100644
--- a/drivers/gpio/gpiolib-devprop.c
+++ b/drivers/gpio/gpiolib-devprop.c
@@ -19,30 +19,27 @@
 /**
  * devprop_gpiochip_set_names - Set GPIO line names using device properties
  * @chip: GPIO chip whose lines should be named, if possible
+ * @fwnode: Property Node containing the gpio-line-names property
  *
  * Looks for device property "gpio-line-names" and if it exists assigns
  * GPIO line names for the chip. The memory allocated for the assigned
  * names belong to the underlying firmware node and should not be released
  * by the caller.
  */
-void devprop_gpiochip_set_names(struct gpio_chip *chip)
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+				const struct fwnode_handle *fwnode)
 {
 	struct gpio_device *gdev = chip->gpiodev;
 	const char **names;
 	int ret, i;
 
-	if (!chip->parent) {
-		dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
-		return;
-	}
-
-	ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+	ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
 						NULL, 0);
 	if (ret < 0)
 		return;
 
 	if (ret != gdev->ngpio) {
-		dev_warn(chip->parent,
+		dev_warn(&gdev->dev,
 			 "names %d do not match number of GPIOs %d\n", ret,
 			 gdev->ngpio);
 		return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
 	if (!names)
 		return;
 
-	ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+	ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
 						names, gdev->ngpio);
 	if (ret < 0) {
-		dev_warn(chip->parent, "failed to read GPIO line names\n");
+		dev_warn(&gdev->dev, "failed to read GPIO line names\n");
 		kfree(names);
 		return;
 	}
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index e0d59e61b52fa6aa53e7830ae1ab1c59ceec2453..72a0695d2ac3a3d3217462ff4ea85147921f5dcd 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
 
 	/* If the chip defines names itself, these take precedence */
 	if (!chip->names)
-		devprop_gpiochip_set_names(chip);
+		devprop_gpiochip_set_names(chip,
+					   of_fwnode_handle(chip->of_node));
 
 	of_node_get(chip->of_node);
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index aad84a6306c4e5ddbc3364d58add85fa1b1e2583..14532d9576e4239ff60a193bec13a1a819c338b8 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices);
 
 static void gpiochip_free_hogs(struct gpio_chip *chip);
 static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-				struct lock_class_key *key);
+				struct lock_class_key *lock_key,
+				struct lock_class_key *request_key);
 static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
 static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
 static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
@@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void)
 }
 
 int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
-			       struct lock_class_key *key)
+			       struct lock_class_key *lock_key,
+			       struct lock_class_key *request_key)
 {
 	unsigned long	flags;
 	int		status = 0;
@@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 	if (status)
 		goto err_remove_from_list;
 
-	status = gpiochip_add_irqchip(chip, key);
+	status = gpiochip_add_irqchip(chip, lock_key, request_key);
 	if (status)
 		goto err_remove_chip;
 
@@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 	 * This lock class tells lockdep that GPIO irqs are in a different
 	 * category than their parents, so it won't report false recursion.
 	 */
-	irq_set_lockdep_class(irq, chip->irq.lock_key);
+	irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key);
 	irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler);
 	/* Chips that use nested thread handlers have them marked */
 	if (chip->irq.threaded)
@@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
 /**
  * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
  * @gpiochip: the GPIO chip to add the IRQ chip to
- * @lock_key: lockdep class
+ * @lock_key: lockdep class for IRQ lock
+ * @request_key: lockdep class for IRQ request
  */
 static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-				struct lock_class_key *lock_key)
+				struct lock_class_key *lock_key,
+				struct lock_class_key *request_key)
 {
 	struct irq_chip *irqchip = gpiochip->irq.chip;
 	const struct irq_domain_ops *ops;
@@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
 	gpiochip->to_irq = gpiochip_to_irq;
 	gpiochip->irq.default_type = type;
 	gpiochip->irq.lock_key = lock_key;
+	gpiochip->irq.request_key = request_key;
 
 	if (gpiochip->irq.domain_ops)
 		ops = gpiochip->irq.domain_ops;
@@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
  * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
  * to have the core avoid setting up any default type in the hardware.
  * @threaded: whether this irqchip uses a nested thread handler
- * @lock_key: lockdep class
+ * @lock_key: lockdep class for IRQ lock
+ * @request_key: lockdep class for IRQ request
  *
  * This function closely associates a certain irqchip with a certain
  * gpiochip, providing an irq domain to translate the local IRQs to
@@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
 			     irq_flow_handler_t handler,
 			     unsigned int type,
 			     bool threaded,
-			     struct lock_class_key *lock_key)
+			     struct lock_class_key *lock_key,
+			     struct lock_class_key *request_key)
 {
 	struct device_node *of_node;
 
@@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
 	gpiochip->irq.default_type = type;
 	gpiochip->to_irq = gpiochip_to_irq;
 	gpiochip->irq.lock_key = lock_key;
+	gpiochip->irq.request_key = request_key;
 	gpiochip->irq.domain = irq_domain_add_simple(of_node,
 					gpiochip->ngpio, first_irq,
 					&gpiochip_domain_ops, gpiochip);
@@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key);
 #else /* CONFIG_GPIOLIB_IRQCHIP */
 
 static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-				       struct lock_class_key *key)
+				       struct lock_class_key *lock_key,
+				       struct lock_class_key *request_key)
 {
 	return 0;
 }
@@ -2883,6 +2892,27 @@ void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 }
 EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
 
+/**
+ * gpiod_set_value_nocheck() - set a GPIO line value without checking
+ * @desc: the descriptor to set the value on
+ * @value: value to set
+ *
+ * This sets the value of a GPIO line backing a descriptor, applying
+ * different semantic quirks like active low and open drain/source
+ * handling.
+ */
+static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value)
+{
+	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+		value = !value;
+	if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
+		gpio_set_open_drain_value_commit(desc, value);
+	else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
+		gpio_set_open_source_value_commit(desc, value);
+	else
+		gpiod_set_raw_value_commit(desc, value);
+}
+
 /**
  * gpiod_set_value() - assign a gpio's value
  * @desc: gpio whose value will be assigned
@@ -2897,16 +2927,8 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
 void gpiod_set_value(struct gpio_desc *desc, int value)
 {
 	VALIDATE_DESC_VOID(desc);
-	/* Should be using gpiod_set_value_cansleep() */
 	WARN_ON(desc->gdev->chip->can_sleep);
-	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
-		value = !value;
-	if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
-		gpio_set_open_drain_value_commit(desc, value);
-	else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
-		gpio_set_open_source_value_commit(desc, value);
-	else
-		gpiod_set_raw_value_commit(desc, value);
+	gpiod_set_value_nocheck(desc, value);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value);
 
@@ -3234,9 +3256,7 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 {
 	might_sleep_if(extra_checks);
 	VALIDATE_DESC_VOID(desc);
-	if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
-		value = !value;
-	gpiod_set_raw_value_commit(desc, value);
+	gpiod_set_value_nocheck(desc, value);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
 
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index af48322839c3d6004ee16a32591f1f434d364fc8..6c44d165213910a259ee94c85731ab4d1ca68431 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
 	return desc - &desc->gdev->descs[0];
 }
 
-void devprop_gpiochip_set_names(struct gpio_chip *chip);
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+				const struct fwnode_handle *fwnode);
 
 /* With descriptor prefix */
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index d853989848d68a459548e7e36fc5a92973035e8f..deeefa7a1773b7b159e531f9eb6d365ebe77bbb8 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -27,10 +27,6 @@ config DRM_MIPI_DSI
 	bool
 	depends on DRM
 
-# Separate option because drm_panel_orientation_quirks.c is shared with fbdev
-config DRM_PANEL_ORIENTATION_QUIRKS
-	tristate
-
 config DRM_DP_AUX_CHARDEV
 	bool "DRM DP AUX Interface"
 	depends on DRM
@@ -154,6 +150,10 @@ config DRM_VM
 	bool
 	depends on DRM && MMU
 
+config DRM_SCHED
+	tristate
+	depends on DRM
+
 source "drivers/gpu/drm/i2c/Kconfig"
 
 source "drivers/gpu/drm/arm/Kconfig"
@@ -183,6 +183,7 @@ config DRM_AMDGPU
 	depends on DRM && PCI && MMU
 	select FW_LOADER
         select DRM_KMS_HELPER
+	select DRM_SCHED
         select DRM_TTM
 	select POWER_SUPPLY
 	select HWMON
@@ -367,6 +368,10 @@ config DRM_SAVAGE
 
 endif # DRM_LEGACY
 
+# Separate option because drm_panel_orientation_quirks.c is shared with fbdev
+config DRM_PANEL_ORIENTATION_QUIRKS
+	tristate
+
 config DRM_LIB_RANDOM
 	bool
 	default n
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index e5bf68b9c1714583f27606b12d3a1c5fa51edb81..50093ff4479b4c41f8ca2d63d83b9423bc535ada 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
 obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
 obj-$(CONFIG_DRM_ARM)	+= arm/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
+obj-$(CONFIG_DRM_SCHED)	+= scheduler/
 obj-$(CONFIG_DRM_TDFX)	+= tdfx/
 obj-$(CONFIG_DRM_R128)	+= r128/
 obj-y			+= amd/lib/
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
index 8a08e81ee90d579774ca96bc70853093ba623f09..d4176a3fb7062537e81010fbb5ea42be15b55a29 100644
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the ACP, which is a sub-component
 # of AMDSOC/AMDGPU drm driver.
 # It provides the HW control for ACP related functionalities.
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 78d609123420455a1d8811eaeb6e91ebaa26e626..d6e5b727385304e47d0cb3e6e418dcbce8f26a89 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -1,4 +1,24 @@
-# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
 #
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
@@ -32,7 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-	amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o
+	amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \
+	amdgpu_ids.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -42,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 amdgpu-y += \
-	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
 
 # add GMC block
 amdgpu-y += \
@@ -115,10 +136,7 @@ amdgpu-y += \
 amdgpu-y += amdgpu_cgs.o
 
 # GPU scheduler
-amdgpu-y += \
-	../scheduler/gpu_scheduler.o \
-	../scheduler/sched_fence.o \
-	amdgpu_job.o
+amdgpu-y += amdgpu_job.o
 
 # ACP componet
 ifneq ($(CONFIG_DRM_AMD_ACP),)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5e2958a79928d0b4b4a0571cb50bca734f48cb28..d5a2eefd6c3e9c634597dba673a6ee25a60c830d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -45,6 +45,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/gpu_scheduler.h>
 
 #include <kgd_kfd_interface.h>
 #include "dm_pp_interface.h"
@@ -68,10 +69,9 @@
 #include "amdgpu_vcn.h"
 #include "amdgpu_mn.h"
 #include "amdgpu_dm.h"
-#include "gpu_scheduler.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_gart.h"
-
+#include "amdgpu_debugfs.h"
 
 /*
  * Modules parameters.
@@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se;
 extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
+extern int amdgpu_gpu_recovery;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@@ -223,17 +224,18 @@ enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
 
-int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
-				  enum amd_ip_block_type block_type,
-				  enum amd_clockgating_state state);
-int amdgpu_set_powergating_state(struct amdgpu_device *adev,
-				  enum amd_ip_block_type block_type,
-				  enum amd_powergating_state state);
-void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags);
-int amdgpu_wait_for_idle(struct amdgpu_device *adev,
-			 enum amd_ip_block_type block_type);
-bool amdgpu_is_idle(struct amdgpu_device *adev,
-		    enum amd_ip_block_type block_type);
+int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_clockgating_state state);
+int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_powergating_state state);
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+					    u32 *flags);
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+				   enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+			      enum amd_ip_block_type block_type);
 
 #define AMDGPU_MAX_IP_NUM 16
 
@@ -258,15 +260,16 @@ struct amdgpu_ip_block {
 	const struct amdgpu_ip_block_version *version;
 };
 
-int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev,
-				enum amd_ip_block_type type,
-				u32 major, u32 minor);
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+				       enum amd_ip_block_type type,
+				       u32 major, u32 minor);
 
-struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev,
-					     enum amd_ip_block_type type);
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+			      enum amd_ip_block_type type);
 
-int amdgpu_ip_block_add(struct amdgpu_device *adev,
-			const struct amdgpu_ip_block_version *ip_block_version);
+int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
+			       const struct amdgpu_ip_block_version *ip_block_version);
 
 /* provided by hw blocks that can move/clear data.  e.g., gfx or sdma */
 struct amdgpu_buffer_funcs {
@@ -346,8 +349,9 @@ struct amdgpu_gart_funcs {
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 				     uint32_t flags);
 	/* get the pde for a given mc addr */
-	u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
-	uint32_t (*get_invalidate_req)(unsigned int vm_id);
+	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+			   u64 *dst, u64 *flags);
+	uint32_t (*get_invalidate_req)(unsigned int vmid);
 };
 
 /* provided by the ih block */
@@ -373,9 +377,6 @@ struct amdgpu_dummy_page {
 	struct page	*page;
 	dma_addr_t	addr;
 };
-int amdgpu_dummy_page_init(struct amdgpu_device *adev);
-void amdgpu_dummy_page_fini(struct amdgpu_device *adev);
-
 
 /*
  * Clocks
@@ -423,7 +424,6 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
 
 /* sub-allocation manager, it has to be protected by another lock.
  * By conception this is an helper for other part of the driver
@@ -540,6 +540,7 @@ struct amdgpu_mc {
 	u64					private_aperture_end;
 	/* protects concurrent invalidation */
 	spinlock_t		invalidate_lock;
+	bool			translate_further;
 };
 
 /*
@@ -650,12 +651,6 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
 	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
 } AMDGPU_DOORBELL64_ASSIGNMENT;
 
-
-void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
-				phys_addr_t *aperture_base,
-				size_t *aperture_size,
-				size_t *start_offset);
-
 /*
  * IRQS.
  */
@@ -689,7 +684,7 @@ struct amdgpu_ib {
 	uint32_t			flags;
 };
 
-extern const struct amd_sched_backend_ops amdgpu_sched_ops;
+extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job, struct amdgpu_vm *vm);
@@ -699,7 +694,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
-		      struct amd_sched_entity *entity, void *owner,
+		      struct drm_sched_entity *entity, void *owner,
 		      struct dma_fence **f);
 
 /*
@@ -732,7 +727,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 struct amdgpu_ctx_ring {
 	uint64_t		sequence;
 	struct dma_fence	**fences;
-	struct amd_sched_entity	entity;
+	struct drm_sched_entity	entity;
 };
 
 struct amdgpu_ctx {
@@ -746,8 +741,8 @@ struct amdgpu_ctx {
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
 	bool			preamble_presented;
-	enum amd_sched_priority init_priority;
-	enum amd_sched_priority override_priority;
+	enum drm_sched_priority init_priority;
+	enum drm_sched_priority override_priority;
 	struct mutex            lock;
 	atomic_t	guilty;
 };
@@ -767,7 +762,7 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 				   struct amdgpu_ring *ring, uint64_t seq);
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
-				  enum amd_sched_priority priority);
+				  enum drm_sched_priority priority);
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
@@ -964,6 +959,7 @@ struct amdgpu_gfx_config {
 };
 
 struct amdgpu_cu_info {
+	uint32_t simd_per_cu;
 	uint32_t max_waves_per_simd;
 	uint32_t wave_front_size;
 	uint32_t max_scratch_slots_per_cu;
@@ -1116,7 +1112,7 @@ struct amdgpu_cs_parser {
 #define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
 
 struct amdgpu_job {
-	struct amd_sched_job    base;
+	struct drm_sched_job    base;
 	struct amdgpu_device	*adev;
 	struct amdgpu_vm	*vm;
 	struct amdgpu_ring	*ring;
@@ -1129,7 +1125,7 @@ struct amdgpu_job {
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	bool                    vm_needs_flush;
-	unsigned		vm_id;
+	unsigned		vmid;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
 	uint32_t		gws_base, gws_size;
@@ -1170,10 +1166,10 @@ struct amdgpu_wb {
 	unsigned long		used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
 };
 
-int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb);
-void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb);
+int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
+void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
 
-void amdgpu_get_pcie_info(struct amdgpu_device *adev);
+void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 
 /*
  * SDMA
@@ -1238,24 +1234,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
  */
 void amdgpu_test_moves(struct amdgpu_device *adev);
 
-/*
- * Debugfs
- */
-struct amdgpu_debugfs {
-	const struct drm_info_list	*files;
-	unsigned		num_files;
-};
-
-int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
-			     const struct drm_info_list *files,
-			     unsigned nfiles);
-int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
-
-#if defined(CONFIG_DEBUG_FS)
-int amdgpu_debugfs_init(struct drm_minor *minor);
-#endif
-
-int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 
 /*
  * amdgpu smumgr functions
@@ -1410,9 +1388,6 @@ struct amdgpu_fw_vram_usage {
 	void *va;
 };
 
-int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev);
-void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev);
-
 /*
  * CGS
  */
@@ -1428,6 +1403,80 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 
+
+/*
+ * amdgpu nbio functions
+ *
+ */
+struct nbio_hdp_flush_reg {
+	u32 ref_and_mask_cp0;
+	u32 ref_and_mask_cp1;
+	u32 ref_and_mask_cp2;
+	u32 ref_and_mask_cp3;
+	u32 ref_and_mask_cp4;
+	u32 ref_and_mask_cp5;
+	u32 ref_and_mask_cp6;
+	u32 ref_and_mask_cp7;
+	u32 ref_and_mask_cp8;
+	u32 ref_and_mask_cp9;
+	u32 ref_and_mask_sdma0;
+	u32 ref_and_mask_sdma1;
+};
+
+struct amdgpu_nbio_funcs {
+	const struct nbio_hdp_flush_reg *hdp_flush_reg;
+	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
+	u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+	u32 (*get_rev_id)(struct amdgpu_device *adev);
+	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
+	void (*hdp_flush)(struct amdgpu_device *adev);
+	u32 (*get_memsize)(struct amdgpu_device *adev);
+	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
+				    bool use_doorbell, int doorbell_index);
+	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
+					 bool enable);
+	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
+						  bool enable);
+	void (*ih_doorbell_range)(struct amdgpu_device *adev,
+				  bool use_doorbell, int doorbell_index);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
+						bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u32 *flags);
+	void (*ih_control)(struct amdgpu_device *adev);
+	void (*init_registers)(struct amdgpu_device *adev);
+	void (*detect_hw_virt)(struct amdgpu_device *adev);
+};
+
+
+/* Define the HW IP blocks will be used in driver , add more if necessary */
+enum amd_hw_ip_block_type {
+	GC_HWIP = 1,
+	HDP_HWIP,
+	SDMA0_HWIP,
+	SDMA1_HWIP,
+	MMHUB_HWIP,
+	ATHUB_HWIP,
+	NBIO_HWIP,
+	MP0_HWIP,
+	UVD_HWIP,
+	VCN_HWIP = UVD_HWIP,
+	VCE_HWIP,
+	DF_HWIP,
+	DCE_HWIP,
+	OSSSYS_HWIP,
+	SMUIO_HWIP,
+	PWR_HWIP,
+	NBIF_HWIP,
+	MAX_HWIP
+};
+
+#define HWIP_MAX_INSTANCE	6
+
 struct amd_powerplay {
 	struct cgs_device *cgs_device;
 	void *pp_handle;
@@ -1620,6 +1669,11 @@ struct amdgpu_device {
 	/* amdkfd interface */
 	struct kfd_dev          *kfd;
 
+	/* soc15 register offset based on ip, instance and  segment */
+	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+
+	const struct amdgpu_nbio_funcs	*nbio_funcs;
+
 	/* delayed work_func for deferring clockgating during resume */
 	struct delayed_work     late_init_work;
 
@@ -1785,7 +1839,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
+#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@@ -1796,7 +1850,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c))
+#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
@@ -1835,23 +1889,25 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 
 /* Common functions */
-int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job);
-bool amdgpu_need_backup(struct amdgpu_device *adev);
-void amdgpu_pci_config_reset(struct amdgpu_device *adev);
-bool amdgpu_need_post(struct amdgpu_device *adev);
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+			      struct amdgpu_job* job, bool force);
+void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
+bool amdgpu_device_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 				  u64 num_vis_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
-void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
-void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
+void amdgpu_device_vram_location(struct amdgpu_device *adev,
+				 struct amdgpu_mc *mc, u64 base);
+void amdgpu_device_gart_location(struct amdgpu_device *adev,
+				 struct amdgpu_mc *mc);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
-void amdgpu_program_register_sequence(struct amdgpu_device *adev,
+void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 *registers,
 					     const u32 array_size);
 
@@ -1885,7 +1941,7 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
 int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void amdgpu_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv);
-int amdgpu_suspend(struct amdgpu_device *adev);
+int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index c04f44a90392639e7ec3ae51da93a694ead70fee..a29362f9ef415d48a6a00ea94d0a6a8fa2b1d7b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -277,7 +277,7 @@ static int acp_hw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	const struct amdgpu_ip_block *ip_block =
-		amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
+		amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
 
 	if (!ip_block)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c70cda04dbfb921a026313924b9f98c4e83e6904..1d605e1c1d66eb926e3ff102248cd79ccad3838c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -93,6 +93,39 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 				   adev->pdev, kfd2kgd);
 }
 
+/**
+ * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
+ *                                setup amdkfd
+ *
+ * @adev: amdgpu_device pointer
+ * @aperture_base: output returning doorbell aperture base physical address
+ * @aperture_size: output returning doorbell aperture size in bytes
+ * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
+ *
+ * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
+ * takes doorbells required for its own rings and reports the setup to amdkfd.
+ * amdgpu reserved doorbells are at the start of the doorbell aperture.
+ */
+static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
+					 phys_addr_t *aperture_base,
+					 size_t *aperture_size,
+					 size_t *start_offset)
+{
+	/*
+	 * The first num_doorbells are used by amdgpu.
+	 * amdkfd takes whatever's left in the aperture.
+	 */
+	if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
+		*aperture_base = adev->doorbell.base;
+		*aperture_size = adev->doorbell.size;
+		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+	} else {
+		*aperture_base = 0;
+		*aperture_size = 0;
+		*start_offset = 0;
+	}
+}
+
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
@@ -242,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 	kfree(mem);
 }
 
-uint64_t get_vmem_size(struct kgd_dev *kgd)
+void get_local_mem_info(struct kgd_dev *kgd,
+			struct kfd_local_mem_info *mem_info)
 {
-	struct amdgpu_device *adev =
-		(struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
+					     ~((1ULL << 32) - 1);
+	resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;
+
+	memset(mem_info, 0, sizeof(*mem_info));
+	if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
+		mem_info->local_mem_size_public = adev->mc.visible_vram_size;
+		mem_info->local_mem_size_private = adev->mc.real_vram_size -
+				adev->mc.visible_vram_size;
+	} else {
+		mem_info->local_mem_size_public = 0;
+		mem_info->local_mem_size_private = adev->mc.real_vram_size;
+	}
+	mem_info->vram_width = adev->mc.vram_width;
 
-	BUG_ON(kgd == NULL);
+	pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
+			&adev->mc.aper_base, &aper_limit,
+			mem_info->local_mem_size_public,
+			mem_info->local_mem_size_private);
 
-	return adev->mc.real_vram_size;
+	if (amdgpu_sriov_vf(adev))
+		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
+	else
+		mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
 }
 
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
@@ -265,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	/* The sclk is in quantas of 10kHz */
-	return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+	/* the sclk is in quantas of 10kHz */
+	if (amdgpu_sriov_vf(adev))
+		return adev->clock.default_sclk / 100;
+
+	return amdgpu_dpm_get_sclk(adev, false) / 100;
+}
+
+void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
+
+	memset(cu_info, 0, sizeof(*cu_info));
+	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
+		return;
+
+	cu_info->cu_active_number = acu_info.number;
+	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
+	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
+	       sizeof(acu_info.bitmap));
+	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+	cu_info->simd_per_cu = acu_info.simd_per_cu;
+	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
+	cu_info->wave_front_size = acu_info.wave_front_size;
+	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
+	cu_info->lds_size = acu_info.lds_size;
+}
+
+uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8d689ab7e4291fad585448c6e679aa32556facff..2a519f9062eeaf3357c378f90b4c0ef8c0971fd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr);
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
-uint64_t get_vmem_size(struct kgd_dev *kgd);
+void get_local_mem_info(struct kgd_dev *kgd,
+			struct kfd_local_mem_info *mem_info);
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
 
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
+uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 
 #define read_user_wptr(mmptr, wptr, dst)				\
 	({								\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1e3e9be7d77ecf29883cf0ec5d5874f0cb67bd64..a9e6aea0e5f8fd1b68599786827b130ad8befa00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id);
 
@@ -166,17 +173,19 @@ static int get_tile_config(struct kgd_dev *kgd,
 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
 	.free_gtt_mem = free_gtt_mem,
-	.get_vmem_size = get_vmem_size,
+	.get_local_mem_info = get_local_mem_info,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
-	.alloc_pasid = amdgpu_vm_alloc_pasid,
-	.free_pasid = amdgpu_vm_free_pasid,
+	.alloc_pasid = amdgpu_pasid_alloc,
+	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
 	.hqd_is_occupied = kgd_hqd_is_occupied,
 	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 	.hqd_destroy = kgd_hqd_destroy,
@@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
+	.get_cu_info = get_cu_info,
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	return 0;
 }
 
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t i = 0, reg;
+#define HQD_N_REGS (35+4)
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+		DUMP_REG(reg);
+
+	release_queue(kgd);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct cik_sdma_rlc_registers *m;
@@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
 	}
 
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
-				m->sdma_rlc_doorbell);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+	data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
+
+	if (read_user_wptr(mm, wptr, data))
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+	else
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdma_rlc_rb_rptr);
+
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 				m->sdma_rlc_virtual_addr);
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
@@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 			m->sdma_rlc_rb_rptr_addr_lo);
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 			m->sdma_rlc_rb_rptr_addr_hi);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-			m->sdma_rlc_rb_cntl);
+
+	data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+		queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
 
 	return 0;
 }
@@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 	struct cik_sdma_rlc_registers *m;
 	uint32_t sdma_base_addr;
 	uint32_t temp;
-	int timeout = utimeout;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
 	sdma_base_addr = get_sdma_base_addr(m);
@@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 		if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
 			break;
-		if (timeout <= 0)
+		if (time_after(jiffies, end_jiffies))
 			return -ETIME;
-		msleep(20);
-		timeout -= 20;
+		usleep_range(500, 1000);
 	}
 
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
@@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 
+	m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 056929b8ccd04e29403ecb401b335aa1ea8d754a..b127259d7d8548c0d46b64c4c61ef11c51bb734e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -45,7 +45,7 @@ enum hqd_dequeue_request_type {
 	RESET_WAVES
 };
 
-struct cik_sdma_rlc_registers;
+struct vi_sdma_mqd;
 
 /*
  * Register access functions
@@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 		uint32_t pipe_id, uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -125,17 +132,19 @@ static int get_tile_config(struct kgd_dev *kgd,
 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
 	.free_gtt_mem = free_gtt_mem,
-	.get_vmem_size = get_vmem_size,
+	.get_local_mem_info = get_local_mem_info,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
-	.alloc_pasid = amdgpu_vm_alloc_pasid,
-	.free_pasid = amdgpu_vm_free_pasid,
+	.alloc_pasid = amdgpu_pasid_alloc,
+	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
 	.hqd_is_occupied = kgd_hqd_is_occupied,
 	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 	.hqd_destroy = kgd_hqd_destroy,
@@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
+	.get_cu_info = get_cu_info,
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	return 0;
 }
 
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
 {
-	return 0;
+	uint32_t retval;
+
+	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+	pr_debug("kfd: sdma base address: 0x%x\n", retval);
+
+	return retval;
 }
 
 static inline struct vi_mqd *get_mqd(void *mqd)
@@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
 	return (struct vi_mqd *)mqd;
 }
 
-static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
 {
-	return (struct cik_sdma_rlc_registers *)mqd;
+	return (struct vi_sdma_mqd *)mqd;
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	return 0;
 }
 
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs)
 {
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t i = 0, reg;
+#define HQD_N_REGS (54+4)
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
+		DUMP_REG(reg);
+
+	release_queue(kgd);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct vi_sdma_mqd *m;
+	unsigned long end_jiffies;
+	uint32_t sdma_base_addr;
+	uint32_t data;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(m);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+	if (m->sdma_engine_id) {
+		data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
+		data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
+				RESUME_CTX, 0);
+		WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
+	} else {
+		data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
+		data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+				RESUME_CTX, 0);
+		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
+	}
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+
+	if (read_user_wptr(mm, wptr, data))
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+	else
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+				m->sdmax_rlcx_virtual_addr);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+		queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4+2+3+7)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
 	return 0;
 }
 
@@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct cik_sdma_rlc_registers *m;
+	struct vi_sdma_mqd *m;
 	uint32_t sdma_base_addr;
 	uint32_t sdma_rlc_rb_cntl;
 
@@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 				unsigned int utimeout)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct cik_sdma_rlc_registers *m;
+	struct vi_sdma_mqd *m;
 	uint32_t sdma_base_addr;
 	uint32_t temp;
-	int timeout = utimeout;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
 	m = get_sdma_mqd(mqd);
 	sdma_base_addr = get_sdma_base_addr(m);
@@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 
 	while (true) {
 		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-		if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 			break;
-		if (timeout <= 0)
+		if (time_after(jiffies, end_jiffies))
 			return -ETIME;
-		msleep(20);
-		timeout -= 20;
+		usleep_range(500, 1000);
 	}
 
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
-	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 39f4d0df1adaf2352f39ae9232a1108d3bb1d950..bf872f694f5090da9ebc1fc5b227bb45364ef7d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -27,6 +27,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
 #include "amdgpu_i2c.h"
 
 #include "atom.h"
@@ -1699,7 +1700,7 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
 	WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
 }
 
-void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
+static void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
 {
 	uint32_t bios_2_scratch, bios_6_scratch;
 
@@ -1721,28 +1722,6 @@ void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
 	WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
 }
 
-void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev)
-{
-	int i;
-
-	for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
-		adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i);
-}
-
-void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
-{
-	int i;
-
-	/*
-	 * VBIOS will check ASIC_INIT_COMPLETE bit to decide if
-	 * execute ASIC_Init posting via driver
-	 */
-	adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
-
-	for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
-		WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
-}
-
 void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
 					      bool hung)
 {
@@ -1798,7 +1777,7 @@ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
 #endif
 }
 
-int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
+static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
 {
 	struct atom_context *ctx = adev->mode_info.atom_context;
 	int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware);
@@ -1841,3 +1820,234 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
 	ctx->scratch_size_bytes = usage_bytes;
 	return 0;
 }
+
+/* ATOM accessor methods */
+/*
+ * ATOM is an interpreted byte code stored in tables in the vbios.  The
+ * driver registers callbacks to access registers and the interpreter
+ * in the driver parses the tables and executes then to program specific
+ * actions (set display modes, asic init, etc.).  See amdgpu_atombios.c,
+ * atombios.h, and atom.c
+ */
+
+/**
+ * cail_pll_read - read PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the PLL register.
+ */
+static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
+{
+	return 0;
+}
+
+/**
+ * cail_pll_write - write PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+
+}
+
+/**
+ * cail_mc_read - read MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ *
+ * Provides an MC register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MC register.
+ */
+static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
+{
+	return 0;
+}
+
+/**
+ * cail_mc_write - write MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MC register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+
+}
+
+/**
+ * cail_reg_write - write MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MMIO register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+	struct amdgpu_device *adev = info->dev->dev_private;
+
+	WREG32(reg, val);
+}
+
+/**
+ * cail_reg_read - read MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ *
+ * Provides an MMIO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MMIO register.
+ */
+static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
+{
+	struct amdgpu_device *adev = info->dev->dev_private;
+	uint32_t r;
+
+	r = RREG32(reg);
+	return r;
+}
+
+/**
+ * cail_ioreg_write - write IO register
+ *
+ * @info: atom card_info pointer
+ * @reg: IO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a IO register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+	struct amdgpu_device *adev = info->dev->dev_private;
+
+	WREG32_IO(reg, val);
+}
+
+/**
+ * cail_ioreg_read - read IO register
+ *
+ * @info: atom card_info pointer
+ * @reg: IO register offset
+ *
+ * Provides an IO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the IO register.
+ */
+static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
+{
+	struct amdgpu_device *adev = info->dev->dev_private;
+	uint32_t r;
+
+	r = RREG32_IO(reg);
+	return r;
+}
+
+static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
+						 struct device_attribute *attr,
+						 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	struct atom_context *ctx = adev->mode_info.atom_context;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version);
+}
+
+static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
+		   NULL);
+
+/**
+ * amdgpu_atombios_fini - free the driver info and callbacks for atombios
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the driver info and register access callbacks for the ATOM
+ * interpreter (r4xx+).
+ * Called at driver shutdown.
+ */
+void amdgpu_atombios_fini(struct amdgpu_device *adev)
+{
+	if (adev->mode_info.atom_context) {
+		kfree(adev->mode_info.atom_context->scratch);
+		kfree(adev->mode_info.atom_context->iio);
+	}
+	kfree(adev->mode_info.atom_context);
+	adev->mode_info.atom_context = NULL;
+	kfree(adev->mode_info.atom_card_info);
+	adev->mode_info.atom_card_info = NULL;
+	device_remove_file(adev->dev, &dev_attr_vbios_version);
+}
+
+/**
+ * amdgpu_atombios_init - init the driver info and callbacks for atombios
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initializes the driver info and register access callbacks for the
+ * ATOM interpreter (r4xx+).
+ * Returns 0 on sucess, -ENOMEM on failure.
+ * Called at driver startup.
+ */
+int amdgpu_atombios_init(struct amdgpu_device *adev)
+{
+	struct card_info *atom_card_info =
+	    kzalloc(sizeof(struct card_info), GFP_KERNEL);
+	int ret;
+
+	if (!atom_card_info)
+		return -ENOMEM;
+
+	adev->mode_info.atom_card_info = atom_card_info;
+	atom_card_info->dev = adev->ddev;
+	atom_card_info->reg_read = cail_reg_read;
+	atom_card_info->reg_write = cail_reg_write;
+	/* needed for iio ops */
+	if (adev->rio_mem) {
+		atom_card_info->ioreg_read = cail_ioreg_read;
+		atom_card_info->ioreg_write = cail_ioreg_write;
+	} else {
+		DRM_DEBUG("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n");
+		atom_card_info->ioreg_read = cail_reg_read;
+		atom_card_info->ioreg_write = cail_reg_write;
+	}
+	atom_card_info->mc_read = cail_mc_read;
+	atom_card_info->mc_write = cail_mc_write;
+	atom_card_info->pll_read = cail_pll_read;
+	atom_card_info->pll_write = cail_pll_write;
+
+	adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios);
+	if (!adev->mode_info.atom_context) {
+		amdgpu_atombios_fini(adev);
+		return -ENOMEM;
+	}
+
+	mutex_init(&adev->mode_info.atom_context->mutex);
+	if (adev->is_atom_fw) {
+		amdgpu_atomfirmware_scratch_regs_init(adev);
+		amdgpu_atomfirmware_allocate_fb_scratch(adev);
+	} else {
+		amdgpu_atombios_scratch_regs_init(adev);
+		amdgpu_atombios_allocate_fb_scratch(adev);
+	}
+
+	ret = device_create_file(adev->dev, &dev_attr_vbios_version);
+	if (ret) {
+		DRM_ERROR("Failed to create device file for VBIOS version\n");
+		return ret;
+	}
+
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index b0d5d1d7fdba15d6674fdfbc744c90592cee8c8c..fd8f18074f7ad4b138f2b506979da7de472f8b9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -195,9 +195,6 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
 bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
 
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
-void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
-void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
-void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
 					      bool hung);
 bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
@@ -219,6 +216,7 @@ int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
 			      u8 voltage_type,
 			      u8 *svd_gpio_id, u8 *svc_gpio_id);
 
-int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev);
+void amdgpu_atombios_fini(struct amdgpu_device *adev);
+int amdgpu_atombios_init(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index c13c51af0b681dbb1ee605b978a319ffaa4c7794..e2c3c5ec42d1557d58474441782bcfd788972a80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -14,6 +14,16 @@
 
 #include "amd_acpi.h"
 
+#define AMDGPU_PX_QUIRK_FORCE_ATPX  (1 << 0)
+
+struct amdgpu_px_quirk {
+	u32 chip_vendor;
+	u32 chip_device;
+	u32 subsys_vendor;
+	u32 subsys_device;
+	u32 px_quirk_flags;
+};
+
 struct amdgpu_atpx_functions {
 	bool px_params;
 	bool power_cntl;
@@ -35,6 +45,7 @@ struct amdgpu_atpx {
 static struct amdgpu_atpx_priv {
 	bool atpx_detected;
 	bool bridge_pm_usable;
+	unsigned int quirks;
 	/* handle for device - and atpx */
 	acpi_handle dhandle;
 	acpi_handle other_handle;
@@ -205,13 +216,19 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 
 	atpx->is_hybrid = false;
 	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
-		printk("ATPX Hybrid Graphics\n");
-		/*
-		 * Disable legacy PM methods only when pcie port PM is usable,
-		 * otherwise the device might fail to power off or power on.
-		 */
-		atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
-		atpx->is_hybrid = true;
+		if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
+			printk("ATPX Hybrid Graphics, forcing to ATPX\n");
+			atpx->functions.power_cntl = true;
+			atpx->is_hybrid = false;
+		} else {
+			printk("ATPX Hybrid Graphics\n");
+			/*
+			 * Disable legacy PM methods only when pcie port PM is usable,
+			 * otherwise the device might fail to power off or power on.
+			 */
+			atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
+			atpx->is_hybrid = true;
+		}
 	}
 
 	atpx->dgpu_req_power_for_displays = false;
@@ -547,6 +564,30 @@ static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.get_client_id = amdgpu_atpx_get_client_id,
 };
 
+static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
+	/* HG _PR3 doesn't seem to work on this A+A weston board */
+	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0, 0, 0, 0, 0 },
+};
+
+static void amdgpu_atpx_get_quirks(struct pci_dev *pdev)
+{
+	const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list;
+
+	/* Apply PX quirks */
+	while (p && p->chip_device != 0) {
+		if (pdev->vendor == p->chip_vendor &&
+		    pdev->device == p->chip_device &&
+		    pdev->subsystem_vendor == p->subsys_vendor &&
+		    pdev->subsystem_device == p->subsys_device) {
+			amdgpu_atpx_priv.quirks |= p->px_quirk_flags;
+			break;
+		}
+		++p;
+	}
+}
+
 /**
  * amdgpu_atpx_detect - detect whether we have PX
  *
@@ -570,6 +611,7 @@ static bool amdgpu_atpx_detect(void)
 
 		parent_pdev = pci_upstream_bridge(pdev);
 		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
 	}
 
 	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
@@ -579,6 +621,7 @@ static bool amdgpu_atpx_detect(void)
 
 		parent_pdev = pci_upstream_bridge(pdev);
 		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
 	}
 
 	if (has_atpx && vga_count == 2) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 057e1ecd83cec5746319adb88602eb95111104c6..a5df80d50d447a5dc0dd3a9a29c8ad21677e485c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -93,7 +93,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
 	resource_size_t size = 256 * 1024; /* ??? */
 
 	if (!(adev->flags & AMD_IS_APU))
-		if (amdgpu_need_post(adev))
+		if (amdgpu_device_need_post(adev))
 			return false;
 
 	adev->bios = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 85d2149b9dbeef43bb6e9ae3e54ae0c3240d74ec..4466f3535e2d95ffe8e4b2241741f7d795768437 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -801,6 +801,11 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				else
 					strcpy(fw_name, "amdgpu/vega10_smc.bin");
 				break;
+			case CHIP_CARRIZO:
+			case CHIP_STONEY:
+			case CHIP_RAVEN:
+				adev->pm.fw_version = info->version;
+				return 0;
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
 				return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4cea9ab237ac3cd36813a610b5623746932a1a24..e80fc38141b57c5d00b0221fc2cd7c66dfe275dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -343,7 +343,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 				 struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	struct ttm_operation_ctx ctx = { true, false };
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+		.allow_reserved_eviction = false,
+		.resv = bo->tbo.resv
+	};
 	uint32_t domain;
 	int r;
 
@@ -773,10 +778,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	struct amdgpu_bo *bo;
 	int i, r;
 
-	r = amdgpu_vm_update_directories(adev, vm);
-	if (r)
-		return r;
-
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		return r;
@@ -834,6 +835,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
+	r = amdgpu_vm_update_directories(adev, vm);
+	if (r)
+		return r;
+
 	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
 	if (r)
 		return r;
@@ -1150,7 +1155,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_ring *ring = p->job->ring;
-	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
 	struct amdgpu_job *job;
 	unsigned i;
 	uint64_t seq;
@@ -1173,7 +1178,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job = p->job;
 	p->job = NULL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
+	r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		amdgpu_mn_unlock(p->mn);
@@ -1202,7 +1207,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	amdgpu_ring_priority_get(job->ring, job->base.s_priority);
 
 	trace_amdgpu_cs_ioctl(job);
-	amd_sched_entity_push_job(&job->base, entity);
+	drm_sched_entity_push_job(&job->base, entity);
 
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d71dc164b4693b5b0e67405eaee5990a0e4af5e1..09d35051fdd68689ac00d69504765166e738076f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -28,10 +28,10 @@
 #include "amdgpu_sched.h"
 
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
-				      enum amd_sched_priority priority)
+				      enum drm_sched_priority priority)
 {
 	/* NORMAL and below are accessible by everyone */
-	if (priority <= AMD_SCHED_PRIORITY_NORMAL)
+	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
 		return 0;
 
 	if (capable(CAP_SYS_NICE))
@@ -44,14 +44,14 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 }
 
 static int amdgpu_ctx_init(struct amdgpu_device *adev,
-			   enum amd_sched_priority priority,
+			   enum drm_sched_priority priority,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
 	unsigned i, j;
 	int r;
 
-	if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
+	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
 		return -EINVAL;
 
 	r = amdgpu_ctx_priority_permit(filp, priority);
@@ -78,19 +78,19 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->reset_counter_query = ctx->reset_counter;
 	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	ctx->init_priority = priority;
-	ctx->override_priority = AMD_SCHED_PRIORITY_UNSET;
+	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
 	/* create context entity for each ring */
 	for (i = 0; i < adev->num_rings; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
-		struct amd_sched_rq *rq;
+		struct drm_sched_rq *rq;
 
 		rq = &ring->sched.sched_rq[priority];
 
 		if (ring == &adev->gfx.kiq.ring)
 			continue;
 
-		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
+		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 					  rq, amdgpu_sched_jobs, &ctx->guilty);
 		if (r)
 			goto failed;
@@ -104,7 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 failed:
 	for (j = 0; j < i; j++)
-		amd_sched_entity_fini(&adev->rings[j]->sched,
+		drm_sched_entity_fini(&adev->rings[j]->sched,
 				      &ctx->rings[j].entity);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
@@ -126,7 +126,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 	ctx->fences = NULL;
 
 	for (i = 0; i < adev->num_rings; i++)
-		amd_sched_entity_fini(&adev->rings[i]->sched,
+		drm_sched_entity_fini(&adev->rings[i]->sched,
 				      &ctx->rings[i].entity);
 
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
@@ -137,7 +137,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
 			    struct drm_file *filp,
-			    enum amd_sched_priority priority,
+			    enum drm_sched_priority priority,
 			    uint32_t *id)
 {
 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
@@ -266,7 +266,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 {
 	int r;
 	uint32_t id;
-	enum amd_sched_priority priority;
+	enum drm_sched_priority priority;
 
 	union drm_amdgpu_ctx *args = data;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -278,8 +278,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 
 	/* For backwards compatibility reasons, we need to accept
 	 * ioctls with garbage in the priority field */
-	if (priority == AMD_SCHED_PRIORITY_INVALID)
-		priority = AMD_SCHED_PRIORITY_NORMAL;
+	if (priority == DRM_SCHED_PRIORITY_INVALID)
+		priority = DRM_SCHED_PRIORITY_NORMAL;
 
 	switch (args->in.op) {
 	case AMDGPU_CTX_OP_ALLOC_CTX:
@@ -385,18 +385,18 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 }
 
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
-				  enum amd_sched_priority priority)
+				  enum drm_sched_priority priority)
 {
 	int i;
 	struct amdgpu_device *adev = ctx->adev;
-	struct amd_sched_rq *rq;
-	struct amd_sched_entity *entity;
+	struct drm_sched_rq *rq;
+	struct drm_sched_entity *entity;
 	struct amdgpu_ring *ring;
-	enum amd_sched_priority ctx_prio;
+	enum drm_sched_priority ctx_prio;
 
 	ctx->override_priority = priority;
 
-	ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ?
+	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
 
 	for (i = 0; i < adev->num_rings; i++) {
@@ -407,7 +407,7 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
 			continue;
 
-		amd_sched_entity_set_rq(entity, rq);
+		drm_sched_entity_set_rq(entity, rq);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
new file mode 100644
index 0000000000000000000000000000000000000000..ee76b468774a313739b202273959ce6161fa34ab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -0,0 +1,792 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <drm/drmP.h>
+#include <linux/debugfs.h>
+#include "amdgpu.h"
+
+/*
+ * Debugfs
+ */
+int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
+			     const struct drm_info_list *files,
+			     unsigned nfiles)
+{
+	unsigned i;
+
+	for (i = 0; i < adev->debugfs_count; i++) {
+		if (adev->debugfs[i].files == files) {
+			/* Already registered */
+			return 0;
+		}
+	}
+
+	i = adev->debugfs_count + 1;
+	if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) {
+		DRM_ERROR("Reached maximum number of debugfs components.\n");
+		DRM_ERROR("Report so we increase "
+			  "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n");
+		return -EINVAL;
+	}
+	adev->debugfs[adev->debugfs_count].files = files;
+	adev->debugfs[adev->debugfs_count].num_files = nfiles;
+	adev->debugfs_count = i;
+#if defined(CONFIG_DEBUG_FS)
+	drm_debugfs_create_files(files, nfiles,
+				 adev->ddev->primary->debugfs_root,
+				 adev->ddev->primary);
+#endif
+	return 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+	bool pm_pg_lock, use_bank;
+	unsigned instance_bank, sh_bank, se_bank;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	/* are we reading registers for which a PG lock is necessary? */
+	pm_pg_lock = (*pos >> 23) & 1;
+
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
+		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
+		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
+
+		if (se_bank == 0x3FF)
+			se_bank = 0xFFFFFFFF;
+		if (sh_bank == 0x3FF)
+			sh_bank = 0xFFFFFFFF;
+		if (instance_bank == 0x3FF)
+			instance_bank = 0xFFFFFFFF;
+		use_bank = 1;
+	} else {
+		use_bank = 0;
+	}
+
+	*pos &= (1UL << 22) - 1;
+
+	if (use_bank) {
+		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
+	if (pm_pg_lock)
+		mutex_lock(&adev->pm.mutex);
+
+	while (size) {
+		uint32_t value;
+
+		if (*pos > adev->rmmio_size)
+			goto end;
+
+		value = RREG32(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			result = r;
+			goto end;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+end:
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
+	if (pm_pg_lock)
+		mutex_unlock(&adev->pm.mutex);
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+	bool pm_pg_lock, use_bank;
+	unsigned instance_bank, sh_bank, se_bank;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	/* are we reading registers for which a PG lock is necessary? */
+	pm_pg_lock = (*pos >> 23) & 1;
+
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
+		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
+		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
+
+		if (se_bank == 0x3FF)
+			se_bank = 0xFFFFFFFF;
+		if (sh_bank == 0x3FF)
+			sh_bank = 0xFFFFFFFF;
+		if (instance_bank == 0x3FF)
+			instance_bank = 0xFFFFFFFF;
+		use_bank = 1;
+	} else {
+		use_bank = 0;
+	}
+
+	*pos &= (1UL << 22) - 1;
+
+	if (use_bank) {
+		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
+	if (pm_pg_lock)
+		mutex_lock(&adev->pm.mutex);
+
+	while (size) {
+		uint32_t value;
+
+		if (*pos > adev->rmmio_size)
+			return result;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
+	if (pm_pg_lock)
+		mutex_unlock(&adev->pm.mutex);
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_PCIE(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32_PCIE(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_DIDT(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32_DIDT(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_SMC(*pos);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32_SMC(*pos, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+	uint32_t *config, no_regs = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	/* version, increment each time something is added */
+	config[no_regs++] = 3;
+	config[no_regs++] = adev->gfx.config.max_shader_engines;
+	config[no_regs++] = adev->gfx.config.max_tile_pipes;
+	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+	config[no_regs++] = adev->gfx.config.max_sh_per_se;
+	config[no_regs++] = adev->gfx.config.max_backends_per_se;
+	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+	config[no_regs++] = adev->gfx.config.max_gprs;
+	config[no_regs++] = adev->gfx.config.max_gs_threads;
+	config[no_regs++] = adev->gfx.config.max_hw_contexts;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.num_tile_pipes;
+	config[no_regs++] = adev->gfx.config.backend_enable_mask;
+	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+	config[no_regs++] = adev->gfx.config.num_gpus;
+	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+	config[no_regs++] = adev->gfx.config.gb_addr_config;
+	config[no_regs++] = adev->gfx.config.num_rbs;
+
+	/* rev==1 */
+	config[no_regs++] = adev->rev_id;
+	config[no_regs++] = adev->pg_flags;
+	config[no_regs++] = adev->cg_flags;
+
+	/* rev==2 */
+	config[no_regs++] = adev->family;
+	config[no_regs++] = adev->external_rev_id;
+
+	/* rev==3 */
+	config[no_regs++] = adev->pdev->device;
+	config[no_regs++] = adev->pdev->revision;
+	config[no_regs++] = adev->pdev->subsystem_device;
+	config[no_regs++] = adev->pdev->subsystem_vendor;
+
+	while (size && (*pos < no_regs * 4)) {
+		uint32_t value;
+
+		value = config[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			kfree(config);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	kfree(config);
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	int idx, x, outsize, r, valuesize;
+	uint32_t values[16];
+
+	if (size & 3 || *pos & 0x3)
+		return -EINVAL;
+
+	if (amdgpu_dpm == 0)
+		return -EINVAL;
+
+	/* convert offset to sensor number */
+	idx = *pos >> 2;
+
+	valuesize = sizeof(values);
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor)
+		r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+	else
+		return -EINVAL;
+
+	if (size > valuesize)
+		return -EINVAL;
+
+	outsize = 0;
+	x = 0;
+	if (!r) {
+		while (size) {
+			r = put_user(values[x++], (int32_t *)buf);
+			buf += 4;
+			size -= 4;
+			outsize += 4;
+		}
+	}
+
+	return !r ? outsize : r;
+}
+
+static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	int r, x;
+	ssize_t result=0;
+	uint32_t offset, se, sh, cu, wave, simd, data[32];
+
+	if (size & 3 || *pos & 3)
+		return -EINVAL;
+
+	/* decode offset */
+	offset = (*pos & GENMASK_ULL(6, 0));
+	se = (*pos & GENMASK_ULL(14, 7)) >> 7;
+	sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
+	cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
+	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
+	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+
+	x = 0;
+	if (adev->gfx.funcs->read_wave_data)
+		adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	if (!x)
+		return -EINVAL;
+
+	while (size && (offset < x * 4)) {
+		uint32_t value;
+
+		value = data[offset >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		offset += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	int r;
+	ssize_t result = 0;
+	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
+
+	if (size & 3 || *pos & 3)
+		return -EINVAL;
+
+	/* decode offset */
+	offset = *pos & GENMASK_ULL(11, 0);
+	se = (*pos & GENMASK_ULL(19, 12)) >> 12;
+	sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
+	cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
+	wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
+	simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
+	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
+	bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
+
+	data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+
+	if (bank == 0) {
+		if (adev->gfx.funcs->read_wave_vgprs)
+			adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
+	} else {
+		if (adev->gfx.funcs->read_wave_sgprs)
+			adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
+	}
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	while (size) {
+		uint32_t value;
+
+		value = data[offset++];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			result = r;
+			goto err;
+		}
+
+		result += 4;
+		buf += 4;
+		size -= 4;
+	}
+
+err:
+	kfree(data);
+	return result;
+}
+
+static const struct file_operations amdgpu_debugfs_regs_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_read,
+	.write = amdgpu_debugfs_regs_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_didt_read,
+	.write = amdgpu_debugfs_regs_didt_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_pcie_read,
+	.write = amdgpu_debugfs_regs_pcie_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_smc_read,
+	.write = amdgpu_debugfs_regs_smc_write,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gca_config_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_sensors_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_sensor_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_wave_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_wave_read,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_gpr_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gpr_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations *debugfs_regs[] = {
+	&amdgpu_debugfs_regs_fops,
+	&amdgpu_debugfs_regs_didt_fops,
+	&amdgpu_debugfs_regs_pcie_fops,
+	&amdgpu_debugfs_regs_smc_fops,
+	&amdgpu_debugfs_gca_config_fops,
+	&amdgpu_debugfs_sensors_fops,
+	&amdgpu_debugfs_wave_fops,
+	&amdgpu_debugfs_gpr_fops,
+};
+
+static const char *debugfs_regs_names[] = {
+	"amdgpu_regs",
+	"amdgpu_regs_didt",
+	"amdgpu_regs_pcie",
+	"amdgpu_regs_smc",
+	"amdgpu_gca_config",
+	"amdgpu_sensors",
+	"amdgpu_wave",
+	"amdgpu_gpr",
+};
+
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+	struct drm_minor *minor = adev->ddev->primary;
+	struct dentry *ent, *root = minor->debugfs_root;
+	unsigned i, j;
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
+		ent = debugfs_create_file(debugfs_regs_names[i],
+					  S_IFREG | S_IRUGO, root,
+					  adev, debugfs_regs[i]);
+		if (IS_ERR(ent)) {
+			for (j = 0; j < i; j++) {
+				debugfs_remove(adev->debugfs_regs[i]);
+				adev->debugfs_regs[i] = NULL;
+			}
+			return PTR_ERR(ent);
+		}
+
+		if (!i)
+			i_size_write(ent->d_inode, adev->rmmio_size);
+		adev->debugfs_regs[i] = ent;
+	}
+
+	return 0;
+}
+
+void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
+		if (adev->debugfs_regs[i]) {
+			debugfs_remove(adev->debugfs_regs[i]);
+			adev->debugfs_regs[i] = NULL;
+		}
+	}
+}
+
+static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	int r = 0, i;
+
+	/* hold on the scheduler */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->sched.thread)
+			continue;
+		kthread_park(ring->sched.thread);
+	}
+
+	seq_printf(m, "run ib test:\n");
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		seq_printf(m, "ib ring tests failed (%d).\n", r);
+	else
+		seq_printf(m, "ib ring tests passed.\n");
+
+	/* go on the scheduler */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->sched.thread)
+			continue;
+		kthread_unpark(ring->sched.thread);
+	}
+
+	return 0;
+}
+
+static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	seq_write(m, adev->bios, adev->bios_size);
+	return 0;
+}
+
+static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
+	return 0;
+}
+
+static const struct drm_info_list amdgpu_debugfs_list[] = {
+	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
+	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
+	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}
+};
+
+int amdgpu_debugfs_init(struct amdgpu_device *adev)
+{
+	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
+					ARRAY_SIZE(amdgpu_debugfs_list));
+}
+
+#else
+int amdgpu_debugfs_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/basics/grph_object_id.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
similarity index 50%
rename from drivers/gpu/drm/amd/display/dc/basics/grph_object_id.c
rename to drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 147822545252ed129c694aa5e7bf870aab4955f2..8260d8073c2688c67bbe2018807bc9957a3f6749 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/grph_object_id.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -1,5 +1,7 @@
 /*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,57 +21,22 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: AMD
- *
  */
 
-#include "dm_services.h"
-#include "include/grph_object_id.h"
-
-static bool dal_graphics_object_id_is_valid(struct graphics_object_id id)
-{
-	bool rc = true;
-
-	switch (id.type) {
-	case OBJECT_TYPE_UNKNOWN:
-		rc = false;
-		break;
-	case OBJECT_TYPE_GPU:
-	case OBJECT_TYPE_ENGINE:
-		/* do NOT check for id.id == 0 */
-		if (id.enum_id == ENUM_ID_UNKNOWN)
-			rc = false;
-		break;
-	default:
-		if (id.id == 0 || id.enum_id == ENUM_ID_UNKNOWN)
-			rc = false;
-		break;
-	}
-
-	return rc;
-}
-
-bool dal_graphics_object_id_is_equal(
-	struct graphics_object_id id1,
-	struct graphics_object_id id2)
-{
-	if (false == dal_graphics_object_id_is_valid(id1)) {
-		dm_output_to_console(
-		"%s: Warning: comparing invalid object 'id1'!\n", __func__);
-		return false;
-	}
-
-	if (false == dal_graphics_object_id_is_valid(id2)) {
-		dm_output_to_console(
-		"%s: Warning: comparing invalid object 'id2'!\n", __func__);
-		return false;
-	}
-
-	if (id1.id == id2.id && id1.enum_id == id2.enum_id
-		&& id1.type == id2.type)
-		return true;
-
-	return false;
-}
-
-
+/*
+ * Debugfs
+ */
+struct amdgpu_debugfs {
+	const struct drm_info_list	*files;
+	unsigned		num_files;
+};
+
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
+int amdgpu_debugfs_init(struct amdgpu_device *adev);
+int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
+			     const struct drm_info_list *files,
+			     unsigned nfiles);
+int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+int amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 70c9e5756b02a66fdda95f5340ad860337ec1328..00a50cc5ec9a31a77b7476020e5f47089c83c5de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -28,7 +28,6 @@
 #include <linux/kthread.h>
 #include <linux/console.h>
 #include <linux/slab.h>
-#include <linux/debugfs.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_atomic_helper.h>
@@ -64,11 +63,6 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS		2000
 
-static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
-static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
-static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev);
-static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev);
-
 static const char *amdgpu_asic_name[] = {
 	"TAHITI",
 	"PITCAIRN",
@@ -333,7 +327,7 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 	BUG();
 }
 
-static int amdgpu_vram_scratch_init(struct amdgpu_device *adev)
+static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 {
 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
@@ -342,13 +336,13 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev)
 				       (void **)&adev->vram_scratch.ptr);
 }
 
-static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev)
+static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 {
 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 }
 
 /**
- * amdgpu_program_register_sequence - program an array of registers.
+ * amdgpu_device_program_register_sequence - program an array of registers.
  *
  * @adev: amdgpu_device pointer
  * @registers: pointer to the register array
@@ -357,9 +351,9 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev)
  * Programs an array or registers with and and or masks.
  * This is a helper for setting golden registers.
  */
-void amdgpu_program_register_sequence(struct amdgpu_device *adev,
-				      const u32 *registers,
-				      const u32 array_size)
+void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
+					     const u32 *registers,
+					     const u32 array_size)
 {
 	u32 tmp, reg, and_mask, or_mask;
 	int i;
@@ -383,7 +377,7 @@ void amdgpu_program_register_sequence(struct amdgpu_device *adev,
 	}
 }
 
-void amdgpu_pci_config_reset(struct amdgpu_device *adev)
+void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 {
 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 }
@@ -392,14 +386,14 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev)
  * GPU doorbell aperture helpers function.
  */
 /**
- * amdgpu_doorbell_init - Init doorbell driver information.
+ * amdgpu_device_doorbell_init - Init doorbell driver information.
  *
  * @adev: amdgpu_device pointer
  *
  * Init doorbell driver information (CIK)
  * Returns 0 on success, error on failure.
  */
-static int amdgpu_doorbell_init(struct amdgpu_device *adev)
+static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 {
 	/* No doorbell on SI hardware generation */
 	if (adev->asic_type < CHIP_BONAIRE) {
@@ -432,66 +426,35 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_doorbell_fini - Tear down doorbell driver information.
+ * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
  *
  * @adev: amdgpu_device pointer
  *
  * Tear down doorbell driver information (CIK)
  */
-static void amdgpu_doorbell_fini(struct amdgpu_device *adev)
+static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
 {
 	iounmap(adev->doorbell.ptr);
 	adev->doorbell.ptr = NULL;
 }
 
-/**
- * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
- *                                setup amdkfd
- *
- * @adev: amdgpu_device pointer
- * @aperture_base: output returning doorbell aperture base physical address
- * @aperture_size: output returning doorbell aperture size in bytes
- * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
- *
- * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
- * takes doorbells required for its own rings and reports the setup to amdkfd.
- * amdgpu reserved doorbells are at the start of the doorbell aperture.
- */
-void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
-				phys_addr_t *aperture_base,
-				size_t *aperture_size,
-				size_t *start_offset)
-{
-	/*
-	 * The first num_doorbells are used by amdgpu.
-	 * amdkfd takes whatever's left in the aperture.
-	 */
-	if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
-		*aperture_base = adev->doorbell.base;
-		*aperture_size = adev->doorbell.size;
-		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
-	} else {
-		*aperture_base = 0;
-		*aperture_size = 0;
-		*start_offset = 0;
-	}
-}
+
 
 /*
- * amdgpu_wb_*()
+ * amdgpu_device_wb_*()
  * Writeback is the method by which the GPU updates special pages in memory
  * with the status of certain GPU events (fences, ring pointers,etc.).
  */
 
 /**
- * amdgpu_wb_fini - Disable Writeback and free memory
+ * amdgpu_device_wb_fini - Disable Writeback and free memory
  *
  * @adev: amdgpu_device pointer
  *
  * Disables Writeback and frees the Writeback memory (all asics).
  * Used at driver shutdown.
  */
-static void amdgpu_wb_fini(struct amdgpu_device *adev)
+static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
 {
 	if (adev->wb.wb_obj) {
 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
@@ -502,7 +465,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_wb_init- Init Writeback driver info and allocate memory
+ * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
  *
  * @adev: amdgpu_device pointer
  *
@@ -510,7 +473,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev)
  * Used at driver startup.
  * Returns 0 on success or an -error on failure.
  */
-static int amdgpu_wb_init(struct amdgpu_device *adev)
+static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 {
 	int r;
 
@@ -536,7 +499,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_wb_get - Allocate a wb entry
+ * amdgpu_device_wb_get - Allocate a wb entry
  *
  * @adev: amdgpu_device pointer
  * @wb: wb index
@@ -544,7 +507,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
  * Allocate a wb slot for use by the driver (all asics).
  * Returns 0 on success or -EINVAL on failure.
  */
-int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb)
+int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
 {
 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
 
@@ -558,21 +521,21 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb)
 }
 
 /**
- * amdgpu_wb_free - Free a wb entry
+ * amdgpu_device_wb_free - Free a wb entry
  *
  * @adev: amdgpu_device pointer
  * @wb: wb index
  *
  * Free a wb slot allocated for use by the driver (all asics)
  */
-void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb)
+void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 {
 	if (wb < adev->wb.num_wb)
 		__clear_bit(wb >> 3, adev->wb.used);
 }
 
 /**
- * amdgpu_vram_location - try to find VRAM location
+ * amdgpu_device_vram_location - try to find VRAM location
  * @adev: amdgpu device structure holding all necessary informations
  * @mc: memory controller structure holding memory informations
  * @base: base address at which to put VRAM
@@ -580,7 +543,8 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb)
  * Function will try to place VRAM at base address provided
  * as parameter.
  */
-void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base)
+void amdgpu_device_vram_location(struct amdgpu_device *adev,
+				 struct amdgpu_mc *mc, u64 base)
 {
 	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
 
@@ -594,7 +558,7 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64
 }
 
 /**
- * amdgpu_gart_location - try to find GTT location
+ * amdgpu_device_gart_location - try to find GTT location
  * @adev: amdgpu device structure holding all necessary informations
  * @mc: memory controller structure holding memory informations
  *
@@ -605,7 +569,8 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64
  *
  * FIXME: when reducing GTT size align new size on power of 2.
  */
-void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
+void amdgpu_device_gart_location(struct amdgpu_device *adev,
+				 struct amdgpu_mc *mc)
 {
 	u64 size_af, size_bf;
 
@@ -632,101 +597,6 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
 			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
 }
 
-/*
- * Firmware Reservation functions
- */
-/**
- * amdgpu_fw_reserve_vram_fini - free fw reserved vram
- *
- * @adev: amdgpu_device pointer
- *
- * free fw reserved vram if it has been reserved.
- */
-void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev)
-{
-	amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
-		NULL, &adev->fw_vram_usage.va);
-}
-
-/**
- * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw
- *
- * @adev: amdgpu_device pointer
- *
- * create bo vram reservation from fw.
- */
-int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int r = 0;
-	int i;
-	u64 vram_size = adev->mc.visible_vram_size;
-	u64 offset = adev->fw_vram_usage.start_offset;
-	u64 size = adev->fw_vram_usage.size;
-	struct amdgpu_bo *bo;
-
-	adev->fw_vram_usage.va = NULL;
-	adev->fw_vram_usage.reserved_bo = NULL;
-
-	if (adev->fw_vram_usage.size > 0 &&
-		adev->fw_vram_usage.size <= vram_size) {
-
-		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
-			PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
-			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
-			&adev->fw_vram_usage.reserved_bo);
-		if (r)
-			goto error_create;
-
-		r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
-		if (r)
-			goto error_reserve;
-
-		/* remove the original mem node and create a new one at the
-		 * request position
-		 */
-		bo = adev->fw_vram_usage.reserved_bo;
-		offset = ALIGN(offset, PAGE_SIZE);
-		for (i = 0; i < bo->placement.num_placement; ++i) {
-			bo->placements[i].fpfn = offset >> PAGE_SHIFT;
-			bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
-		}
-
-		ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
-		r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
-				     &bo->tbo.mem, &ctx);
-		if (r)
-			goto error_pin;
-
-		r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
-			AMDGPU_GEM_DOMAIN_VRAM,
-			adev->fw_vram_usage.start_offset,
-			(adev->fw_vram_usage.start_offset +
-			adev->fw_vram_usage.size), NULL);
-		if (r)
-			goto error_pin;
-		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
-			&adev->fw_vram_usage.va);
-		if (r)
-			goto error_kmap;
-
-		amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
-	}
-	return r;
-
-error_kmap:
-	amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
-error_pin:
-	amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
-error_reserve:
-	amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
-error_create:
-	adev->fw_vram_usage.va = NULL;
-	adev->fw_vram_usage.reserved_bo = NULL;
-	return r;
-}
-
 /**
  * amdgpu_device_resize_fb_bar - try to resize FB BAR
  *
@@ -756,7 +626,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 		root = root->parent;
 
 	pci_bus_for_each_resource(root, res, i) {
-		if (res && res->flags & IORESOURCE_MEM_64 &&
+		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
 		    res->start > 0x100000000ull)
 			break;
 	}
@@ -771,7 +641,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 			      cmd & ~PCI_COMMAND_MEMORY);
 
 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
-	amdgpu_doorbell_fini(adev);
+	amdgpu_device_doorbell_fini(adev);
 	if (adev->asic_type >= CHIP_BONAIRE)
 		pci_release_resource(adev->pdev, 2);
 
@@ -788,7 +658,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 	/* When the doorbell or fb BAR isn't available we have no chance of
 	 * using the device.
 	 */
-	r = amdgpu_doorbell_init(adev);
+	r = amdgpu_device_doorbell_init(adev);
 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
 		return -ENODEV;
 
@@ -801,7 +671,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
  * GPU helpers function.
  */
 /**
- * amdgpu_need_post - check if the hw need post or not
+ * amdgpu_device_need_post - check if the hw need post or not
  *
  * @adev: amdgpu_device pointer
  *
@@ -809,7 +679,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
  * or post is needed if  hw reset is performed.
  * Returns true if need or false if not.
  */
-bool amdgpu_need_post(struct amdgpu_device *adev)
+bool amdgpu_device_need_post(struct amdgpu_device *adev)
 {
 	uint32_t reg;
 
@@ -854,285 +724,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev)
 	return true;
 }
 
-/**
- * amdgpu_dummy_page_init - init dummy page used by the driver
- *
- * @adev: amdgpu_device pointer
- *
- * Allocate the dummy page used by the driver (all asics).
- * This dummy page is used by the driver as a filler for gart entries
- * when pages are taken out of the GART
- * Returns 0 on sucess, -ENOMEM on failure.
- */
-int amdgpu_dummy_page_init(struct amdgpu_device *adev)
-{
-	if (adev->dummy_page.page)
-		return 0;
-	adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
-	if (adev->dummy_page.page == NULL)
-		return -ENOMEM;
-	adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page,
-					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) {
-		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
-		__free_page(adev->dummy_page.page);
-		adev->dummy_page.page = NULL;
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-/**
- * amdgpu_dummy_page_fini - free dummy page used by the driver
- *
- * @adev: amdgpu_device pointer
- *
- * Frees the dummy page used by the driver (all asics).
- */
-void amdgpu_dummy_page_fini(struct amdgpu_device *adev)
-{
-	if (adev->dummy_page.page == NULL)
-		return;
-	pci_unmap_page(adev->pdev, adev->dummy_page.addr,
-			PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	__free_page(adev->dummy_page.page);
-	adev->dummy_page.page = NULL;
-}
-
-
-/* ATOM accessor methods */
-/*
- * ATOM is an interpreted byte code stored in tables in the vbios.  The
- * driver registers callbacks to access registers and the interpreter
- * in the driver parses the tables and executes then to program specific
- * actions (set display modes, asic init, etc.).  See amdgpu_atombios.c,
- * atombios.h, and atom.c
- */
-
-/**
- * cail_pll_read - read PLL register
- *
- * @info: atom card_info pointer
- * @reg: PLL register offset
- *
- * Provides a PLL register accessor for the atom interpreter (r4xx+).
- * Returns the value of the PLL register.
- */
-static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
-{
-	return 0;
-}
-
-/**
- * cail_pll_write - write PLL register
- *
- * @info: atom card_info pointer
- * @reg: PLL register offset
- * @val: value to write to the pll register
- *
- * Provides a PLL register accessor for the atom interpreter (r4xx+).
- */
-static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
-{
-
-}
-
-/**
- * cail_mc_read - read MC (Memory Controller) register
- *
- * @info: atom card_info pointer
- * @reg: MC register offset
- *
- * Provides an MC register accessor for the atom interpreter (r4xx+).
- * Returns the value of the MC register.
- */
-static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
-{
-	return 0;
-}
-
-/**
- * cail_mc_write - write MC (Memory Controller) register
- *
- * @info: atom card_info pointer
- * @reg: MC register offset
- * @val: value to write to the pll register
- *
- * Provides a MC register accessor for the atom interpreter (r4xx+).
- */
-static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
-{
-
-}
-
-/**
- * cail_reg_write - write MMIO register
- *
- * @info: atom card_info pointer
- * @reg: MMIO register offset
- * @val: value to write to the pll register
- *
- * Provides a MMIO register accessor for the atom interpreter (r4xx+).
- */
-static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
-{
-	struct amdgpu_device *adev = info->dev->dev_private;
-
-	WREG32(reg, val);
-}
-
-/**
- * cail_reg_read - read MMIO register
- *
- * @info: atom card_info pointer
- * @reg: MMIO register offset
- *
- * Provides an MMIO register accessor for the atom interpreter (r4xx+).
- * Returns the value of the MMIO register.
- */
-static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
-{
-	struct amdgpu_device *adev = info->dev->dev_private;
-	uint32_t r;
-
-	r = RREG32(reg);
-	return r;
-}
-
-/**
- * cail_ioreg_write - write IO register
- *
- * @info: atom card_info pointer
- * @reg: IO register offset
- * @val: value to write to the pll register
- *
- * Provides a IO register accessor for the atom interpreter (r4xx+).
- */
-static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
-{
-	struct amdgpu_device *adev = info->dev->dev_private;
-
-	WREG32_IO(reg, val);
-}
-
-/**
- * cail_ioreg_read - read IO register
- *
- * @info: atom card_info pointer
- * @reg: IO register offset
- *
- * Provides an IO register accessor for the atom interpreter (r4xx+).
- * Returns the value of the IO register.
- */
-static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
-{
-	struct amdgpu_device *adev = info->dev->dev_private;
-	uint32_t r;
-
-	r = RREG32_IO(reg);
-	return r;
-}
-
-static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
-						 struct device_attribute *attr,
-						 char *buf)
-{
-	struct drm_device *ddev = dev_get_drvdata(dev);
-	struct amdgpu_device *adev = ddev->dev_private;
-	struct atom_context *ctx = adev->mode_info.atom_context;
-
-	return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version);
-}
-
-static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
-		   NULL);
-
-/**
- * amdgpu_atombios_fini - free the driver info and callbacks for atombios
- *
- * @adev: amdgpu_device pointer
- *
- * Frees the driver info and register access callbacks for the ATOM
- * interpreter (r4xx+).
- * Called at driver shutdown.
- */
-static void amdgpu_atombios_fini(struct amdgpu_device *adev)
-{
-	if (adev->mode_info.atom_context) {
-		kfree(adev->mode_info.atom_context->scratch);
-		kfree(adev->mode_info.atom_context->iio);
-	}
-	kfree(adev->mode_info.atom_context);
-	adev->mode_info.atom_context = NULL;
-	kfree(adev->mode_info.atom_card_info);
-	adev->mode_info.atom_card_info = NULL;
-	device_remove_file(adev->dev, &dev_attr_vbios_version);
-}
-
-/**
- * amdgpu_atombios_init - init the driver info and callbacks for atombios
- *
- * @adev: amdgpu_device pointer
- *
- * Initializes the driver info and register access callbacks for the
- * ATOM interpreter (r4xx+).
- * Returns 0 on sucess, -ENOMEM on failure.
- * Called at driver startup.
- */
-static int amdgpu_atombios_init(struct amdgpu_device *adev)
-{
-	struct card_info *atom_card_info =
-	    kzalloc(sizeof(struct card_info), GFP_KERNEL);
-	int ret;
-
-	if (!atom_card_info)
-		return -ENOMEM;
-
-	adev->mode_info.atom_card_info = atom_card_info;
-	atom_card_info->dev = adev->ddev;
-	atom_card_info->reg_read = cail_reg_read;
-	atom_card_info->reg_write = cail_reg_write;
-	/* needed for iio ops */
-	if (adev->rio_mem) {
-		atom_card_info->ioreg_read = cail_ioreg_read;
-		atom_card_info->ioreg_write = cail_ioreg_write;
-	} else {
-		DRM_DEBUG("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n");
-		atom_card_info->ioreg_read = cail_reg_read;
-		atom_card_info->ioreg_write = cail_reg_write;
-	}
-	atom_card_info->mc_read = cail_mc_read;
-	atom_card_info->mc_write = cail_mc_write;
-	atom_card_info->pll_read = cail_pll_read;
-	atom_card_info->pll_write = cail_pll_write;
-
-	adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios);
-	if (!adev->mode_info.atom_context) {
-		amdgpu_atombios_fini(adev);
-		return -ENOMEM;
-	}
-
-	mutex_init(&adev->mode_info.atom_context->mutex);
-	if (adev->is_atom_fw) {
-		amdgpu_atomfirmware_scratch_regs_init(adev);
-		amdgpu_atomfirmware_allocate_fb_scratch(adev);
-	} else {
-		amdgpu_atombios_scratch_regs_init(adev);
-		amdgpu_atombios_allocate_fb_scratch(adev);
-	}
-
-	ret = device_create_file(adev->dev, &dev_attr_vbios_version);
-	if (ret) {
-		DRM_ERROR("Failed to create device file for VBIOS version\n");
-		return ret;
-	}
-
-	return 0;
-}
-
 /* if we get transitioned to only one device, take VGA back */
 /**
- * amdgpu_vga_set_decode - enable/disable vga decode
+ * amdgpu_device_vga_set_decode - enable/disable vga decode
  *
  * @cookie: amdgpu_device pointer
  * @state: enable/disable vga decode
@@ -1140,7 +734,7 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev)
  * Enable/disable vga decode (all asics).
  * Returns VGA resource flags.
  */
-static unsigned int amdgpu_vga_set_decode(void *cookie, bool state)
+static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
 {
 	struct amdgpu_device *adev = cookie;
 	amdgpu_asic_set_vga_state(adev, state);
@@ -1151,7 +745,7 @@ static unsigned int amdgpu_vga_set_decode(void *cookie, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
-static void amdgpu_check_block_size(struct amdgpu_device *adev)
+static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
 {
 	/* defines number of bits in page table versus page directory,
 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
@@ -1166,7 +760,7 @@ static void amdgpu_check_block_size(struct amdgpu_device *adev)
 	}
 }
 
-static void amdgpu_check_vm_size(struct amdgpu_device *adev)
+static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
 {
 	/* no need to check the default value */
 	if (amdgpu_vm_size == -1)
@@ -1180,14 +774,14 @@ static void amdgpu_check_vm_size(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_check_arguments - validate module params
+ * amdgpu_device_check_arguments - validate module params
  *
  * @adev: amdgpu_device pointer
  *
  * Validates certain module parameters and updates
  * the associated values used by the driver (all asics).
  */
-static void amdgpu_check_arguments(struct amdgpu_device *adev)
+static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
 	if (amdgpu_sched_jobs < 4) {
 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
@@ -1220,9 +814,9 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
 		amdgpu_vm_fragment_size = -1;
 	}
 
-	amdgpu_check_vm_size(adev);
+	amdgpu_device_check_vm_size(adev);
 
-	amdgpu_check_block_size(adev);
+	amdgpu_device_check_block_size(adev);
 
 	if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
 	    !is_power_of_2(amdgpu_vram_page_split))) {
@@ -1230,6 +824,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
 			 amdgpu_vram_page_split);
 		amdgpu_vram_page_split = 1024;
 	}
+
+	if (amdgpu_lockup_timeout == 0) {
+		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
+		amdgpu_lockup_timeout = 10000;
+	}
 }
 
 /**
@@ -1293,9 +892,9 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
 	.can_switch = amdgpu_switcheroo_can_switch,
 };
 
-int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
-				  enum amd_ip_block_type block_type,
-				  enum amd_clockgating_state state)
+int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_clockgating_state state)
 {
 	int i, r = 0;
 
@@ -1315,9 +914,9 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 	return r;
 }
 
-int amdgpu_set_powergating_state(struct amdgpu_device *adev,
-				  enum amd_ip_block_type block_type,
-				  enum amd_powergating_state state)
+int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_powergating_state state)
 {
 	int i, r = 0;
 
@@ -1337,7 +936,8 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 	return r;
 }
 
-void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+					    u32 *flags)
 {
 	int i;
 
@@ -1349,8 +949,8 @@ void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
 	}
 }
 
-int amdgpu_wait_for_idle(struct amdgpu_device *adev,
-			 enum amd_ip_block_type block_type)
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+				   enum amd_ip_block_type block_type)
 {
 	int i, r;
 
@@ -1368,8 +968,8 @@ int amdgpu_wait_for_idle(struct amdgpu_device *adev,
 
 }
 
-bool amdgpu_is_idle(struct amdgpu_device *adev,
-		    enum amd_ip_block_type block_type)
+bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+			      enum amd_ip_block_type block_type)
 {
 	int i;
 
@@ -1383,8 +983,9 @@ bool amdgpu_is_idle(struct amdgpu_device *adev,
 
 }
 
-struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev,
-					     enum amd_ip_block_type type)
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+			      enum amd_ip_block_type type)
 {
 	int i;
 
@@ -1396,7 +997,7 @@ struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_ip_block_version_cmp
+ * amdgpu_device_ip_block_version_cmp
  *
  * @adev: amdgpu_device pointer
  * @type: enum amd_ip_block_type
@@ -1406,11 +1007,11 @@ struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev,
  * return 0 if equal or greater
  * return 1 if smaller or the ip_block doesn't exist
  */
-int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev,
-				enum amd_ip_block_type type,
-				u32 major, u32 minor)
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+				       enum amd_ip_block_type type,
+				       u32 major, u32 minor)
 {
-	struct amdgpu_ip_block *ip_block = amdgpu_get_ip_block(adev, type);
+	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
 
 	if (ip_block && ((ip_block->version->major > major) ||
 			((ip_block->version->major == major) &&
@@ -1421,7 +1022,7 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_ip_block_add
+ * amdgpu_device_ip_block_add
  *
  * @adev: amdgpu_device pointer
  * @ip_block_version: pointer to the IP to add
@@ -1429,8 +1030,8 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev,
  * Adds the IP block driver information to the collection of IPs
  * on the asic.
  */
-int amdgpu_ip_block_add(struct amdgpu_device *adev,
-			const struct amdgpu_ip_block_version *ip_block_version)
+int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
+			       const struct amdgpu_ip_block_version *ip_block_version)
 {
 	if (!ip_block_version)
 		return -EINVAL;
@@ -1586,7 +1187,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	return err;
 }
 
-static int amdgpu_early_init(struct amdgpu_device *adev)
+static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -1695,7 +1296,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_init(struct amdgpu_device *adev)
+static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -1711,7 +1312,7 @@ static int amdgpu_init(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.sw = true;
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
-			r = amdgpu_vram_scratch_init(adev);
+			r = amdgpu_device_vram_scratch_init(adev);
 			if (r) {
 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
 				return r;
@@ -1721,9 +1322,9 @@ static int amdgpu_init(struct amdgpu_device *adev)
 				DRM_ERROR("hw_init %d failed %d\n", i, r);
 				return r;
 			}
-			r = amdgpu_wb_init(adev);
+			r = amdgpu_device_wb_init(adev);
 			if (r) {
-				DRM_ERROR("amdgpu_wb_init failed %d\n", r);
+				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
 				return r;
 			}
 			adev->ip_blocks[i].status.hw = true;
@@ -1762,18 +1363,18 @@ static int amdgpu_init(struct amdgpu_device *adev)
 	return 0;
 }
 
-static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
+static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
 {
 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
 }
 
-static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
+static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
 {
 	return !!memcmp(adev->gart.ptr, adev->reset_magic,
 			AMDGPU_RESET_MAGIC_NUM);
 }
 
-static int amdgpu_late_set_cg_state(struct amdgpu_device *adev)
+static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 {
 	int i = 0, r;
 
@@ -1796,7 +1397,7 @@ static int amdgpu_late_set_cg_state(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_late_init(struct amdgpu_device *adev)
+static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 {
 	int i = 0, r;
 
@@ -1817,12 +1418,12 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
 	mod_delayed_work(system_wq, &adev->late_init_work,
 			msecs_to_jiffies(AMDGPU_RESUME_MS));
 
-	amdgpu_fill_reset_magic(adev);
+	amdgpu_device_fill_reset_magic(adev);
 
 	return 0;
 }
 
-static int amdgpu_fini(struct amdgpu_device *adev)
+static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -1856,8 +1457,8 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 			continue;
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			amdgpu_free_static_csa(adev);
-			amdgpu_wb_fini(adev);
-			amdgpu_vram_scratch_fini(adev);
+			amdgpu_device_wb_fini(adev);
+			amdgpu_device_vram_scratch_fini(adev);
 		}
 
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -1910,14 +1511,14 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 	return 0;
 }
 
-static void amdgpu_late_init_func_handler(struct work_struct *work)
+static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
 		container_of(work, struct amdgpu_device, late_init_work.work);
-	amdgpu_late_set_cg_state(adev);
+	amdgpu_device_ip_late_set_cg_state(adev);
 }
 
-int amdgpu_suspend(struct amdgpu_device *adev)
+int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -1925,10 +1526,10 @@ int amdgpu_suspend(struct amdgpu_device *adev)
 		amdgpu_virt_request_full_gpu(adev, false);
 
 	/* ungate SMC block first */
-	r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
-					 AMD_CG_STATE_UNGATE);
+	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
+						   AMD_CG_STATE_UNGATE);
 	if (r) {
-		DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n",r);
+		DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
 	}
 
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
@@ -1958,7 +1559,7 @@ int amdgpu_suspend(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
+static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -1987,7 +1588,7 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
+static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -2020,7 +1621,7 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_resume_phase1(struct amdgpu_device *adev)
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -2043,7 +1644,7 @@ static int amdgpu_resume_phase1(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_resume_phase2(struct amdgpu_device *adev)
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 {
 	int i, r;
 
@@ -2065,14 +1666,14 @@ static int amdgpu_resume_phase2(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_resume(struct amdgpu_device *adev)
+static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 {
 	int r;
 
-	r = amdgpu_resume_phase1(adev);
+	r = amdgpu_device_ip_resume_phase1(adev);
 	if (r)
 		return r;
-	r = amdgpu_resume_phase2(adev);
+	r = amdgpu_device_ip_resume_phase2(adev);
 
 	return r;
 }
@@ -2211,7 +1812,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	hash_init(adev->mn_hash);
 	mutex_init(&adev->lock_reset);
 
-	amdgpu_check_arguments(adev);
+	amdgpu_device_check_arguments(adev);
 
 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
@@ -2229,7 +1830,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&adev->ring_lru_list);
 	spin_lock_init(&adev->ring_lru_list_lock);
 
-	INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler);
+	INIT_DELAYED_WORK(&adev->late_init_work,
+			  amdgpu_device_ip_late_init_func_handler);
 
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
@@ -2249,7 +1851,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
 
 	/* doorbell bar mapping */
-	amdgpu_doorbell_init(adev);
+	amdgpu_device_doorbell_init(adev);
 
 	/* io port mapping */
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
@@ -2263,17 +1865,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		DRM_INFO("PCI I/O BAR is not found.\n");
 
 	/* early init functions */
-	r = amdgpu_early_init(adev);
+	r = amdgpu_device_ip_early_init(adev);
 	if (r)
 		return r;
 
 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
 	/* this will fail for cards that aren't VGA class devices, just
 	 * ignore it */
-	vga_client_register(adev->pdev, adev, NULL, amdgpu_vga_set_decode);
+	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
 
-	if (amdgpu_runtime_pm == 1)
-		runtime = true;
 	if (amdgpu_device_is_px(ddev))
 		runtime = true;
 	if (!pci_is_thunderbolt_attached(adev->pdev))
@@ -2299,7 +1899,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	amdgpu_device_detect_sriov_bios(adev);
 
 	/* Post card if necessary */
-	if (amdgpu_need_post(adev)) {
+	if (amdgpu_device_need_post(adev)) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "no vBIOS found\n");
 			r = -EINVAL;
@@ -2345,7 +1945,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* init the mode config */
 	drm_mode_config_init(adev->ddev);
 
-	r = amdgpu_init(adev);
+	r = amdgpu_device_ip_init(adev);
 	if (r) {
 		/* failed in exclusive mode due to timeout */
 		if (amdgpu_sriov_vf(adev) &&
@@ -2359,9 +1959,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 			r = -EAGAIN;
 			goto failed;
 		}
-		dev_err(adev->dev, "amdgpu_init failed\n");
+		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
-		amdgpu_fini(adev);
+		amdgpu_device_ip_fini(adev);
 		goto failed;
 	}
 
@@ -2397,7 +1997,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
 
-	r = amdgpu_gem_debugfs_init(adev);
+	r = amdgpu_debugfs_gem_init(adev);
 	if (r)
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
 
@@ -2405,17 +2005,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 
-	r = amdgpu_debugfs_test_ib_ring_init(adev);
-	if (r)
-		DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r);
-
 	r = amdgpu_debugfs_firmware_init(adev);
 	if (r)
 		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
 
-	r = amdgpu_debugfs_vbios_dump_init(adev);
+	r = amdgpu_debugfs_init(adev);
 	if (r)
-		DRM_ERROR("Creating vbios dump debugfs failed (%d).\n", r);
+		DRM_ERROR("Creating debugfs files failed (%d).\n", r);
 
 	if ((amdgpu_testing & 1)) {
 		if (adev->accel_working)
@@ -2433,9 +2029,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
 	 * explicit gating rather than handling it automatically.
 	 */
-	r = amdgpu_late_init(adev);
+	r = amdgpu_device_ip_late_init(adev);
 	if (r) {
-		dev_err(adev->dev, "amdgpu_late_init failed\n");
+		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
 		goto failed;
 	}
@@ -2466,12 +2062,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	adev->shutdown = true;
 	if (adev->mode_info.mode_config_initialized)
 		drm_crtc_force_disable_all(adev->ddev);
-	/* evict vram memory */
-	amdgpu_bo_evict_vram(adev);
+
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
-	r = amdgpu_fini(adev);
+	r = amdgpu_device_ip_fini(adev);
 	if (adev->firmware.gpu_info_fw) {
 		release_firmware(adev->firmware.gpu_info_fw);
 		adev->firmware.gpu_info_fw = NULL;
@@ -2494,7 +2089,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	adev->rio_mem = NULL;
 	iounmap(adev->rmmio);
 	adev->rmmio = NULL;
-	amdgpu_doorbell_fini(adev);
+	amdgpu_device_doorbell_fini(adev);
 	amdgpu_pm_sysfs_fini(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
 }
@@ -2575,7 +2170,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 
 	amdgpu_fence_driver_suspend(adev);
 
-	r = amdgpu_suspend(adev);
+	r = amdgpu_device_ip_suspend(adev);
 
 	/* evict remaining vram memory
 	 * This second call to evict vram is to evict the gart page table
@@ -2583,7 +2178,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	 */
 	amdgpu_bo_evict_vram(adev);
 
-	amdgpu_atombios_scratch_regs_save(adev);
 	pci_save_state(dev->pdev);
 	if (suspend) {
 		/* Shut down the device */
@@ -2632,18 +2226,17 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		if (r)
 			goto unlock;
 	}
-	amdgpu_atombios_scratch_regs_restore(adev);
 
 	/* post card */
-	if (amdgpu_need_post(adev)) {
+	if (amdgpu_device_need_post(adev)) {
 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
 		if (r)
 			DRM_ERROR("amdgpu asic init failed\n");
 	}
 
-	r = amdgpu_resume(adev);
+	r = amdgpu_device_ip_resume(adev);
 	if (r) {
-		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
+		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
 		goto unlock;
 	}
 	amdgpu_fence_driver_resume(adev);
@@ -2654,7 +2247,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 			DRM_ERROR("ib ring test failed (%d).\n", r);
 	}
 
-	r = amdgpu_late_init(adev);
+	r = amdgpu_device_ip_late_init(adev);
 	if (r)
 		goto unlock;
 
@@ -2734,7 +2327,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	return r;
 }
 
-static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
+static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
 {
 	int i;
 	bool asic_hang = false;
@@ -2756,7 +2349,7 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
 	return asic_hang;
 }
 
-static int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
+static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
 {
 	int i, r = 0;
 
@@ -2774,7 +2367,7 @@ static int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
-static bool amdgpu_need_full_reset(struct amdgpu_device *adev)
+static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -2795,7 +2388,7 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev)
 	return false;
 }
 
-static int amdgpu_soft_reset(struct amdgpu_device *adev)
+static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
 {
 	int i, r = 0;
 
@@ -2813,7 +2406,7 @@ static int amdgpu_soft_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_post_soft_reset(struct amdgpu_device *adev)
+static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
 {
 	int i, r = 0;
 
@@ -2830,18 +2423,10 @@ static int amdgpu_post_soft_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
-bool amdgpu_need_backup(struct amdgpu_device *adev)
-{
-	if (adev->flags & AMD_IS_APU)
-		return false;
-
-	return amdgpu_lockup_timeout > 0 ? true : false;
-}
-
-static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
-					   struct amdgpu_ring *ring,
-					   struct amdgpu_bo *bo,
-					   struct dma_fence **fence)
+static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
+						  struct amdgpu_ring *ring,
+						  struct amdgpu_bo *bo,
+						  struct dma_fence **fence)
 {
 	uint32_t domain;
 	int r;
@@ -2874,7 +2459,7 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 }
 
 /*
- * amdgpu_reset - reset ASIC/GPU for bare-metal or passthrough
+ * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
  *
  * @adev: amdgpu device pointer
  * @reset_flags: output param tells caller the reset result
@@ -2882,18 +2467,19 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
  * attempt to do soft-reset or full-reset and reinitialize Asic
  * return 0 means successed otherwise failed
 */
-static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
+static int amdgpu_device_reset(struct amdgpu_device *adev,
+			       uint64_t* reset_flags)
 {
 	bool need_full_reset, vram_lost = 0;
 	int r;
 
-	need_full_reset = amdgpu_need_full_reset(adev);
+	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
 
 	if (!need_full_reset) {
-		amdgpu_pre_soft_reset(adev);
-		r = amdgpu_soft_reset(adev);
-		amdgpu_post_soft_reset(adev);
-		if (r || amdgpu_check_soft_reset(adev)) {
+		amdgpu_device_ip_pre_soft_reset(adev);
+		r = amdgpu_device_ip_soft_reset(adev);
+		amdgpu_device_ip_post_soft_reset(adev);
+		if (r || amdgpu_device_ip_check_soft_reset(adev)) {
 			DRM_INFO("soft reset failed, will fallback to full reset!\n");
 			need_full_reset = true;
 		}
@@ -2901,22 +2487,20 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
 	}
 
 	if (need_full_reset) {
-		r = amdgpu_suspend(adev);
+		r = amdgpu_device_ip_suspend(adev);
 
 retry:
-		amdgpu_atombios_scratch_regs_save(adev);
 		r = amdgpu_asic_reset(adev);
-		amdgpu_atombios_scratch_regs_restore(adev);
 		/* post card */
 		amdgpu_atom_asic_init(adev->mode_info.atom_context);
 
 		if (!r) {
 			dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
-			r = amdgpu_resume_phase1(adev);
+			r = amdgpu_device_ip_resume_phase1(adev);
 			if (r)
 				goto out;
 
-			vram_lost = amdgpu_check_vram_lost(adev);
+			vram_lost = amdgpu_device_check_vram_lost(adev);
 			if (vram_lost) {
 				DRM_ERROR("VRAM is lost!\n");
 				atomic_inc(&adev->vram_lost_counter);
@@ -2927,12 +2511,12 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
 			if (r)
 				goto out;
 
-			r = amdgpu_resume_phase2(adev);
+			r = amdgpu_device_ip_resume_phase2(adev);
 			if (r)
 				goto out;
 
 			if (vram_lost)
-				amdgpu_fill_reset_magic(adev);
+				amdgpu_device_fill_reset_magic(adev);
 		}
 	}
 
@@ -2942,7 +2526,7 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
 		r = amdgpu_ib_ring_tests(adev);
 		if (r) {
 			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
-			r = amdgpu_suspend(adev);
+			r = amdgpu_device_ip_suspend(adev);
 			need_full_reset = true;
 			goto retry;
 		}
@@ -2960,7 +2544,7 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
 }
 
 /*
- * amdgpu_reset_sriov - reset ASIC for SR-IOV vf
+ * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
  *
  * @adev: amdgpu device pointer
  * @reset_flags: output param tells caller the reset result
@@ -2968,7 +2552,9 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
  * do VF FLR and reinitialize Asic
  * return 0 means successed otherwise failed
 */
-static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, bool from_hypervisor)
+static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+				     uint64_t *reset_flags,
+				     bool from_hypervisor)
 {
 	int r;
 
@@ -2980,7 +2566,7 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags,
 		return r;
 
 	/* Resume IP prior to SMC */
-	r = amdgpu_sriov_reinit_early(adev);
+	r = amdgpu_device_ip_reinit_early_sriov(adev);
 	if (r)
 		goto error;
 
@@ -2988,7 +2574,7 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags,
 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
 
 	/* now we are okay to resume SMC/CP/SDMA */
-	r = amdgpu_sriov_reinit_late(adev);
+	r = amdgpu_device_ip_reinit_late_sriov(adev);
 	if (r)
 		goto error;
 
@@ -3015,25 +2601,33 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags,
 }
 
 /**
- * amdgpu_gpu_recover - reset the asic and recover scheduler
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
  * @adev: amdgpu device pointer
  * @job: which job trigger hang
+ * @force forces reset regardless of amdgpu_gpu_recovery
  *
  * Attempt to reset the GPU if it has hung (all asics).
  * Returns 0 for success or an error on failure.
  */
-int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+			      struct amdgpu_job *job, bool force)
 {
 	struct drm_atomic_state *state = NULL;
 	uint64_t reset_flags = 0;
 	int i, r, resched;
 
-	if (!amdgpu_check_soft_reset(adev)) {
+	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
 		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
 		return 0;
 	}
 
+	if (!force && (amdgpu_gpu_recovery == 0 ||
+			(amdgpu_gpu_recovery == -1  && !amdgpu_sriov_vf(adev)))) {
+		DRM_INFO("GPU recovery disabled.\n");
+		return 0;
+	}
+
 	dev_info(adev->dev, "GPU reset begin!\n");
 
 	mutex_lock(&adev->lock_reset);
@@ -3058,16 +2652,16 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
 			continue;
 
 		kthread_park(ring->sched.thread);
-		amd_sched_hw_job_reset(&ring->sched, &job->base);
+		drm_sched_hw_job_reset(&ring->sched, &job->base);
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
 
 	if (amdgpu_sriov_vf(adev))
-		r = amdgpu_reset_sriov(adev, &reset_flags, job ? false : true);
+		r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true);
 	else
-		r = amdgpu_reset(adev, &reset_flags);
+		r = amdgpu_device_reset(adev, &reset_flags);
 
 	if (!r) {
 		if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
@@ -3080,7 +2674,7 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
 			mutex_lock(&adev->shadow_list_lock);
 			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
 				next = NULL;
-				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
+				amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
 				if (fence) {
 					r = dma_fence_wait(fence, false);
 					if (r) {
@@ -3111,7 +2705,7 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
 			if (job && job->ring->idx != i)
 				continue;
 
-			amd_sched_job_recovery(&ring->sched);
+			drm_sched_job_recovery(&ring->sched);
 			kthread_unpark(ring->sched.thread);
 		}
 	} else {
@@ -3153,7 +2747,7 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
 	return r;
 }
 
-void amdgpu_get_pcie_info(struct amdgpu_device *adev)
+void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
 	int ret;
@@ -3245,773 +2839,3 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 	}
 }
 
-/*
- * Debugfs
- */
-int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
-			     const struct drm_info_list *files,
-			     unsigned nfiles)
-{
-	unsigned i;
-
-	for (i = 0; i < adev->debugfs_count; i++) {
-		if (adev->debugfs[i].files == files) {
-			/* Already registered */
-			return 0;
-		}
-	}
-
-	i = adev->debugfs_count + 1;
-	if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) {
-		DRM_ERROR("Reached maximum number of debugfs components.\n");
-		DRM_ERROR("Report so we increase "
-			  "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n");
-		return -EINVAL;
-	}
-	adev->debugfs[adev->debugfs_count].files = files;
-	adev->debugfs[adev->debugfs_count].num_files = nfiles;
-	adev->debugfs_count = i;
-#if defined(CONFIG_DEBUG_FS)
-	drm_debugfs_create_files(files, nfiles,
-				 adev->ddev->primary->debugfs_root,
-				 adev->ddev->primary);
-#endif
-	return 0;
-}
-
-#if defined(CONFIG_DEBUG_FS)
-
-static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-	bool pm_pg_lock, use_bank;
-	unsigned instance_bank, sh_bank, se_bank;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	/* are we reading registers for which a PG lock is necessary? */
-	pm_pg_lock = (*pos >> 23) & 1;
-
-	if (*pos & (1ULL << 62)) {
-		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
-		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
-		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
-
-		if (se_bank == 0x3FF)
-			se_bank = 0xFFFFFFFF;
-		if (sh_bank == 0x3FF)
-			sh_bank = 0xFFFFFFFF;
-		if (instance_bank == 0x3FF)
-			instance_bank = 0xFFFFFFFF;
-		use_bank = 1;
-	} else {
-		use_bank = 0;
-	}
-
-	*pos &= (1UL << 22) - 1;
-
-	if (use_bank) {
-		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
-		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
-			return -EINVAL;
-		mutex_lock(&adev->grbm_idx_mutex);
-		amdgpu_gfx_select_se_sh(adev, se_bank,
-					sh_bank, instance_bank);
-	}
-
-	if (pm_pg_lock)
-		mutex_lock(&adev->pm.mutex);
-
-	while (size) {
-		uint32_t value;
-
-		if (*pos > adev->rmmio_size)
-			goto end;
-
-		value = RREG32(*pos >> 2);
-		r = put_user(value, (uint32_t *)buf);
-		if (r) {
-			result = r;
-			goto end;
-		}
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-end:
-	if (use_bank) {
-		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-		mutex_unlock(&adev->grbm_idx_mutex);
-	}
-
-	if (pm_pg_lock)
-		mutex_unlock(&adev->pm.mutex);
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
-					 size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-	bool pm_pg_lock, use_bank;
-	unsigned instance_bank, sh_bank, se_bank;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	/* are we reading registers for which a PG lock is necessary? */
-	pm_pg_lock = (*pos >> 23) & 1;
-
-	if (*pos & (1ULL << 62)) {
-		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
-		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
-		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
-
-		if (se_bank == 0x3FF)
-			se_bank = 0xFFFFFFFF;
-		if (sh_bank == 0x3FF)
-			sh_bank = 0xFFFFFFFF;
-		if (instance_bank == 0x3FF)
-			instance_bank = 0xFFFFFFFF;
-		use_bank = 1;
-	} else {
-		use_bank = 0;
-	}
-
-	*pos &= (1UL << 22) - 1;
-
-	if (use_bank) {
-		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
-		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
-			return -EINVAL;
-		mutex_lock(&adev->grbm_idx_mutex);
-		amdgpu_gfx_select_se_sh(adev, se_bank,
-					sh_bank, instance_bank);
-	}
-
-	if (pm_pg_lock)
-		mutex_lock(&adev->pm.mutex);
-
-	while (size) {
-		uint32_t value;
-
-		if (*pos > adev->rmmio_size)
-			return result;
-
-		r = get_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		WREG32(*pos >> 2, value);
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	if (use_bank) {
-		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-		mutex_unlock(&adev->grbm_idx_mutex);
-	}
-
-	if (pm_pg_lock)
-		mutex_unlock(&adev->pm.mutex);
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	while (size) {
-		uint32_t value;
-
-		value = RREG32_PCIE(*pos >> 2);
-		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
-					 size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	while (size) {
-		uint32_t value;
-
-		r = get_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		WREG32_PCIE(*pos >> 2, value);
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	while (size) {
-		uint32_t value;
-
-		value = RREG32_DIDT(*pos >> 2);
-		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
-					 size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	while (size) {
-		uint32_t value;
-
-		r = get_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		WREG32_DIDT(*pos >> 2, value);
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	while (size) {
-		uint32_t value;
-
-		value = RREG32_SMC(*pos);
-		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
-					 size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	while (size) {
-		uint32_t value;
-
-		r = get_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		WREG32_SMC(*pos, value);
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-	uint32_t *config, no_regs = 0;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
-	if (!config)
-		return -ENOMEM;
-
-	/* version, increment each time something is added */
-	config[no_regs++] = 3;
-	config[no_regs++] = adev->gfx.config.max_shader_engines;
-	config[no_regs++] = adev->gfx.config.max_tile_pipes;
-	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
-	config[no_regs++] = adev->gfx.config.max_sh_per_se;
-	config[no_regs++] = adev->gfx.config.max_backends_per_se;
-	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
-	config[no_regs++] = adev->gfx.config.max_gprs;
-	config[no_regs++] = adev->gfx.config.max_gs_threads;
-	config[no_regs++] = adev->gfx.config.max_hw_contexts;
-	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
-	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
-	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
-	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
-	config[no_regs++] = adev->gfx.config.num_tile_pipes;
-	config[no_regs++] = adev->gfx.config.backend_enable_mask;
-	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
-	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
-	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
-	config[no_regs++] = adev->gfx.config.num_gpus;
-	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
-	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
-	config[no_regs++] = adev->gfx.config.gb_addr_config;
-	config[no_regs++] = adev->gfx.config.num_rbs;
-
-	/* rev==1 */
-	config[no_regs++] = adev->rev_id;
-	config[no_regs++] = adev->pg_flags;
-	config[no_regs++] = adev->cg_flags;
-
-	/* rev==2 */
-	config[no_regs++] = adev->family;
-	config[no_regs++] = adev->external_rev_id;
-
-	/* rev==3 */
-	config[no_regs++] = adev->pdev->device;
-	config[no_regs++] = adev->pdev->revision;
-	config[no_regs++] = adev->pdev->subsystem_device;
-	config[no_regs++] = adev->pdev->subsystem_vendor;
-
-	while (size && (*pos < no_regs * 4)) {
-		uint32_t value;
-
-		value = config[*pos >> 2];
-		r = put_user(value, (uint32_t *)buf);
-		if (r) {
-			kfree(config);
-			return r;
-		}
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	kfree(config);
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	int idx, x, outsize, r, valuesize;
-	uint32_t values[16];
-
-	if (size & 3 || *pos & 0x3)
-		return -EINVAL;
-
-	if (amdgpu_dpm == 0)
-		return -EINVAL;
-
-	/* convert offset to sensor number */
-	idx = *pos >> 2;
-
-	valuesize = sizeof(values);
-	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor)
-		r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
-	else
-		return -EINVAL;
-
-	if (size > valuesize)
-		return -EINVAL;
-
-	outsize = 0;
-	x = 0;
-	if (!r) {
-		while (size) {
-			r = put_user(values[x++], (int32_t *)buf);
-			buf += 4;
-			size -= 4;
-			outsize += 4;
-		}
-	}
-
-	return !r ? outsize : r;
-}
-
-static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = f->f_inode->i_private;
-	int r, x;
-	ssize_t result=0;
-	uint32_t offset, se, sh, cu, wave, simd, data[32];
-
-	if (size & 3 || *pos & 3)
-		return -EINVAL;
-
-	/* decode offset */
-	offset = (*pos & GENMASK_ULL(6, 0));
-	se = (*pos & GENMASK_ULL(14, 7)) >> 7;
-	sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
-	cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
-	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
-	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
-
-	/* switch to the specific se/sh/cu */
-	mutex_lock(&adev->grbm_idx_mutex);
-	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
-
-	x = 0;
-	if (adev->gfx.funcs->read_wave_data)
-		adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
-
-	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-	mutex_unlock(&adev->grbm_idx_mutex);
-
-	if (!x)
-		return -EINVAL;
-
-	while (size && (offset < x * 4)) {
-		uint32_t value;
-
-		value = data[offset >> 2];
-		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		result += 4;
-		buf += 4;
-		offset += 4;
-		size -= 4;
-	}
-
-	return result;
-}
-
-static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
-{
-	struct amdgpu_device *adev = f->f_inode->i_private;
-	int r;
-	ssize_t result = 0;
-	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
-
-	if (size & 3 || *pos & 3)
-		return -EINVAL;
-
-	/* decode offset */
-	offset = *pos & GENMASK_ULL(11, 0);
-	se = (*pos & GENMASK_ULL(19, 12)) >> 12;
-	sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
-	cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
-	wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
-	simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
-	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
-	bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
-
-	data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	/* switch to the specific se/sh/cu */
-	mutex_lock(&adev->grbm_idx_mutex);
-	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
-
-	if (bank == 0) {
-		if (adev->gfx.funcs->read_wave_vgprs)
-			adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
-	} else {
-		if (adev->gfx.funcs->read_wave_sgprs)
-			adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
-	}
-
-	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-	mutex_unlock(&adev->grbm_idx_mutex);
-
-	while (size) {
-		uint32_t value;
-
-		value = data[offset++];
-		r = put_user(value, (uint32_t *)buf);
-		if (r) {
-			result = r;
-			goto err;
-		}
-
-		result += 4;
-		buf += 4;
-		size -= 4;
-	}
-
-err:
-	kfree(data);
-	return result;
-}
-
-static const struct file_operations amdgpu_debugfs_regs_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_regs_read,
-	.write = amdgpu_debugfs_regs_write,
-	.llseek = default_llseek
-};
-static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_regs_didt_read,
-	.write = amdgpu_debugfs_regs_didt_write,
-	.llseek = default_llseek
-};
-static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_regs_pcie_read,
-	.write = amdgpu_debugfs_regs_pcie_write,
-	.llseek = default_llseek
-};
-static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_regs_smc_read,
-	.write = amdgpu_debugfs_regs_smc_write,
-	.llseek = default_llseek
-};
-
-static const struct file_operations amdgpu_debugfs_gca_config_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_gca_config_read,
-	.llseek = default_llseek
-};
-
-static const struct file_operations amdgpu_debugfs_sensors_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_sensor_read,
-	.llseek = default_llseek
-};
-
-static const struct file_operations amdgpu_debugfs_wave_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_wave_read,
-	.llseek = default_llseek
-};
-static const struct file_operations amdgpu_debugfs_gpr_fops = {
-	.owner = THIS_MODULE,
-	.read = amdgpu_debugfs_gpr_read,
-	.llseek = default_llseek
-};
-
-static const struct file_operations *debugfs_regs[] = {
-	&amdgpu_debugfs_regs_fops,
-	&amdgpu_debugfs_regs_didt_fops,
-	&amdgpu_debugfs_regs_pcie_fops,
-	&amdgpu_debugfs_regs_smc_fops,
-	&amdgpu_debugfs_gca_config_fops,
-	&amdgpu_debugfs_sensors_fops,
-	&amdgpu_debugfs_wave_fops,
-	&amdgpu_debugfs_gpr_fops,
-};
-
-static const char *debugfs_regs_names[] = {
-	"amdgpu_regs",
-	"amdgpu_regs_didt",
-	"amdgpu_regs_pcie",
-	"amdgpu_regs_smc",
-	"amdgpu_gca_config",
-	"amdgpu_sensors",
-	"amdgpu_wave",
-	"amdgpu_gpr",
-};
-
-static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
-{
-	struct drm_minor *minor = adev->ddev->primary;
-	struct dentry *ent, *root = minor->debugfs_root;
-	unsigned i, j;
-
-	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
-		ent = debugfs_create_file(debugfs_regs_names[i],
-					  S_IFREG | S_IRUGO, root,
-					  adev, debugfs_regs[i]);
-		if (IS_ERR(ent)) {
-			for (j = 0; j < i; j++) {
-				debugfs_remove(adev->debugfs_regs[i]);
-				adev->debugfs_regs[i] = NULL;
-			}
-			return PTR_ERR(ent);
-		}
-
-		if (!i)
-			i_size_write(ent->d_inode, adev->rmmio_size);
-		adev->debugfs_regs[i] = ent;
-	}
-
-	return 0;
-}
-
-static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
-{
-	unsigned i;
-
-	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
-		if (adev->debugfs_regs[i]) {
-			debugfs_remove(adev->debugfs_regs[i]);
-			adev->debugfs_regs[i] = NULL;
-		}
-	}
-}
-
-static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	int r = 0, i;
-
-	/* hold on the scheduler */
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
-		kthread_park(ring->sched.thread);
-	}
-
-	seq_printf(m, "run ib test:\n");
-	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		seq_printf(m, "ib ring tests failed (%d).\n", r);
-	else
-		seq_printf(m, "ib ring tests passed.\n");
-
-	/* go on the scheduler */
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
-		kthread_unpark(ring->sched.thread);
-	}
-
-	return 0;
-}
-
-static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = {
-	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib}
-};
-
-static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
-{
-	return amdgpu_debugfs_add_files(adev,
-					amdgpu_debugfs_test_ib_ring_list, 1);
-}
-
-int amdgpu_debugfs_init(struct drm_minor *minor)
-{
-	return 0;
-}
-
-static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-
-	seq_write(m, adev->bios, adev->bios_size);
-	return 0;
-}
-
-static const struct drm_info_list amdgpu_vbios_dump_list[] = {
-		{"amdgpu_vbios",
-		 amdgpu_debugfs_get_vbios_dump,
-		 0, NULL},
-};
-
-static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev)
-{
-	return amdgpu_debugfs_add_files(adev,
-					amdgpu_vbios_dump_list, 1);
-}
-#else
-static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
-{
-	return 0;
-}
-static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
-{
-	return 0;
-}
-static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev)
-{
-	return 0;
-}
-static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 31383e0049476153cd87d2dcfe2cf90805941751..50afcf65181a4e7f4ac5e5dc3c804a1af06864e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -90,7 +90,7 @@ int amdgpu_disp_priority = 0;
 int amdgpu_hw_i2c = 0;
 int amdgpu_pcie_gen2 = -1;
 int amdgpu_msi = -1;
-int amdgpu_lockup_timeout = 0;
+int amdgpu_lockup_timeout = 10000;
 int amdgpu_dpm = -1;
 int amdgpu_fw_load_type = -1;
 int amdgpu_aspm = -1;
@@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0;
 int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
+int amdgpu_gpu_recovery = -1; /* auto */
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -165,7 +166,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, amdgpu_msi, int, 0444);
 
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 0 = disable)");
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
 module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
 
 MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
@@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
 
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
+module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -645,7 +649,7 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
 	 * unfortunately we can't detect certain
 	 * hypervisors so just do this all the time.
 	 */
-	amdgpu_suspend(adev);
+	amdgpu_device_ip_suspend(adev);
 }
 
 static int amdgpu_pmops_suspend(struct device *dev)
@@ -850,9 +854,6 @@ static struct drm_driver kms_driver = {
 	.disable_vblank = amdgpu_disable_vblank_kms,
 	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
 	.get_scanout_position = amdgpu_get_crtc_scanout_position,
-#if defined(CONFIG_DEBUG_FS)
-	.debugfs_init = amdgpu_debugfs_init,
-#endif
 	.irq_preinstall = amdgpu_irq_preinstall,
 	.irq_postinstall = amdgpu_irq_postinstall,
 	.irq_uninstall = amdgpu_irq_uninstall,
@@ -912,10 +913,6 @@ static int __init amdgpu_init(void)
 	if (r)
 		goto error_fence;
 
-	r = amd_sched_fence_slab_init();
-	if (r)
-		goto error_sched;
-
 	if (vgacon_text_force()) {
 		DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
 		return -EINVAL;
@@ -928,9 +925,6 @@ static int __init amdgpu_init(void)
 	/* let modprobe override vga console setting */
 	return pci_register_driver(pdriver);
 
-error_sched:
-	amdgpu_fence_slab_fini();
-
 error_fence:
 	amdgpu_sync_fini();
 
@@ -944,7 +938,6 @@ static void __exit amdgpu_exit(void)
 	pci_unregister_driver(pdriver);
 	amdgpu_unregister_atpx_handler();
 	amdgpu_sync_fini();
-	amd_sched_fence_slab_fini();
 	amdgpu_fence_slab_fini();
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 604ac03a42e4f3858d87ab2d53ca563bcb4e7b93..008e1984b7e39b9d51a9021c0df5762d889d6506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -187,7 +187,7 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
 
 	seq = ++ring->fence_drv.sync_seq;
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       seq, AMDGPU_FENCE_FLAG_INT);
+			       seq, 0);
 
 	*s = seq;
 
@@ -410,7 +410,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  unsigned num_hw_submission)
 {
-	long timeout;
 	int r;
 
 	/* Check that num_hw_submission is a power of two */
@@ -434,20 +433,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
 	/* No need to setup the GPU scheduler for KIQ ring */
 	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
-		timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
-		if (timeout == 0) {
-			/*
-			 * FIXME:
-			 * Delayed workqueue cannot use it directly,
-			 * so the scheduler will not use delayed workqueue if
-			 * MAX_SCHEDULE_TIMEOUT is set.
-			 * Currently keep it simple and silly.
-			 */
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		}
-		r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
+		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 				   num_hw_submission, amdgpu_job_hang_limit,
-				   timeout, ring->name);
+				   msecs_to_jiffies(amdgpu_lockup_timeout), ring->name);
 		if (r) {
 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
 				  ring->name);
@@ -503,7 +491,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 		}
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
-		amd_sched_fini(&ring->sched);
+		drm_sched_fini(&ring->sched);
 		del_timer_sync(&ring->fence_drv.fallback_timer);
 		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
 			dma_fence_put(ring->fence_drv.fences[j]);
@@ -705,7 +693,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
 	struct amdgpu_device *adev = dev->dev_private;
 
 	seq_printf(m, "gpu recover\n");
-	amdgpu_gpu_recover(adev, NULL);
+	amdgpu_device_gpu_recover(adev, NULL, true);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 1f51897acc5b4d7bcf0b86fa7e2c53accff60d49..0a4f34afaaaa321dc2a0680a227660d8ecefd0d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -56,6 +56,51 @@
  * Common GART table functions.
  */
 
+/**
+ * amdgpu_dummy_page_init - init dummy page used by the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the dummy page used by the driver (all asics).
+ * This dummy page is used by the driver as a filler for gart entries
+ * when pages are taken out of the GART
+ * Returns 0 on sucess, -ENOMEM on failure.
+ */
+static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
+{
+	if (adev->dummy_page.page)
+		return 0;
+	adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
+	if (adev->dummy_page.page == NULL)
+		return -ENOMEM;
+	adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page,
+					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) {
+		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
+		__free_page(adev->dummy_page.page);
+		adev->dummy_page.page = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_dummy_page_fini - free dummy page used by the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the dummy page used by the driver (all asics).
+ */
+static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+{
+	if (adev->dummy_page.page == NULL)
+		return;
+	pci_unmap_page(adev->pdev, adev->dummy_page.addr,
+			PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	__free_page(adev->dummy_page.page);
+	adev->dummy_page.page = NULL;
+}
+
 /**
  * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
  *
@@ -308,7 +353,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 		DRM_ERROR("Page size is smaller than GPU page size!\n");
 		return -EINVAL;
 	}
-	r = amdgpu_dummy_page_init(adev);
+	r = amdgpu_gart_dummy_page_init(adev);
 	if (r)
 		return r;
 	/* Compute table size */
@@ -340,5 +385,5 @@ void amdgpu_gart_fini(struct amdgpu_device *adev)
 	vfree(adev->gart.pages);
 	adev->gart.pages = NULL;
 #endif
-	amdgpu_dummy_page_fini(adev);
+	amdgpu_gart_dummy_page_fini(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index eb75eb44efc681d8dc10789db64de687e963546a..e48b4ec88c8c72b84599d8f85b38836c92c26523 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -518,10 +518,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (!amdgpu_vm_ready(vm))
 		return;
 
-	r = amdgpu_vm_update_directories(adev, vm);
-	if (r)
-		goto error;
-
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		goto error;
@@ -530,6 +526,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	    operation == AMDGPU_VA_OP_REPLACE)
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
 
+	r = amdgpu_vm_update_directories(adev, vm);
+	if (r)
+		goto error;
+
 error:
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
@@ -851,7 +851,7 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = {
 };
 #endif
 
-int amdgpu_gem_debugfs_init(struct amdgpu_device *adev)
+int amdgpu_debugfs_gem_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ef043361009f4c2a8de0788beeed86aef288cd7a..bb40d2529a307eb9f71973e1d0628e3766da984c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -203,7 +203,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
 	spin_lock_init(&kiq->ring_lock);
 
-	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
+	r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs);
 	if (r)
 		return r;
 
@@ -229,7 +229,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
 			      struct amdgpu_irq_src *irq)
 {
-	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
+	amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 0cf86eb357d61aa73f008d07076f0fec926c6883..a162d87ca0c8e7bedf2d936f4cd63abf98fe534e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -149,7 +149,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 	}
 
-	if (vm && !job->vm_id) {
+	if (vm && !job->vmid) {
 		dev_err(adev->dev, "VM IB without ID\n");
 		return -EINVAL;
 	}
@@ -211,7 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 			!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;
 
-		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
+		amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
 	}
@@ -229,9 +229,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	r = amdgpu_fence_emit(ring, f);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
-		if (job && job->vm_id)
-			amdgpu_vm_reset_id(adev, ring->funcs->vmhub,
-					   job->vm_id);
+		if (job && job->vmid)
+			amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid);
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
new file mode 100644
index 0000000000000000000000000000000000000000..16884a0b677b93892ff35672364836617fa12985
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ids.h"
+
+#include <linux/idr.h>
+#include <linux/dma-fence-array.h>
+#include <drm/drmP.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+/*
+ * PASID manager
+ *
+ * PASIDs are global address space identifiers that can be shared
+ * between the GPU, an IOMMU and the driver. VMs on different devices
+ * may use the same PASID if they share the same address
+ * space. Therefore PASIDs are allocated using a global IDA. VMs are
+ * looked up from the PASID per amdgpu_device.
+ */
+static DEFINE_IDA(amdgpu_pasid_ida);
+
+/**
+ * amdgpu_pasid_alloc - Allocate a PASID
+ * @bits: Maximum width of the PASID in bits, must be at least 1
+ *
+ * Allocates a PASID of the given width while keeping smaller PASIDs
+ * available if possible.
+ *
+ * Returns a positive integer on success. Returns %-EINVAL if bits==0.
+ * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
+ * memory allocation failure.
+ */
+int amdgpu_pasid_alloc(unsigned int bits)
+{
+	int pasid = -EINVAL;
+
+	for (bits = min(bits, 31U); bits > 0; bits--) {
+		pasid = ida_simple_get(&amdgpu_pasid_ida,
+				       1U << (bits - 1), 1U << bits,
+				       GFP_KERNEL);
+		if (pasid != -ENOSPC)
+			break;
+	}
+
+	return pasid;
+}
+
+/**
+ * amdgpu_pasid_free - Free a PASID
+ * @pasid: PASID to free
+ */
+void amdgpu_pasid_free(unsigned int pasid)
+{
+	ida_simple_remove(&amdgpu_pasid_ida, pasid);
+}
+
+/*
+ * VMID manager
+ *
+ * VMIDs are a per VMHUB identifier for page tables handling.
+ */
+
+/**
+ * amdgpu_vmid_had_gpu_reset - check if reset occured since last use
+ *
+ * @adev: amdgpu_device pointer
+ * @id: VMID structure
+ *
+ * Check if GPU reset occured since last use of the VMID.
+ */
+bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
+			       struct amdgpu_vmid *id)
+{
+	return id->current_gpu_reset_count !=
+		atomic_read(&adev->gpu_reset_counter);
+}
+
+/* idr_mgr->lock must be held */
+static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm,
+					    struct amdgpu_ring *ring,
+					    struct amdgpu_sync *sync,
+					    struct dma_fence *fence,
+					    struct amdgpu_job *job)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct amdgpu_vmid *id = vm->reserved_vmid[vmhub];
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct dma_fence *updates = sync->last_vm_update;
+	int r = 0;
+	struct dma_fence *flushed, *tmp;
+	bool needs_flush = vm->use_cpu_for_update;
+
+	flushed  = id->flushed_updates;
+	if ((amdgpu_vmid_had_gpu_reset(adev, id)) ||
+	    (atomic64_read(&id->owner) != vm->entity.fence_context) ||
+	    (job->vm_pd_addr != id->pd_gpu_addr) ||
+	    (updates && (!flushed || updates->context != flushed->context ||
+			dma_fence_is_later(updates, flushed))) ||
+	    (!id->last_flush || (id->last_flush->context != fence_context &&
+				 !dma_fence_is_signaled(id->last_flush)))) {
+		needs_flush = true;
+		/* to prevent one context starved by another context */
+		id->pd_gpu_addr = 0;
+		tmp = amdgpu_sync_peek_fence(&id->active, ring);
+		if (tmp) {
+			r = amdgpu_sync_fence(adev, sync, tmp, false);
+			return r;
+		}
+	}
+
+	/* Good we can use this VMID. Remember this submission as
+	* user of the VMID.
+	*/
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
+	if (r)
+		goto out;
+
+	if (updates && (!flushed || updates->context != flushed->context ||
+			dma_fence_is_later(updates, flushed))) {
+		dma_fence_put(id->flushed_updates);
+		id->flushed_updates = dma_fence_get(updates);
+	}
+	id->pd_gpu_addr = job->vm_pd_addr;
+	atomic64_set(&id->owner, vm->entity.fence_context);
+	job->vm_needs_flush = needs_flush;
+	if (needs_flush) {
+		dma_fence_put(id->last_flush);
+		id->last_flush = NULL;
+	}
+	job->vmid = id - id_mgr->ids;
+	trace_amdgpu_vm_grab_id(vm, ring, job);
+out:
+	return r;
+}
+
+/**
+ * amdgpu_vm_grab_id - allocate the next free VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ *
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ */
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_sync *sync, struct dma_fence *fence,
+		     struct amdgpu_job *job)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct dma_fence *updates = sync->last_vm_update;
+	struct amdgpu_vmid *id, *idle;
+	struct dma_fence **fences;
+	unsigned i;
+	int r = 0;
+
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub]) {
+		r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job);
+		mutex_unlock(&id_mgr->lock);
+		return r;
+	}
+	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
+	if (!fences) {
+		mutex_unlock(&id_mgr->lock);
+		return -ENOMEM;
+	}
+	/* Check if we have an idle VMID */
+	i = 0;
+	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+		if (!fences[i])
+			break;
+		++i;
+	}
+
+	/* If we can't find a idle VMID to use, wait till one becomes available */
+	if (&idle->list == &id_mgr->ids_lru) {
+		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
+		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
+		struct dma_fence_array *array;
+		unsigned j;
+
+		for (j = 0; j < i; ++j)
+			dma_fence_get(fences[j]);
+
+		array = dma_fence_array_create(i, fences, fence_context,
+					   seqno, true);
+		if (!array) {
+			for (j = 0; j < i; ++j)
+				dma_fence_put(fences[j]);
+			kfree(fences);
+			r = -ENOMEM;
+			goto error;
+		}
+
+
+		r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
+		dma_fence_put(&array->base);
+		if (r)
+			goto error;
+
+		mutex_unlock(&id_mgr->lock);
+		return 0;
+
+	}
+	kfree(fences);
+
+	job->vm_needs_flush = vm->use_cpu_for_update;
+	/* Check if we can use a VMID already assigned to this VM */
+	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
+		struct dma_fence *flushed;
+		bool needs_flush = vm->use_cpu_for_update;
+
+		/* Check all the prerequisites to using this VMID */
+		if (amdgpu_vmid_had_gpu_reset(adev, id))
+			continue;
+
+		if (atomic64_read(&id->owner) != vm->entity.fence_context)
+			continue;
+
+		if (job->vm_pd_addr != id->pd_gpu_addr)
+			continue;
+
+		if (!id->last_flush ||
+		    (id->last_flush->context != fence_context &&
+		     !dma_fence_is_signaled(id->last_flush)))
+			needs_flush = true;
+
+		flushed  = id->flushed_updates;
+		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
+			needs_flush = true;
+
+		/* Concurrent flushes are only possible starting with Vega10 */
+		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
+			continue;
+
+		/* Good we can use this VMID. Remember this submission as
+		 * user of the VMID.
+		 */
+		r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
+		if (r)
+			goto error;
+
+		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
+			dma_fence_put(id->flushed_updates);
+			id->flushed_updates = dma_fence_get(updates);
+		}
+
+		if (needs_flush)
+			goto needs_flush;
+		else
+			goto no_flush_needed;
+
+	};
+
+	/* Still no ID to use? Then use the idle one found earlier */
+	id = idle;
+
+	/* Remember this submission as user of the VMID */
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
+	if (r)
+		goto error;
+
+	id->pd_gpu_addr = job->vm_pd_addr;
+	dma_fence_put(id->flushed_updates);
+	id->flushed_updates = dma_fence_get(updates);
+	atomic64_set(&id->owner, vm->entity.fence_context);
+
+needs_flush:
+	job->vm_needs_flush = true;
+	dma_fence_put(id->last_flush);
+	id->last_flush = NULL;
+
+no_flush_needed:
+	list_move_tail(&id->list, &id_mgr->ids_lru);
+
+	job->vmid = id - id_mgr->ids;
+	trace_amdgpu_vm_grab_id(vm, ring, job);
+
+error:
+	mutex_unlock(&id_mgr->lock);
+	return r;
+}
+
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       unsigned vmhub)
+{
+	struct amdgpu_vmid_mgr *id_mgr;
+	struct amdgpu_vmid *idle;
+	int r = 0;
+
+	id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub])
+		goto unlock;
+	if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
+	    AMDGPU_VM_MAX_RESERVED_VMID) {
+		DRM_ERROR("Over limitation of reserved vmid\n");
+		atomic_dec(&id_mgr->reserved_vmid_num);
+		r = -EINVAL;
+		goto unlock;
+	}
+	/* Select the first entry VMID */
+	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
+	list_del_init(&idle->list);
+	vm->reserved_vmid[vmhub] = idle;
+	mutex_unlock(&id_mgr->lock);
+
+	return 0;
+unlock:
+	mutex_unlock(&id_mgr->lock);
+	return r;
+}
+
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       unsigned vmhub)
+{
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub]) {
+		list_add(&vm->reserved_vmid[vmhub]->list,
+			&id_mgr->ids_lru);
+		vm->reserved_vmid[vmhub] = NULL;
+		atomic_dec(&id_mgr->reserved_vmid_num);
+	}
+	mutex_unlock(&id_mgr->lock);
+}
+
+/**
+ * amdgpu_vmid_reset - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ * @vmid: vmid number to use
+ *
+ * Reset saved GDW, GWS and OA to force switch on next flush.
+ */
+void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
+		       unsigned vmid)
+{
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id = &id_mgr->ids[vmid];
+
+	atomic64_set(&id->owner, 0);
+	id->gds_base = 0;
+	id->gds_size = 0;
+	id->gws_base = 0;
+	id->gws_size = 0;
+	id->oa_base = 0;
+	id->oa_size = 0;
+}
+
+/**
+ * amdgpu_vmid_reset_all - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vmid_reset_all(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vmid_mgr *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		for (j = 1; j < id_mgr->num_ids; ++j)
+			amdgpu_vmid_reset(adev, i, j);
+	}
+}
+
+/**
+ * amdgpu_vmid_mgr_init - init the VMID manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vmid_mgr *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		mutex_init(&id_mgr->lock);
+		INIT_LIST_HEAD(&id_mgr->ids_lru);
+		atomic_set(&id_mgr->reserved_vmid_num, 0);
+
+		/* skip over VMID 0, since it is the system VM */
+		for (j = 1; j < id_mgr->num_ids; ++j) {
+			amdgpu_vmid_reset(adev, i, j);
+			amdgpu_sync_create(&id_mgr->ids[i].active);
+			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
+		}
+	}
+
+	adev->vm_manager.fence_context =
+		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		adev->vm_manager.seqno[i] = 0;
+}
+
+/**
+ * amdgpu_vmid_mgr_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vmid_mgr *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		mutex_destroy(&id_mgr->lock);
+		for (j = 0; j < AMDGPU_NUM_VMID; ++j) {
+			struct amdgpu_vmid *id = &id_mgr->ids[j];
+
+			amdgpu_sync_free(&id->active);
+			dma_fence_put(id->flushed_updates);
+			dma_fence_put(id->last_flush);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
new file mode 100644
index 0000000000000000000000000000000000000000..ad931fa570b37d5030bfdf3db83fe27f4735601a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_IDS_H__
+#define __AMDGPU_IDS_H__
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/dma-fence.h>
+
+#include "amdgpu_sync.h"
+
+/* maximum number of VMIDs */
+#define AMDGPU_NUM_VMID	16
+
+struct amdgpu_device;
+struct amdgpu_vm;
+struct amdgpu_ring;
+struct amdgpu_sync;
+struct amdgpu_job;
+
+struct amdgpu_vmid {
+	struct list_head	list;
+	struct amdgpu_sync	active;
+	struct dma_fence	*last_flush;
+	atomic64_t		owner;
+
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	struct dma_fence	*flushed_updates;
+
+	uint32_t                current_gpu_reset_count;
+
+	uint32_t		gds_base;
+	uint32_t		gds_size;
+	uint32_t		gws_base;
+	uint32_t		gws_size;
+	uint32_t		oa_base;
+	uint32_t		oa_size;
+};
+
+struct amdgpu_vmid_mgr {
+	struct mutex		lock;
+	unsigned		num_ids;
+	struct list_head	ids_lru;
+	struct amdgpu_vmid	ids[AMDGPU_NUM_VMID];
+	atomic_t		reserved_vmid_num;
+};
+
+int amdgpu_pasid_alloc(unsigned int bits);
+void amdgpu_pasid_free(unsigned int pasid);
+
+bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
+			       struct amdgpu_vmid *id);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       unsigned vmhub);
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       unsigned vmhub);
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_sync *sync, struct dma_fence *fence,
+		     struct amdgpu_job *job);
+void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
+		       unsigned vmid);
+void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
+
+void amdgpu_vmid_mgr_init(struct amdgpu_device *adev);
+void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f5f27e4f0f7ff1ae788ebb9fa77ed3d3b11ee7ac..06373d44b3da3062d16c3f3a11fc4a7a39508774 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -92,15 +92,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
 		}
 		return 0;
 	} else {
-		r = amdgpu_wb_get(adev, &adev->irq.ih.wptr_offs);
+		r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs);
 		if (r) {
 			dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r);
 			return r;
 		}
 
-		r = amdgpu_wb_get(adev, &adev->irq.ih.rptr_offs);
+		r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs);
 		if (r) {
-			amdgpu_wb_free(adev, adev->irq.ih.wptr_offs);
+			amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
 			dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r);
 			return r;
 		}
@@ -133,8 +133,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
 		amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
 				      &adev->irq.ih.gpu_addr,
 				      (void **)&adev->irq.ih.ring);
-		amdgpu_wb_free(adev, adev->irq.ih.wptr_offs);
-		amdgpu_wb_free(adev, adev->irq.ih.rptr_offs);
+		amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
+		amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index ada89358e2207b566004495053b00e3ba68530b8..29cf10927a92ce6655064c9b028644473e69182b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -105,8 +105,8 @@ struct amdgpu_iv_entry {
 	unsigned client_id;
 	unsigned src_id;
 	unsigned ring_id;
-	unsigned vm_id;
-	unsigned vm_id_src;
+	unsigned vmid;
+	unsigned vmid_src;
 	uint64_t timestamp;
 	unsigned timestamp_src;
 	unsigned pas_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index c340774082ea225e7dc2106397c1630d1aeb180c..56bcd59c3399a0a912ea1fa8f869c105b8827327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 						  reset_work);
 
 	if (!amdgpu_sriov_vf(adev))
-		amdgpu_gpu_recover(adev, NULL);
+		amdgpu_device_gpu_recover(adev, NULL, false);
 }
 
 /* Disable *all* interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index bdc210ac74f800edb99629c872c9fdc1af658b14..2bd56760c7441fdc720d3a469fef424ae39741ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-static void amdgpu_job_timedout(struct amd_sched_job *s_job)
+static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
 	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
@@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 		  atomic_read(&job->ring->fence_drv.last_seq),
 		  job->ring->fence_drv.sync_seq);
 
-	amdgpu_gpu_recover(job->adev, job);
+	amdgpu_device_gpu_recover(job->adev, job, false);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -96,7 +96,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 		amdgpu_ib_free(job->adev, &job->ibs[i], f);
 }
 
-static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
+static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 {
 	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
@@ -118,7 +118,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 }
 
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
-		      struct amd_sched_entity *entity, void *owner,
+		      struct drm_sched_entity *entity, void *owner,
 		      struct dma_fence **f)
 {
 	int r;
@@ -127,7 +127,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 	if (!f)
 		return -EINVAL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
+	r = drm_sched_job_init(&job->base, &ring->sched, entity, owner);
 	if (r)
 		return r;
 
@@ -136,13 +136,13 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
 	amdgpu_ring_priority_get(job->ring, job->base.s_priority);
-	amd_sched_entity_push_job(&job->base, entity);
+	drm_sched_entity_push_job(&job->base, entity);
 
 	return 0;
 }
 
-static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job,
-					       struct amd_sched_entity *s_entity)
+static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
+					       struct drm_sched_entity *s_entity)
 {
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_vm *vm = job->vm;
@@ -151,19 +151,19 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job,
 	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit);
 
 	if (fence && explicit) {
-		if (amd_sched_dependency_optimized(fence, s_entity)) {
+		if (drm_sched_dependency_optimized(fence, s_entity)) {
 			r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false);
 			if (r)
 				DRM_ERROR("Error adding fence to sync (%d)\n", r);
 		}
 	}
 
-	while (fence == NULL && vm && !job->vm_id) {
+	while (fence == NULL && vm && !job->vmid) {
 		struct amdgpu_ring *ring = job->ring;
 
-		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
-				      &job->base.s_fence->finished,
-				      job);
+		r = amdgpu_vmid_grab(vm, ring, &job->sync,
+				     &job->base.s_fence->finished,
+				     job);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);
 
@@ -173,7 +173,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job,
 	return fence;
 }
 
-static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
+static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 {
 	struct dma_fence *fence = NULL, *finished;
 	struct amdgpu_device *adev;
@@ -211,7 +211,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	return fence;
 }
 
-const struct amd_sched_backend_ops amdgpu_sched_ops = {
+const struct drm_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
 	.timedout_job = amdgpu_job_timedout,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index dc0a8be98043ee3476d774b8d043cc83688c1d10..5c4c3e0d527be64386dcab0951727642f64d73db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -37,6 +37,18 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+static bool amdgpu_need_backup(struct amdgpu_device *adev)
+{
+	if (adev->flags & AMD_IS_APU)
+		return false;
+
+	if (amdgpu_gpu_recovery == 0 ||
+	    (amdgpu_gpu_recovery == -1  && !amdgpu_sriov_vf(adev)))
+		return false;
+
+	return true;
+}
+
 static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
@@ -327,7 +339,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 			       uint64_t init_value,
 			       struct amdgpu_bo **bo_ptr)
 {
-	struct ttm_operation_ctx ctx = { !kernel, false };
+	struct ttm_operation_ctx ctx = {
+		.interruptible = !kernel,
+		.no_wait_gpu = false,
+		.allow_reserved_eviction = true,
+		.resv = resv
+	};
 	struct amdgpu_bo *bo;
 	enum ttm_bo_type type;
 	unsigned long page_align;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 6f56ff606e43fcb58d90f0e7f984a85eb8471f77..01a996c6b802971af57df0bbd75ff007d8904f25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
@@ -1276,16 +1278,16 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 			/* XXX select vce level based on ring/task */
 			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
 			mutex_unlock(&adev->pm.mutex);
-			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							AMD_CG_STATE_UNGATE);
-			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							AMD_PG_STATE_UNGATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_UNGATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_UNGATE);
 			amdgpu_pm_compute_clocks(adev);
 		} else {
-			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							AMD_PG_STATE_GATE);
-			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							AMD_CG_STATE_GATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_GATE);
 			mutex_lock(&adev->pm.mutex);
 			adev->pm.dpm.vce_active = false;
 			mutex_unlock(&adev->pm.mutex);
@@ -1582,7 +1584,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	struct drm_device *ddev = adev->ddev;
 	u32 flags = 0;
 
-	amdgpu_get_clockgating_state(adev, &flags);
+	amdgpu_device_ip_get_clockgating_state(adev, &flags);
 	seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
 	amdgpu_parse_cg_state(m, flags);
 	seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index a98fbbb4739f65dad15dc717c1e66722770b5420..13044e66dcaf4e6ab0b79fab23aef8db987170db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -164,7 +164,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
  * Release a request for executing at @priority
  */
 void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
-			      enum amd_sched_priority priority)
+			      enum drm_sched_priority priority)
 {
 	int i;
 
@@ -175,7 +175,7 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
 		return;
 
 	/* no need to restore if the job is already at the lowest priority */
-	if (priority == AMD_SCHED_PRIORITY_NORMAL)
+	if (priority == DRM_SCHED_PRIORITY_NORMAL)
 		return;
 
 	mutex_lock(&ring->priority_mutex);
@@ -184,8 +184,8 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
 		goto out_unlock;
 
 	/* decay priority to the next level with a job available */
-	for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) {
-		if (i == AMD_SCHED_PRIORITY_NORMAL
+	for (i = priority; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+		if (i == DRM_SCHED_PRIORITY_NORMAL
 				|| atomic_read(&ring->num_jobs[i])) {
 			ring->priority = i;
 			ring->funcs->set_priority(ring, i);
@@ -206,7 +206,7 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
  * Request a ring's priority to be raised to @priority (refcounted).
  */
 void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
-			      enum amd_sched_priority priority)
+			      enum drm_sched_priority priority)
 {
 	if (!ring->funcs->set_priority)
 		return;
@@ -263,25 +263,25 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 			return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->rptr_offs);
+	r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
 		return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->wptr_offs);
+	r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
 		return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->fence_offs);
+	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
 		return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
+	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
 		return r;
@@ -317,12 +317,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	}
 
 	ring->max_dw = max_dw;
-	ring->priority = AMD_SCHED_PRIORITY_NORMAL;
+	ring->priority = DRM_SCHED_PRIORITY_NORMAL;
 	mutex_init(&ring->priority_mutex);
 	INIT_LIST_HEAD(&ring->lru_list);
 	amdgpu_ring_lru_touch(adev, ring);
 
-	for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i)
+	for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
 		atomic_set(&ring->num_jobs[i], 0);
 
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
@@ -348,11 +348,11 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
 		return;
 
-	amdgpu_wb_free(ring->adev, ring->rptr_offs);
-	amdgpu_wb_free(ring->adev, ring->wptr_offs);
+	amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
+	amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
 
-	amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
-	amdgpu_wb_free(ring->adev, ring->fence_offs);
+	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
 
 	amdgpu_bo_free_kernel(&ring->ring_obj,
 			      &ring->gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a6b89e3932a5fcb7ebe1f0b7a2030940f57722cd..102dad3edf6a44be8462ea1d9303ad0be2fdd68b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -25,7 +25,7 @@
 #define __AMDGPU_RING_H__
 
 #include <drm/amdgpu_drm.h>
-#include "gpu_scheduler.h"
+#include <drm/gpu_scheduler.h>
 
 /* max number of rings */
 #define AMDGPU_MAX_RINGS		18
@@ -121,11 +121,11 @@ struct amdgpu_ring_funcs {
 	/* command emit functions */
 	void (*emit_ib)(struct amdgpu_ring *ring,
 			struct amdgpu_ib *ib,
-			unsigned vm_id, bool ctx_switch);
+			unsigned vmid, bool ctx_switch);
 	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
 			   uint64_t seq, unsigned flags);
 	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
-	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
+	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
 			      uint64_t pd_addr);
 	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
 	void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
@@ -154,14 +154,14 @@ struct amdgpu_ring_funcs {
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
-			      enum amd_sched_priority priority);
+			      enum drm_sched_priority priority);
 };
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
 	const struct amdgpu_ring_funcs	*funcs;
 	struct amdgpu_fence_driver	fence_drv;
-	struct amd_gpu_scheduler	sched;
+	struct drm_gpu_scheduler	sched;
 	struct list_head		lru_list;
 
 	struct amdgpu_bo	*ring_obj;
@@ -186,6 +186,7 @@ struct amdgpu_ring {
 	uint64_t                eop_gpu_addr;
 	u32			doorbell_index;
 	bool			use_doorbell;
+	bool			use_pollmem;
 	unsigned		wptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
@@ -196,7 +197,7 @@ struct amdgpu_ring {
 	unsigned		vm_inv_eng;
 	bool			has_compute_vm_bug;
 
-	atomic_t		num_jobs[AMD_SCHED_PRIORITY_MAX];
+	atomic_t		num_jobs[DRM_SCHED_PRIORITY_MAX];
 	struct mutex		priority_mutex;
 	/* protected by priority_mutex */
 	int			priority;
@@ -212,9 +213,9 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
-			      enum amd_sched_priority priority);
+			      enum drm_sched_priority priority);
 void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
-			      enum amd_sched_priority priority);
+			      enum drm_sched_priority priority);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 290cc3f9c433a3df7de41f1c1db228db30377ee8..86a0715d9431ded69bac338a38143d82789ad949 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -29,29 +29,29 @@
 
 #include "amdgpu_vm.h"
 
-enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
+enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
 {
 	switch (amdgpu_priority) {
 	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
-		return AMD_SCHED_PRIORITY_HIGH_HW;
+		return DRM_SCHED_PRIORITY_HIGH_HW;
 	case AMDGPU_CTX_PRIORITY_HIGH:
-		return AMD_SCHED_PRIORITY_HIGH_SW;
+		return DRM_SCHED_PRIORITY_HIGH_SW;
 	case AMDGPU_CTX_PRIORITY_NORMAL:
-		return AMD_SCHED_PRIORITY_NORMAL;
+		return DRM_SCHED_PRIORITY_NORMAL;
 	case AMDGPU_CTX_PRIORITY_LOW:
 	case AMDGPU_CTX_PRIORITY_VERY_LOW:
-		return AMD_SCHED_PRIORITY_LOW;
+		return DRM_SCHED_PRIORITY_LOW;
 	case AMDGPU_CTX_PRIORITY_UNSET:
-		return AMD_SCHED_PRIORITY_UNSET;
+		return DRM_SCHED_PRIORITY_UNSET;
 	default:
 		WARN(1, "Invalid context priority %d\n", amdgpu_priority);
-		return AMD_SCHED_PRIORITY_INVALID;
+		return DRM_SCHED_PRIORITY_INVALID;
 	}
 }
 
 static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 						  int fd,
-						  enum amd_sched_priority priority)
+						  enum drm_sched_priority priority)
 {
 	struct file *filp = fcheck(fd);
 	struct drm_file *file;
@@ -86,11 +86,11 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 {
 	union drm_amdgpu_sched *args = data;
 	struct amdgpu_device *adev = dev->dev_private;
-	enum amd_sched_priority priority;
+	enum drm_sched_priority priority;
 	int r;
 
 	priority = amdgpu_to_sched_priority(args->in.priority);
-	if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID)
+	if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
 		return -EINVAL;
 
 	switch (args->in.op) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
index b28c067d38223f2c78a3b523d628a234745f0c72..2a1a0c734bddb76e77a142f854e1e049973dde6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
@@ -27,7 +27,7 @@
 
 #include <drm/drmP.h>
 
-enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority);
+enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority);
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *filp);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index ebe1ffbab0c1ddd91f70be72c37d24ef789f2919..df65c66dc956f7500810b5e3e42080650a484193 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -64,7 +64,7 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
 				 struct dma_fence *f)
 {
-	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
 
 	if (s_fence) {
 		struct amdgpu_ring *ring;
@@ -85,7 +85,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
  */
 static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
-	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
 
 	if (s_fence)
 		return s_fence->owner;
@@ -120,7 +120,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
  * Tries to add the fence to an existing hash entry. Returns true when an entry
  * was found, false otherwise.
  */
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit)
 {
 	struct amdgpu_sync_entry *e;
 
@@ -129,6 +129,10 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
 			continue;
 
 		amdgpu_sync_keep_later(&e->fence, f);
+
+		/* Preserve eplicit flag to not loose pipe line sync */
+		e->explicit |= explicit;
+
 		return true;
 	}
 	return false;
@@ -148,12 +152,11 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 
 	if (!f)
 		return 0;
-
 	if (amdgpu_sync_same_dev(adev, f) &&
 	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
 		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
-	if (amdgpu_sync_add_later(sync, f))
+	if (amdgpu_sync_add_later(sync, f, explicit))
 		return 0;
 
 	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
@@ -245,7 +248,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		struct dma_fence *f = e->fence;
-		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+		struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
 
 		if (dma_fence_is_signaled(f)) {
 			hash_del(&e->node);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f337c316ec2c656a823230b355250929715db327..cace7a93fc943beab80622d98a2ba5a01ec71884 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -1,4 +1,26 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
 #if !defined(_AMDGPU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _AMDGPU_TRACE_H_
 
@@ -60,8 +82,8 @@ TRACE_EVENT(amdgpu_iv,
 			     __field(unsigned, client_id)
 			     __field(unsigned, src_id)
 			     __field(unsigned, ring_id)
-			     __field(unsigned, vm_id)
-			     __field(unsigned, vm_id_src)
+			     __field(unsigned, vmid)
+			     __field(unsigned, vmid_src)
 			     __field(uint64_t, timestamp)
 			     __field(unsigned, timestamp_src)
 			     __field(unsigned, pas_id)
@@ -71,8 +93,8 @@ TRACE_EVENT(amdgpu_iv,
 			   __entry->client_id = iv->client_id;
 			   __entry->src_id = iv->src_id;
 			   __entry->ring_id = iv->ring_id;
-			   __entry->vm_id = iv->vm_id;
-			   __entry->vm_id_src = iv->vm_id_src;
+			   __entry->vmid = iv->vmid;
+			   __entry->vmid_src = iv->vmid_src;
 			   __entry->timestamp = iv->timestamp;
 			   __entry->timestamp_src = iv->timestamp_src;
 			   __entry->pas_id = iv->pas_id;
@@ -81,9 +103,9 @@ TRACE_EVENT(amdgpu_iv,
 			   __entry->src_data[2] = iv->src_data[2];
 			   __entry->src_data[3] = iv->src_data[3];
 			   ),
-	    TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
+	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
 		      __entry->client_id, __entry->src_id,
-		      __entry->ring_id, __entry->vm_id,
+		      __entry->ring_id, __entry->vmid,
 		      __entry->timestamp, __entry->pas_id,
 		      __entry->src_data[0], __entry->src_data[1],
 		      __entry->src_data[2], __entry->src_data[3])
@@ -197,7 +219,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
-			     __field(u32, vm_id)
+			     __field(u32, vmid)
 			     __field(u32, vm_hub)
 			     __field(u64, pd_addr)
 			     __field(u32, needs_flush)
@@ -206,13 +228,13 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 	    TP_fast_assign(
 			   __entry->vm = vm;
 			   __entry->ring = ring->idx;
-			   __entry->vm_id = job->vm_id;
+			   __entry->vmid = job->vmid;
 			   __entry->vm_hub = ring->funcs->vmhub,
 			   __entry->pd_addr = job->vm_pd_addr;
 			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
 	    TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
-		      __entry->vm, __entry->ring, __entry->vm_id,
+		      __entry->vm, __entry->ring, __entry->vmid,
 		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
 );
 
@@ -335,24 +357,24 @@ TRACE_EVENT(amdgpu_vm_copy_ptes,
 );
 
 TRACE_EVENT(amdgpu_vm_flush,
-	    TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id,
+	    TP_PROTO(struct amdgpu_ring *ring, unsigned vmid,
 		     uint64_t pd_addr),
-	    TP_ARGS(ring, vm_id, pd_addr),
+	    TP_ARGS(ring, vmid, pd_addr),
 	    TP_STRUCT__entry(
 			     __field(u32, ring)
-			     __field(u32, vm_id)
+			     __field(u32, vmid)
 			     __field(u32, vm_hub)
 			     __field(u64, pd_addr)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->ring = ring->idx;
-			   __entry->vm_id = vm_id;
+			   __entry->vmid = vmid;
 			   __entry->vm_hub = ring->funcs->vmhub;
 			   __entry->pd_addr = pd_addr;
 			   ),
 	    TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
-		      __entry->ring, __entry->vm_id,
+		      __entry->ring, __entry->vmid,
 		      __entry->vm_hub,__entry->pd_addr)
 );
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 952e0bf3bc8426c54fa531c121709ce60b9eea17..e4bb435e614b86cacd4f264526423d73e3d32ace 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -76,7 +76,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 {
 	struct drm_global_reference *global_ref;
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	int r;
 
 	adev->mman.mem_global_referenced = false;
@@ -108,8 +108,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 	mutex_init(&adev->mman.gtt_window_lock);
 
 	ring = adev->mman.buffer_funcs_ring;
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
-	r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
@@ -131,7 +131,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
 {
 	if (adev->mman.mem_global_referenced) {
-		amd_sched_entity_fini(adev->mman.entity.sched,
+		drm_sched_entity_fini(adev->mman.entity.sched,
 				      &adev->mman.entity);
 		mutex_destroy(&adev->mman.gtt_window_lock);
 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
@@ -497,7 +497,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 		goto out_cleanup;
 	}
 
-	r = ttm_tt_bind(bo->ttm, &tmp_mem);
+	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -505,7 +505,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, new_mem);
+	r = ttm_bo_move_ttm(bo, ctx, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
@@ -536,7 +536,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 	if (unlikely(r)) {
 		return r;
 	}
-	r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, &tmp_mem);
+	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -597,8 +597,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, ctx->interruptible,
-				       ctx->no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
 		if (r) {
 			return r;
 		}
@@ -991,7 +990,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
 	return &gtt->ttm.ttm;
 }
 
-static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
+static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
+			struct ttm_operation_ctx *ctx)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1019,11 +1019,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
 
 #ifdef CONFIG_SWIOTLB
 	if (swiotlb_nr_tbl()) {
-		return ttm_dma_populate(&gtt->ttm, adev->dev);
+		return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
 	}
 #endif
 
-	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm);
+	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
 }
 
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -1270,6 +1270,101 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.access_memory = &amdgpu_ttm_access_memory
 };
 
+/*
+ * Firmware Reservation functions
+ */
+/**
+ * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free fw reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
+		NULL, &adev->fw_vram_usage.va);
+}
+
+/**
+ * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from fw.
+ */
+static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	int r = 0;
+	int i;
+	u64 vram_size = adev->mc.visible_vram_size;
+	u64 offset = adev->fw_vram_usage.start_offset;
+	u64 size = adev->fw_vram_usage.size;
+	struct amdgpu_bo *bo;
+
+	adev->fw_vram_usage.va = NULL;
+	adev->fw_vram_usage.reserved_bo = NULL;
+
+	if (adev->fw_vram_usage.size > 0 &&
+		adev->fw_vram_usage.size <= vram_size) {
+
+		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
+			PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
+			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
+			&adev->fw_vram_usage.reserved_bo);
+		if (r)
+			goto error_create;
+
+		r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
+		if (r)
+			goto error_reserve;
+
+		/* remove the original mem node and create a new one at the
+		 * request position
+		 */
+		bo = adev->fw_vram_usage.reserved_bo;
+		offset = ALIGN(offset, PAGE_SIZE);
+		for (i = 0; i < bo->placement.num_placement; ++i) {
+			bo->placements[i].fpfn = offset >> PAGE_SHIFT;
+			bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+		}
+
+		ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
+		r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
+				     &bo->tbo.mem, &ctx);
+		if (r)
+			goto error_pin;
+
+		r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
+			AMDGPU_GEM_DOMAIN_VRAM,
+			adev->fw_vram_usage.start_offset,
+			(adev->fw_vram_usage.start_offset +
+			adev->fw_vram_usage.size), NULL);
+		if (r)
+			goto error_pin;
+		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
+			&adev->fw_vram_usage.va);
+		if (r)
+			goto error_kmap;
+
+		amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+	}
+	return r;
+
+error_kmap:
+	amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
+error_pin:
+	amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+error_reserve:
+	amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
+error_create:
+	adev->fw_vram_usage.va = NULL;
+	adev->fw_vram_usage.reserved_bo = NULL;
+	return r;
+}
+
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
 	uint64_t gtt_size;
@@ -1312,7 +1407,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	 *The reserved vram for firmware must be pinned to the specified
 	 *place on the VRAM, so reserve it early.
 	 */
-	r = amdgpu_fw_reserve_vram_init(adev);
+	r = amdgpu_ttm_fw_reserve_vram_init(adev);
 	if (r) {
 		return r;
 	}
@@ -1330,9 +1425,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		struct sysinfo si;
 
 		si_meminfo(&si);
-		gtt_size = max(AMDGPU_DEFAULT_GTT_SIZE_MB << 20,
-			(uint64_t)si.totalram * si.mem_unit * 3/4);
-	} else
+		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+			       adev->mc.mc_vram_size),
+			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
+	}
+	else
 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
 	if (r) {
@@ -1396,7 +1493,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 
 	amdgpu_ttm_debugfs_fini(adev);
 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
-	amdgpu_fw_reserve_vram_fini(adev);
+	amdgpu_ttm_fw_reserve_vram_fini(adev);
 
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 4f9433e6140622b5aff6569c8bf8c4941cbd375a..167856f6080fa432f23417f6e6117593d129ce59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -25,7 +25,7 @@
 #define __AMDGPU_TTM_H__
 
 #include "amdgpu.h"
-#include "gpu_scheduler.h"
+#include <drm/gpu_scheduler.h>
 
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
 #define AMDGPU_PL_GWS		(TTM_PL_PRIV + 1)
@@ -55,7 +55,7 @@ struct amdgpu_mman {
 
 	struct mutex				gtt_window_lock;
 	/* Scheduler entity for buffer moves */
-	struct amd_sched_entity			entity;
+	struct drm_sched_entity			entity;
 };
 
 struct amdgpu_copy_mem {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 2f2a9e17fdb4df1ad5456c66aa1060f679c9246f..b2eae86bf906abe6ed0bd7e89f67f2d9acbfa809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -116,7 +116,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	unsigned long bo_size;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
@@ -230,8 +230,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	}
 
 	ring = &adev->uvd.ring;
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
-	r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up UVD run queue.\n");
@@ -244,7 +244,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	}
 
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
-	if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
+	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;
 
 	switch (adev->asic_type) {
@@ -272,7 +272,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 	int i;
 	kfree(adev->uvd.saved_bo);
 
-	amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+	drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
 
 	amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
 			      &adev->uvd.gpu_addr,
@@ -297,6 +297,8 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 	if (adev->uvd.vcpu_bo == NULL)
 		return 0;
 
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
+
 	for (i = 0; i < adev->uvd.max_handles; ++i)
 		if (atomic_read(&adev->uvd.handles[i]))
 			break;
@@ -304,8 +306,6 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 	if (i == AMDGPU_MAX_UVD_HANDLES)
 		return 0;
 
-	cancel_delayed_work_sync(&adev->uvd.idle_work);
-
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
 	ptr = adev->uvd.cpu_addr;
 
@@ -346,6 +346,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 			ptr += le32_to_cpu(hdr->ucode_size_bytes);
 		}
 		memset_io(ptr, 0, size);
+		/* to restore uvd fence seq */
+		amdgpu_fence_driver_force_completion(&adev->uvd.ring);
 	}
 
 	return 0;
@@ -1153,10 +1155,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 			/* shutdown the UVD block */
-			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_PG_STATE_GATE);
-			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_CG_STATE_GATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_CG_STATE_GATE);
 		}
 	} else {
 		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
@@ -1176,10 +1178,10 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 			amdgpu_dpm_enable_uvd(adev, true);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_CG_STATE_UNGATE);
-			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_PG_STATE_UNGATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_CG_STATE_UNGATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_PG_STATE_UNGATE);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 845eea993f75ca5b567f2ac514b24e95e37b9a64..32ea20b99e53a3818417945877f8e1df56ef14fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -51,8 +51,8 @@ struct amdgpu_uvd {
 	struct amdgpu_irq_src	irq;
 	bool			address_64_bit;
 	bool			use_ctx_buf;
-	struct amd_sched_entity entity;
-	struct amd_sched_entity entity_enc;
+	struct drm_sched_entity entity;
+	struct drm_sched_entity entity_enc;
 	uint32_t                srbm_soft_reset;
 	unsigned		num_enc_rings;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index ba6d846b08ff62e33a9f5b8ad0235d26e6c05a47..d274ae53553059a5970ffd28d5e5e3fcc7398143 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -85,7 +85,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 {
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	unsigned ucode_version, version_major, version_minor, binary_id;
@@ -174,8 +174,8 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	}
 
 	ring = &adev->vce.ring[0];
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
-	r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
@@ -207,7 +207,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;
 
-	amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+	drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
 
 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
 		(void **)&adev->vce.cpu_addr);
@@ -311,10 +311,10 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 			amdgpu_dpm_enable_vce(adev, false);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
-			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_PG_STATE_GATE);
-			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_CG_STATE_GATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_GATE);
 		}
 	} else {
 		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
@@ -343,10 +343,10 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
 			amdgpu_dpm_enable_vce(adev, true);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
-			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_CG_STATE_UNGATE);
-			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_PG_STATE_UNGATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_UNGATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_UNGATE);
 
 		}
 	}
@@ -585,8 +585,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 
 	for (i = 0; i < bo->placement.num_placement; ++i) {
 		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
-		bo->placements[i].lpfn = bo->placements[i].fpfn ?
-			min(bo->placements[i].fpfn, lpfn) : lpfn;
+		bo->placements[i].lpfn = bo->placements[i].lpfn ?
+			min(bo->placements[i].lpfn, lpfn) : lpfn;
 	}
 	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 }
@@ -991,7 +991,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
  *
  */
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
-			     unsigned vm_id, bool ctx_switch)
+			     unsigned vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, VCE_CMD_IB);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 5ce54cde472d8dbfb7c9b9be556359ffe04527a2..0fd378ae92c3faee3efd8d2f45a7561afecb8274 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -46,7 +46,7 @@ struct amdgpu_vce {
 	struct amdgpu_ring	ring[AMDGPU_MAX_VCE_RINGS];
 	struct amdgpu_irq_src	irq;
 	unsigned		harvest_config;
-	struct amd_sched_entity	entity;
+	struct drm_sched_entity	entity;
 	uint32_t                srbm_soft_reset;
 	unsigned		num_rings;
 };
@@ -63,7 +63,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
-			     unsigned vm_id, bool ctx_switch);
+			     unsigned vmid, bool ctx_switch);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index d7ba048c2f80c136e4a2f998985966e78e872abd..837962118dbc5d56de555791d99b8d6215520331 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -35,7 +35,6 @@
 #include "soc15d.h"
 #include "soc15_common.h"
 
-#include "soc15ip.h"
 #include "vcn/vcn_1_0_offset.h"
 
 /* 1 second timeout */
@@ -51,7 +50,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	unsigned long bo_size;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
@@ -104,8 +103,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	}
 
 	ring = &adev->vcn.ring_dec;
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
-	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN dec run queue.\n");
@@ -113,8 +112,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	}
 
 	ring = &adev->vcn.ring_enc[0];
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
-	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN enc run queue.\n");
@@ -130,9 +129,9 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 
 	kfree(adev->vcn.saved_bo);
 
-	amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
+	drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
 
-	amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
+	drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
 
 	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
 			      &adev->vcn.gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index d50ba06578542e5a7df60dbc963a75edd1d1b672..2fd7db891689f514c0b98f72eb2d905311b31ffc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -56,8 +56,8 @@ struct amdgpu_vcn {
 	struct amdgpu_ring	ring_dec;
 	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
 	struct amdgpu_irq_src	irq;
-	struct amd_sched_entity entity_dec;
-	struct amd_sched_entity entity_enc;
+	struct drm_sched_entity entity_dec;
+	struct drm_sched_entity entity_enc;
 	unsigned		num_enc_rings;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3ecdbdfb04dd72899c64f9f22c8b9f2dfb893b74..6fc16eecf2dce5fc448afaa1681fbb086259ea35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,52 +33,6 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-/*
- * PASID manager
- *
- * PASIDs are global address space identifiers that can be shared
- * between the GPU, an IOMMU and the driver. VMs on different devices
- * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
- */
-static DEFINE_IDA(amdgpu_vm_pasid_ida);
-
-/**
- * amdgpu_vm_alloc_pasid - Allocate a PASID
- * @bits: Maximum width of the PASID in bits, must be at least 1
- *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
- *
- * Returns a positive integer on success. Returns %-EINVAL if bits==0.
- * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
- * memory allocation failure.
- */
-int amdgpu_vm_alloc_pasid(unsigned int bits)
-{
-	int pasid = -EINVAL;
-
-	for (bits = min(bits, 31U); bits > 0; bits--) {
-		pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
-				       1U << (bits - 1), 1U << bits,
-				       GFP_KERNEL);
-		if (pasid != -ENOSPC)
-			break;
-	}
-
-	return pasid;
-}
-
-/**
- * amdgpu_vm_free_pasid - Free a PASID
- * @pasid: PASID to free
- */
-void amdgpu_vm_free_pasid(unsigned int pasid)
-{
-	ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
-}
-
 /*
  * GPUVM
  * GPUVM is similar to the legacy gart on older asics, however
@@ -148,12 +102,23 @@ struct amdgpu_prt_cb {
 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 				      unsigned level)
 {
-	if (level != adev->vm_manager.num_level)
-		return 9 * (adev->vm_manager.num_level - level - 1) +
+	unsigned shift = 0xff;
+
+	switch (level) {
+	case AMDGPU_VM_PDB2:
+	case AMDGPU_VM_PDB1:
+	case AMDGPU_VM_PDB0:
+		shift = 9 * (AMDGPU_VM_PDB0 - level) +
 			adev->vm_manager.block_size;
-	else
-		/* For the page tables on the leaves */
-		return 0;
+		break;
+	case AMDGPU_VM_PTB:
+		shift = 0;
+		break;
+	default:
+		dev_err(adev->dev, "the level%d isn't supported.\n", level);
+	}
+
+	return shift;
 }
 
 /**
@@ -166,12 +131,13 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 				      unsigned level)
 {
-	unsigned shift = amdgpu_vm_level_shift(adev, 0);
+	unsigned shift = amdgpu_vm_level_shift(adev,
+					       adev->vm_manager.root_level);
 
-	if (level == 0)
+	if (level == adev->vm_manager.root_level)
 		/* For the root directory */
 		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
-	else if (level != adev->vm_manager.num_level)
+	else if (level != AMDGPU_VM_PTB)
 		/* Everything in between */
 		return 512;
 	else
@@ -329,9 +295,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	    to >= amdgpu_vm_num_entries(adev, level))
 		return -EINVAL;
 
-	if (to > parent->last_entry_used)
-		parent->last_entry_used = to;
-
 	++level;
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
@@ -346,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	if (vm->pte_support_ats) {
 		init_value = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != adev->vm_manager.num_level - 1)
+		if (level != AMDGPU_VM_PTB)
 			init_value |= AMDGPU_PDE_PTE;
 
 	}
@@ -386,10 +349,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			spin_lock(&vm->status_lock);
 			list_add(&entry->base.vm_status, &vm->relocated);
 			spin_unlock(&vm->status_lock);
-			entry->addr = 0;
 		}
 
-		if (level < adev->vm_manager.num_level) {
+		if (level < AMDGPU_VM_PTB) {
 			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 				((1 << shift) - 1);
@@ -435,287 +397,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
-}
-
-/**
- * amdgpu_vm_had_gpu_reset - check if reset occured since last use
- *
- * @adev: amdgpu_device pointer
- * @id: VMID structure
- *
- * Check if GPU reset occured since last use of the VMID.
- */
-static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
-				    struct amdgpu_vm_id *id)
-{
-	return id->current_gpu_reset_count !=
-		atomic_read(&adev->gpu_reset_counter);
-}
-
-static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
-{
-	return !!vm->reserved_vmid[vmhub];
-}
-
-/* idr_mgr->lock must be held */
-static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
-					       struct amdgpu_ring *ring,
-					       struct amdgpu_sync *sync,
-					       struct dma_fence *fence,
-					       struct amdgpu_job *job)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmhub = ring->funcs->vmhub;
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct dma_fence *updates = sync->last_vm_update;
-	int r = 0;
-	struct dma_fence *flushed, *tmp;
-	bool needs_flush = vm->use_cpu_for_update;
-
-	flushed  = id->flushed_updates;
-	if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
-	    (atomic64_read(&id->owner) != vm->client_id) ||
-	    (job->vm_pd_addr != id->pd_gpu_addr) ||
-	    (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) ||
-	    (!id->last_flush || (id->last_flush->context != fence_context &&
-				 !dma_fence_is_signaled(id->last_flush)))) {
-		needs_flush = true;
-		/* to prevent one context starved by another context */
-		id->pd_gpu_addr = 0;
-		tmp = amdgpu_sync_peek_fence(&id->active, ring);
-		if (tmp) {
-			r = amdgpu_sync_fence(adev, sync, tmp, false);
-			return r;
-		}
-	}
-
-	/* Good we can use this VMID. Remember this submission as
-	* user of the VMID.
-	*/
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-	if (r)
-		goto out;
-
-	if (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) {
-		dma_fence_put(id->flushed_updates);
-		id->flushed_updates = dma_fence_get(updates);
-	}
-	id->pd_gpu_addr = job->vm_pd_addr;
-	atomic64_set(&id->owner, vm->client_id);
-	job->vm_needs_flush = needs_flush;
-	if (needs_flush) {
-		dma_fence_put(id->last_flush);
-		id->last_flush = NULL;
-	}
-	job->vm_id = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring, job);
-out:
-	return r;
-}
-
-/**
- * amdgpu_vm_grab_id - allocate the next free VMID
- *
- * @vm: vm to allocate id for
- * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
- *
- * Allocate an id for the vm, adding fences to the sync obj as necessary.
- */
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync, struct dma_fence *fence,
-		      struct amdgpu_job *job)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmhub = ring->funcs->vmhub;
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct dma_fence *updates = sync->last_vm_update;
-	struct amdgpu_vm_id *id, *idle;
-	struct dma_fence **fences;
-	unsigned i;
-	int r = 0;
-
-	mutex_lock(&id_mgr->lock);
-	if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
-		r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
-		mutex_unlock(&id_mgr->lock);
-		return r;
-	}
-	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-	if (!fences) {
-		mutex_unlock(&id_mgr->lock);
-		return -ENOMEM;
-	}
-	/* Check if we have an idle VMID */
-	i = 0;
-	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
-		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
-		if (!fences[i])
-			break;
-		++i;
-	}
-
-	/* If we can't find a idle VMID to use, wait till one becomes available */
-	if (&idle->list == &id_mgr->ids_lru) {
-		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
-		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
-		struct dma_fence_array *array;
-		unsigned j;
-
-		for (j = 0; j < i; ++j)
-			dma_fence_get(fences[j]);
-
-		array = dma_fence_array_create(i, fences, fence_context,
-					   seqno, true);
-		if (!array) {
-			for (j = 0; j < i; ++j)
-				dma_fence_put(fences[j]);
-			kfree(fences);
-			r = -ENOMEM;
-			goto error;
-		}
-
-
-		r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
-		dma_fence_put(&array->base);
-		if (r)
-			goto error;
-
-		mutex_unlock(&id_mgr->lock);
-		return 0;
-
-	}
-	kfree(fences);
-
-	job->vm_needs_flush = vm->use_cpu_for_update;
-	/* Check if we can use a VMID already assigned to this VM */
-	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
-		struct dma_fence *flushed;
-		bool needs_flush = vm->use_cpu_for_update;
-
-		/* Check all the prerequisites to using this VMID */
-		if (amdgpu_vm_had_gpu_reset(adev, id))
-			continue;
-
-		if (atomic64_read(&id->owner) != vm->client_id)
-			continue;
-
-		if (job->vm_pd_addr != id->pd_gpu_addr)
-			continue;
-
-		if (!id->last_flush ||
-		    (id->last_flush->context != fence_context &&
-		     !dma_fence_is_signaled(id->last_flush)))
-			needs_flush = true;
-
-		flushed  = id->flushed_updates;
-		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
-			needs_flush = true;
-
-		/* Concurrent flushes are only possible starting with Vega10 */
-		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
-			continue;
-
-		/* Good we can use this VMID. Remember this submission as
-		 * user of the VMID.
-		 */
-		r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-		if (r)
-			goto error;
-
-		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
-			dma_fence_put(id->flushed_updates);
-			id->flushed_updates = dma_fence_get(updates);
-		}
-
-		if (needs_flush)
-			goto needs_flush;
-		else
-			goto no_flush_needed;
-
-	};
-
-	/* Still no ID to use? Then use the idle one found earlier */
-	id = idle;
-
-	/* Remember this submission as user of the VMID */
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-	if (r)
-		goto error;
-
-	id->pd_gpu_addr = job->vm_pd_addr;
-	dma_fence_put(id->flushed_updates);
-	id->flushed_updates = dma_fence_get(updates);
-	atomic64_set(&id->owner, vm->client_id);
-
-needs_flush:
-	job->vm_needs_flush = true;
-	dma_fence_put(id->last_flush);
-	id->last_flush = NULL;
-
-no_flush_needed:
-	list_move_tail(&id->list, &id_mgr->ids_lru);
-
-	job->vm_id = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring, job);
-
-error:
-	mutex_unlock(&id_mgr->lock);
-	return r;
-}
-
-static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
-					  struct amdgpu_vm *vm,
-					  unsigned vmhub)
-{
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-
-	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub]) {
-		list_add(&vm->reserved_vmid[vmhub]->list,
-			&id_mgr->ids_lru);
-		vm->reserved_vmid[vmhub] = NULL;
-		atomic_dec(&id_mgr->reserved_vmid_num);
-	}
-	mutex_unlock(&id_mgr->lock);
-}
-
-static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
-					 struct amdgpu_vm *vm,
-					 unsigned vmhub)
-{
-	struct amdgpu_vm_id_manager *id_mgr;
-	struct amdgpu_vm_id *idle;
-	int r = 0;
-
-	id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub])
-		goto unlock;
-	if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
-	    AMDGPU_VM_MAX_RESERVED_VMID) {
-		DRM_ERROR("Over limitation of reserved vmid\n");
-		atomic_dec(&id_mgr->reserved_vmid_num);
-		r = -EINVAL;
-		goto unlock;
-	}
-	/* Select the first entry VMID */
-	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
-	list_del_init(&idle->list);
-	vm->reserved_vmid[vmhub] = idle;
-	mutex_unlock(&id_mgr->lock);
-
-	return 0;
-unlock:
-	mutex_unlock(&id_mgr->lock);
-	return r;
+	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
+				      adev->vm_manager.root_level);
 }
 
 /**
@@ -732,7 +415,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 
 	has_compute_vm_bug = false;
 
-	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
 	if (ip_block) {
 		/* Compute has a VM bug for GFX version < 7.
 		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
@@ -758,14 +441,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vm_id *id;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id;
 	bool gds_switch_needed;
 	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
 
-	if (job->vm_id == 0)
+	if (job->vmid == 0)
 		return false;
-	id = &id_mgr->ids[job->vm_id];
+	id = &id_mgr->ids[job->vmid];
 	gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -774,7 +457,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
 
-	if (amdgpu_vm_had_gpu_reset(adev, id))
+	if (amdgpu_vmid_had_gpu_reset(adev, id))
 		return true;
 
 	return vm_flush_needed || gds_switch_needed;
@@ -789,7 +472,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
  * amdgpu_vm_flush - hardware flush the vm
  *
  * @ring: ring to use for flush
- * @vm_id: vmid number to use
+ * @vmid: vmid number to use
  * @pd_addr: address of the page directory
  *
  * Emit a VM flush when it is necessary.
@@ -798,8 +481,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -811,7 +494,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	unsigned patch_offset = 0;
 	int r;
 
-	if (amdgpu_vm_had_gpu_reset(adev, id)) {
+	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 		gds_switch_needed = true;
 		vm_flush_needed = true;
 	}
@@ -828,8 +511,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
 		struct dma_fence *fence;
 
-		trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
-		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
 
 		r = amdgpu_fence_emit(ring, &fence);
 		if (r)
@@ -849,7 +532,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		id->gws_size = job->gws_size;
 		id->oa_base = job->oa_base;
 		id->oa_size = job->oa_size;
-		amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base,
+		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
 					    job->gds_size, job->gws_base,
 					    job->gws_size, job->oa_base,
 					    job->oa_size);
@@ -866,49 +549,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	return 0;
 }
 
-/**
- * amdgpu_vm_reset_id - reset VMID to zero
- *
- * @adev: amdgpu device structure
- * @vm_id: vmid number to use
- *
- * Reset saved GDW, GWS and OA to force switch on next flush.
- */
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
-			unsigned vmid)
-{
-	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
-
-	atomic64_set(&id->owner, 0);
-	id->gds_base = 0;
-	id->gds_size = 0;
-	id->gws_base = 0;
-	id->gws_size = 0;
-	id->oa_base = 0;
-	id->oa_size = 0;
-}
-
-/**
- * amdgpu_vm_reset_all_id - reset VMID to zero
- *
- * @adev: amdgpu device structure
- *
- * Reset VMID to force flush on next use
- */
-void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
-{
-	unsigned i, j;
-
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vm_id_manager *id_mgr =
-			&adev->vm_manager.id_mgr[i];
-
-		for (j = 1; j < id_mgr->num_ids; ++j)
-			amdgpu_vm_reset_id(adev, i, j);
-	}
-}
-
 /**
  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
  *
@@ -1060,162 +700,52 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 }
 
 /*
- * amdgpu_vm_update_level - update a single level in the hierarchy
+ * amdgpu_vm_update_pde - update a single level in the hierarchy
  *
- * @adev: amdgpu_device pointer
+ * @param: parameters for the update
  * @vm: requested vm
  * @parent: parent directory
+ * @entry: entry to update
  *
- * Makes sure all entries in @parent are up to date.
- * Returns 0 for success, error for failure.
+ * Makes sure the requested entry in parent is up to date.
  */
-static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
+				 struct amdgpu_vm *vm,
+				 struct amdgpu_vm_pt *parent,
+				 struct amdgpu_vm_pt *entry)
 {
-	struct amdgpu_bo *shadow;
-	struct amdgpu_ring *ring = NULL;
+	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
-	unsigned count = 0, pt_idx, ndw = 0;
-	struct amdgpu_job *job;
-	struct amdgpu_pte_update_params params;
-	struct dma_fence *fence = NULL;
-	uint32_t incr;
-
-	int r;
-
-	if (!parent->entries)
-		return 0;
+	uint64_t pde, pt, flags;
+	unsigned level;
 
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	shadow = parent->base.bo->shadow;
+	/* Don't update huge pages here */
+	if (entry->huge)
+		return;
 
 	if (vm->use_cpu_for_update) {
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
-		if (unlikely(r))
-			return r;
-
-		params.func = amdgpu_vm_cpu_set_ptes;
 	} else {
-		ring = container_of(vm->entity.sched, struct amdgpu_ring,
-				    sched);
-
-		/* padding, etc. */
-		ndw = 64;
-
-		/* assume the worst case */
-		ndw += parent->last_entry_used * 6;
-
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-
-		if (shadow) {
+		shadow = parent->base.bo->shadow;
+		if (shadow)
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-			ndw *= 2;
-		} else {
-			shadow_addr = 0;
-		}
-
-		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-		if (r)
-			return r;
-
-		params.ib = &job->ibs[0];
-		params.func = amdgpu_vm_do_set_ptes;
-	}
-
-
-	/* walk over the address space and update the directory */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-		struct amdgpu_bo *bo = entry->base.bo;
-		uint64_t pde, pt;
-
-		if (bo == NULL)
-			continue;
-
-		spin_lock(&vm->status_lock);
-		list_del_init(&entry->base.vm_status);
-		spin_unlock(&vm->status_lock);
-
-		pt = amdgpu_bo_gpu_offset(bo);
-		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		/* Don't update huge pages here */
-		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
-		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
-			continue;
-
-		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
-
-		pde = pd_addr + pt_idx * 8;
-		incr = amdgpu_bo_size(bo);
-		if (((last_pde + 8 * count) != pde) ||
-		    ((last_pt + incr * count) != pt) ||
-		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
-
-			if (count) {
-				if (shadow)
-					params.func(&params,
-						    last_shadow,
-						    last_pt, count,
-						    incr,
-						    AMDGPU_PTE_VALID);
-
-				params.func(&params, last_pde,
-					    last_pt, count, incr,
-					    AMDGPU_PTE_VALID);
-			}
-
-			count = 1;
-			last_pde = pde;
-			last_shadow = shadow_addr + pt_idx * 8;
-			last_pt = pt;
-		} else {
-			++count;
-		}
 	}
 
-	if (count) {
-		if (vm->root.base.bo->shadow)
-			params.func(&params, last_shadow, last_pt,
-				    count, incr, AMDGPU_PTE_VALID);
+	for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+		pbo = pbo->parent;
 
-		params.func(&params, last_pde, last_pt,
-			    count, incr, AMDGPU_PTE_VALID);
+	level += params->adev->vm_manager.root_level;
+	pt = amdgpu_bo_gpu_offset(bo);
+	flags = AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
+	if (shadow) {
+		pde = shadow_addr + (entry - parent->entries) * 8;
+		params->func(params, pde, pt, 1, 0, flags);
 	}
 
-	if (!vm->use_cpu_for_update) {
-		if (params.ib->length_dw == 0) {
-			amdgpu_job_free(job);
-		} else {
-			amdgpu_ring_pad_ib(ring, params.ib);
-			amdgpu_sync_resv(adev, &job->sync,
-					 parent->base.bo->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-			if (shadow)
-				amdgpu_sync_resv(adev, &job->sync,
-						 shadow->tbo.resv,
-						 AMDGPU_FENCE_OWNER_VM, false);
-
-			WARN_ON(params.ib->length_dw > ndw);
-			r = amdgpu_job_submit(job, ring, &vm->entity,
-					AMDGPU_FENCE_OWNER_VM, &fence);
-			if (r)
-				goto error_free;
-
-			amdgpu_bo_fence(parent->base.bo, fence, true);
-			dma_fence_put(vm->last_update);
-			vm->last_update = fence;
-		}
-	}
-
-	return 0;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
+	pde = pd_addr + (entry - parent->entries) * 8;
+	params->func(params, pde, pt, 1, 0, flags);
 }
 
 /*
@@ -1225,27 +755,29 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
-				       struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
+				       struct amdgpu_vm *vm,
+				       struct amdgpu_vm_pt *parent,
+				       unsigned level)
 {
-	unsigned pt_idx;
+	unsigned pt_idx, num_entries;
 
 	/*
 	 * Recurse into the subdirectories. This recursion is harmless because
 	 * we only have a maximum of 5 layers.
 	 */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+	num_entries = amdgpu_vm_num_entries(adev, level);
+	for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
 		if (!entry->base.bo)
 			continue;
 
-		entry->addr = ~0ULL;
 		spin_lock(&vm->status_lock);
 		if (list_empty(&entry->base.vm_status))
 			list_add(&entry->base.vm_status, &vm->relocated);
 		spin_unlock(&vm->status_lock);
-		amdgpu_vm_invalidate_level(vm, entry);
+		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 	}
 }
 
@@ -1261,38 +793,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
+	struct amdgpu_pte_update_params params;
+	struct amdgpu_job *job;
+	unsigned ndw = 0;
 	int r = 0;
 
+	if (list_empty(&vm->relocated))
+		return 0;
+
+restart:
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+
+	if (vm->use_cpu_for_update) {
+		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+		if (unlikely(r))
+			return r;
+
+		params.func = amdgpu_vm_cpu_set_ptes;
+	} else {
+		ndw = 512 * 8;
+		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+		if (r)
+			return r;
+
+		params.ib = &job->ibs[0];
+		params.func = amdgpu_vm_do_set_ptes;
+	}
+
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
-		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_vm_bo_base *bo_base, *parent;
+		struct amdgpu_vm_pt *pt, *entry;
 		struct amdgpu_bo *bo;
 
 		bo_base = list_first_entry(&vm->relocated,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
+		list_del_init(&bo_base->vm_status);
 		spin_unlock(&vm->status_lock);
 
 		bo = bo_base->bo->parent;
-		if (bo) {
-			struct amdgpu_vm_bo_base *parent;
-			struct amdgpu_vm_pt *pt;
-
-			parent = list_first_entry(&bo->va,
-						  struct amdgpu_vm_bo_base,
-						  bo_list);
-			pt = container_of(parent, struct amdgpu_vm_pt, base);
-
-			r = amdgpu_vm_update_level(adev, vm, pt);
-			if (r) {
-				amdgpu_vm_invalidate_level(vm, &vm->root);
-				return r;
-			}
+		if (!bo) {
 			spin_lock(&vm->status_lock);
-		} else {
-			spin_lock(&vm->status_lock);
-			list_del_init(&bo_base->vm_status);
+			continue;
 		}
+
+		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
+					  bo_list);
+		pt = container_of(parent, struct amdgpu_vm_pt, base);
+		entry = container_of(bo_base, struct amdgpu_vm_pt, base);
+
+		amdgpu_vm_update_pde(&params, vm, pt, entry);
+
+		spin_lock(&vm->status_lock);
+		if (!vm->use_cpu_for_update &&
+		    (ndw - params.ib->length_dw) < 32)
+			break;
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1300,8 +857,44 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		/* Flush HDP */
 		mb();
 		amdgpu_gart_flush_gpu_tlb(adev, 0);
+	} else if (params.ib->length_dw == 0) {
+		amdgpu_job_free(job);
+	} else {
+		struct amdgpu_bo *root = vm->root.base.bo;
+		struct amdgpu_ring *ring;
+		struct dma_fence *fence;
+
+		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+				    sched);
+
+		amdgpu_ring_pad_ib(ring, params.ib);
+		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
+				 AMDGPU_FENCE_OWNER_VM, false);
+		if (root->shadow)
+			amdgpu_sync_resv(adev, &job->sync,
+					 root->shadow->tbo.resv,
+					 AMDGPU_FENCE_OWNER_VM, false);
+
+		WARN_ON(params.ib->length_dw > ndw);
+		r = amdgpu_job_submit(job, ring, &vm->entity,
+				      AMDGPU_FENCE_OWNER_VM, &fence);
+		if (r)
+			goto error;
+
+		amdgpu_bo_fence(root, fence, true);
+		dma_fence_put(vm->last_update);
+		vm->last_update = fence;
 	}
 
+	if (!list_empty(&vm->relocated))
+		goto restart;
+
+	return 0;
+
+error:
+	amdgpu_vm_invalidate_level(adev, vm, &vm->root,
+				   adev->vm_manager.root_level);
+	amdgpu_job_free(job);
 	return r;
 }
 
@@ -1319,19 +912,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 			 struct amdgpu_vm_pt **entry,
 			 struct amdgpu_vm_pt **parent)
 {
-	unsigned level = 0;
+	unsigned level = p->adev->vm_manager.root_level;
 
 	*parent = NULL;
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
-		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++);
+		unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
 
-		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
 		*parent = *entry;
-		*entry = &(*entry)->entries[idx];
+		*entry = &(*entry)->entries[addr >> shift];
+		addr &= (1ULL << shift) - 1;
 	}
 
-	if (level != p->adev->vm_manager.num_level)
+	if (level != AMDGPU_VM_PTB)
 		*entry = NULL;
 }
 
@@ -1353,56 +946,42 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 					unsigned nptes, uint64_t dst,
 					uint64_t flags)
 {
-	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
 	uint64_t pd_addr, pde;
 
 	/* In the case of a mixed PT the PDE must point to it*/
-	if (p->adev->asic_type < CHIP_VEGA10 ||
-	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
-	    p->src ||
-	    !(flags & AMDGPU_PTE_VALID)) {
-
-		dst = amdgpu_bo_gpu_offset(entry->base.bo);
-		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
-		flags = AMDGPU_PTE_VALID;
-	} else {
+	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
+	    nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
 		/* Set the huge page flag to stop scanning at this PDE */
 		flags |= AMDGPU_PDE_PTE;
 	}
 
-	if (entry->addr == (dst | flags))
+	if (!(flags & AMDGPU_PDE_PTE)) {
+		if (entry->huge) {
+			/* Add the entry to the relocated list to update it. */
+			entry->huge = false;
+			spin_lock(&p->vm->status_lock);
+			list_move(&entry->base.vm_status, &p->vm->relocated);
+			spin_unlock(&p->vm->status_lock);
+		}
 		return;
+	}
 
-	entry->addr = (dst | flags);
-
-	if (use_cpu_update) {
-		/* In case a huge page is replaced with a system
-		 * memory mapping, p->pages_addr != NULL and
-		 * amdgpu_vm_cpu_set_ptes would try to translate dst
-		 * through amdgpu_vm_map_gart. But dst is already a
-		 * GPU address (of the page table). Disable
-		 * amdgpu_vm_map_gart temporarily.
-		 */
-		dma_addr_t *tmp;
-
-		tmp = p->pages_addr;
-		p->pages_addr = NULL;
+	entry->huge = true;
+	amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
+			       &dst, &flags);
 
+	if (p->func == amdgpu_vm_cpu_set_ptes) {
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-		pde = pd_addr + (entry - parent->entries) * 8;
-		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
-
-		p->pages_addr = tmp;
 	} else {
 		if (parent->base.bo->shadow) {
 			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
 			pde = pd_addr + (entry - parent->entries) * 8;
-			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+			p->func(p, pde, dst, 1, 0, flags);
 		}
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-		pde = pd_addr + (entry - parent->entries) * 8;
-		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 	}
+	pde = pd_addr + (entry - parent->entries) * 8;
+	p->func(p, pde, dst, 1, 0, flags);
 }
 
 /**
@@ -1447,7 +1026,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		amdgpu_vm_handle_huge_pages(params, entry, parent,
 					    nptes, dst, flags);
 		/* We don't need to update PTEs for huge pages */
-		if (entry->addr & AMDGPU_PDE_PTE)
+		if (entry->huge)
 			continue;
 
 		pt = entry->base.bo;
@@ -1606,14 +1185,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
          *
          * The second command is for the shadow pagetables.
 	 */
-	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+	if (vm->root.base.bo->shadow)
+		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+	else
+		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
 
 	/* padding, etc. */
 	ndw = 64;
 
-	/* one PDE write for each huge page */
-	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
-
 	if (pages_addr) {
 		/* copy commands needed */
 		ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
@@ -1688,7 +1267,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(vm, &vm->root);
 	return r;
 }
 
@@ -2099,18 +1677,31 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->moved)) {
 		struct amdgpu_bo_va *bo_va;
+		struct reservation_object *resv;
 
 		bo_va = list_first_entry(&vm->moved,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
+		resv = bo_va->base.bo->tbo.resv;
+
 		/* Per VM BOs never need to bo cleared in the page tables */
-		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+		if (resv == vm->root.base.bo->tbo.resv)
+			clear = false;
+		/* Try to reserve the BO to avoid clearing its ptes */
+		else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+			clear = false;
+		/* Somebody else is using the BO right now */
+		else
+			clear = true;
 
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
+		if (!clear && resv != vm->root.base.bo->tbo.resv)
+			reservation_object_unlock(resv);
+
 		spin_lock(&vm->status_lock);
 	}
 	spin_unlock(&vm->status_lock);
@@ -2150,8 +1741,26 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 
-	if (bo)
-		list_add_tail(&bo_va->base.bo_list, &bo->va);
+	if (!bo)
+		return bo_va;
+
+	list_add_tail(&bo_va->base.bo_list, &bo->va);
+
+	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return bo_va;
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+		return bo_va;
+
+	/*
+	 * We checked all the prerequisites, but it looks like this per VM BO
+	 * is currently evicted. add the BO to the evicted list to make sure it
+	 * is validated on next VM use to avoid fault.
+	 * */
+	spin_lock(&vm->status_lock);
+	list_move_tail(&bo_va->base.vm_status, &vm->evicted);
+	spin_unlock(&vm->status_lock);
 
 	return bo_va;
 }
@@ -2604,7 +2213,19 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
 		tmp >>= amdgpu_vm_block_size - 9;
 	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
 	adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
-
+	switch (adev->vm_manager.num_level) {
+	case 3:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
+		break;
+	case 2:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB1;
+		break;
+	case 1:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB0;
+		break;
+	default:
+		dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
+	}
 	/* block size depends on vm size and hw setup*/
 	if (amdgpu_vm_block_size != -1)
 		adev->vm_manager.block_size =
@@ -2643,13 +2264,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	int r, i;
 	u64 flags;
 	uint64_t init_pde_value = 0;
 
 	vm->va = RB_ROOT_CACHED;
-	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
@@ -2663,8 +2283,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
 	ring_instance %= adev->vm_manager.vm_pte_num_rings;
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
-	r = amd_sched_entity_init(&ring->sched, &vm->entity,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+	r = drm_sched_entity_init(&ring->sched, &vm->entity,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r)
 		return r;
@@ -2698,7 +2318,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 				AMDGPU_GEM_CREATE_SHADOW);
 
-	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
+	r = amdgpu_bo_create(adev,
+			     amdgpu_vm_bo_size(adev, adev->vm_manager.root_level),
+			     align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
 			     NULL, NULL, init_pde_value, &vm->root.base.bo);
@@ -2744,7 +2366,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
-	amd_sched_entity_fini(&ring->sched, &vm->entity);
+	drm_sched_entity_fini(&ring->sched, &vm->entity);
 
 	return r;
 }
@@ -2752,26 +2374,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 /**
  * amdgpu_vm_free_levels - free PD/PT levels
  *
- * @level: PD/PT starting level to free
+ * @adev: amdgpu device structure
+ * @parent: PD/PT starting level to free
+ * @level: level of parent structure
  *
  * Free the page directory or page table level and all sub levels.
  */
-static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
+static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
+				  struct amdgpu_vm_pt *parent,
+				  unsigned level)
 {
-	unsigned i;
+	unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
 
-	if (level->base.bo) {
-		list_del(&level->base.bo_list);
-		list_del(&level->base.vm_status);
-		amdgpu_bo_unref(&level->base.bo->shadow);
-		amdgpu_bo_unref(&level->base.bo);
+	if (parent->base.bo) {
+		list_del(&parent->base.bo_list);
+		list_del(&parent->base.vm_status);
+		amdgpu_bo_unref(&parent->base.bo->shadow);
+		amdgpu_bo_unref(&parent->base.bo);
 	}
 
-	if (level->entries)
-		for (i = 0; i <= level->last_entry_used; i++)
-			amdgpu_vm_free_levels(&level->entries[i]);
+	if (parent->entries)
+		for (i = 0; i < num_entries; i++)
+			amdgpu_vm_free_levels(adev, &parent->entries[i],
+					      level + 1);
 
-	kvfree(level->entries);
+	kvfree(parent->entries);
 }
 
 /**
@@ -2803,7 +2430,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
 
-	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
+	drm_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
@@ -2829,13 +2456,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r) {
 		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
 	} else {
-		amdgpu_vm_free_levels(&vm->root);
+		amdgpu_vm_free_levels(adev, &vm->root,
+				      adev->vm_manager.root_level);
 		amdgpu_bo_unreserve(root);
 	}
 	amdgpu_bo_unref(&root);
 	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
-		amdgpu_vm_free_reserved_vmid(adev, vm, i);
+		amdgpu_vmid_free_reserved(adev, vm, i);
 }
 
 /**
@@ -2854,17 +2482,21 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 
 	spin_lock(&adev->vm_manager.pasid_lock);
 	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-	spin_unlock(&adev->vm_manager.pasid_lock);
-	if (!vm)
+	if (!vm) {
 		/* VM not found, can't track fault credit */
+		spin_unlock(&adev->vm_manager.pasid_lock);
 		return true;
+	}
 
 	/* No lock needed. only accessed by IRQ handler */
-	if (!vm->fault_credit)
+	if (!vm->fault_credit) {
 		/* Too many faults in this VM */
+		spin_unlock(&adev->vm_manager.pasid_lock);
 		return false;
+	}
 
 	vm->fault_credit--;
+	spin_unlock(&adev->vm_manager.pasid_lock);
 	return true;
 }
 
@@ -2877,23 +2509,9 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
  */
 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 {
-	unsigned i, j;
-
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vm_id_manager *id_mgr =
-			&adev->vm_manager.id_mgr[i];
+	unsigned i;
 
-		mutex_init(&id_mgr->lock);
-		INIT_LIST_HEAD(&id_mgr->ids_lru);
-		atomic_set(&id_mgr->reserved_vmid_num, 0);
-
-		/* skip over VMID 0, since it is the system VM */
-		for (j = 1; j < id_mgr->num_ids; ++j) {
-			amdgpu_vm_reset_id(adev, i, j);
-			amdgpu_sync_create(&id_mgr->ids[i].active);
-			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
-		}
-	}
+	amdgpu_vmid_mgr_init(adev);
 
 	adev->vm_manager.fence_context =
 		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
@@ -2901,7 +2519,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 		adev->vm_manager.seqno[i] = 0;
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
-	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
 	atomic_set(&adev->vm_manager.num_prt_users, 0);
 
@@ -2934,24 +2551,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	unsigned i, j;
-
 	WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
 	idr_destroy(&adev->vm_manager.pasid_idr);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vm_id_manager *id_mgr =
-			&adev->vm_manager.id_mgr[i];
-
-		mutex_destroy(&id_mgr->lock);
-		for (j = 0; j < AMDGPU_NUM_VM; ++j) {
-			struct amdgpu_vm_id *id = &id_mgr->ids[j];
-
-			amdgpu_sync_free(&id->active);
-			dma_fence_put(id->flushed_updates);
-			dma_fence_put(id->last_flush);
-		}
-	}
+	amdgpu_vmid_mgr_fini(adev);
 }
 
 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -2964,13 +2567,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	switch (args->in.op) {
 	case AMDGPU_VM_OP_RESERVE_VMID:
 		/* current, we only have requirement to reserve vmid from gfxhub */
-		r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
-						  AMDGPU_GFXHUB);
+		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
 		if (r)
 			return r;
 		break;
 	case AMDGPU_VM_OP_UNRESERVE_VMID:
-		amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
+		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 43ea131dd411b8626816e7df608e203a5b929fe2..21a80f1bb2b9cd9779d475db133b9d70a77ebcec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -24,12 +24,14 @@
 #ifndef __AMDGPU_VM_H__
 #define __AMDGPU_VM_H__
 
-#include <linux/rbtree.h>
 #include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
 
-#include "gpu_scheduler.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
 
 struct amdgpu_bo_va;
 struct amdgpu_job;
@@ -39,9 +41,6 @@ struct amdgpu_bo_list_entry;
  * GPUVM handling
  */
 
-/* maximum number of VMIDs */
-#define AMDGPU_NUM_VM	16
-
 /* Maximum number of PTEs the hardware can write with one command */
 #define AMDGPU_VM_MAX_UPDATE_SIZE	0x3FFFF
 
@@ -69,6 +68,12 @@ struct amdgpu_bo_list_entry;
 /* PDE is handled as PTE for VEGA10 */
 #define AMDGPU_PDE_PTE		(1ULL << 54)
 
+/* PTE is handled as PDE for VEGA10 (Translate Further) */
+#define AMDGPU_PTE_TF		(1ULL << 56)
+
+/* PDE Block Fragment Size for VEGA10 */
+#define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
@@ -119,6 +124,16 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
 #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
 
+/* VMPT level enumerate, and the hiberachy is:
+ * PDB2->PDB1->PDB0->PTB
+ */
+enum amdgpu_vm_level {
+	AMDGPU_VM_PDB2,
+	AMDGPU_VM_PDB1,
+	AMDGPU_VM_PDB0,
+	AMDGPU_VM_PTB
+};
+
 /* base structure for tracking BO usage in a VM */
 struct amdgpu_vm_bo_base {
 	/* constant after initialization */
@@ -137,11 +152,10 @@ struct amdgpu_vm_bo_base {
 
 struct amdgpu_vm_pt {
 	struct amdgpu_vm_bo_base	base;
-	uint64_t			addr;
+	bool				huge;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt		*entries;
-	unsigned			last_entry_used;
 };
 
 #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
@@ -175,13 +189,11 @@ struct amdgpu_vm {
 	spinlock_t		freed_lock;
 
 	/* Scheduler entity for page table updates */
-	struct amd_sched_entity	entity;
+	struct drm_sched_entity	entity;
 
-	/* client id and PASID (TODO: replace client_id with PASID) */
-	u64                     client_id;
 	unsigned int		pasid;
 	/* dedicated to vm */
-	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
+	struct amdgpu_vmid	*reserved_vmid[AMDGPU_MAX_VMHUBS];
 
 	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
 	bool                    use_cpu_for_update;
@@ -196,37 +208,9 @@ struct amdgpu_vm {
 	unsigned int		fault_credit;
 };
 
-struct amdgpu_vm_id {
-	struct list_head	list;
-	struct amdgpu_sync	active;
-	struct dma_fence		*last_flush;
-	atomic64_t		owner;
-
-	uint64_t		pd_gpu_addr;
-	/* last flushed PD/PT update */
-	struct dma_fence		*flushed_updates;
-
-	uint32_t                current_gpu_reset_count;
-
-	uint32_t		gds_base;
-	uint32_t		gds_size;
-	uint32_t		gws_base;
-	uint32_t		gws_size;
-	uint32_t		oa_base;
-	uint32_t		oa_size;
-};
-
-struct amdgpu_vm_id_manager {
-	struct mutex		lock;
-	unsigned		num_ids;
-	struct list_head	ids_lru;
-	struct amdgpu_vm_id	ids[AMDGPU_NUM_VM];
-	atomic_t		reserved_vmid_num;
-};
-
 struct amdgpu_vm_manager {
 	/* Handling of VMIDs */
-	struct amdgpu_vm_id_manager		id_mgr[AMDGPU_MAX_VMHUBS];
+	struct amdgpu_vmid_mgr			id_mgr[AMDGPU_MAX_VMHUBS];
 
 	/* Handling of VM fences */
 	u64					fence_context;
@@ -236,6 +220,7 @@ struct amdgpu_vm_manager {
 	uint32_t				num_level;
 	uint32_t				block_size;
 	uint32_t				fragment_size;
+	enum amdgpu_vm_level			root_level;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
 	/* vm pte handling */
@@ -243,8 +228,6 @@ struct amdgpu_vm_manager {
 	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
 	unsigned				vm_pte_num_rings;
 	atomic_t				vm_pte_next_ring;
-	/* client id counter */
-	atomic64_t				client_counter;
 
 	/* partial resident texture handling */
 	spinlock_t				prt_lock;
@@ -263,8 +246,6 @@ struct amdgpu_vm_manager {
 	spinlock_t				pasid_lock;
 };
 
-int amdgpu_vm_alloc_pasid(unsigned int bits);
-void amdgpu_vm_free_pasid(unsigned int pasid);
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -282,13 +263,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			struct amdgpu_vm *vm,
 			uint64_t saddr, uint64_t size);
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync, struct dma_fence *fence,
-		      struct amdgpu_job *job);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
-			unsigned vmid);
-void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index f11c0aacf19f5a34652da90522a6d80264c09cc0..a0943aa8d1d370fd43bb4f8f111659663e0326fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -891,12 +891,12 @@ static void ci_dpm_powergate_uvd(void *handle, bool gate)
 
 	if (gate) {
 		/* stop the UVD block */
-		amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							AMD_PG_STATE_GATE);
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+						       AMD_PG_STATE_GATE);
 		ci_update_uvd_dpm(adev, gate);
 	} else {
-		amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							AMD_PG_STATE_UNGATE);
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+						       AMD_PG_STATE_UNGATE);
 		ci_update_uvd_dpm(adev, gate);
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 8ba056a2a5da9ede328b0bc5541d0b745b445df6..8e59e65efd448891fc9d84a7db6e1669c2b892eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -755,74 +755,74 @@ static void cik_init_golden_registers(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		amdgpu_program_register_sequence(adev,
-						 bonaire_mgcg_cgcg_init,
-						 ARRAY_SIZE(bonaire_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 bonaire_golden_registers,
-						 ARRAY_SIZE(bonaire_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 bonaire_golden_common_registers,
-						 ARRAY_SIZE(bonaire_golden_common_registers));
-		amdgpu_program_register_sequence(adev,
-						 bonaire_golden_spm_registers,
-						 ARRAY_SIZE(bonaire_golden_spm_registers));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_mgcg_cgcg_init,
+							ARRAY_SIZE(bonaire_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_golden_registers,
+							ARRAY_SIZE(bonaire_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_golden_common_registers,
+							ARRAY_SIZE(bonaire_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_golden_spm_registers,
+							ARRAY_SIZE(bonaire_golden_spm_registers));
 		break;
 	case CHIP_KABINI:
-		amdgpu_program_register_sequence(adev,
-						 kalindi_mgcg_cgcg_init,
-						 ARRAY_SIZE(kalindi_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 kalindi_golden_registers,
-						 ARRAY_SIZE(kalindi_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 kalindi_golden_common_registers,
-						 ARRAY_SIZE(kalindi_golden_common_registers));
-		amdgpu_program_register_sequence(adev,
-						 kalindi_golden_spm_registers,
-						 ARRAY_SIZE(kalindi_golden_spm_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_mgcg_cgcg_init,
+							ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_registers,
+							ARRAY_SIZE(kalindi_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_common_registers,
+							ARRAY_SIZE(kalindi_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_spm_registers,
+							ARRAY_SIZE(kalindi_golden_spm_registers));
 		break;
 	case CHIP_MULLINS:
-		amdgpu_program_register_sequence(adev,
-						 kalindi_mgcg_cgcg_init,
-						 ARRAY_SIZE(kalindi_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 godavari_golden_registers,
-						 ARRAY_SIZE(godavari_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 kalindi_golden_common_registers,
-						 ARRAY_SIZE(kalindi_golden_common_registers));
-		amdgpu_program_register_sequence(adev,
-						 kalindi_golden_spm_registers,
-						 ARRAY_SIZE(kalindi_golden_spm_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_mgcg_cgcg_init,
+							ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							godavari_golden_registers,
+							ARRAY_SIZE(godavari_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_common_registers,
+							ARRAY_SIZE(kalindi_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_spm_registers,
+							ARRAY_SIZE(kalindi_golden_spm_registers));
 		break;
 	case CHIP_KAVERI:
-		amdgpu_program_register_sequence(adev,
-						 spectre_mgcg_cgcg_init,
-						 ARRAY_SIZE(spectre_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 spectre_golden_registers,
-						 ARRAY_SIZE(spectre_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 spectre_golden_common_registers,
-						 ARRAY_SIZE(spectre_golden_common_registers));
-		amdgpu_program_register_sequence(adev,
-						 spectre_golden_spm_registers,
-						 ARRAY_SIZE(spectre_golden_spm_registers));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_mgcg_cgcg_init,
+							ARRAY_SIZE(spectre_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_golden_registers,
+							ARRAY_SIZE(spectre_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_golden_common_registers,
+							ARRAY_SIZE(spectre_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_golden_spm_registers,
+							ARRAY_SIZE(spectre_golden_spm_registers));
 		break;
 	case CHIP_HAWAII:
-		amdgpu_program_register_sequence(adev,
-						 hawaii_mgcg_cgcg_init,
-						 ARRAY_SIZE(hawaii_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 hawaii_golden_registers,
-						 ARRAY_SIZE(hawaii_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 hawaii_golden_common_registers,
-						 ARRAY_SIZE(hawaii_golden_common_registers));
-		amdgpu_program_register_sequence(adev,
-						 hawaii_golden_spm_registers,
-						 ARRAY_SIZE(hawaii_golden_spm_registers));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_mgcg_cgcg_init,
+							ARRAY_SIZE(hawaii_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_golden_registers,
+							ARRAY_SIZE(hawaii_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_golden_common_registers,
+							ARRAY_SIZE(hawaii_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_golden_spm_registers,
+							ARRAY_SIZE(hawaii_golden_spm_registers));
 		break;
 	default:
 		break;
@@ -1246,7 +1246,7 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 	/* disable BM */
 	pci_clear_master(adev->pdev);
 	/* reset */
-	amdgpu_pci_config_reset(adev);
+	amdgpu_device_pci_config_reset(adev);
 
 	udelay(100);
 
@@ -1866,7 +1866,7 @@ static int cik_common_early_init(void *handle)
 
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
-	amdgpu_get_pcie_info(adev);
+	amdgpu_device_get_pcie_info(adev);
 
 	return 0;
 }
@@ -1974,77 +1974,77 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		amdgpu_ip_block_add(adev, &cik_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
-		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v8_2_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block);
-		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
 	case CHIP_HAWAII:
-		amdgpu_ip_block_add(adev, &cik_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
-		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v8_5_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v7_3_ip_block);
-		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
 	case CHIP_KAVERI:
-		amdgpu_ip_block_add(adev, &cik_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
-		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v8_1_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v7_1_ip_block);
-		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
-		amdgpu_ip_block_add(adev, &cik_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
-		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v8_3_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block);
-		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
 	default:
 		/* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index a870b354e3f7babc8e24f36929f9792d1cb65f36..d5a05c19708fc9f4fdde684d7d7a19d282ffa45c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -280,7 +280,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
-	entry->vm_id = (dw[2] >> 8) & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->pas_id = (dw[2] >> 16) & 0xffff;
 
 	/* wptr/rptr are in bytes! */
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index ed26dcbc4f795c188dd9b008f3d2280df463276a..6e8278e689b18ef35bd67428573cc8dd15c5dcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -221,9 +221,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  */
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
-	u32 extra_bits = vm_id & 0xf;
+	u32 extra_bits = vmid & 0xf;
 
 	/* IB packet must end on a 8 DW boundary */
 	cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
@@ -626,7 +626,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 	u32 tmp;
 	u64 gpu_addr;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
 		return r;
@@ -639,7 +639,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		amdgpu_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
@@ -663,7 +663,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 			  ring->idx, tmp);
 		r = -EINVAL;
 	}
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -686,7 +686,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	u64 gpu_addr;
 	long r;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
@@ -735,7 +735,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	amdgpu_ib_free(adev, &ib, NULL);
 	dma_fence_put(f);
 err0:
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -880,23 +880,23 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  * using sDMA (CIK).
  */
 static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
 
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	if (vm_id < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
+	if (vmid < 8) {
+		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
 	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
+		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
 	}
 	amdgpu_ring_write(ring, pd_addr >> 12);
 
 	/* flush TLB */
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 6a9e38a3d2a0bbee4265da23f889b4ebf3710b5c..cee6e8a3ad9c9227c81dcfe2f568daf508686219 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -562,7 +562,7 @@
 #define	PRIVATE_BASE(x)	((x) << 0) /* scratch */
 #define	SHARED_BASE(x)	((x) << 16) /* LDS */
 
-#define KFD_CIK_SDMA_QUEUE_OFFSET	0x200
+#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL)
 
 /* valid for both DEFAULT_MTYPE and APE1_MTYPE */
 enum {
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
index 003a131bad474db5d28584735c0e399b1def20d6..567a904804bc694caed08e3072128078fb4cee4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -48,7 +48,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
     0x00000000, // DB_STENCIL_WRITE_BASE
     0x00000000, // DB_STENCIL_WRITE_BASE_HI
     0x00000000, // DB_DFSM_CONTROL
-    0x00000000, // DB_RENDER_FILTER
+    0, // HOLE
     0x00000000, // DB_Z_INFO2
     0x00000000, // DB_STENCIL_INFO2
     0, // HOLE
@@ -259,8 +259,8 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
     0x00000000, // PA_SC_RIGHT_VERT_GRID
     0x00000000, // PA_SC_LEFT_VERT_GRID
     0x00000000, // PA_SC_HORIZ_GRID
-    0x00000000, // PA_SC_FOV_WINDOW_LR
-    0x00000000, // PA_SC_FOV_WINDOW_TB
+    0, // HOLE
+    0, // HOLE
     0, // HOLE
     0, // HOLE
     0, // HOLE
@@ -701,7 +701,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
 {
     0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
     0x00000000, // VGT_DRAW_PAYLOAD_CNTL
-    0x00000000, // VGT_INDEX_PAYLOAD_CNTL
+    0, // HOLE
     0x00000000, // VGT_INSTANCE_STEP_RATE_0
     0x00000000, // VGT_INSTANCE_STEP_RATE_1
     0, // HOLE
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index fa61d649bb44a5c89126bd7cd680ad88c39ce52f..f576e9cbbc61c613551770fee2f4159dba9f4f8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -259,7 +259,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
-	entry->vm_id = (dw[2] >> 8) & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->pas_id = (dw[2] >> 16) & 0xffff;
 
 	/* wptr/rptr are in bytes! */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index a397111c2cedb9d34aa2d4de292af9bbe19e3499..f34bc68aadfb119380e5f6ff1a279a996d4f453a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -145,20 +145,20 @@ static void dce_v10_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
-		amdgpu_program_register_sequence(adev,
-						 fiji_mgcg_cgcg_init,
-						 ARRAY_SIZE(fiji_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_fiji_a10,
-						 ARRAY_SIZE(golden_settings_fiji_a10));
+		amdgpu_device_program_register_sequence(adev,
+							fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_fiji_a10,
+							ARRAY_SIZE(golden_settings_fiji_a10));
 		break;
 	case CHIP_TONGA:
-		amdgpu_program_register_sequence(adev,
-						 tonga_mgcg_cgcg_init,
-						 ARRAY_SIZE(tonga_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_tonga_a11,
-						 ARRAY_SIZE(golden_settings_tonga_a11));
+		amdgpu_device_program_register_sequence(adev,
+							tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_tonga_a11,
+							ARRAY_SIZE(golden_settings_tonga_a11));
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 67e670989e81539e9de3ceac09a4d2be99a22407..26378bd6aba45a86e18e1d7ac122182a7730a811 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -154,28 +154,28 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_CARRIZO:
-		amdgpu_program_register_sequence(adev,
-						 cz_mgcg_cgcg_init,
-						 ARRAY_SIZE(cz_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 cz_golden_settings_a11,
-						 ARRAY_SIZE(cz_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							cz_mgcg_cgcg_init,
+							ARRAY_SIZE(cz_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							cz_golden_settings_a11,
+							ARRAY_SIZE(cz_golden_settings_a11));
 		break;
 	case CHIP_STONEY:
-		amdgpu_program_register_sequence(adev,
-						 stoney_golden_settings_a11,
-						 ARRAY_SIZE(stoney_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_golden_settings_a11,
+							ARRAY_SIZE(stoney_golden_settings_a11));
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
-		amdgpu_program_register_sequence(adev,
-						 polaris11_golden_settings_a11,
-						 ARRAY_SIZE(polaris11_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							polaris11_golden_settings_a11,
+							ARRAY_SIZE(polaris11_golden_settings_a11));
 		break;
 	case CHIP_POLARIS10:
-		amdgpu_program_register_sequence(adev,
-						 polaris10_golden_settings_a11,
-						 ARRAY_SIZE(polaris10_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							polaris10_golden_settings_a11,
+							ARRAY_SIZE(polaris10_golden_settings_a11));
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index edef17d93527a585a8379ce4bc4c29f5a14b14b2..9870d83b68c17448df2df0ba770801d4c2329132 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1874,7 +1874,7 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 
 static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
 	u32 header, control = 0;
 
@@ -1889,7 +1889,7 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (vm_id << 24);
+	control |= ib->length_dw | (vmid << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -2354,7 +2354,7 @@ static void gfx_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 }
 
 static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 
@@ -2362,10 +2362,10 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
 				 WRITE_DATA_DST_SEL(0)));
-	if (vm_id < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id ));
+	if (vmid < 8) {
+		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid ));
 	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8)));
+		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
 	}
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, pd_addr >> 12);
@@ -2376,7 +2376,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 				 WRITE_DATA_DST_SEL(0)));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
 	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	/* wait for the invalidate to complete */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 83d94c23aa782846367a90a828d7137d7a464aeb..a066c5eda135a782d8e01f730cfc10172210c7c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -48,6 +48,8 @@
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
+#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */
+
 #define GFX7_NUM_GFX_RINGS     1
 #define GFX7_MEC_HPD_SIZE      2048
 
@@ -2252,7 +2254,7 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
  */
 static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      struct amdgpu_ib *ib,
-				      unsigned vm_id, bool ctx_switch)
+				      unsigned vmid, bool ctx_switch)
 {
 	u32 header, control = 0;
 
@@ -2267,7 +2269,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (vm_id << 24);
+	control |= ib->length_dw | (vmid << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -2281,9 +2283,9 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
-					  unsigned vm_id, bool ctx_switch)
+					  unsigned vmid, bool ctx_switch)
 {
-	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
@@ -3237,19 +3239,19 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  * using the CP (CIK).
  */
 static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
-	if (vm_id < 8) {
+	if (vmid < 8) {
 		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
+				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
 	} else {
 		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
+				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
 	}
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, pd_addr >> 12);
@@ -3260,7 +3262,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 				 WRITE_DATA_DST_SEL(0)));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
 	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	/* wait for the invalidate to complete */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -5277,6 +5279,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+	cu_info->max_waves_per_simd = 10;
+	cu_info->max_scratch_slots_per_cu = 32;
+	cu_info->wave_front_size = 64;
+	cu_info->lds_size = 64;
 }
 
 const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index d02493cf91759a9e497454dde0dc7a811aebe041..4e694ae9f3082442658af492a430d350722d3f0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -679,55 +679,55 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-		amdgpu_program_register_sequence(adev,
-						 iceland_mgcg_cgcg_init,
-						 ARRAY_SIZE(iceland_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_iceland_a11,
-						 ARRAY_SIZE(golden_settings_iceland_a11));
-		amdgpu_program_register_sequence(adev,
-						 iceland_golden_common_all,
-						 ARRAY_SIZE(iceland_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							iceland_mgcg_cgcg_init,
+							ARRAY_SIZE(iceland_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_iceland_a11,
+							ARRAY_SIZE(golden_settings_iceland_a11));
+		amdgpu_device_program_register_sequence(adev,
+							iceland_golden_common_all,
+							ARRAY_SIZE(iceland_golden_common_all));
 		break;
 	case CHIP_FIJI:
-		amdgpu_program_register_sequence(adev,
-						 fiji_mgcg_cgcg_init,
-						 ARRAY_SIZE(fiji_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_fiji_a10,
-						 ARRAY_SIZE(golden_settings_fiji_a10));
-		amdgpu_program_register_sequence(adev,
-						 fiji_golden_common_all,
-						 ARRAY_SIZE(fiji_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_fiji_a10,
+							ARRAY_SIZE(golden_settings_fiji_a10));
+		amdgpu_device_program_register_sequence(adev,
+							fiji_golden_common_all,
+							ARRAY_SIZE(fiji_golden_common_all));
 		break;
 
 	case CHIP_TONGA:
-		amdgpu_program_register_sequence(adev,
-						 tonga_mgcg_cgcg_init,
-						 ARRAY_SIZE(tonga_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_tonga_a11,
-						 ARRAY_SIZE(golden_settings_tonga_a11));
-		amdgpu_program_register_sequence(adev,
-						 tonga_golden_common_all,
-						 ARRAY_SIZE(tonga_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_tonga_a11,
+							ARRAY_SIZE(golden_settings_tonga_a11));
+		amdgpu_device_program_register_sequence(adev,
+							tonga_golden_common_all,
+							ARRAY_SIZE(tonga_golden_common_all));
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_polaris11_a11,
-						 ARRAY_SIZE(golden_settings_polaris11_a11));
-		amdgpu_program_register_sequence(adev,
-						 polaris11_golden_common_all,
-						 ARRAY_SIZE(polaris11_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_polaris11_a11,
+							ARRAY_SIZE(golden_settings_polaris11_a11));
+		amdgpu_device_program_register_sequence(adev,
+							polaris11_golden_common_all,
+							ARRAY_SIZE(polaris11_golden_common_all));
 		break;
 	case CHIP_POLARIS10:
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_polaris10_a11,
-						 ARRAY_SIZE(golden_settings_polaris10_a11));
-		amdgpu_program_register_sequence(adev,
-						 polaris10_golden_common_all,
-						 ARRAY_SIZE(polaris10_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_polaris10_a11,
+							ARRAY_SIZE(golden_settings_polaris10_a11));
+		amdgpu_device_program_register_sequence(adev,
+							polaris10_golden_common_all,
+							ARRAY_SIZE(polaris10_golden_common_all));
 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 		if (adev->pdev->revision == 0xc7 &&
 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
@@ -738,26 +738,26 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		}
 		break;
 	case CHIP_CARRIZO:
-		amdgpu_program_register_sequence(adev,
-						 cz_mgcg_cgcg_init,
-						 ARRAY_SIZE(cz_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 cz_golden_settings_a11,
-						 ARRAY_SIZE(cz_golden_settings_a11));
-		amdgpu_program_register_sequence(adev,
-						 cz_golden_common_all,
-						 ARRAY_SIZE(cz_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							cz_mgcg_cgcg_init,
+							ARRAY_SIZE(cz_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							cz_golden_settings_a11,
+							ARRAY_SIZE(cz_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							cz_golden_common_all,
+							ARRAY_SIZE(cz_golden_common_all));
 		break;
 	case CHIP_STONEY:
-		amdgpu_program_register_sequence(adev,
-						 stoney_mgcg_cgcg_init,
-						 ARRAY_SIZE(stoney_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 stoney_golden_settings_a11,
-						 ARRAY_SIZE(stoney_golden_settings_a11));
-		amdgpu_program_register_sequence(adev,
-						 stoney_golden_common_all,
-						 ARRAY_SIZE(stoney_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_mgcg_cgcg_init,
+							ARRAY_SIZE(stoney_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_golden_settings_a11,
+							ARRAY_SIZE(stoney_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_golden_common_all,
+							ARRAY_SIZE(stoney_golden_common_all));
 		break;
 	default:
 		break;
@@ -5062,8 +5062,9 @@ static int gfx_v8_0_hw_fini(void *handle)
 	gfx_v8_0_cp_enable(adev, false);
 	gfx_v8_0_rlc_stop(adev);
 
-	amdgpu_set_powergating_state(adev,
-			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
+	amdgpu_device_ip_set_powergating_state(adev,
+					       AMD_IP_BLOCK_TYPE_GFX,
+					       AMD_PG_STATE_UNGATE);
 
 	return 0;
 }
@@ -5480,8 +5481,9 @@ static int gfx_v8_0_late_init(void *handle)
 	if (r)
 		return r;
 
-	amdgpu_set_powergating_state(adev,
-			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
+	amdgpu_device_ip_set_powergating_state(adev,
+					       AMD_IP_BLOCK_TYPE_GFX,
+					       AMD_PG_STATE_GATE);
 
 	return 0;
 }
@@ -5492,10 +5494,10 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
 	if ((adev->asic_type == CHIP_POLARIS11) ||
 	    (adev->asic_type == CHIP_POLARIS12))
 		/* Send msg to SMU via Powerplay */
-		amdgpu_set_powergating_state(adev,
-					     AMD_IP_BLOCK_TYPE_SMC,
-					     enable ?
-					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+		amdgpu_device_ip_set_powergating_state(adev,
+						       AMD_IP_BLOCK_TYPE_SMC,
+						       enable ?
+						       AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
 
 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
 }
@@ -6243,7 +6245,7 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      struct amdgpu_ib *ib,
-				      unsigned vm_id, bool ctx_switch)
+				      unsigned vmid, bool ctx_switch)
 {
 	u32 header, control = 0;
 
@@ -6252,7 +6254,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (vm_id << 24);
+	control |= ib->length_dw | (vmid << 24);
 
 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
@@ -6273,9 +6275,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
-					  unsigned vm_id, bool ctx_switch)
+					  unsigned vmid, bool ctx_switch)
 {
-	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
@@ -6326,7 +6328,7 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 }
 
 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 
@@ -6334,12 +6336,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)) |
 				 WR_CONFIRM);
-	if (vm_id < 8) {
+	if (vmid < 8) {
 		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
+				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
 	} else {
 		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
+				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
 	}
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, pd_addr >> 12);
@@ -6351,7 +6353,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 				 WRITE_DATA_DST_SEL(0)));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
 	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	/* wait for the invalidate to complete */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -6472,10 +6474,10 @@ static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
 	mutex_unlock(&adev->srbm_mutex);
 }
 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
-					       enum amd_sched_priority priority)
+					       enum drm_sched_priority priority)
 {
 	struct amdgpu_device *adev = ring->adev;
-	bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
+	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
 
 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
 		return;
@@ -7114,6 +7116,11 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+	cu_info->max_waves_per_simd = 10;
+	cu_info->max_scratch_slots_per_cu = 32;
+	cu_info->wave_front_size = 64;
+	cu_info->lds_size = 64;
 }
 
 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6c5289ae67be3f912edeac55c472e7374bef989a..c06479615e8a4c69fc3baa2f4802ffcb9dbefdb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -28,7 +28,6 @@
 #include "soc15.h"
 #include "soc15d.h"
 
-#include "soc15ip.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
 #include "vega10_enum.h"
@@ -65,152 +64,84 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin");
 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
 
-static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
-{
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14) },
-	{ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15),
-	  SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15) }
+static const struct soc15_reg_golden golden_settings_gc_9_0[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 };
 
-static const u32 golden_settings_gc_9_0[] =
-{
-	SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420,
-	SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000,
-	SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), 0x00001000, 0x00001000,
-	SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107,
-	SOC15_REG_OFFSET(GC, 0, mmSQC_CONFIG), 0x03000000, 0x020a2000,
-	SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
-	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68,
-	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197,
-	SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000,
-	SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff,
-	SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080
-};
-
-static const u32 golden_settings_gc_9_0_vg10[] =
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 {
-	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107,
-	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000,
-	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042,
-	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000,
-	SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
-	SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
 };
 
-static const u32 golden_settings_gc_9_1[] =
-{
-	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104,
-	SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420,
-	SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000,
-	SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080,
-	SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
-	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000,
-	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120,
-	SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000,
-	SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff,
-	SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080
+static const struct soc15_reg_golden golden_settings_gc_9_1[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 };
 
-static const u32 golden_settings_gc_9_1_rv1[] =
+static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 {
-	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000,
-	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042,
-	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000,
-	SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000,
-	SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
-	SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 };
 
-static const u32 golden_settings_gc_9_x_common[] =
+static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 {
-	SOC15_REG_OFFSET(GC, 0, mmGRBM_CAM_INDEX), 0xffffffff, 0x00000000,
-	SOC15_REG_OFFSET(GC, 0, mmGRBM_CAM_DATA), 0xffffffff, 0x2544c382
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 };
 
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
@@ -230,18 +161,18 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_gc_9_0,
 						 ARRAY_SIZE(golden_settings_gc_9_0));
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_gc_9_0_vg10,
 						 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 		break;
 	case CHIP_RAVEN:
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_gc_9_1,
 						 ARRAY_SIZE(golden_settings_gc_9_1));
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_gc_9_1_rv1,
 						 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 		break;
@@ -249,7 +180,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 		break;
 	}
 
-	amdgpu_program_register_sequence(adev, golden_settings_gc_9_x_common,
+	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 }
 
@@ -1137,8 +1068,8 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
 	adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
 	adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
-	adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base;
-	adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size;
+	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
+	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
 
 	/* Primitive Buffer */
 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
@@ -1243,23 +1174,24 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 	}
 
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   amdgpu_gds_reg_offset[0].mem_size,
+				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
 			           (adev->gds.mem.total_size +
 				    adev->gfx.ngg.gds_reserve_size) >>
 				   AMDGPU_GDS_SHIFT);
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
+				PACKET3_DMA_DATA_DST_SEL(1) |
 				PACKET3_DMA_DATA_SRC_SEL(2)));
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
 	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size);
-
+	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
+				adev->gfx.ngg.gds_reserve_size);
 
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   amdgpu_gds_reg_offset[0].mem_size, 0);
+				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
 
 	amdgpu_ring_commit(ring);
 
@@ -1595,14 +1527,21 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
 		soc15_grbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
-		tmp = 0;
-		tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
-				    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
-		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
+		if (i == 0) {
+			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
+		} else {
+			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+			tmp = adev->mc.shared_aperture_start >> 48;
+			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
+		}
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 
@@ -2474,7 +2413,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
+				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
@@ -3146,6 +3085,8 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 					  uint32_t gws_base, uint32_t gws_size,
 					  uint32_t oa_base, uint32_t oa_size)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
 
@@ -3157,22 +3098,22 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 
 	/* GDS Base */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   amdgpu_gds_reg_offset[vmid].mem_base,
+				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
 				   gds_base);
 
 	/* GDS Size */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   amdgpu_gds_reg_offset[vmid].mem_size,
+				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
 				   gds_size);
 
 	/* GWS */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   amdgpu_gds_reg_offset[vmid].gws,
+				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
 
 	/* OA */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
-				   amdgpu_gds_reg_offset[vmid].oa,
+				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
@@ -3617,13 +3558,9 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 
 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
 	u32 ref_and_mask, reg_mem_engine;
-	const struct nbio_hdp_flush_reg *nbio_hf_reg;
-
-	if (ring->adev->flags & AMD_IS_APU)
-		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
-	else
-		nbio_hf_reg = &nbio_v6_1_hdp_flush_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
 
 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		switch (ring->me) {
@@ -3643,20 +3580,22 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	}
 
 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
-			      nbio_hf_reg->hdp_flush_req_offset,
-			      nbio_hf_reg->hdp_flush_done_offset,
+			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
 			      ref_and_mask, ref_and_mask, 0x20);
 }
 
 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	gfx_v9_0_write_data_to_reg(ring, 0, true,
 				   SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
 }
 
 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
                                       struct amdgpu_ib *ib,
-                                      unsigned vm_id, bool ctx_switch)
+                                      unsigned vmid, bool ctx_switch)
 {
 	u32 header, control = 0;
 
@@ -3665,7 +3604,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (vm_id << 24);
+	control |= ib->length_dw | (vmid << 24);
 
 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
@@ -3687,9 +3626,9 @@ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
                                           struct amdgpu_ib *ib,
-                                          unsigned vm_id, bool ctx_switch)
+                                          unsigned vmid, bool ctx_switch)
 {
-        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
+        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
@@ -3745,22 +3684,23 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 }
 
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
+				   hub->ctx0_ptb_addr_lo32 + (2 * vmid),
 				   lower_32_bits(pd_addr));
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-				   hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
+				   hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 				   upper_32_bits(pd_addr));
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
@@ -3768,7 +3708,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	/* wait for the invalidate to complete */
 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
-			      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
+			      eng, 0, 1 << vmid, 1 << vmid, 0x20);
 
 	/* compute doesn't have PFP */
 	if (usepfp) {
@@ -3811,6 +3751,8 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
 					 u64 seq, unsigned int flags)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	/* we only allocate 32bit for each seq wb address */
 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index f1effadfbaa68645b4eccd78e77879aad7ff4d0d..56f5fe4e2feefdc5b0c5a17620e48fa26b903989 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -23,7 +23,6 @@
 #include "amdgpu.h"
 #include "gfxhub_v1_0.h"
 
-#include "soc15ip.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
 #include "gc/gc_9_0_default.h"
@@ -144,8 +143,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+	if (adev->mc.translate_further) {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+	} else {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+	}
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
@@ -183,31 +189,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
 
 static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 {
-	int i;
+	unsigned num_level, block_size;
 	uint32_t tmp;
+	int i;
+
+	num_level = adev->vm_manager.num_level;
+	block_size = adev->vm_manager.block_size;
+	if (adev->mc.translate_further)
+		num_level -= 1;
+	else
+		block_size -= 9;
 
 	for (i = 0; i <= 14; i++) {
 		tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
-				    adev->vm_manager.num_level);
+				    num_level);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+				    1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PAGE_TABLE_BLOCK_SIZE,
-				adev->vm_manager.block_size - 9);
+				    PAGE_TABLE_BLOCK_SIZE,
+				    block_size);
 		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 468281f10e8d21b880e7e043bc97d015c14f9cdd..8e28270d1ea969e96b37d6c18bd7e9f2bc50c748 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -222,8 +222,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
 	u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 
-	amdgpu_vram_location(adev, &adev->mc, base);
-	amdgpu_gart_location(adev, mc);
+	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_gart_location(adev, mc);
 }
 
 static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
@@ -395,10 +395,10 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-	return addr;
+	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
@@ -956,7 +956,7 @@ static int gmc_v6_0_resume(void *handle)
 	if (r)
 		return r;
 
-	amdgpu_vm_reset_all_ids(adev);
+	amdgpu_vmid_reset_all(adev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 68a85051f4b705a39977d050003e36d575fb4418..86e9d682c59e8760d0751d38908a4c072764e123 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -67,12 +67,12 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-		amdgpu_program_register_sequence(adev,
-						 iceland_mgcg_cgcg_init,
-						 ARRAY_SIZE(iceland_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_iceland_a11,
-						 ARRAY_SIZE(golden_settings_iceland_a11));
+		amdgpu_device_program_register_sequence(adev,
+							iceland_mgcg_cgcg_init,
+							ARRAY_SIZE(iceland_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_iceland_a11,
+							ARRAY_SIZE(golden_settings_iceland_a11));
 		break;
 	default:
 		break;
@@ -240,8 +240,8 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
 	u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 
-	amdgpu_vram_location(adev, &adev->mc, base);
-	amdgpu_gart_location(adev, mc);
+	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_gart_location(adev, mc);
 }
 
 /**
@@ -480,10 +480,10 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-	return addr;
+	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 /**
@@ -1107,7 +1107,7 @@ static int gmc_v7_0_resume(void *handle)
 	if (r)
 		return r;
 
-	amdgpu_vm_reset_all_ids(adev);
+	amdgpu_vmid_reset_all(adev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 46ec97e70e5c175412b7eee2a55dc7b800c315f7..9a813d834f1a21e72908cea34a0641343c848e30 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -120,44 +120,44 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
-		amdgpu_program_register_sequence(adev,
-						 fiji_mgcg_cgcg_init,
-						 ARRAY_SIZE(fiji_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_fiji_a10,
-						 ARRAY_SIZE(golden_settings_fiji_a10));
+		amdgpu_device_program_register_sequence(adev,
+							fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_fiji_a10,
+							ARRAY_SIZE(golden_settings_fiji_a10));
 		break;
 	case CHIP_TONGA:
-		amdgpu_program_register_sequence(adev,
-						 tonga_mgcg_cgcg_init,
-						 ARRAY_SIZE(tonga_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_tonga_a11,
-						 ARRAY_SIZE(golden_settings_tonga_a11));
+		amdgpu_device_program_register_sequence(adev,
+							tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_tonga_a11,
+							ARRAY_SIZE(golden_settings_tonga_a11));
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_polaris11_a11,
-						 ARRAY_SIZE(golden_settings_polaris11_a11));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_polaris11_a11,
+							ARRAY_SIZE(golden_settings_polaris11_a11));
 		break;
 	case CHIP_POLARIS10:
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_polaris10_a11,
-						 ARRAY_SIZE(golden_settings_polaris10_a11));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_polaris10_a11,
+							ARRAY_SIZE(golden_settings_polaris10_a11));
 		break;
 	case CHIP_CARRIZO:
-		amdgpu_program_register_sequence(adev,
-						 cz_mgcg_cgcg_init,
-						 ARRAY_SIZE(cz_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							cz_mgcg_cgcg_init,
+							ARRAY_SIZE(cz_mgcg_cgcg_init));
 		break;
 	case CHIP_STONEY:
-		amdgpu_program_register_sequence(adev,
-						 stoney_mgcg_cgcg_init,
-						 ARRAY_SIZE(stoney_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_stoney_common,
-						 ARRAY_SIZE(golden_settings_stoney_common));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_mgcg_cgcg_init,
+							ARRAY_SIZE(stoney_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_stoney_common,
+							ARRAY_SIZE(golden_settings_stoney_common));
 		break;
 	default:
 		break;
@@ -405,8 +405,8 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
 		base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 
-	amdgpu_vram_location(adev, &adev->mc, base);
-	amdgpu_gart_location(adev, mc);
+	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_gart_location(adev, mc);
 }
 
 /**
@@ -677,10 +677,10 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-	return addr;
+	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 /**
@@ -1212,7 +1212,7 @@ static int gmc_v8_0_resume(void *handle)
 	if (r)
 		return r;
 
-	amdgpu_vm_reset_all_ids(adev);
+	amdgpu_vmid_reset_all(adev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index cc972153d401ce1bc507a0e196e5148d22acdbc8..2719937e09d6bf00a4f78c47cd970ce5f5a51000 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -25,7 +25,6 @@
 #include "gmc_v9_0.h"
 #include "amdgpu_atomfirmware.h"
 
-#include "soc15ip.h"
 #include "hdp/hdp_4_0_offset.h"
 #include "hdp/hdp_4_0_sh_mask.h"
 #include "gc/gc_9_0_sh_mask.h"
@@ -35,11 +34,10 @@
 #include "mmhub/mmhub_1_0_offset.h"
 #include "athub/athub_1_0_offset.h"
 
+#include "soc15.h"
 #include "soc15_common.h"
 #include "umc/umc_6_0_sh_mask.h"
 
-#include "nbio_v6_1.h"
-#include "nbio_v7_0.h"
 #include "gfxhub_v1_0.h"
 #include "mmhub_v1_0.h"
 
@@ -74,16 +72,16 @@ static const u32 golden_settings_vega10_hdp[] =
 	0xf6e, 0x0fffffff, 0x00000000,
 };
 
-static const u32 golden_settings_mmhub_1_0_0[] =
+static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
 {
-	SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_WRCLI2), 0x00000007, 0xfe5fe0fa,
-	SOC15_REG_OFFSET(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0), 0x00000030, 0x55555565
+	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
+	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
 };
 
-static const u32 golden_settings_athub_1_0_0[] =
+static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
 {
-	SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL), 0x0000ff00, 0x00000800,
-	SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL2), 0x00ff00ff, 0x00080008
+	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
 };
 
 /* Ecc related register addresses, (BASE + reg offset) */
@@ -250,7 +248,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 				struct amdgpu_irq_src *source,
 				struct amdgpu_iv_entry *entry)
 {
-	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vm_id_src];
+	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
 	uint32_t status = 0;
 	u64 addr;
 
@@ -264,9 +262,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 
 	if (printk_ratelimit()) {
 		dev_err(adev->dev,
-			"[%s] VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u)\n",
-			entry->vm_id_src ? "mmhub" : "gfxhub",
-			entry->src_id, entry->ring_id, entry->vm_id,
+			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n",
+			entry->vmid_src ? "mmhub" : "gfxhub",
+			entry->src_id, entry->ring_id, entry->vmid,
 			entry->pas_id);
 		dev_err(adev->dev, "  at page 0x%016llx from %d\n",
 			addr, entry->client_id);
@@ -290,13 +288,13 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
 }
 
-static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vm_id)
+static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
 {
 	u32 req = 0;
 
-	/* invalidate using legacy mode on vm_id*/
+	/* invalidate using legacy mode on vmid*/
 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
-			    PER_VMID_INVALIDATE_REQ, 1 << vm_id);
+			    PER_VMID_INVALIDATE_REQ, 1 << vmid);
 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
@@ -332,10 +330,7 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 	unsigned i, j;
 
 	/* flush hdp cache */
-	if (adev->flags & AMD_IS_APU)
-		nbio_v7_0_hdp_flush(adev);
-	else
-		nbio_v6_1_hdp_flush(adev);
+	adev->nbio_funcs->hdp_flush(adev);
 
 	spin_lock(&adev->mc.invalidate_lock);
 
@@ -474,11 +469,28 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
+static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
-	BUG_ON(addr & 0xFFFF00000000003FULL);
-	return addr;
+	if (!(*flags & AMDGPU_PDE_PTE))
+		*addr = adev->vm_manager.vram_base_offset + *addr -
+			adev->mc.vram_start;
+	BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+	if (!adev->mc.translate_further)
+		return;
+
+	if (level == AMDGPU_VM_PDB1) {
+		/* Set the block fragment size */
+		if (!(*flags & AMDGPU_PDE_PTE))
+			*flags |= AMDGPU_PDE_BFS(0x9);
+
+	} else if (level == AMDGPU_VM_PDB0) {
+		if (*flags & AMDGPU_PDE_PTE)
+			*flags &= ~AMDGPU_PDE_PTE;
+		else
+			*flags |= AMDGPU_PTE_TF;
+	}
 }
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
@@ -502,6 +514,14 @@ static int gmc_v9_0_early_init(void *handle)
 	gmc_v9_0_set_gart_funcs(adev);
 	gmc_v9_0_set_irq_funcs(adev);
 
+	adev->mc.shared_aperture_start = 0x2000000000000000ULL;
+	adev->mc.shared_aperture_end =
+		adev->mc.shared_aperture_start + (4ULL << 30) - 1;
+	adev->mc.private_aperture_start =
+		adev->mc.shared_aperture_end + 1;
+	adev->mc.private_aperture_end =
+		adev->mc.private_aperture_start + (4ULL << 30) - 1;
+
 	return 0;
 }
 
@@ -614,14 +634,16 @@ static int gmc_v9_0_late_init(void *handle)
 	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
 		BUG_ON(vm_inv_eng[i] > 16);
 
-	r = gmc_v9_0_ecc_available(adev);
-	if (r == 1) {
-		DRM_INFO("ECC is active.\n");
-	} else if (r == 0) {
-		DRM_INFO("ECC is not present.\n");
-	} else {
-		DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
-		return r;
+	if (adev->asic_type == CHIP_VEGA10) {
+		r = gmc_v9_0_ecc_available(adev);
+		if (r == 1) {
+			DRM_INFO("ECC is active.\n");
+		} else if (r == 0) {
+			DRM_INFO("ECC is not present.\n");
+		} else {
+			DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
+			return r;
+		}
 	}
 
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
@@ -633,8 +655,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 	u64 base = 0;
 	if (!amdgpu_sriov_vf(adev))
 		base = mmhub_v1_0_get_fb_location(adev);
-	amdgpu_vram_location(adev, &adev->mc, base);
-	amdgpu_gart_location(adev, mc);
+	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_gart_location(adev, mc);
 	/* base offset of vram pages */
 	if (adev->flags & AMD_IS_APU)
 		adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
@@ -700,8 +722,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 
 	/* size in MB on si */
 	adev->mc.mc_vram_size =
-		((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) :
-		 nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL;
+		adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
 	adev->mc.real_vram_size = adev->mc.mc_vram_size;
 
 	if (!(adev->flags & AMD_IS_APU)) {
@@ -769,11 +790,14 @@ static int gmc_v9_0_sw_init(void *handle)
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
 		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-		if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
+		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
-		else
-			/* vm_size is 64GB for legacy 2-level page support */
-			amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
+		} else {
+			/* vm_size is 128TB + 512GB for legacy 3-level page support */
+			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
+			adev->mc.translate_further =
+				adev->vm_manager.num_level > 1;
+		}
 		break;
 	case CHIP_VEGA10:
 		/* XXX Don't know how to get VRAM type yet. */
@@ -883,17 +907,18 @@ static int gmc_v9_0_sw_fini(void *handle)
 
 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						golden_settings_mmhub_1_0_0,
 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						golden_settings_athub_1_0_0,
 						ARRAY_SIZE(golden_settings_athub_1_0_0));
 		break;
 	case CHIP_RAVEN:
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						golden_settings_athub_1_0_0,
 						ARRAY_SIZE(golden_settings_athub_1_0_0));
 		break;
@@ -913,9 +938,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	bool value;
 	u32 tmp;
 
-	amdgpu_program_register_sequence(adev,
-		golden_settings_vega10_hdp,
-		ARRAY_SIZE(golden_settings_vega10_hdp));
+	amdgpu_device_program_register_sequence(adev,
+						golden_settings_vega10_hdp,
+						ARRAY_SIZE(golden_settings_vega10_hdp));
 
 	if (adev->gart.robj == NULL) {
 		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -948,10 +973,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
 
 	/* After HDP is initialized, flush HDP.*/
-	if (adev->flags & AMD_IS_APU)
-		nbio_v7_0_hdp_flush(adev);
-	else
-		nbio_v6_1_hdp_flush(adev);
+	adev->nbio_funcs->hdp_flush(adev);
 
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 		value = false;
@@ -1036,7 +1058,7 @@ static int gmc_v9_0_resume(void *handle)
 	if (r)
 		return r;
 
-	amdgpu_vm_reset_all_ids(adev);
+	amdgpu_vmid_reset_all(adev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index bd592cb39f3708d72c645aecda67ca09fd521272..c4e4be3dd31d87de5af944b50ec97e2e2bac628e 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -259,7 +259,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
-	entry->vm_id = (dw[2] >> 8) & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->pas_id = (dw[2] >> 16) & 0xffff;
 
 	/* wptr/rptr are in bytes! */
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f33d1ffdb20b2dd438ff014de4ce5e509ba0488d..d9e9e52a0defab36a12057ed2354b4bf7aebdd73 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -1682,8 +1682,8 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate)
 
 	if (gate) {
 		/* stop the UVD block */
-		ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							AMD_PG_STATE_GATE);
+		ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							     AMD_PG_STATE_GATE);
 		kv_update_uvd_dpm(adev, gate);
 		if (pi->caps_uvd_pg)
 			/* power off the UVD block */
@@ -1695,8 +1695,8 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate)
 			/* re-init the UVD block */
 		kv_update_uvd_dpm(adev, gate);
 
-		ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							AMD_PG_STATE_UNGATE);
+		ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							     AMD_PG_STATE_UNGATE);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index bd160d8700e0fcb816f44e276fa0ead1391c0fa5..ffd5b7ee49c46d7413d0b225db7fbc39f3ef5a93 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -23,7 +23,6 @@
 #include "amdgpu.h"
 #include "mmhub_v1_0.h"
 
-#include "soc15ip.h"
 #include "mmhub/mmhub_1_0_offset.h"
 #include "mmhub/mmhub_1_0_sh_mask.h"
 #include "mmhub/mmhub_1_0_default.h"
@@ -156,10 +155,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
 
-	tmp = mmVM_L2_CNTL3_DEFAULT;
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
-	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
+	if (adev->mc.translate_further) {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+	} else {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+	}
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
@@ -197,32 +201,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
 
 static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 {
-	int i;
+	unsigned num_level, block_size;
 	uint32_t tmp;
+	int i;
+
+	num_level = adev->vm_manager.num_level;
+	block_size = adev->vm_manager.block_size;
+	if (adev->mc.translate_further)
+		num_level -= 1;
+	else
+		block_size -= 9;
 
 	for (i = 0; i <= 14; i++) {
 		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
+		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+				    num_level);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				ENABLE_CONTEXT, 1);
-		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
-		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+				    1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PAGE_TABLE_BLOCK_SIZE,
-				adev->vm_manager.block_size - 9);
+				    PAGE_TABLE_BLOCK_SIZE,
+				    block_size);
 		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index ad9054e3903c3b1577670c6734e7b56d426ea03d..271452d3999a1d04b571a8ebc3e075a11b1f5db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -22,7 +22,6 @@
  */
 
 #include "amdgpu.h"
-#include "soc15ip.h"
 #include "nbio/nbio_6_1_offset.h"
 #include "nbio/nbio_6_1_sh_mask.h"
 #include "gc/gc_9_0_offset.h"
@@ -254,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	}
 
 	/* Trigger recovery due to world switch failure */
-	amdgpu_gpu_recover(adev, NULL);
+	amdgpu_device_gpu_recover(adev, NULL, false);
 }
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -278,7 +277,7 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
 	int r;
 
 	/* trigger gpu-reset by hypervisor only if TDR disbaled */
-	if (amdgpu_lockup_timeout == 0) {
+	if (!amdgpu_gpu_recovery) {
 		/* see what event we get */
 		r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index df52824c0cd416154251847b247b4ef638c99c03..9fc1c37344cebeaa468941d15552fe5e15b102e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -279,32 +279,32 @@ void xgpu_vi_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
-		amdgpu_program_register_sequence(adev,
-						 xgpu_fiji_mgcg_cgcg_init,
-						 ARRAY_SIZE(
-						 xgpu_fiji_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 xgpu_fiji_golden_settings_a10,
-						 ARRAY_SIZE(
-						 xgpu_fiji_golden_settings_a10));
-		amdgpu_program_register_sequence(adev,
-						 xgpu_fiji_golden_common_all,
-						 ARRAY_SIZE(
-						 xgpu_fiji_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							xgpu_fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(
+								xgpu_fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							xgpu_fiji_golden_settings_a10,
+							ARRAY_SIZE(
+								xgpu_fiji_golden_settings_a10));
+		amdgpu_device_program_register_sequence(adev,
+							xgpu_fiji_golden_common_all,
+							ARRAY_SIZE(
+								xgpu_fiji_golden_common_all));
 		break;
 	case CHIP_TONGA:
-		amdgpu_program_register_sequence(adev,
-						 xgpu_tonga_mgcg_cgcg_init,
-						 ARRAY_SIZE(
-						 xgpu_tonga_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 xgpu_tonga_golden_settings_a11,
-						 ARRAY_SIZE(
-						 xgpu_tonga_golden_settings_a11));
-		amdgpu_program_register_sequence(adev,
-						 xgpu_tonga_golden_common_all,
-						 ARRAY_SIZE(
-						 xgpu_tonga_golden_common_all));
+		amdgpu_device_program_register_sequence(adev,
+							xgpu_tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(
+								xgpu_tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							xgpu_tonga_golden_settings_a11,
+							ARRAY_SIZE(
+								xgpu_tonga_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							xgpu_tonga_golden_common_all,
+							ARRAY_SIZE(
+								xgpu_tonga_golden_common_all));
 		break;
 	default:
 		BUG_ON("Doesn't support chip type.\n");
@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
 	}
 
 	/* Trigger recovery due to world switch failure */
-	amdgpu_gpu_recover(adev, NULL);
+	amdgpu_device_gpu_recover(adev, NULL, false);
 }
 
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -545,7 +545,7 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
 	int r;
 
 	/* trigger gpu-reset by hypervisor only if TDR disbaled */
-	if (amdgpu_lockup_timeout == 0) {
+	if (!amdgpu_gpu_recovery) {
 		/* see what event we get */
 		r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 76db711097c7c0d51d7b8b83f4f1c1211a5a6a0a..d4da663d5eb0c7aed8826c72302008b757cd8d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -24,7 +24,6 @@
 #include "amdgpu_atombios.h"
 #include "nbio_v6_1.h"
 
-#include "soc15ip.h"
 #include "nbio/nbio_6_1_default.h"
 #include "nbio/nbio_6_1_offset.h"
 #include "nbio/nbio_6_1_sh_mask.h"
@@ -34,7 +33,7 @@
 #define smnPCIE_CNTL2                                                                                   0x11180070
 #define smnPCIE_CONFIG_CNTL                                                                             0x11180044
 
-u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
+static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
 {
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
 
@@ -44,19 +43,7 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
 	return tmp;
 }
 
-u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev,
-					uint32_t idx)
-{
-	return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx);
-}
-
-void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev,
-					 uint32_t idx, uint32_t val)
-{
-	WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val);
-}
-
-void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
+static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
 {
 	if (enable)
 		WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
@@ -66,26 +53,23 @@ void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
 		WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
 }
 
-void nbio_v6_1_hdp_flush(struct amdgpu_device *adev)
+static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev)
 {
 	WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
 }
 
-u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
+static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
 {
 	return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE);
 }
 
-static const u32 nbio_sdma_doorbell_range_reg[] =
-{
-	SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE),
-	SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE)
-};
-
-void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
 				  bool use_doorbell, int doorbell_index)
 {
-	u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]);
+	u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+
+	u32 doorbell_range = RREG32(reg);
 
 	if (use_doorbell) {
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
@@ -93,17 +77,18 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
 	} else
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
 
-	WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range);
+	WREG32(reg, doorbell_range);
+
 }
 
-void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev,
-					bool enable)
+static void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev,
+					       bool enable)
 {
 	WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
 }
 
-void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
-					bool enable)
+static void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+							bool enable)
 {
 	u32 tmp = 0;
 
@@ -122,8 +107,8 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
 }
 
 
-void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
-				bool use_doorbell, int doorbell_index)
+static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
+					bool use_doorbell, int doorbell_index)
 {
 	u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
 
@@ -136,7 +121,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
 	WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
 }
 
-void nbio_v6_1_ih_control(struct amdgpu_device *adev)
+static void nbio_v6_1_ih_control(struct amdgpu_device *adev)
 {
 	u32 interrupt_cntl;
 
@@ -152,8 +137,8 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev)
 	WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
 }
 
-void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
-						bool enable)
+static void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						       bool enable)
 {
 	uint32_t def, data;
 
@@ -180,8 +165,8 @@ void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 		WREG32_PCIE(smnCPM_CONTROL, data);
 }
 
-void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
-					       bool enable)
+static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+						      bool enable)
 {
 	uint32_t def, data;
 
@@ -200,7 +185,8 @@ void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 		WREG32_PCIE(smnPCIE_CNTL2, data);
 }
 
-void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
+static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev,
+					    u32 *flags)
 {
 	int data;
 
@@ -215,9 +201,27 @@ void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
 		*flags |= AMD_CG_SUPPORT_BIF_LS;
 }
 
-const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
-	.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ),
-	.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE),
+static u32 nbio_v6_1_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX);
+}
+
+static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA);
+}
+
+static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
 	.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
 	.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
 	.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -232,12 +236,7 @@ const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
 	.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK
 };
 
-const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data = {
-	.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX),
-	.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA),
-};
-
-void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
+static void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
 {
 	uint32_t reg;
 
@@ -254,7 +253,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
 	}
 }
 
-void nbio_v6_1_init_registers(struct amdgpu_device *adev)
+static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
 {
 	uint32_t def, data;
 
@@ -265,3 +264,25 @@ void nbio_v6_1_init_registers(struct amdgpu_device *adev)
 	if (def != data)
 		WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
 }
+
+const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
+	.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg,
+	.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
+	.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
+	.get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset,
+	.get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset,
+	.get_rev_id = nbio_v6_1_get_rev_id,
+	.mc_access_enable = nbio_v6_1_mc_access_enable,
+	.hdp_flush = nbio_v6_1_hdp_flush,
+	.get_memsize = nbio_v6_1_get_memsize,
+	.sdma_doorbell_range = nbio_v6_1_sdma_doorbell_range,
+	.enable_doorbell_aperture = nbio_v6_1_enable_doorbell_aperture,
+	.enable_doorbell_selfring_aperture = nbio_v6_1_enable_doorbell_selfring_aperture,
+	.ih_doorbell_range = nbio_v6_1_ih_doorbell_range,
+	.update_medium_grain_clock_gating = nbio_v6_1_update_medium_grain_clock_gating,
+	.update_medium_grain_light_sleep = nbio_v6_1_update_medium_grain_light_sleep,
+	.get_clockgating_state = nbio_v6_1_get_clockgating_state,
+	.ih_control = nbio_v6_1_ih_control,
+	.init_registers = nbio_v6_1_init_registers,
+	.detect_hw_virt = nbio_v6_1_detect_hw_virt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
index 14ca8d45a46c6cb059eaeb2ccad6ca365e93a0bd..0743a6f016f37cfb793166c54c78050c8d199d67 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -26,30 +26,6 @@
 
 #include "soc15_common.h"
 
-extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
-extern const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
-int nbio_v6_1_init(struct amdgpu_device *adev);
-u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev,
-                                        uint32_t idx);
-void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev,
-                                         uint32_t idx, uint32_t val);
-void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable);
-void nbio_v6_1_hdp_flush(struct amdgpu_device *adev);
-u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev);
-void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
-				  bool use_doorbell, int doorbell_index);
-void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev,
-					bool enable);
-void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
-					bool enable);
-void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
-				bool use_doorbell, int doorbell_index);
-void nbio_v6_1_ih_control(struct amdgpu_device *adev);
-u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev);
-void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable);
-void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable);
-void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags);
-void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev);
-void nbio_v6_1_init_registers(struct amdgpu_device *adev);
+extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 1fb77174e02c5a1df43a97bbf286ccb8fb35e463..17a9131a459846b9a6075d517af548b4c1efd05e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -24,7 +24,6 @@
 #include "amdgpu_atombios.h"
 #include "nbio_v7_0.h"
 
-#include "soc15ip.h"
 #include "nbio/nbio_7_0_default.h"
 #include "nbio/nbio_7_0_offset.h"
 #include "nbio/nbio_7_0_sh_mask.h"
@@ -32,7 +31,10 @@
 
 #define smnNBIF_MGCG_CTRL_LCLK	0x1013a05c
 
-u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
+#define smnCPM_CONTROL                                                                                  0x11180460
+#define smnPCIE_CNTL2                                                                                   0x11180070
+
+static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 {
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
 
@@ -42,19 +44,7 @@ u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 	return tmp;
 }
 
-u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
-					uint32_t idx)
-{
-	return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx);
-}
-
-void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev,
-					 uint32_t idx, uint32_t val)
-{
-	WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val);
-}
-
-void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
+static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
 {
 	if (enable)
 		WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
@@ -63,26 +53,23 @@ void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
 		WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
 }
 
-void nbio_v7_0_hdp_flush(struct amdgpu_device *adev)
+static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev)
 {
 	WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
 }
 
-u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
+static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
 {
 	return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE);
 }
 
-static const u32 nbio_sdma_doorbell_range_reg[] =
+static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+					  bool use_doorbell, int doorbell_index)
 {
-	SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE),
-	SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE)
-};
+	u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
 
-void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
-				  bool use_doorbell, int doorbell_index)
-{
-	u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]);
+	u32 doorbell_range = RREG32(reg);
 
 	if (use_doorbell) {
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
@@ -90,17 +77,23 @@ void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
 	} else
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
 
-	WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range);
+	WREG32(reg, doorbell_range);
 }
 
-void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
-					bool enable)
+static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
+					       bool enable)
 {
 	WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
 }
 
-void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,
-				bool use_doorbell, int doorbell_index)
+static void nbio_v7_0_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+							bool enable)
+{
+
+}
+
+static void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,
+					bool use_doorbell, int doorbell_index)
 {
 	u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
 
@@ -130,8 +123,8 @@ static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t o
 	WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data);
 }
 
-void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
-						bool enable)
+static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						       bool enable)
 {
 	uint32_t def, data;
 
@@ -169,7 +162,43 @@ void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 		nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data);
 }
 
-void nbio_v7_0_ih_control(struct amdgpu_device *adev)
+static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+						      bool enable)
+{
+	uint32_t def, data;
+
+	def = data = RREG32_PCIE(smnPCIE_CNTL2);
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+		data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+			 PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+			 PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+	} else {
+		data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+			  PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+			  PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+	}
+
+	if (def != data)
+		WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev,
+					    u32 *flags)
+{
+	int data;
+
+	/* AMD_CG_SUPPORT_BIF_MGCG */
+	data = RREG32_PCIE(smnCPM_CONTROL);
+	if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+	/* AMD_CG_SUPPORT_BIF_LS */
+	data = RREG32_PCIE(smnPCIE_CNTL2);
+	if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static void nbio_v7_0_ih_control(struct amdgpu_device *adev)
 {
 	u32 interrupt_cntl;
 
@@ -185,9 +214,27 @@ void nbio_v7_0_ih_control(struct amdgpu_device *adev)
 	WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
 }
 
+static u32 nbio_v7_0_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_0_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_0_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v7_0_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+	return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
 const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
-	.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ),
-	.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE),
 	.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
 	.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
 	.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -202,7 +249,35 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
 	.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
 };
 
-const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data = {
-	.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2),
-	.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)
+static void nbio_v7_0_detect_hw_virt(struct amdgpu_device *adev)
+{
+	if (is_virtual_machine())	/* passthrough mode exclus sriov mod */
+		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+}
+
+static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
+{
+
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
+	.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg,
+	.get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset,
+	.get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset,
+	.get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset,
+	.get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset,
+	.get_rev_id = nbio_v7_0_get_rev_id,
+	.mc_access_enable = nbio_v7_0_mc_access_enable,
+	.hdp_flush = nbio_v7_0_hdp_flush,
+	.get_memsize = nbio_v7_0_get_memsize,
+	.sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range,
+	.enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture,
+	.enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture,
+	.ih_doorbell_range = nbio_v7_0_ih_doorbell_range,
+	.update_medium_grain_clock_gating = nbio_v7_0_update_medium_grain_clock_gating,
+	.update_medium_grain_light_sleep = nbio_v7_0_update_medium_grain_light_sleep,
+	.get_clockgating_state = nbio_v7_0_get_clockgating_state,
+	.ih_control = nbio_v7_0_ih_control,
+	.init_registers = nbio_v7_0_init_registers,
+	.detect_hw_virt = nbio_v7_0_detect_hw_virt,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
index df8fa90f40d7f6a4ed8ef05611233f7171ae44ee..508d549c50291fc89a40539b2a33642275b74a0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -26,24 +26,6 @@
 
 #include "soc15_common.h"
 
-extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
-extern const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
-int nbio_v7_0_init(struct amdgpu_device *adev);
-u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
-                                        uint32_t idx);
-void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev,
-                                         uint32_t idx, uint32_t val);
-void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable);
-void nbio_v7_0_hdp_flush(struct amdgpu_device *adev);
-u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev);
-void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
-				  bool use_doorbell, int doorbell_index);
-void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
-					bool enable);
-void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,
-				bool use_doorbell, int doorbell_index);
-void nbio_v7_0_ih_control(struct amdgpu_device *adev);
-u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev);
-void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
-						bool enable);
+extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs;
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 78fe3f2917a08f47bb92651fc2705fef30bc128c..5a9fe24697f93832f0c7fd9e8e700760f250be91 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -30,7 +30,6 @@
 #include "soc15_common.h"
 #include "psp_v10_0.h"
 
-#include "soc15ip.h"
 #include "mp/mp_10_0_offset.h"
 #include "gc/gc_9_1_offset.h"
 #include "sdma0/sdma0_4_1_offset.h"
@@ -298,9 +297,10 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
 }
 
 static int
-psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
-		  unsigned int *sram_data_reg_offset,
-		  enum AMDGPU_UCODE_ID ucode_id)
+psp_v10_0_sram_map(struct amdgpu_device *adev,
+		unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
+		unsigned int *sram_data_reg_offset,
+		enum AMDGPU_UCODE_ID ucode_id)
 {
 	int ret = 0;
 
@@ -395,7 +395,7 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp,
 	uint32_t *ucode_mem = NULL;
 	struct amdgpu_device *adev = psp->adev;
 
-	err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset,
+	err = psp_v10_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
 				&fw_sram_data_reg_offset, ucode_type);
 	if (err)
 		return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index e75a23d858ef1a2ffc068955ad6954563025b16e..19bd1934e63d7896acf88d50995e92011b69dcf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -31,7 +31,6 @@
 #include "soc15_common.h"
 #include "psp_v3_1.h"
 
-#include "soc15ip.h"
 #include "mp/mp_9_0_offset.h"
 #include "mp/mp_9_0_sh_mask.h"
 #include "gc/gc_9_0_offset.h"
@@ -410,9 +409,10 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
 }
 
 static int
-psp_v3_1_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
-		  unsigned int *sram_data_reg_offset,
-		  enum AMDGPU_UCODE_ID ucode_id)
+psp_v3_1_sram_map(struct amdgpu_device *adev,
+		unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
+		unsigned int *sram_data_reg_offset,
+		enum AMDGPU_UCODE_ID ucode_id)
 {
 	int ret = 0;
 
@@ -507,7 +507,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp,
 	uint32_t *ucode_mem = NULL;
 	struct amdgpu_device *adev = psp->adev;
 
-	err = psp_v3_1_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset,
+	err = psp_v3_1_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
 				&fw_sram_data_reg_offset, ucode_type);
 	if (err)
 		return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 121e628e7cdb0ee16902733994f501d491ef539f..d4787ad4d346b0592f8e90103410df3cf860c59e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -93,12 +93,12 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-		amdgpu_program_register_sequence(adev,
-						 iceland_mgcg_cgcg_init,
-						 ARRAY_SIZE(iceland_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_iceland_a11,
-						 ARRAY_SIZE(golden_settings_iceland_a11));
+		amdgpu_device_program_register_sequence(adev,
+							iceland_mgcg_cgcg_init,
+							ARRAY_SIZE(iceland_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_iceland_a11,
+							ARRAY_SIZE(golden_settings_iceland_a11));
 		break;
 	default:
 		break;
@@ -246,15 +246,13 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  */
 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
 				   struct amdgpu_ib *ib,
-				   unsigned vm_id, bool ctx_switch)
+				   unsigned vmid, bool ctx_switch)
 {
-	u32 vmid = vm_id & 0xf;
-
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
-			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
+			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
 	/* base must be 32 byte aligned */
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
@@ -600,7 +598,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
 	u32 tmp;
 	u64 gpu_addr;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
 		return r;
@@ -613,7 +611,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
 	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		amdgpu_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -639,7 +637,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
 			  ring->idx, tmp);
 		r = -EINVAL;
 	}
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -662,7 +660,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	u64 gpu_addr;
 	long r;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
@@ -715,7 +713,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	amdgpu_ib_free(adev, &ib, NULL);
 	dma_fence_put(f);
 err0:
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -861,14 +859,14 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  * using sDMA (VI).
  */
 static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					 unsigned vm_id, uint64_t pd_addr)
+					 unsigned vmid, uint64_t pd_addr)
 {
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-	if (vm_id < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
+	if (vmid < 8) {
+		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
 	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
+		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
 	}
 	amdgpu_ring_write(ring, pd_addr >> 12);
 
@@ -876,7 +874,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	/* wait for flush */
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index c8c93f9dac2143c2fdbf49a0087eb55271f49cb4..521978c4053744abae0061ea04d41af362a9f443 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -192,47 +192,47 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
-		amdgpu_program_register_sequence(adev,
-						 fiji_mgcg_cgcg_init,
-						 ARRAY_SIZE(fiji_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_fiji_a10,
-						 ARRAY_SIZE(golden_settings_fiji_a10));
+		amdgpu_device_program_register_sequence(adev,
+							fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_fiji_a10,
+							ARRAY_SIZE(golden_settings_fiji_a10));
 		break;
 	case CHIP_TONGA:
-		amdgpu_program_register_sequence(adev,
-						 tonga_mgcg_cgcg_init,
-						 ARRAY_SIZE(tonga_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_tonga_a11,
-						 ARRAY_SIZE(golden_settings_tonga_a11));
+		amdgpu_device_program_register_sequence(adev,
+							tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_tonga_a11,
+							ARRAY_SIZE(golden_settings_tonga_a11));
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_polaris11_a11,
-						 ARRAY_SIZE(golden_settings_polaris11_a11));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_polaris11_a11,
+							ARRAY_SIZE(golden_settings_polaris11_a11));
 		break;
 	case CHIP_POLARIS10:
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_polaris10_a11,
-						 ARRAY_SIZE(golden_settings_polaris10_a11));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_polaris10_a11,
+							ARRAY_SIZE(golden_settings_polaris10_a11));
 		break;
 	case CHIP_CARRIZO:
-		amdgpu_program_register_sequence(adev,
-						 cz_mgcg_cgcg_init,
-						 ARRAY_SIZE(cz_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 cz_golden_settings_a11,
-						 ARRAY_SIZE(cz_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							cz_mgcg_cgcg_init,
+							ARRAY_SIZE(cz_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							cz_golden_settings_a11,
+							ARRAY_SIZE(cz_golden_settings_a11));
 		break;
 	case CHIP_STONEY:
-		amdgpu_program_register_sequence(adev,
-						 stoney_mgcg_cgcg_init,
-						 ARRAY_SIZE(stoney_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 stoney_golden_settings_a11,
-						 ARRAY_SIZE(stoney_golden_settings_a11));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_mgcg_cgcg_init,
+							ARRAY_SIZE(stoney_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_golden_settings_a11,
+							ARRAY_SIZE(stoney_golden_settings_a11));
 		break;
 	default:
 		break;
@@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	u32 wptr;
 
-	if (ring->use_doorbell) {
+	if (ring->use_doorbell || ring->use_pollmem) {
 		/* XXX check if swapping is necessary on BE */
 		wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
 	} else {
@@ -380,10 +380,13 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 
 	if (ring->use_doorbell) {
 		u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
-
 		/* XXX check if swapping is necessary on BE */
 		WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
+	} else if (ring->use_pollmem) {
+		u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
+
+		WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
 	} else {
 		int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
 
@@ -414,15 +417,13 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  */
 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 				   struct amdgpu_ib *ib,
-				   unsigned vm_id, bool ctx_switch)
+				   unsigned vmid, bool ctx_switch)
 {
-	u32 vmid = vm_id & 0xf;
-
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
-			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
+			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
 	/* base must be 32 byte aligned */
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
@@ -718,10 +719,14 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
 		       upper_32_bits(wptr_gpu_addr));
 		wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
-		if (amdgpu_sriov_vf(adev))
-			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
+		if (ring->use_pollmem)
+			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+						       SDMA0_GFX_RB_WPTR_POLL_CNTL,
+						       ENABLE, 1);
 		else
-			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+						       SDMA0_GFX_RB_WPTR_POLL_CNTL,
+						       ENABLE, 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl);
 
 		/* enable DMA RB */
@@ -860,7 +865,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
 	u32 tmp;
 	u64 gpu_addr;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
 		return r;
@@ -873,7 +878,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
 	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		amdgpu_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -899,7 +904,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
 			  ring->idx, tmp);
 		r = -EINVAL;
 	}
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -922,7 +927,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	u64 gpu_addr;
 	long r;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
@@ -974,7 +979,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	amdgpu_ib_free(adev, &ib, NULL);
 	dma_fence_put(f);
 err0:
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -1120,14 +1125,14 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  * using sDMA (VI).
  */
 static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					 unsigned vm_id, uint64_t pd_addr)
+					 unsigned vmid, uint64_t pd_addr)
 {
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-	if (vm_id < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
+	if (vmid < 8) {
+		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
 	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
+		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
 	}
 	amdgpu_ring_write(ring, pd_addr >> 12);
 
@@ -1135,7 +1140,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	/* wait for flush */
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
@@ -1203,9 +1208,13 @@ static int sdma_v3_0_sw_init(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		ring = &adev->sdma.instance[i].ring;
 		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = (i == 0) ?
-			AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
+		if (!amdgpu_sriov_vf(adev)) {
+			ring->use_doorbell = true;
+			ring->doorbell_index = (i == 0) ?
+				AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
+		} else {
+			ring->use_pollmem = true;
+		}
 
 		sprintf(ring->name, "sdma%d", i);
 		r = amdgpu_ring_init(adev, ring, 1024,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4c55f21e37a8b78172b51e7cf2d899f521c61d48..e92fb372bc99738dad957334ba40d3a138c57636 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -27,7 +27,6 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_trace.h"
 
-#include "soc15ip.h"
 #include "sdma0/sdma0_4_0_offset.h"
 #include "sdma0/sdma0_4_0_sh_mask.h"
 #include "sdma1/sdma1_4_0_offset.h"
@@ -53,95 +52,83 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 
-static const u32 golden_settings_sdma_4[] = {
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0x003ff006, 0x0003c000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CHICKEN_BITS), 0xfe931f07, 0x02831f07,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), 0xffffffff, 0x3f000100,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), 0x003ff000, 0x0003c000,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL), 0x800f0100, 0x00000100,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0
+static const struct soc15_reg_golden golden_settings_sdma_4[] = {
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
 };
 
-static const u32 golden_settings_sdma_vg10[] = {
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
-	SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002
+static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
 };
 
-static const u32 golden_settings_sdma_4_1[] =
+static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
 {
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
 };
 
-static const u32 golden_settings_sdma_rv1[] =
+static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
 {
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002,
-	SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
 };
 
-static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset)
+static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
+		u32 instance, u32 offset)
 {
-	u32 base = 0;
-
-	switch (instance) {
-	case 0:
-		base = SDMA0_BASE.instance[0].segment[0];
-		break;
-	case 1:
-		base = SDMA1_BASE.instance[0].segment[0];
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	return base + internal_offset;
+	return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
+			(adev->reg_offset[SDMA1_HWIP][0][0] + offset));
 }
 
 static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_sdma_4,
 						 ARRAY_SIZE(golden_settings_sdma_4));
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_sdma_vg10,
 						 ARRAY_SIZE(golden_settings_sdma_vg10));
 		break;
 	case CHIP_RAVEN:
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_sdma_4_1,
 						 ARRAY_SIZE(golden_settings_sdma_4_1));
-		amdgpu_program_register_sequence(adev,
+		soc15_program_register_sequence(adev,
 						 golden_settings_sdma_rv1,
 						 ARRAY_SIZE(golden_settings_sdma_rv1));
 		break;
@@ -265,8 +252,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 		int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
 
 		wptr = &local_wptr;
-		lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2;
-		highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
+		lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
+		highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
 
 		DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
 				me, highbit, lowbit);
@@ -315,8 +302,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 				lower_32_bits(ring->wptr << 2),
 				me,
 				upper_32_bits(ring->wptr << 2));
-		WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
-		WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+		WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+		WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
 	}
 }
 
@@ -343,15 +330,13 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  */
 static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 					struct amdgpu_ib *ib,
-					unsigned vm_id, bool ctx_switch)
+					unsigned vmid, bool ctx_switch)
 {
-	u32 vmid = vm_id & 0xf;
-
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
-			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
+			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
 	/* base must be 32 byte aligned */
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
@@ -370,13 +355,9 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
  */
 static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
 	u32 ref_and_mask = 0;
-	const struct nbio_hdp_flush_reg *nbio_hf_reg;
-
-	if (ring->adev->flags & AMD_IS_APU)
-		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
-	else
-		nbio_hf_reg = &nbio_v6_1_hdp_flush_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
 
 	if (ring == &ring->adev->sdma.instance[0].ring)
 		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
@@ -386,8 +367,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
-	amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_done_offset << 2);
-	amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_req_offset << 2);
+	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
+	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
 	amdgpu_ring_write(ring, ref_and_mask); /* reference */
 	amdgpu_ring_write(ring, ref_and_mask); /* mask */
 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
@@ -396,6 +377,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 
 static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
 	amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE));
@@ -460,12 +443,12 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
 		amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL));
+		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL));
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 	}
 
 	sdma0->ready = false;
@@ -522,18 +505,18 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL));
+		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
 				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
 		if (enable && amdgpu_sdma_phase_quantum) {
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM),
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
 			       phase_quantum);
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM),
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
 			       phase_quantum);
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM),
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
 			       phase_quantum);
 		}
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
 	}
 
 }
@@ -557,9 +540,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL));
+		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), f32_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
 	}
 }
 
@@ -587,48 +570,48 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 		ring = &adev->sdma.instance[i].ring;
 		wb_offset = (ring->rptr_offs * 4);
 
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
 
 		/* Set ring buffer size in dwords */
 		rb_bufsz = order_base_2(ring->ring_size / 4);
-		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL));
+		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
 #ifdef __BIG_ENDIAN
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
 					RPTR_WRITEBACK_SWAP_ENABLE, 1);
 #endif
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
 
 		/* Initialize the ring buffer's read and write pointers */
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR), 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_HI), 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
 
 		/* set the wb address whether it's enabled or not */
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
 		       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
 		       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
 
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
 
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
 
 		ring->wptr = 0;
 
 		/* before programing wptr to a less value, need set minor_ptr_update first */
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
 
 		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
 		}
 
-		doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL));
-		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET));
+		doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
 
 		if (ring->use_doorbell) {
 			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
@@ -637,55 +620,53 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 		} else {
 			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
 		}
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
-		if (adev->flags & AMD_IS_APU)
-			nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index);
-		else
-			nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+		adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+						      ring->doorbell_index);
 
 		if (amdgpu_sriov_vf(adev))
 			sdma_v4_0_ring_set_wptr(ring);
 
 		/* set minor_ptr_update to 0 after wptr programed */
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
 
 		/* set utc l1 enable flag always to 1 */
-		temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL));
+		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
 		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), temp);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
 
 		if (!amdgpu_sriov_vf(adev)) {
 			/* unhalt engine */
-			temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL));
+			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp);
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
 		}
 
 		/* setup the wptr shadow polling */
 		wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
 		       lower_32_bits(wptr_gpu_addr));
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
 		       upper_32_bits(wptr_gpu_addr));
-		wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+		wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
 		if (amdgpu_sriov_vf(adev))
 			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
 		else
 			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
 
 		/* enable DMA RB */
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
 
-		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL));
+		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
 #ifdef __BIG_ENDIAN
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
 #endif
 		/* enable DMA IBs */
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 
 		ring->ready = true;
 
@@ -816,12 +797,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
 			(adev->sdma.instance[i].fw->data +
 				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
 
 		for (j = 0; j < fw_size; j++)
-			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
 
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
 	}
 
 	return 0;
@@ -886,7 +867,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
 	u32 tmp;
 	u64 gpu_addr;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
 		return r;
@@ -899,7 +880,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
 	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		amdgpu_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -925,7 +906,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
 			  ring->idx, tmp);
 		r = -EINVAL;
 	}
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -948,7 +929,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	u32 tmp = 0;
 	u64 gpu_addr;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
@@ -1000,7 +981,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	amdgpu_ib_free(adev, &ib, NULL);
 	dma_fence_put(f);
 err0:
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -1152,23 +1133,24 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  * using sDMA (VEGA10).
  */
 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					 unsigned vm_id, uint64_t pd_addr)
+					 unsigned vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2);
 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 
 	/* flush TLB */
@@ -1183,8 +1165,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
 	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vm_id); /* reference */
-	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, 1 << vmid); /* reference */
+	amdgpu_ring_write(ring, 1 << vmid); /* mask */
 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
@@ -1317,7 +1299,7 @@ static bool sdma_v4_0_is_idle(void *handle)
 	u32 i;
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG));
+		u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
 
 		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
 			return false;
@@ -1333,8 +1315,8 @@ static int sdma_v4_0_wait_for_idle(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG));
-		sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG));
+		sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
+		sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
 
 		if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
 			return 0;
@@ -1358,8 +1340,8 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
 	u32 sdma_cntl;
 
 	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
-		sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) :
-		sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
+		sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
+		sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
 
 	sdma_cntl = RREG32(reg_offset);
 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 49eef3090f085013e51ba6fcafcf75cb7ee0eecc..543101d5a5edd053c83beea6de7db5adbf9844eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1390,65 +1390,65 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TAHITI:
-		amdgpu_program_register_sequence(adev,
-						 tahiti_golden_registers,
-						 ARRAY_SIZE(tahiti_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 tahiti_golden_rlc_registers,
-						 ARRAY_SIZE(tahiti_golden_rlc_registers));
-		amdgpu_program_register_sequence(adev,
-						 tahiti_mgcg_cgcg_init,
-						 ARRAY_SIZE(tahiti_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 tahiti_golden_registers2,
-						 ARRAY_SIZE(tahiti_golden_registers2));
+		amdgpu_device_program_register_sequence(adev,
+							tahiti_golden_registers,
+							ARRAY_SIZE(tahiti_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							tahiti_golden_rlc_registers,
+							ARRAY_SIZE(tahiti_golden_rlc_registers));
+		amdgpu_device_program_register_sequence(adev,
+							tahiti_mgcg_cgcg_init,
+							ARRAY_SIZE(tahiti_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							tahiti_golden_registers2,
+							ARRAY_SIZE(tahiti_golden_registers2));
 		break;
 	case CHIP_PITCAIRN:
-		amdgpu_program_register_sequence(adev,
-						 pitcairn_golden_registers,
-						 ARRAY_SIZE(pitcairn_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 pitcairn_golden_rlc_registers,
-						 ARRAY_SIZE(pitcairn_golden_rlc_registers));
-		amdgpu_program_register_sequence(adev,
-						 pitcairn_mgcg_cgcg_init,
-						 ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							pitcairn_golden_registers,
+							ARRAY_SIZE(pitcairn_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							pitcairn_golden_rlc_registers,
+							ARRAY_SIZE(pitcairn_golden_rlc_registers));
+		amdgpu_device_program_register_sequence(adev,
+							pitcairn_mgcg_cgcg_init,
+							ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
 		break;
 	case CHIP_VERDE:
-		amdgpu_program_register_sequence(adev,
-						 verde_golden_registers,
-						 ARRAY_SIZE(verde_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 verde_golden_rlc_registers,
-						 ARRAY_SIZE(verde_golden_rlc_registers));
-		amdgpu_program_register_sequence(adev,
-						 verde_mgcg_cgcg_init,
-						 ARRAY_SIZE(verde_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 verde_pg_init,
-						 ARRAY_SIZE(verde_pg_init));
+		amdgpu_device_program_register_sequence(adev,
+							verde_golden_registers,
+							ARRAY_SIZE(verde_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							verde_golden_rlc_registers,
+							ARRAY_SIZE(verde_golden_rlc_registers));
+		amdgpu_device_program_register_sequence(adev,
+							verde_mgcg_cgcg_init,
+							ARRAY_SIZE(verde_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							verde_pg_init,
+							ARRAY_SIZE(verde_pg_init));
 		break;
 	case CHIP_OLAND:
-		amdgpu_program_register_sequence(adev,
-						 oland_golden_registers,
-						 ARRAY_SIZE(oland_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 oland_golden_rlc_registers,
-						 ARRAY_SIZE(oland_golden_rlc_registers));
-		amdgpu_program_register_sequence(adev,
-						 oland_mgcg_cgcg_init,
-						 ARRAY_SIZE(oland_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							oland_golden_registers,
+							ARRAY_SIZE(oland_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							oland_golden_rlc_registers,
+							ARRAY_SIZE(oland_golden_rlc_registers));
+		amdgpu_device_program_register_sequence(adev,
+							oland_mgcg_cgcg_init,
+							ARRAY_SIZE(oland_mgcg_cgcg_init));
 		break;
 	case CHIP_HAINAN:
-		amdgpu_program_register_sequence(adev,
-						 hainan_golden_registers,
-						 ARRAY_SIZE(hainan_golden_registers));
-		amdgpu_program_register_sequence(adev,
-						 hainan_golden_registers2,
-						 ARRAY_SIZE(hainan_golden_registers2));
-		amdgpu_program_register_sequence(adev,
-						 hainan_mgcg_cgcg_init,
-						 ARRAY_SIZE(hainan_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							hainan_golden_registers,
+							ARRAY_SIZE(hainan_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							hainan_golden_registers2,
+							ARRAY_SIZE(hainan_golden_registers2));
+		amdgpu_device_program_register_sequence(adev,
+							hainan_mgcg_cgcg_init,
+							ARRAY_SIZE(hainan_mgcg_cgcg_init));
 		break;
 
 
@@ -1959,42 +1959,42 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
 	case CHIP_VERDE:
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
-		amdgpu_ip_block_add(adev, &si_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &si_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 		else
-			amdgpu_ip_block_add(adev, &dce_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &si_dma_ip_block);
-		/* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */
-		/* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */
+			amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
+		/* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
+		/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
 		break;
 	case CHIP_OLAND:
-		amdgpu_ip_block_add(adev, &si_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &si_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 		else
-			amdgpu_ip_block_add(adev, &dce_v6_4_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &si_dma_ip_block);
-		/* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */
-		/* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */
+			amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
+		/* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
+		/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
 		break;
 	case CHIP_HAINAN:
-		amdgpu_ip_block_add(adev, &si_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &si_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &si_dma_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
 		break;
 	default:
 		BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index ee469a906cd371f37ebf1cad85c51afd567584ee..9a29c13990918116a2bbfdaca9a9043f757dbc3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -61,14 +61,14 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
 
 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
 				struct amdgpu_ib *ib,
-				unsigned vm_id, bool ctx_switch)
+				unsigned vmid, bool ctx_switch)
 {
 	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
 	 * Pad as necessary with NOPs.
 	 */
 	while ((lower_32_bits(ring->wptr) & 7) != 5)
 		amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
-	amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
+	amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0));
 	amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
 	amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
 
@@ -221,7 +221,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
 	u32 tmp;
 	u64 gpu_addr;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
 		return r;
@@ -234,7 +234,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
 	r = amdgpu_ring_alloc(ring, 4);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		amdgpu_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -258,7 +258,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
 			  ring->idx, tmp);
 		r = -EINVAL;
 	}
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -281,7 +281,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	u64 gpu_addr;
 	long r;
 
-	r = amdgpu_wb_get(adev, &index);
+	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
@@ -328,7 +328,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	amdgpu_ib_free(adev, &ib, NULL);
 	dma_fence_put(f);
 err0:
-	amdgpu_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -473,25 +473,25 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
  * using sDMA (VI).
  */
 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
-				      unsigned vm_id, uint64_t pd_addr)
+				      unsigned vmid, uint64_t pd_addr)
 {
 	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-	if (vm_id < 8)
-		amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
+	if (vmid < 8)
+		amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
 	else
-		amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8)));
+		amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
 	amdgpu_ring_write(ring, pd_addr >> 12);
 
 	/* bits 0-7 are the VM contexts0-7 */
 	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
 	amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST));
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 
 	/* wait for invalidate to complete */
 	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
 	amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
 	amdgpu_ring_write(ring, 0xff << 16); /* retry */
-	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, 1 << vmid); /* mask */
 	amdgpu_ring_write(ring, 0); /* value */
 	amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 299cb3161b2cc641751798895c0d3bb8b9616f11..ce675a7f179a6a0f3b9f7b254589694541aea3cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3464,6 +3464,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 		    (adev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+		if ((adev->pdev->revision == 0xC3) ||
+		    (adev->pdev->device == 0x6665)) {
+			max_sclk = 60000;
+			max_mclk = 80000;
+		}
 	} else if (adev->asic_type == CHIP_OLAND) {
 		if ((adev->pdev->revision == 0xC7) ||
 		    (adev->pdev->revision == 0x80) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index d2c6b80309c8db3560968bc80577a0628ee675b0..60dad63098a2aa4db47de4804174a992fb78039e 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -146,7 +146,7 @@ static void si_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
-	entry->vm_id = (dw[2] >> 8) & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
 
 	adev->irq.ih.rptr += 16;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index f134ca0c093cf8f33d600396c0045fcf2df3e5c3..a04a033f57de0121e7da812f66d8c8978fc81e32 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -34,7 +34,6 @@
 #include "atom.h"
 #include "amd_pcie.h"
 
-#include "soc15ip.h"
 #include "uvd/uvd_7_0_offset.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
@@ -101,15 +100,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
 {
 	unsigned long flags, address, data;
 	u32 r;
-	const struct nbio_pcie_index_data *nbio_pcie_id;
-
-	if (adev->flags & AMD_IS_APU)
-		nbio_pcie_id = &nbio_v7_0_pcie_index_data;
-	else
-		nbio_pcie_id = &nbio_v6_1_pcie_index_data;
-
-	address = nbio_pcie_id->index_offset;
-	data = nbio_pcie_id->data_offset;
+	address = adev->nbio_funcs->get_pcie_index_offset(adev);
+	data = adev->nbio_funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, reg);
@@ -122,15 +114,9 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
 static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
 	unsigned long flags, address, data;
-	const struct nbio_pcie_index_data *nbio_pcie_id;
 
-	if (adev->flags & AMD_IS_APU)
-		nbio_pcie_id = &nbio_v7_0_pcie_index_data;
-	else
-		nbio_pcie_id = &nbio_v6_1_pcie_index_data;
-
-	address = nbio_pcie_id->index_offset;
-	data = nbio_pcie_id->data_offset;
+	address = adev->nbio_funcs->get_pcie_index_offset(adev);
+	data = adev->nbio_funcs->get_pcie_data_offset(adev);
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, reg);
@@ -242,41 +228,9 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 
 static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
 {
-	if (adev->flags & AMD_IS_APU)
-		return nbio_v7_0_get_memsize(adev);
-	else
-		return nbio_v6_1_get_memsize(adev);
+	return adev->nbio_funcs->get_memsize(adev);
 }
 
-static const u32 vega10_golden_init[] =
-{
-};
-
-static const u32 raven_golden_init[] =
-{
-};
-
-static void soc15_init_golden_registers(struct amdgpu_device *adev)
-{
-	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
-	mutex_lock(&adev->grbm_idx_mutex);
-
-	switch (adev->asic_type) {
-	case CHIP_VEGA10:
-		amdgpu_program_register_sequence(adev,
-						 vega10_golden_init,
-						 ARRAY_SIZE(vega10_golden_init));
-		break;
-	case CHIP_RAVEN:
-		amdgpu_program_register_sequence(adev,
-						 raven_golden_init,
-						 ARRAY_SIZE(raven_golden_init));
-		break;
-	default:
-		break;
-	}
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
 static u32 soc15_get_xclk(struct amdgpu_device *adev)
 {
 	return adev->clock.spll.reference_freq;
@@ -332,25 +286,34 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
-static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = {
-	{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)},
-	{ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)},
-	{ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_STAT)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)},
-	{ SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)},
-	{ SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)},
+struct soc15_allowed_register_entry {
+	uint32_t hwip;
+	uint32_t inst;
+	uint32_t seg;
+	uint32_t reg_offset;
+	bool grbm_indexed;
+};
+
+
+static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
+	{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
+	{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
+	{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)},
+	{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)},
+	{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)},
+	{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)},
+	{ SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)},
+	{ SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_STAT)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
+	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
+	{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
 };
 
 static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
@@ -377,12 +340,9 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev,
 	if (indexed) {
 		return soc15_read_indexed_register(adev, se_num, sh_num, reg_offset);
 	} else {
-		switch (reg_offset) {
-		case SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG):
+		if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
 			return adev->gfx.config.gb_addr_config;
-		default:
-			return RREG32(reg_offset);
-		}
+		return RREG32(reg_offset);
 	}
 }
 
@@ -390,10 +350,13 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
 			    u32 sh_num, u32 reg_offset, u32 *value)
 {
 	uint32_t i;
+	struct soc15_allowed_register_entry  *en;
 
 	*value = 0;
 	for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
-		if (reg_offset != soc15_allowed_read_registers[i].reg_offset)
+		en = &soc15_allowed_read_registers[i];
+		if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+					+ en->reg_offset))
 			continue;
 
 		*value = soc15_get_register_value(adev,
@@ -404,6 +367,43 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
+
+/**
+ * soc15_program_register_sequence - program an array of registers.
+ *
+ * @adev: amdgpu_device pointer
+ * @regs: pointer to the register array
+ * @array_size: size of the register array
+ *
+ * Programs an array or registers with and and or masks.
+ * This is a helper for setting golden registers.
+ */
+
+void soc15_program_register_sequence(struct amdgpu_device *adev,
+					     const struct soc15_reg_golden *regs,
+					     const u32 array_size)
+{
+	const struct soc15_reg_golden *entry;
+	u32 tmp, reg;
+	int i;
+
+	for (i = 0; i < array_size; ++i) {
+		entry = &regs[i];
+		reg =  adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+
+		if (entry->and_mask == 0xffffffff) {
+			tmp = entry->or_mask;
+		} else {
+			tmp = RREG32(reg);
+			tmp &= ~(entry->and_mask);
+			tmp |= entry->or_mask;
+		}
+		WREG32(reg, tmp);
+	}
+
+}
+
+
 static int soc15_asic_reset(struct amdgpu_device *adev)
 {
 	u32 i;
@@ -428,9 +428,8 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		u32 memsize = (adev->flags & AMD_IS_APU) ?
-			nbio_v7_0_get_memsize(adev) :
-			nbio_v6_1_get_memsize(adev);
+		u32 memsize = adev->nbio_funcs->get_memsize(adev);
+
 		if (memsize != 0xffffffff)
 			break;
 		udelay(1);
@@ -495,14 +494,10 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
 }
 
 static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
-					bool enable)
+					   bool enable)
 {
-	if (adev->flags & AMD_IS_APU) {
-		nbio_v7_0_enable_doorbell_aperture(adev, enable);
-	} else {
-		nbio_v6_1_enable_doorbell_aperture(adev, enable);
-		nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable);
-	}
+	adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
+	adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
 }
 
 static const struct amdgpu_ip_block_version vega10_common_ip_block =
@@ -516,50 +511,65 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
 
 int soc15_set_ip_blocks(struct amdgpu_device *adev)
 {
-	nbio_v6_1_detect_hw_virt(adev);
+	/* Set IP register base before any HW register access */
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		vega10_reg_base_init(adev);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (adev->flags & AMD_IS_APU)
+		adev->nbio_funcs = &nbio_v7_0_funcs;
+	else
+		adev->nbio_funcs = &nbio_v6_1_funcs;
+
+	adev->nbio_funcs->detect_hw_virt(adev);
 
 	if (amdgpu_sriov_vf(adev))
 		adev->virt.ops = &xgpu_ai_virt_ops;
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		amdgpu_ip_block_add(adev, &vega10_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block);
-		amdgpu_ip_block_add(adev, &vega10_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
 		if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1)
-			amdgpu_ip_block_add(adev, &psp_v3_1_ip_block);
+			amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
 		if (!amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+			amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #else
 #	warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
 #endif
-		amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v4_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
 		break;
 	case CHIP_RAVEN:
-		amdgpu_ip_block_add(adev, &vega10_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block);
-		amdgpu_ip_block_add(adev, &vega10_ih_ip_block);
-		amdgpu_ip_block_add(adev, &psp_v10_0_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #else
 #	warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
 #endif
-		amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block);
-		amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
 		break;
 	default:
 		return -EINVAL;
@@ -570,10 +580,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 
 static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
 {
-	if (adev->flags & AMD_IS_APU)
-		return nbio_v7_0_get_rev_id(adev);
-	else
-		return nbio_v6_1_get_rev_id(adev);
+	return adev->nbio_funcs->get_rev_id(adev);
 }
 
 static const struct amdgpu_asic_funcs soc15_asic_funcs =
@@ -609,8 +616,8 @@ static int soc15_common_early_init(void *handle)
 
 	adev->asic_funcs = &soc15_asic_funcs;
 
-	if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) &&
-		(amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) &&
+	    (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
 		psp_enabled = true;
 
 	adev->rev_id = soc15_get_rev_id(adev);
@@ -659,8 +666,8 @@ static int soc15_common_early_init(void *handle)
 			AMD_CG_SUPPORT_MC_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS;
-		adev->pg_flags = AMD_PG_SUPPORT_SDMA |
-				 AMD_PG_SUPPORT_MMHUB;
+		adev->pg_flags = AMD_PG_SUPPORT_SDMA;
+
 		adev->external_rev_id = 0x1;
 		break;
 	default:
@@ -675,7 +682,7 @@ static int soc15_common_early_init(void *handle)
 
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
-	amdgpu_get_pcie_info(adev);
+	amdgpu_device_get_pcie_info(adev);
 
 	return 0;
 }
@@ -709,15 +716,12 @@ static int soc15_common_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	/* move the golden regs per IP block */
-	soc15_init_golden_registers(adev);
 	/* enable pcie gen2/3 link */
 	soc15_pcie_gen3_enable(adev);
 	/* enable aspm */
 	soc15_program_aspm(adev);
 	/* setup nbio registers */
-	if (!(adev->flags & AMD_IS_APU))
-		nbio_v6_1_init_registers(adev);
+	adev->nbio_funcs->init_registers(adev);
 	/* enable the doorbell aperture */
 	soc15_enable_doorbell_aperture(adev, true);
 
@@ -878,9 +882,9 @@ static int soc15_common_set_clockgating_state(void *handle,
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		nbio_v6_1_update_medium_grain_clock_gating(adev,
+		adev->nbio_funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		nbio_v6_1_update_medium_grain_light_sleep(adev,
+		adev->nbio_funcs->update_medium_grain_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_hdp_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
@@ -894,9 +898,9 @@ static int soc15_common_set_clockgating_state(void *handle,
 				state == AMD_CG_STATE_GATE ? true : false);
 		break;
 	case CHIP_RAVEN:
-		nbio_v7_0_update_medium_grain_clock_gating(adev,
+		adev->nbio_funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		nbio_v6_1_update_medium_grain_light_sleep(adev,
+		adev->nbio_funcs->update_medium_grain_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_hdp_light_sleep(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
@@ -921,7 +925,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 
-	nbio_v6_1_get_clockgating_state(adev, flags);
+	adev->nbio_funcs->get_clockgating_state(adev, flags);
 
 	/* AMD_CG_SUPPORT_HDP_LS */
 	data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index acb3cdb119f24c2daa6ee23a97a1f4b8e7365b54..26b3feac5d062328d253e69ef212721447463813 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -29,8 +29,28 @@
 
 extern const struct amd_ip_funcs soc15_common_ip_funcs;
 
+struct soc15_reg_golden {
+	u32	hwip;
+	u32	instance;
+	u32	segment;
+	u32	reg;
+	u32	and_mask;
+	u32	or_mask;
+};
+
+#define SOC15_REG_ENTRY(ip, inst, reg)	ip##_HWIP, inst, reg##_BASE_IDX, reg
+
+#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
+	{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
+
 void soc15_grbm_select(struct amdgpu_device *adev,
 		    u32 me, u32 pipe, u32 queue, u32 vmid);
 int soc15_set_ip_blocks(struct amdgpu_device *adev);
 
+void soc15_program_register_sequence(struct amdgpu_device *adev,
+					     const struct soc15_reg_golden *registers,
+					     const u32 array_size);
+
+int vega10_reg_base_init(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 7a8e4e28abb2a9bf3b68933f80c21596920c4781..def865067edd2b2cadc820824442b0006b057b2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -24,72 +24,28 @@
 #ifndef __SOC15_COMMON_H__
 #define __SOC15_COMMON_H__
 
-struct nbio_hdp_flush_reg {
-	u32 hdp_flush_req_offset;
-	u32 hdp_flush_done_offset;
-	u32 ref_and_mask_cp0;
-	u32 ref_and_mask_cp1;
-	u32 ref_and_mask_cp2;
-	u32 ref_and_mask_cp3;
-	u32 ref_and_mask_cp4;
-	u32 ref_and_mask_cp5;
-	u32 ref_and_mask_cp6;
-	u32 ref_and_mask_cp7;
-	u32 ref_and_mask_cp8;
-	u32 ref_and_mask_cp9;
-	u32 ref_and_mask_sdma0;
-	u32 ref_and_mask_sdma1;
-};
-
-struct nbio_pcie_index_data {
-	u32 index_offset;
-	u32 data_offset;
-};
-
 /* Register Access Macros */
-#define SOC15_REG_OFFSET(ip, inst, reg)       (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
-                                                (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
-                                                    (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
-                                                        (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
-                                                            (ip##_BASE__INST##inst##_SEG4 + reg)))))
+#define SOC15_REG_OFFSET(ip, inst, reg)	(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
 
 #define WREG32_FIELD15(ip, idx, reg, field, val)	\
-	WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+	WREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg,	\
+	(RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg)	\
+	& ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 
 #define RREG32_SOC15(ip, inst, reg) \
-	RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
-		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
-		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
-		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
-		(ip##_BASE__INST##inst##_SEG4 + reg))))))
+	RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
 
 #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
-	RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
-		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
-		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
-		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
-		(ip##_BASE__INST##inst##_SEG4 + reg))))) + offset)
+	RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset)
 
 #define WREG32_SOC15(ip, inst, reg, value) \
-	WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
-		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
-		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
-		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
-		(ip##_BASE__INST##inst##_SEG4 + reg))))), value)
+	WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value)
 
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
-	WREG32_NO_KIQ( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
-		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
-		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
-		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
-		(ip##_BASE__INST##inst##_SEG4 + reg))))), value)
+	WREG32_NO_KIQ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value)
 
 #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
-	WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
-		(1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
-		(2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
-		(3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
-		(ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value)
+	WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index aa4e320e31f8b82178aaa7d2ed21c74b03209149..5995ffc183de0612c1f64fd6aa76e6874b71efdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -270,7 +270,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
-	entry->vm_id = (dw[2] >> 8) & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->pas_id = (dw[2] >> 16) & 0xffff;
 
 	/* wptr/rptr are in bytes! */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index b13ae34be1c2f6d0466ffe905be74472a1faaed5..8ab10c220910c6a176ff7fe36e1c945eda3ecfe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -541,7 +541,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
  */
 static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
 	amdgpu_ring_write(ring, ib->gpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index a4b0f1d842b734aedd2c74f3361bdbbd24879889..c1fe30cdba32021532123fa2174ac89ee745436e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -556,7 +556,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
  */
 static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 0e8b887cf03e78ddb236d590ad48bcdda61f9180..b2bfedaf57f197b38e98dce034d12c1d3359ca64 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -37,6 +37,9 @@
 #include "gmc/gmc_8_1_d.h"
 #include "vi.h"
 
+/* Polaris10/11/12 firmware version */
+#define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8))
+
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev);
 static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 
@@ -58,7 +61,9 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
 */
 static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev)
 {
-	return ((adev->asic_type >= CHIP_POLARIS10) && (adev->asic_type <= CHIP_POLARIS12));
+	return ((adev->asic_type >= CHIP_POLARIS10) &&
+			(adev->asic_type <= CHIP_POLARIS12) &&
+			(!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16));
 }
 
 /**
@@ -411,11 +416,19 @@ static int uvd_v6_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	if (uvd_v6_0_enc_support(adev)) {
-		struct amd_sched_rq *rq;
+	if (!uvd_v6_0_enc_support(adev)) {
+		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+			adev->uvd.ring_enc[i].funcs = NULL;
+
+		adev->uvd.irq.num_types = 1;
+		adev->uvd.num_enc_rings = 0;
+
+		DRM_INFO("UVD ENC is disabled\n");
+	} else {
+		struct drm_sched_rq *rq;
 		ring = &adev->uvd.ring_enc[0];
-		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
-		r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
+		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
 					  rq, amdgpu_sched_jobs, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up UVD ENC run queue.\n");
@@ -456,7 +469,7 @@ static int uvd_v6_0_sw_fini(void *handle)
 		return r;
 
 	if (uvd_v6_0_enc_support(adev)) {
-		amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
+		drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
 
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
 			amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
@@ -1028,10 +1041,10 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
  */
 static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
@@ -1050,24 +1063,24 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
  * Write enc ring commands to execute the indirect buffer
  */
 static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
-		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
+		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
 static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					 unsigned vm_id, uint64_t pd_addr)
+					 unsigned vmid, uint64_t pd_addr)
 {
 	uint32_t reg;
 
-	if (vm_id < 8)
-		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id;
+	if (vmid < 8)
+		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
 	else
-		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8;
+		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
 	amdgpu_ring_write(ring, reg << 2);
@@ -1079,7 +1092,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
 	amdgpu_ring_write(ring, 0x8);
 
@@ -1088,7 +1101,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
-	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, 1 << vmid); /* mask */
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
 	amdgpu_ring_write(ring, 0xC);
 }
@@ -1127,14 +1140,14 @@ static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 }
 
 static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
-        unsigned int vm_id, uint64_t pd_addr)
+        unsigned int vmid, uint64_t pd_addr)
 {
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, pd_addr >> 12);
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_FLUSH_TLB);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 }
 
 static bool uvd_v6_0_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 660fa41dc877aa6c920184532c6e212fe6077bfb..6b95f4f344b5315cb295a28a0868f008b8652acb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -29,7 +29,6 @@
 #include "soc15_common.h"
 #include "mmsch_v1_0.h"
 
-#include "soc15ip.h"
 #include "uvd/uvd_7_0_offset.h"
 #include "uvd/uvd_7_0_sh_mask.h"
 #include "vce/vce_4_0_offset.h"
@@ -385,7 +384,7 @@ static int uvd_v7_0_early_init(void *handle)
 static int uvd_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
-	struct amd_sched_rq *rq;
+	struct drm_sched_rq *rq;
 	int i, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
@@ -416,8 +415,8 @@ static int uvd_v7_0_sw_init(void *handle)
 	}
 
 	ring = &adev->uvd.ring_enc[0];
-	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
-	r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
 				  rq, amdgpu_sched_jobs, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up UVD ENC run queue.\n");
@@ -472,7 +471,7 @@ static int uvd_v7_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
+	drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
 
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i)
 		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
@@ -1086,6 +1085,8 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev)
 static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				     unsigned flags)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
 	amdgpu_ring_write(ring,
@@ -1123,6 +1124,7 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 			u64 seq, unsigned flags)
 {
+
 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE);
@@ -1141,6 +1143,8 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
  */
 static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0,
 		mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0));
 	amdgpu_ring_write(ring, 0);
@@ -1155,6 +1159,8 @@ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
  */
 static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
 	amdgpu_ring_write(ring, 1);
 }
@@ -1212,11 +1218,13 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
  */
 static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
@@ -1238,10 +1246,10 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
  * Write enc ring commands to execute the indirect buffer
  */
 static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
-		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
+		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, ib->length_dw);
@@ -1250,6 +1258,8 @@ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
 static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring,
 				uint32_t data0, uint32_t data1)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, data0);
@@ -1264,6 +1274,8 @@ static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring,
 static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 				uint32_t data0, uint32_t data1, uint32_t mask)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, data0);
@@ -1279,25 +1291,26 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 }
 
 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	uint32_t data0, data1, mask;
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
+	uint32_t data0, data1, mask;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
-	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
+	data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
 	uvd_v7_0_vm_reg_write(ring, data0, data1);
 
-	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
 	data1 = lower_32_bits(pd_addr);
 	uvd_v7_0_vm_reg_write(ring, data0, data1);
 
-	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
 	data1 = lower_32_bits(pd_addr);
 	mask = 0xffffffff;
 	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
@@ -1309,36 +1322,47 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	/* wait for flush */
 	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
-	data1 = 1 << vm_id;
-	mask =  1 << vm_id;
+	data1 = 1 << vmid;
+	mask =  1 << vmid;
 	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
 }
 
+static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+	struct amdgpu_device *adev = ring->adev;
+
+	for (i = 0; i < count; i++)
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+
+}
+
 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 {
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
 }
 
 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
-			 unsigned int vm_id, uint64_t pd_addr)
+			 unsigned int vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
@@ -1350,8 +1374,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	/* wait for flush */
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 1 << vm_id);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_ring_write(ring, 1 << vmid);
 }
 
 #if 0
@@ -1681,7 +1705,7 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
 static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
-	.nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
+	.nop = PACKET0(0x81ff, 0),
 	.support_64bit_ptrs = false,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_ring_get_rptr,
@@ -1700,7 +1724,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v7_0_ring_test_ring,
 	.test_ib = amdgpu_uvd_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = uvd_v7_0_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = amdgpu_uvd_ring_begin_use,
 	.end_use = amdgpu_uvd_ring_end_use,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index cf81065e3c5ab95030f24f4a883924f27c71bc90..a5355eb689f145a81ec8296d8cfdcc27600b0012 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -834,24 +834,24 @@ static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
 }
 
 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
-		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
+		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
-			 unsigned int vm_id, uint64_t pd_addr)
+			 unsigned int vmid, uint64_t pd_addr)
 {
 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, pd_addr >> 12);
 
 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, VCE_CMD_END);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index f2f7136500744cc5cabf64a6814db31856e25bb2..7cf2eef68cf24d32c1dc2b50dacd78ccc58d9b3b 100755
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -32,7 +32,6 @@
 #include "soc15_common.h"
 #include "mmsch_v1_0.h"
 
-#include "soc15ip.h"
 #include "vce/vce_4_0_offset.h"
 #include "vce/vce_4_0_default.h"
 #include "vce/vce_4_0_sh_mask.h"
@@ -424,7 +423,7 @@ static int vce_v4_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
+	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		size += VCE_V4_0_FW_SIZE;
 
@@ -939,10 +938,10 @@ static int vce_v4_0_set_powergating_state(void *handle,
 #endif
 
 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
-		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
+		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, ib->length_dw);
@@ -966,25 +965,26 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 }
 
 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
-			 unsigned int vm_id, uint64_t pd_addr)
+			 unsigned int vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
@@ -996,8 +996,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 	/* wait for flush */
 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 1 << vm_id);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_ring_write(ring, 1 << vmid);
 }
 
 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index e4673f792545b1491bce7d4da9cb9881e469c461..b99e15c43e45cf847b4699f86e5e1c24187fe279 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -28,7 +28,6 @@
 #include "soc15d.h"
 #include "soc15_common.h"
 
-#include "soc15ip.h"
 #include "vcn/vcn_1_0_offset.h"
 #include "vcn/vcn_1_0_sh_mask.h"
 #include "hdp/hdp_4_0_offset.h"
@@ -744,6 +743,8 @@ static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
  */
 static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, 0);
@@ -761,6 +762,8 @@ static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring)
  */
 static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
 	amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1);
@@ -777,6 +780,8 @@ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring)
 static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				     unsigned flags)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
 	amdgpu_ring_write(ring,
@@ -812,6 +817,8 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
  */
 static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
 	amdgpu_ring_write(ring, 1);
 }
@@ -826,11 +833,13 @@ static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
  */
 static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
-				  unsigned vm_id, bool ctx_switch)
+				  unsigned vmid, bool ctx_switch)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
@@ -846,6 +855,8 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
 static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring,
 				uint32_t data0, uint32_t data1)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, data0);
@@ -860,6 +871,8 @@ static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring,
 static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
 				uint32_t data0, uint32_t data1, uint32_t mask)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, data0);
@@ -875,25 +888,26 @@ static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
 }
 
 static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+					unsigned vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	uint32_t data0, data1, mask;
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
+	uint32_t data0, data1, mask;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
-	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
+	data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
 	vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
 
-	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
 	data1 = lower_32_bits(pd_addr);
 	vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
 
-	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
 	data1 = lower_32_bits(pd_addr);
 	mask = 0xffffffff;
 	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
@@ -905,8 +919,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	/* wait for flush */
 	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
-	data1 = 1 << vm_id;
-	mask =  1 << vm_id;
+	data1 = 1 << vmid;
+	mask =  1 << vmid;
 	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
 }
 
@@ -997,38 +1011,39 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)
  * Write enc ring commands to execute the indirect buffer
  */
 static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
-		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
+		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
-	amdgpu_ring_write(ring, vm_id);
+	amdgpu_ring_write(ring, vmid);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
 static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
-			 unsigned int vm_id, uint64_t pd_addr)
+			 unsigned int vmid, uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,
-			  (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+			  (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,
-			  (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+			  (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
 	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
 	amdgpu_ring_write(ring,
-			  (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+			  (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
@@ -1040,8 +1055,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	/* wait for flush */
 	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 1 << vm_id);
-	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_ring_write(ring, 1 << vmid);
 }
 
 static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1077,6 +1092,17 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+	struct amdgpu_device *adev = ring->adev;
+
+	for (i = 0; i < count; i++)
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+
+}
+
+
 static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
 	.name = "vcn_v1_0",
 	.early_init = vcn_v1_0_early_init,
@@ -1100,7 +1126,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
 static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_DEC,
 	.align_mask = 0xf,
-	.nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
+	.nop = PACKET0(0x81ff, 0),
 	.support_64bit_ptrs = false,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vcn_v1_0_dec_ring_get_rptr,
@@ -1118,7 +1144,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate,
 	.test_ring = amdgpu_vcn_dec_ring_test_ring,
 	.test_ib = amdgpu_vcn_dec_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = vcn_v1_0_ring_insert_nop,
 	.insert_start = vcn_v1_0_dec_ring_insert_start,
 	.insert_end = vcn_v1_0_dec_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ca778cd4e6e84c1d4e0005dc5e8545207de1c787..b69ceafb78883e648609dab5927ed7049c720690 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -25,8 +25,6 @@
 #include "amdgpu_ih.h"
 #include "soc15.h"
 
-
-#include "soc15ip.h"
 #include "oss/osssys_4_0_offset.h"
 #include "oss/osssys_4_0_sh_mask.h"
 
@@ -97,10 +95,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 	/* disable irqs */
 	vega10_ih_disable_interrupts(adev);
 
-	if (adev->flags & AMD_IS_APU)
-		nbio_v7_0_ih_control(adev);
-	else
-		nbio_v6_1_ih_control(adev);
+	adev->nbio_funcs->ih_control(adev);
 
 	ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
 	/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
@@ -151,10 +146,8 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 						 ENABLE, 0);
 	}
 	WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
-	if (adev->flags & AMD_IS_APU)
-		nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index);
-	else
-		nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index);
+	adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+					    adev->irq.ih.doorbell_index);
 
 	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
 	tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
@@ -334,8 +327,8 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
 	entry->client_id = dw[0] & 0xff;
 	entry->src_id = (dw[0] >> 8) & 0xff;
 	entry->ring_id = (dw[0] >> 16) & 0xff;
-	entry->vm_id = (dw[0] >> 24) & 0xf;
-	entry->vm_id_src = (dw[0] >> 31);
+	entry->vmid = (dw[0] >> 24) & 0xf;
+	entry->vmid_src = (dw[0] >> 31);
 	entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
 	entry->timestamp_src = dw[2] >> 31;
 	entry->pas_id = dw[3] & 0xffff;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
new file mode 100644
index 0000000000000000000000000000000000000000..b7bdd04793d69afb52aff348dc49cf369f1f7ad7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15ip.h"
+
+int vega10_reg_base_init(struct amdgpu_device *adev)
+{
+	/* HW has more IP blocks,  only initialized the blocke beend by our driver  */
+	uint32_t i;
+	for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+		adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
+		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
+		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+		adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));
+		adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));
+
+	}
+	return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index bb8ca9489546aed8be038d8c67dd86b388289cd0..da2b99c2d95f06953d285b0e6bbd4093e768f423 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -282,29 +282,29 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-		amdgpu_program_register_sequence(adev,
-						 iceland_mgcg_cgcg_init,
-						 ARRAY_SIZE(iceland_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							iceland_mgcg_cgcg_init,
+							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 		break;
 	case CHIP_FIJI:
-		amdgpu_program_register_sequence(adev,
-						 fiji_mgcg_cgcg_init,
-						 ARRAY_SIZE(fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 		break;
 	case CHIP_TONGA:
-		amdgpu_program_register_sequence(adev,
-						 tonga_mgcg_cgcg_init,
-						 ARRAY_SIZE(tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 		break;
 	case CHIP_CARRIZO:
-		amdgpu_program_register_sequence(adev,
-						 cz_mgcg_cgcg_init,
-						 ARRAY_SIZE(cz_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							cz_mgcg_cgcg_init,
+							ARRAY_SIZE(cz_mgcg_cgcg_init));
 		break;
 	case CHIP_STONEY:
-		amdgpu_program_register_sequence(adev,
-						 stoney_mgcg_cgcg_init,
-						 ARRAY_SIZE(stoney_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							stoney_mgcg_cgcg_init,
+							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
@@ -449,14 +449,18 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 
 static void vi_detect_hw_virtualization(struct amdgpu_device *adev)
 {
-	uint32_t reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
-	/* bit0: 0 means pf and 1 means vf */
-	/* bit31: 0 means disable IOV and 1 means enable */
-	if (reg & 1)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
-
-	if (reg & 0x80000000)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+	uint32_t reg = 0;
+
+	if (adev->asic_type == CHIP_TONGA ||
+	    adev->asic_type == CHIP_FIJI) {
+	       reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+	       /* bit0: 0 means pf and 1 means vf */
+	       if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
+		       adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+	       /* bit31: 0 means disable IOV and 1 means enable */
+	       if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
+		       adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+	}
 
 	if (reg == 0) {
 		if (is_virtual_machine()) /* passthrough mode exclus sr-iov mode */
@@ -667,7 +671,7 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 	/* disable BM */
 	pci_clear_master(adev->pdev);
 	/* reset */
-	amdgpu_pci_config_reset(adev);
+	amdgpu_device_pci_config_reset(adev);
 
 	udelay(100);
 
@@ -891,8 +895,8 @@ static int vi_common_early_init(void *handle)
 
 	adev->asic_funcs = &vi_asic_funcs;
 
-	if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) &&
-		(amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC)))
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) &&
+	    (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC)))
 		smc_enabled = true;
 
 	adev->rev_id = vi_get_rev_id(adev);
@@ -1074,7 +1078,7 @@ static int vi_common_early_init(void *handle)
 	/* vi use smc load by default */
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
-	amdgpu_get_pcie_info(adev);
+	amdgpu_device_get_pcie_info(adev);
 
 	return 0;
 }
@@ -1487,115 +1491,115 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
 		/* topaz has no DCE, UVD, VCE */
-		amdgpu_ip_block_add(adev, &vi_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v7_4_ip_block);
-		amdgpu_ip_block_add(adev, &iceland_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block);
+		amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v2_4_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block);
 		break;
 	case CHIP_FIJI:
-		amdgpu_ip_block_add(adev, &vi_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v8_5_ip_block);
-		amdgpu_ip_block_add(adev, &tonga_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block);
+		amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v10_1_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v10_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
 		if (!amdgpu_sriov_vf(adev)) {
-			amdgpu_ip_block_add(adev, &uvd_v6_0_ip_block);
-			amdgpu_ip_block_add(adev, &vce_v3_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
 		}
 		break;
 	case CHIP_TONGA:
-		amdgpu_ip_block_add(adev, &vi_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &tonga_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v10_0_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v10_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
 		if (!amdgpu_sriov_vf(adev)) {
-			amdgpu_ip_block_add(adev, &uvd_v5_0_ip_block);
-			amdgpu_ip_block_add(adev, &vce_v3_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &uvd_v5_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
 		}
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS12:
-		amdgpu_ip_block_add(adev, &vi_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v8_1_ip_block);
-		amdgpu_ip_block_add(adev, &tonga_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v11_2_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v3_1_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v6_3_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v3_4_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
 		break;
 	case CHIP_CARRIZO:
-		amdgpu_ip_block_add(adev, &vi_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &cz_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v11_0_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v6_0_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v3_1_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block);
 #if defined(CONFIG_DRM_AMD_ACP)
-		amdgpu_ip_block_add(adev, &acp_ip_block);
+		amdgpu_device_ip_block_add(adev, &acp_ip_block);
 #endif
 		break;
 	case CHIP_STONEY:
-		amdgpu_ip_block_add(adev, &vi_common_ip_block);
-		amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block);
-		amdgpu_ip_block_add(adev, &cz_ih_ip_block);
-		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display)
-			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
-			amdgpu_ip_block_add(adev, &dm_ip_block);
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 #endif
 		else
-			amdgpu_ip_block_add(adev, &dce_v11_0_ip_block);
-		amdgpu_ip_block_add(adev, &gfx_v8_1_ip_block);
-		amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block);
-		amdgpu_ip_block_add(adev, &uvd_v6_2_ip_block);
-		amdgpu_ip_block_add(adev, &vce_v3_4_ip_block);
+			amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
 #if defined(CONFIG_DRM_AMD_ACP)
-		amdgpu_ip_block_add(adev, &acp_ip_block);
+		amdgpu_device_ip_block_add(adev, &acp_ip_block);
 #endif
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index dbf3703cbd1b837cc1734ed7c0e7db8e16217ff6..19ddd2312e00d4c8f0178d4fe6130d8b80fb09b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -27,6 +27,8 @@
 #define SDMA1_REGISTER_OFFSET                             0x200 /* not a register */
 #define SDMA_MAX_INSTANCE 2
 
+#define KFD_VI_SDMA_QUEUE_OFFSET                      0x80 /* not a register */
+
 /* crtc instance offsets */
 #define CRTC0_REGISTER_OFFSET                 (0x1b9c - 0x1b9c)
 #define CRTC1_REGISTER_OFFSET                 (0x1d9c - 0x1b9c)
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 7bb0bc0ca3d6aaa9ff330d41c896089ce2121011..a317e76ffb5e4aae53ba208ed63c290d7b093dbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -1,4 +1,24 @@
-# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
 #
 # Makefile for Heterogenous System Architecture support for AMD GPU devices
 #
@@ -15,6 +35,8 @@ amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
 		kfd_process_queue_manager.o kfd_device_queue_manager.o \
 		kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
 		kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
-		kfd_dbgdev.o kfd_dbgmgr.o
+		kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+
+amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
 
 obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
new file mode 100644
index 0000000000000000000000000000000000000000..997a383dcb8b775c6ac27250d214da6b4129b304
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -0,0 +1,1384 @@
+/*
+ * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if 0
+HW (VI) source code for CWSR trap handler
+#Version 18 + multiple trap handler
+
+// this performance-optimal version was originally from Seven Xu at SRDC
+
+// Revison #18   --...
+/* Rev History
+** #1. Branch from gc dv.   //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
+** #4. SR Memory Layout:
+**             1. VGPR-SGPR-HWREG-{LDS}
+**             2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
+** #7. Update: 1. don't barrier if noLDS
+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
+**             2. Fix SQ issue by s_sleep 2
+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
+**             2. optimize s_buffer save by burst 16sgprs...
+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
+** #11. Update 1. Add 2 more timestamp for debug version
+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
+** #13. Integ  1. Always use MUBUF for PV trap shader...
+** #14. Update 1. s_buffer_store soft clause...
+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
+**             2. PERF - Save LDS before save VGPR to cover LDS save long latency...
+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
+**             2. FUNC - Handle non-CWSR traps
+*/
+
+var G8SR_WDMEM_HWREG_OFFSET = 0
+var G8SR_WDMEM_SGPR_OFFSET  = 128  // in bytes
+
+// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
+
+var G8SR_DEBUG_TIMESTAMP = 0
+var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4  // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
+var s_g8sr_ts_save_s    = s[34:35]   // save start
+var s_g8sr_ts_sq_save_msg  = s[36:37]   // The save shader send SAVEWAVE msg to spi
+var s_g8sr_ts_spi_wrexec   = s[38:39]   // the SPI write the sr address to SQ
+var s_g8sr_ts_save_d    = s[40:41]   // save end
+var s_g8sr_ts_restore_s = s[42:43]   // restore start
+var s_g8sr_ts_restore_d = s[44:45]   // restore end
+
+var G8SR_VGPR_SR_IN_DWX4 = 0
+var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000    // DWx4 stride is 4*4Bytes
+var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4  = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
+
+
+/*************************************************************************/
+/*                  control on how to run the shader                     */
+/*************************************************************************/
+//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
+var EMU_RUN_HACK                    =   0
+var EMU_RUN_HACK_RESTORE_NORMAL     =   0
+var EMU_RUN_HACK_SAVE_NORMAL_EXIT   =   0
+var EMU_RUN_HACK_SAVE_SINGLE_WAVE   =   0
+var EMU_RUN_HACK_SAVE_FIRST_TIME    =   0                   //for interrupted restore in which the first save is through EMU_RUN_HACK
+var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO =   0                   //for interrupted restore in which the first save is through EMU_RUN_HACK
+var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI =   0                   //for interrupted restore in which the first save is through EMU_RUN_HACK
+var SAVE_LDS                        =   1
+var WG_BASE_ADDR_LO                 =   0x9000a000
+var WG_BASE_ADDR_HI                 =   0x0
+var WAVE_SPACE                      =   0x5000              //memory size that each wave occupies in workgroup state mem
+var CTX_SAVE_CONTROL                =   0x0
+var CTX_RESTORE_CONTROL             =   CTX_SAVE_CONTROL
+var SIM_RUN_HACK                    =   0                   //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
+var SGPR_SAVE_USE_SQC               =   1                   //use SQC D$ to do the write
+var USE_MTBUF_INSTEAD_OF_MUBUF      =   0                   //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+var SWIZZLE_EN                      =   0                   //whether we use swizzled buffer addressing
+
+/**************************************************************************/
+/*                      variables                                         */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT    = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE     = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT   = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE    = 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT   = 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE    = 3                     //FIXME  sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK    =   0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK       =   0x1FF                   // Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT   =   10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK   =   0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT  =   8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK    =   0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT   =   0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE    =   10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK   =   0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT  =   11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE   =   21
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT           =   16                  //FIXME
+var SQ_WAVE_IB_STS_RCNT_SIZE            =   4                   //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT   =   15                  //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE    =   1                   //FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG   = 0x00007FFF    //FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT     =   24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT   =   27
+
+
+/*      Save        */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE        =   0x00040000          //stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC          =   0x00807FAC          //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK            =   0x08000000          //bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT           =   27
+var S_SAVE_SPI_INIT_MTYPE_MASK          =   0x70000000          //bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT         =   28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK     =   0x04000000          //bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT    =   26
+
+var S_SAVE_PC_HI_RCNT_SHIFT             =   28                  //FIXME  check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK              =   0xF0000000          //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT     =   27                  //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK      =   0x08000000          //FIXME
+
+var s_save_spi_init_lo              =   exec_lo
+var s_save_spi_init_hi              =   exec_hi
+
+                                                //tba_lo and tba_hi need to be saved/restored
+var s_save_pc_lo            =   ttmp0           //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi            =   ttmp1
+var s_save_exec_lo          =   ttmp2
+var s_save_exec_hi          =   ttmp3
+var s_save_status           =   ttmp4
+var s_save_trapsts          =   ttmp5           //not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo    =   ttmp6
+var s_save_xnack_mask_hi    =   ttmp7
+var s_save_buf_rsrc0        =   ttmp8
+var s_save_buf_rsrc1        =   ttmp9
+var s_save_buf_rsrc2        =   ttmp10
+var s_save_buf_rsrc3        =   ttmp11
+
+var s_save_mem_offset       =   tma_lo
+var s_save_alloc_size       =   s_save_trapsts          //conflict
+var s_save_tmp              =   s_save_buf_rsrc2        //shared with s_save_buf_rsrc2  (conflict: should not use mem access with s_save_tmp at the same time)
+var s_save_m0               =   tma_hi
+
+/*      Restore     */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE         =   S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC           =   S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK             =   0x08000000          //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT            =   27
+var S_RESTORE_SPI_INIT_MTYPE_MASK           =   0x70000000          //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT          =   28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK      =   0x04000000          //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT     =   26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT              =   S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK               =   S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT      =   S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK       =   S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo                   =   exec_lo
+var s_restore_spi_init_hi                   =   exec_hi
+
+var s_restore_mem_offset        =   ttmp2
+var s_restore_alloc_size        =   ttmp3
+var s_restore_tmp               =   ttmp6               //tba_lo/hi need to be restored
+var s_restore_mem_offset_save   =   s_restore_tmp       //no conflict
+
+var s_restore_m0            =   s_restore_alloc_size    //no conflict
+
+var s_restore_mode          =   ttmp7
+
+var s_restore_pc_lo         =   ttmp0
+var s_restore_pc_hi         =   ttmp1
+var s_restore_exec_lo       =   tma_lo                  //no conflict
+var s_restore_exec_hi       =   tma_hi                  //no conflict
+var s_restore_status        =   ttmp4
+var s_restore_trapsts       =   ttmp5
+var s_restore_xnack_mask_lo =   xnack_mask_lo
+var s_restore_xnack_mask_hi =   xnack_mask_hi
+var s_restore_buf_rsrc0     =   ttmp8
+var s_restore_buf_rsrc1     =   ttmp9
+var s_restore_buf_rsrc2     =   ttmp10
+var s_restore_buf_rsrc3     =   ttmp11
+
+/**************************************************************************/
+/*                      trap handler entry points                         */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+  asic(VI)
+  type(CS)
+
+
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))                   //hack to use trap_id for determining save/restore
+        //FIXME VCCZ un-init assertion s_getreg_b32     s_save_status, hwreg(HW_REG_STATUS)         //save STATUS since we will change SCC
+        s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000              //change SCC
+        s_cmp_eq_u32 s_save_tmp, 0x007e0000                         //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
+        s_cbranch_scc0 L_JUMP_TO_RESTORE                            //do not need to recover STATUS here  since we are going to RESTORE
+        //FIXME  s_setreg_b32   hwreg(HW_REG_STATUS),   s_save_status       //need to recover STATUS since we are going to SAVE
+        s_branch L_SKIP_RESTORE                                     //NOT restore, SAVE actually
+    else
+        s_branch L_SKIP_RESTORE                                     //NOT restore. might be a regular trap or save
+    end
+
+L_JUMP_TO_RESTORE:
+    s_branch L_RESTORE                                              //restore
+
+L_SKIP_RESTORE:
+
+    s_getreg_b32    s_save_status, hwreg(HW_REG_STATUS)                             //save STATUS since we will change SCC
+    s_andn2_b32     s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK      //check whether this is for save
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32       s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK    //check whether this is for save
+    s_cbranch_scc1  L_SAVE                                      //this is the operation for save
+
+    // *********    Handle non-CWSR traps       *******************
+if (!EMU_RUN_HACK)
+    /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */
+    s_load_dwordx4  [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0
+    s_waitcnt lgkmcnt(0)
+    s_or_b32        ttmp7, ttmp8, ttmp9
+    s_cbranch_scc0  L_NO_NEXT_TRAP //next level trap handler not been set
+    s_setreg_b32    hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
+    s_setpc_b64     [ttmp8,ttmp9] //jump to next level trap handler
+
+L_NO_NEXT_TRAP:
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32       s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+    s_cbranch_scc1  L_EXCP_CASE   // Exception, jump back to the shader program directly.
+    s_add_u32       ttmp0, ttmp0, 4   // S_TRAP case, add 4 to ttmp0
+    s_addc_u32  ttmp1, ttmp1, 0
+L_EXCP_CASE:
+    s_and_b32   ttmp1, ttmp1, 0xFFFF
+    s_setreg_b32    hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
+    s_rfe_b64       [ttmp0, ttmp1]
+end
+    // *********        End handling of non-CWSR traps   *******************
+
+/**************************************************************************/
+/*                      save routine                                      */
+/**************************************************************************/
+
+L_SAVE:
+
+if G8SR_DEBUG_TIMESTAMP
+        s_memrealtime   s_g8sr_ts_save_s
+        s_waitcnt lgkmcnt(0)         //FIXME, will cause xnack??
+end
+
+    //check whether there is mem_viol
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32   s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+    s_cbranch_scc0  L_NO_PC_REWIND
+
+    //if so, need rewind PC assuming GDS operation gets NACKed
+    s_mov_b32       s_save_tmp, 0                                                           //clear mem_viol bit
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp    //clear mem_viol bit
+    s_and_b32       s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32]
+    s_sub_u32       s_save_pc_lo, s_save_pc_lo, 8             //pc[31:0]-8
+    s_subb_u32      s_save_pc_hi, s_save_pc_hi, 0x0           // -scc
+
+L_NO_PC_REWIND:
+    s_mov_b32       s_save_tmp, 0                                                           //clear saveCtx bit
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp     //clear saveCtx bit
+
+    s_mov_b32       s_save_xnack_mask_lo,   xnack_mask_lo                                   //save XNACK_MASK
+    s_mov_b32       s_save_xnack_mask_hi,   xnack_mask_hi    //save XNACK must before any memory operation
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)                   //save RCNT
+    s_lshl_b32      s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+    s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)   //save FIRST_REPLAY
+    s_lshl_b32      s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+    s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS)                                        //clear RCNT and FIRST_REPLAY in IB_STS
+    s_and_b32       s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+    s_setreg_b32    hwreg(HW_REG_IB_STS), s_save_tmp
+
+    /*      inform SPI the readiness and wait for SPI's go signal */
+    s_mov_b32       s_save_exec_lo, exec_lo                                                 //save EXEC and use EXEC for the go signal from SPI
+    s_mov_b32       s_save_exec_hi, exec_hi
+    s_mov_b64       exec,   0x0                                                             //clear EXEC to get ready to receive
+
+if G8SR_DEBUG_TIMESTAMP
+        s_memrealtime  s_g8sr_ts_sq_save_msg
+        s_waitcnt lgkmcnt(0)
+end
+
+    if (EMU_RUN_HACK)
+
+    else
+        s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for SPI's write to EXEC
+    end
+
+  L_SLEEP:
+    s_sleep 0x2                // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+    if (EMU_RUN_HACK)
+
+    else
+        s_cbranch_execz L_SLEEP
+    end
+
+if G8SR_DEBUG_TIMESTAMP
+        s_memrealtime  s_g8sr_ts_spi_wrexec
+        s_waitcnt lgkmcnt(0)
+end
+
+    /*      setup Resource Contants    */
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+        //calculate wd_addr using absolute thread id
+        v_readlane_b32 s_save_tmp, v9, 0
+        s_lshr_b32 s_save_tmp, s_save_tmp, 6
+        s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
+        s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+        s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+        s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+    else
+    end
+    if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+        s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+        s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+        s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+    else
+    end
+
+
+    s_mov_b32       s_save_buf_rsrc0,   s_save_spi_init_lo                                                      //base_addr_lo
+    s_and_b32       s_save_buf_rsrc1,   s_save_spi_init_hi, 0x0000FFFF                                          //base_addr_hi
+    s_or_b32        s_save_buf_rsrc1,   s_save_buf_rsrc1,  S_SAVE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32       s_save_buf_rsrc2,   0                                                                       //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+    s_mov_b32       s_save_buf_rsrc3,   S_SAVE_BUF_RSRC_WORD3_MISC
+    s_and_b32       s_save_tmp,         s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+    s_lshr_b32      s_save_tmp,         s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)         //get ATC bit into position
+    s_or_b32        s_save_buf_rsrc3,   s_save_buf_rsrc3,  s_save_tmp                                           //or ATC
+    s_and_b32       s_save_tmp,         s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+    s_lshr_b32      s_save_tmp,         s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)     //get MTYPE bits into position
+    s_or_b32        s_save_buf_rsrc3,   s_save_buf_rsrc3,  s_save_tmp                                           //or MTYPE
+
+    //FIXME  right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi  (might need to save them before using them?)
+    s_mov_b32       s_save_m0,          m0                                                                  //save M0
+
+    /*      global mem offset           */
+    s_mov_b32       s_save_mem_offset,  0x0                                                                     //mem offset initial value = 0
+
+
+
+
+    /*      save HW registers   */
+    //////////////////////////////
+
+  L_SAVE_HWREG:
+        // HWREG SR memory offset : size(VGPR)+size(SGPR)
+       get_vgpr_size_bytes(s_save_mem_offset)
+       get_sgpr_size_bytes(s_save_tmp)
+       s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+    s_mov_b32       s_save_buf_rsrc2, 0x4                               //NUM_RECORDS   in bytes
+    if (SWIZZLE_EN)
+        s_add_u32       s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0                     //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+    end
+
+
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)                  //M0
+
+    if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+        s_add_u32 s_save_pc_lo, s_save_pc_lo, 4             //pc[31:0]+4
+        s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0          //carry bit over
+        s_mov_b32   tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO
+        s_mov_b32   tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI
+    end
+
+    write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)                   //PC
+    write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)             //EXEC
+    write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)              //STATUS
+
+    //s_save_trapsts conflicts with s_save_alloc_size
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)             //TRAPSTS
+
+    write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset)           //XNACK_MASK_LO
+    write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset)           //XNACK_MASK_HI
+
+    //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+    s_getreg_b32    s_save_m0, hwreg(HW_REG_MODE)                                                   //MODE
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset)                     //TBA_LO
+    write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset)                     //TBA_HI
+
+
+
+    /*      the first wave in the threadgroup    */
+        // save fist_wave bits in tba_hi unused bit.26
+    s_and_b32       s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK     // extract fisrt wave bit
+    //s_or_b32        tba_hi, s_save_tmp, tba_hi                                        // save first wave bit to tba_hi.bits[26]
+    s_mov_b32        s_save_exec_hi, 0x0
+    s_or_b32         s_save_exec_hi, s_save_tmp, s_save_exec_hi                          // save first wave bit to s_save_exec_hi.bits[26]
+
+
+    /*          save SGPRs      */
+        // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_save_mem_offset)
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)               //spgr_size
+    s_add_u32       s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 4                         //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+    if (SGPR_SAVE_USE_SQC)
+        s_lshl_b32      s_save_buf_rsrc2,   s_save_alloc_size, 2                    //NUM_RECORDS in bytes
+    else
+        s_lshl_b32      s_save_buf_rsrc2,   s_save_alloc_size, 8                    //NUM_RECORDS in bytes (64 threads)
+    end
+
+    if (SWIZZLE_EN)
+        s_add_u32       s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0                     //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+    end
+
+
+    // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+    //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+    s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+    s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+    s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+    s_mov_b32       m0, 0x0                         //SGPR initial index value =0
+  L_SAVE_SGPR_LOOP:
+    // SGPR is allocated in 16 SGPR granularity
+    s_movrels_b64   s0, s0     //s0 = s[0+m0], s1 = s[1+m0]
+    s_movrels_b64   s2, s2     //s2 = s[2+m0], s3 = s[3+m0]
+    s_movrels_b64   s4, s4     //s4 = s[4+m0], s5 = s[5+m0]
+    s_movrels_b64   s6, s6     //s6 = s[6+m0], s7 = s[7+m0]
+    s_movrels_b64   s8, s8     //s8 = s[8+m0], s9 = s[9+m0]
+    s_movrels_b64   s10, s10   //s10 = s[10+m0], s11 = s[11+m0]
+    s_movrels_b64   s12, s12   //s12 = s[12+m0], s13 = s[13+m0]
+    s_movrels_b64   s14, s14   //s14 = s[14+m0], s15 = s[15+m0]
+
+    write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+    s_add_u32       m0, m0, 16                                                      //next sgpr index
+    s_cmp_lt_u32    m0, s_save_alloc_size                                           //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_SGPR_LOOP                                    //SGPR save is complete?
+    // restore s_save_buf_rsrc0,1
+    //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+    s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+    /*          save first 4 VGPR, then LDS save could use   */
+        // each wave will alloc 4 vgprs at least...
+    /////////////////////////////////////////////////////////////////////////////////////
+
+    s_mov_b32       s_save_mem_offset, 0
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                             //need every thread from now on
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    if (SWIZZLE_EN)
+        s_add_u32       s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0                     //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+    end
+
+
+    // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+        // the const stride for DWx4 is 4*4 bytes
+        s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+        s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4  // const stride to 4*4 bytes
+
+        buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+        s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+        s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE  // reset const stride to 4 bytes
+else
+        buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+        buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256
+        buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*2
+        buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*3
+end
+
+
+
+    /*          save LDS        */
+    //////////////////////////////
+
+  L_SAVE_LDS:
+
+        // Change EXEC to all threads...
+    s_mov_b32       exec_lo, 0xFFFFFFFF   //need every thread from now on
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)             //lds_size
+    s_and_b32       s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF                //lds_size is zero?
+    s_cbranch_scc0  L_SAVE_LDS_DONE                                                                            //no lds used? jump to L_SAVE_DONE
+
+    s_barrier               //LDS is used? wait for other waves in the same TG
+    //s_and_b32     s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK                //exec is still used here
+    s_and_b32       s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK                //exec is still used here
+    s_cbranch_scc0  L_SAVE_LDS_DONE
+
+        // first wave do LDS save;
+
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 6                         //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 2                         //LDS size in bytes
+    s_mov_b32       s_save_buf_rsrc2,  s_save_alloc_size                            //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_save_mem_offset)
+    get_sgpr_size_bytes(s_save_tmp)
+    s_add_u32  s_save_mem_offset, s_save_mem_offset, s_save_tmp
+    s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+    if (SWIZZLE_EN)
+        s_add_u32       s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0       //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                  //NUM_RECORDS in bytes
+    end
+
+    s_mov_b32       m0, 0x0                                               //lds_offset initial value = 0
+
+
+var LDS_DMA_ENABLE = 0
+var UNROLL = 0
+if UNROLL==0 && LDS_DMA_ENABLE==1
+        s_mov_b32  s3, 256*2
+        s_nop 0
+        s_nop 0
+        s_nop 0
+  L_SAVE_LDS_LOOP:
+        //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
+    if (SAVE_LDS)     //SPI always alloc LDS space in 128DW granularity
+            buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1            // first 64DW
+            buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+    end
+
+    s_add_u32       m0, m0, s3                                          //every buffer_store_lds does 256 bytes
+    s_add_u32       s_save_mem_offset, s_save_mem_offset, s3                            //mem offset increased by 256 bytes
+    s_cmp_lt_u32    m0, s_save_alloc_size                                               //scc=(m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_LDS_LOOP                                                     //LDS save is complete?
+
+elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL  , has ichace miss
+      // store from higest LDS address to lowest
+      s_mov_b32  s3, 256*2
+      s_sub_u32  m0, s_save_alloc_size, s3
+      s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
+      s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9   // how many 128 trunks...
+      s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size   // store from higheset addr to lowest
+      s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4   // PC offset increment,  each LDS save block cost 6*4 Bytes instruction
+      s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4   //2is the below 2 inst...//s_addc and s_setpc
+      s_nop 0
+      s_nop 0
+      s_nop 0   //pad 3 dw to let LDS_DMA align with 64Bytes
+      s_getpc_b64 s[0:1]                              // reuse s[0:1], since s[0:1] already saved
+      s_add_u32   s0, s0,s_save_alloc_size
+      s_addc_u32  s1, s1, 0
+      s_setpc_b64 s[0:1]
+
+
+       for var i =0; i< 128; i++
+            // be careful to make here a 64Byte aligned address, which could improve performance...
+            buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0           // first 64DW
+            buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256           // second 64DW
+
+        if i!=127
+        s_sub_u32  m0, m0, s3      // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e.  pack more LDS_DMA inst to one Cacheline
+            s_sub_u32  s_save_mem_offset, s_save_mem_offset,  s3
+            end
+       end
+
+else   // BUFFER_STORE
+      v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+      v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2     // tid
+      v_mul_i32_i24 v2, v3, 8   // tid*8
+      v_mov_b32 v3, 256*2
+      s_mov_b32 m0, 0x10000
+      s_mov_b32 s0, s_save_buf_rsrc3
+      s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF    // disable add_tid
+      s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000   //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+      ds_read_b64 v[0:1], v2    //x =LDS[a], byte address
+      s_waitcnt lgkmcnt(0)
+      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1  glc:1  slc:1
+//      s_waitcnt vmcnt(0)
+      v_add_u32 v2, vcc[0:1], v2, v3
+      v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+      s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+      // restore rsrc3
+      s_mov_b32 s_save_buf_rsrc3, s0
+
+end
+
+L_SAVE_LDS_DONE:
+
+
+    /*          save VGPRs  - set the Rest VGPRs        */
+    //////////////////////////////////////////////////////////////////////////////////////
+  L_SAVE_VGPR:
+    // VGPR SR memory offset: 0
+    // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+    s_mov_b32       s_save_mem_offset, (0+256*4)                                    // for the rest VGPRs
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                             //need every thread from now on
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)                   //vpgr_size
+    s_add_u32       s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 2                         //Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)   //FIXME for GFX, zero is possible
+    s_lshl_b32      s_save_buf_rsrc2,  s_save_alloc_size, 8                         //NUM_RECORDS in bytes (64 threads*4)
+    if (SWIZZLE_EN)
+        s_add_u32       s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0                     //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+    end
+
+
+    // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+        // the const stride for DWx4 is 4*4 bytes
+        s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+        s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4  // const stride to 4*4 bytes
+
+        s_mov_b32         m0, 4     // skip first 4 VGPRs
+        s_cmp_lt_u32      m0, s_save_alloc_size
+        s_cbranch_scc0    L_SAVE_VGPR_LOOP_END      // no more vgprs
+
+        s_set_gpr_idx_on  m0, 0x1   // This will change M0
+        s_add_u32         s_save_alloc_size, s_save_alloc_size, 0x1000  // because above inst change m0
+L_SAVE_VGPR_LOOP:
+        v_mov_b32         v0, v0   // v0 = v[0+m0]
+        v_mov_b32         v1, v1
+        v_mov_b32         v2, v2
+        v_mov_b32         v3, v3
+
+
+        buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+        s_add_u32         m0, m0, 4
+        s_add_u32         s_save_mem_offset, s_save_mem_offset, 256*4
+        s_cmp_lt_u32      m0, s_save_alloc_size
+    s_cbranch_scc1  L_SAVE_VGPR_LOOP                                                //VGPR save is complete?
+    s_set_gpr_idx_off
+L_SAVE_VGPR_LOOP_END:
+
+        s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+        s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE  // reset const stride to 4 bytes
+else
+    // VGPR store using dw burst
+    s_mov_b32         m0, 0x4   //VGPR initial index value =0
+    s_cmp_lt_u32      m0, s_save_alloc_size
+    s_cbranch_scc0    L_SAVE_VGPR_END
+
+
+    s_set_gpr_idx_on    m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+    s_add_u32       s_save_alloc_size, s_save_alloc_size, 0x1000                    //add 0x1000 since we compare m0 against it later
+
+  L_SAVE_VGPR_LOOP:
+    v_mov_b32       v0, v0              //v0 = v[0+m0]
+    v_mov_b32       v1, v1              //v0 = v[0+m0]
+    v_mov_b32       v2, v2              //v0 = v[0+m0]
+    v_mov_b32       v3, v3              //v0 = v[0+m0]
+
+    if(USE_MTBUF_INSTEAD_OF_MUBUF)
+        tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+    else
+        buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+        buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256
+        buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*2
+        buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*3
+    end
+
+    s_add_u32       m0, m0, 4                                                       //next vgpr index
+    s_add_u32       s_save_mem_offset, s_save_mem_offset, 256*4                     //every buffer_store_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_save_alloc_size                                           //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_VGPR_LOOP                                                //VGPR save is complete?
+    s_set_gpr_idx_off
+end
+
+L_SAVE_VGPR_END:
+
+
+
+
+
+
+    /*     S_PGM_END_SAVED  */                              //FIXME  graphics ONLY
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
+        s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32]
+        s_add_u32 s_save_pc_lo, s_save_pc_lo, 4             //pc[31:0]+4
+        s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0          //carry bit over
+        s_rfe_b64 s_save_pc_lo                              //Return to the main shader program
+    else
+    end
+
+// Save Done timestamp
+if G8SR_DEBUG_TIMESTAMP
+        s_memrealtime   s_g8sr_ts_save_d
+        // SGPR SR memory offset : size(VGPR)
+        get_vgpr_size_bytes(s_save_mem_offset)
+        s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
+        s_waitcnt lgkmcnt(0)         //FIXME, will cause xnack??
+        // Need reset rsrc2??
+        s_mov_b32 m0, s_save_mem_offset
+        s_mov_b32 s_save_buf_rsrc2,  0x1000000                                  //NUM_RECORDS in bytes
+        s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0       glc:1
+end
+
+
+    s_branch    L_END_PGM
+
+
+
+/**************************************************************************/
+/*                      restore routine                                   */
+/**************************************************************************/
+
+L_RESTORE:
+    /*      Setup Resource Contants    */
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+        //calculate wd_addr using absolute thread id
+        v_readlane_b32 s_restore_tmp, v9, 0
+        s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
+        s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
+        s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
+        s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
+        s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
+    else
+    end
+
+if G8SR_DEBUG_TIMESTAMP
+        s_memrealtime   s_g8sr_ts_restore_s
+        s_waitcnt lgkmcnt(0)         //FIXME, will cause xnack??
+        // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
+        s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
+        s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1]   //backup ts to ttmp0/1, sicne exec will be finally restored..
+end
+
+
+
+    s_mov_b32       s_restore_buf_rsrc0,    s_restore_spi_init_lo                                                           //base_addr_lo
+    s_and_b32       s_restore_buf_rsrc1,    s_restore_spi_init_hi, 0x0000FFFF                                               //base_addr_hi
+    s_or_b32        s_restore_buf_rsrc1,    s_restore_buf_rsrc1,  S_RESTORE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32       s_restore_buf_rsrc2,    0                                                                               //NUM_RECORDS initial value = 0 (in bytes)
+    s_mov_b32       s_restore_buf_rsrc3,    S_RESTORE_BUF_RSRC_WORD3_MISC
+    s_and_b32       s_restore_tmp,          s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+    s_lshr_b32      s_restore_tmp,          s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)       //get ATC bit into position
+    s_or_b32        s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp                                             //or ATC
+    s_and_b32       s_restore_tmp,          s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+    s_lshr_b32      s_restore_tmp,          s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)   //get MTYPE bits into position
+    s_or_b32        s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp                                             //or MTYPE
+
+    /*      global mem offset           */
+//  s_mov_b32       s_restore_mem_offset, 0x0                               //mem offset initial value = 0
+
+    /*      the first wave in the threadgroup    */
+    s_and_b32       s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+    s_cbranch_scc0  L_RESTORE_VGPR
+
+    /*          restore LDS     */
+    //////////////////////////////
+  L_RESTORE_LDS:
+
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                                     //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)              //lds_size
+    s_and_b32       s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF                  //lds_size is zero?
+    s_cbranch_scc0  L_RESTORE_VGPR                                                          //no lds used? jump to L_RESTORE_VGPR
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 6                           //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 2                           //LDS size in bytes
+    s_mov_b32       s_restore_buf_rsrc2,    s_restore_alloc_size                            //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()            //FIXME, Check if offset overflow???
+
+
+    if (SWIZZLE_EN)
+        s_add_u32       s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0                       //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+    end
+    s_mov_b32       m0, 0x0                                                                 //lds_offset initial value = 0
+
+  L_RESTORE_LDS_LOOP:
+    if (SAVE_LDS)
+        buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1                    // first 64DW
+        buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256         // second 64DW
+    end
+    s_add_u32       m0, m0, 256*2                                               // 128 DW
+    s_add_u32       s_restore_mem_offset, s_restore_mem_offset, 256*2           //mem offset increased by 128DW
+    s_cmp_lt_u32    m0, s_restore_alloc_size                                    //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_LDS_LOOP                                                      //LDS restore is complete?
+
+
+    /*          restore VGPRs       */
+    //////////////////////////////
+  L_RESTORE_VGPR:
+        // VGPR SR memory offset : 0
+    s_mov_b32       s_restore_mem_offset, 0x0
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                                     //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)    //vpgr_size
+    s_add_u32       s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 2                           //Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)
+    s_lshl_b32      s_restore_buf_rsrc2,  s_restore_alloc_size, 8                           //NUM_RECORDS in bytes (64 threads*4)
+    if (SWIZZLE_EN)
+        s_add_u32       s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0                       //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+    end
+
+if G8SR_VGPR_SR_IN_DWX4
+     get_vgpr_size_bytes(s_restore_mem_offset)
+     s_sub_u32         s_restore_mem_offset, s_restore_mem_offset, 256*4
+
+     // the const stride for DWx4 is 4*4 bytes
+     s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+     s_or_b32  s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4  // const stride to 4*4 bytes
+
+     s_mov_b32         m0, s_restore_alloc_size
+     s_set_gpr_idx_on  m0, 0x8    // Note.. This will change m0
+
+L_RESTORE_VGPR_LOOP:
+     buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+     s_waitcnt vmcnt(0)
+     s_sub_u32         m0, m0, 4
+     v_mov_b32         v0, v0   // v[0+m0] = v0
+     v_mov_b32         v1, v1
+     v_mov_b32         v2, v2
+     v_mov_b32         v3, v3
+     s_sub_u32         s_restore_mem_offset, s_restore_mem_offset, 256*4
+     s_cmp_eq_u32      m0, 0x8000
+     s_cbranch_scc0    L_RESTORE_VGPR_LOOP
+     s_set_gpr_idx_off
+
+     s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+     s_or_b32  s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE  // const stride to 4*4 bytes
+
+else
+    // VGPR load using dw burst
+    s_mov_b32       s_restore_mem_offset_save, s_restore_mem_offset     // restore start with v1, v0 will be the last
+    s_add_u32       s_restore_mem_offset, s_restore_mem_offset, 256*4
+    s_mov_b32       m0, 4                               //VGPR initial index value = 1
+    s_set_gpr_idx_on  m0, 0x8                       //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+    s_add_u32       s_restore_alloc_size, s_restore_alloc_size, 0x8000                      //add 0x8000 since we compare m0 against it later
+
+  L_RESTORE_VGPR_LOOP:
+    if(USE_MTBUF_INSTEAD_OF_MUBUF)
+        tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+    else
+        buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+        buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+        buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+        buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+    end
+    s_waitcnt       vmcnt(0)                                                                //ensure data ready
+    v_mov_b32       v0, v0                                                                  //v[0+m0] = v0
+    v_mov_b32       v1, v1
+    v_mov_b32       v2, v2
+    v_mov_b32       v3, v3
+    s_add_u32       m0, m0, 4                                                               //next vgpr index
+    s_add_u32       s_restore_mem_offset, s_restore_mem_offset, 256*4                           //every buffer_load_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_restore_alloc_size                                                //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_VGPR_LOOP                                                     //VGPR restore (except v0) is complete?
+    s_set_gpr_idx_off
+                                                                                            /* VGPR restore on v0 */
+    if(USE_MTBUF_INSTEAD_OF_MUBUF)
+        tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+    else
+        buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1
+        buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256
+        buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256*2
+        buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256*3
+    end
+
+end
+
+    /*          restore SGPRs       */
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4     // restore SGPR from S[n] to S[0], by 16 sgprs group
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)                //spgr_size
+    s_add_u32       s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 4                           //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+    if (SGPR_SAVE_USE_SQC)
+        s_lshl_b32      s_restore_buf_rsrc2,    s_restore_alloc_size, 2                     //NUM_RECORDS in bytes
+    else
+        s_lshl_b32      s_restore_buf_rsrc2,    s_restore_alloc_size, 8                     //NUM_RECORDS in bytes (64 threads)
+    end
+    if (SWIZZLE_EN)
+        s_add_u32       s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0                       //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+    end
+
+    /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111),
+       However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG
+    */
+    s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+    read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)  //PV: further performance improvement can be made
+    s_waitcnt       lgkmcnt(0)                                                              //ensure data ready
+
+    s_sub_u32 m0, m0, 16    // Restore from S[n] to S[0]
+
+    s_movreld_b64   s0, s0      //s[0+m0] = s0
+    s_movreld_b64   s2, s2
+    s_movreld_b64   s4, s4
+    s_movreld_b64   s6, s6
+    s_movreld_b64   s8, s8
+    s_movreld_b64   s10, s10
+    s_movreld_b64   s12, s12
+    s_movreld_b64   s14, s14
+
+    s_cmp_eq_u32    m0, 0               //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc0  L_RESTORE_SGPR_LOOP             //SGPR restore (except s0) is complete?
+
+    /*      restore HW registers    */
+    //////////////////////////////
+  L_RESTORE_HWREG:
+
+
+if G8SR_DEBUG_TIMESTAMP
+      s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
+      s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
+end
+
+    // HWREG SR memory offset : size(VGPR)+size(SGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+    s_mov_b32       s_restore_buf_rsrc2, 0x4                                                //NUM_RECORDS   in bytes
+    if (SWIZZLE_EN)
+        s_add_u32       s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0                       //FIXME need to use swizzle to enable bounds checking?
+    else
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+    end
+
+    read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)                    //M0
+    read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)             //PC
+    read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)               //EXEC
+    read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset)                //STATUS
+    read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset)               //TRAPSTS
+    read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset)                   //XNACK_MASK_LO
+    read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset)                   //XNACK_MASK_HI
+    read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)              //MODE
+    read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset)                      //TBA_LO
+    read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset)                      //TBA_HI
+
+    s_waitcnt       lgkmcnt(0)                                                                                      //from now on, it is safe to restore STATUS and IB_STS
+
+    s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff      //pc[47:32]        //Do it here in order not to affect STATUS
+
+    //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+        s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8            //pc[31:0]+8     //two back-to-back s_trap are used (first for save and second for restore)
+        s_addc_u32  s_restore_pc_hi, s_restore_pc_hi, 0x0        //carry bit over
+    end
+    if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
+        s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4            //pc[31:0]+4     // save is hack through s_trap but restore is normal
+        s_addc_u32  s_restore_pc_hi, s_restore_pc_hi, 0x0        //carry bit over
+    end
+
+    s_mov_b32       m0,         s_restore_m0
+    s_mov_b32       exec_lo,    s_restore_exec_lo
+    s_mov_b32       exec_hi,    s_restore_exec_hi
+
+    s_and_b32       s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+    s_and_b32       s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+    s_lshr_b32      s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+    //s_setreg_b32  hwreg(HW_REG_TRAPSTS),  s_restore_trapsts      //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+    s_setreg_b32    hwreg(HW_REG_MODE),     s_restore_mode
+    //reuse s_restore_m0 as a temp register
+    s_and_b32       s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+    s_lshr_b32      s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+    s_lshl_b32      s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+    s_mov_b32       s_restore_tmp, 0x0                                                                              //IB_STS is zero
+    s_or_b32        s_restore_tmp, s_restore_tmp, s_restore_m0
+    s_and_b32       s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+    s_lshr_b32      s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+    s_lshl_b32      s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+    s_or_b32        s_restore_tmp, s_restore_tmp, s_restore_m0
+    s_and_b32       s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+    s_lshr_b32      s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+    s_setreg_b32    hwreg(HW_REG_IB_STS),   s_restore_tmp
+
+    s_and_b64    exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
+    s_and_b64    vcc, vcc, vcc  // Restore STATUS.VCCZ, not writable by s_setreg_b32
+    s_setreg_b32    hwreg(HW_REG_STATUS),   s_restore_status     // SCC is included, which is changed by previous salu
+
+    s_barrier                                                   //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+if G8SR_DEBUG_TIMESTAMP
+    s_memrealtime s_g8sr_ts_restore_d
+    s_waitcnt lgkmcnt(0)
+end
+
+//  s_rfe_b64 s_restore_pc_lo                                   //Return to the main shader program and resume execution
+    s_rfe_restore_b64  s_restore_pc_lo, s_restore_m0            // s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/*                      the END                                           */
+/**************************************************************************/
+L_END_PGM:
+    s_endpgm
+
+end
+
+
+/**************************************************************************/
+/*                      the helper functions                              */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+        s_mov_b32 exec_lo, m0                   //assuming exec_lo is not needed anymore from this point on
+        s_mov_b32 m0, s_mem_offset
+        s_buffer_store_dword s, s_rsrc, m0      glc:1
+        s_add_u32       s_mem_offset, s_mem_offset, 4
+        s_mov_b32   m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+        s_buffer_store_dwordx4 s[0], s_rsrc, 0  glc:1
+        s_buffer_store_dwordx4 s[4], s_rsrc, 16  glc:1
+        s_buffer_store_dwordx4 s[8], s_rsrc, 32  glc:1
+        s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+        s_add_u32       s_rsrc[0], s_rsrc[0], 4*16
+        s_addc_u32      s_rsrc[1], s_rsrc[1], 0x0             // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dword s, s_rsrc, s_mem_offset     glc:1
+    s_add_u32       s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset      glc:1
+    s_sub_u32       s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+    // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+    s_getreg_b32   s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)          // lds_size
+    s_lshl_b32     s_lds_size_byte, s_lds_size_byte, 8                      //LDS size in dwords = lds_size * 64 *4Bytes    // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+    s_getreg_b32   s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)  //vpgr_size
+    s_add_u32      s_vgpr_size_byte, s_vgpr_size_byte, 1
+    s_lshl_b32     s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4   (non-zero value)   //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+    s_getreg_b32   s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)  //spgr_size
+    s_add_u32      s_sgpr_size_byte, s_sgpr_size_byte, 1
+    s_lshl_b32     s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4   (non-zero value)
+end
+
+function get_hwreg_size_bytes
+    return 128 //HWREG size 128 bytes
+end
+
+
+#endif
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+	0xbf820001, 0xbf820123,
+	0xb8f4f802, 0x89748674,
+	0xb8f5f803, 0x8675ff75,
+	0x00000400, 0xbf850011,
+	0xc00a1e37, 0x00000000,
+	0xbf8c007f, 0x87777978,
+	0xbf840002, 0xb974f802,
+	0xbe801d78, 0xb8f5f803,
+	0x8675ff75, 0x000001ff,
+	0xbf850002, 0x80708470,
+	0x82718071, 0x8671ff71,
+	0x0000ffff, 0xb974f802,
+	0xbe801f70, 0xb8f5f803,
+	0x8675ff75, 0x00000100,
+	0xbf840006, 0xbefa0080,
+	0xb97a0203, 0x8671ff71,
+	0x0000ffff, 0x80f08870,
+	0x82f18071, 0xbefa0080,
+	0xb97a0283, 0xbef60068,
+	0xbef70069, 0xb8fa1c07,
+	0x8e7a9c7a, 0x87717a71,
+	0xb8fa03c7, 0x8e7a9b7a,
+	0x87717a71, 0xb8faf807,
+	0x867aff7a, 0x00007fff,
+	0xb97af807, 0xbef2007e,
+	0xbef3007f, 0xbefe0180,
+	0xbf900004, 0xbf8e0002,
+	0xbf88fffe, 0xbef8007e,
+	0x8679ff7f, 0x0000ffff,
+	0x8779ff79, 0x00040000,
+	0xbefa0080, 0xbefb00ff,
+	0x00807fac, 0x867aff7f,
+	0x08000000, 0x8f7a837a,
+	0x877b7a7b, 0x867aff7f,
+	0x70000000, 0x8f7a817a,
+	0x877b7a7b, 0xbeef007c,
+	0xbeee0080, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x806e7a6e,
+	0xbefa0084, 0xbefa00ff,
+	0x01000000, 0xbefe007c,
+	0xbefc006e, 0xc0611bfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611c3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611c7c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611cbc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611cfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611d3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xb8f5f803,
+	0xbefe007c, 0xbefc006e,
+	0xc0611d7c, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xbefe007c, 0xbefc006e,
+	0xc0611dbc, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xbefe007c, 0xbefc006e,
+	0xc0611dfc, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xb8eff801, 0xbefe007c,
+	0xbefc006e, 0xc0611bfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611b3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611b7c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbef30080,
+	0x8773737a, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8f51605, 0x80758175,
+	0x8e758475, 0x8e7a8275,
+	0xbefa00ff, 0x01000000,
+	0xbef60178, 0x80786e78,
+	0x82798079, 0xbefc0080,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003c, 0x00000000,
+	0xc06b013c, 0x00000010,
+	0xc06b023c, 0x00000020,
+	0xc06b033c, 0x00000030,
+	0x8078c078, 0x82798079,
+	0x807c907c, 0xbf0a757c,
+	0xbf85ffeb, 0xbef80176,
+	0xbeee0080, 0xbefe00c1,
+	0xbeff00c1, 0xbefa00ff,
+	0x01000000, 0xe0724000,
+	0x6e1e0000, 0xe0724100,
+	0x6e1e0100, 0xe0724200,
+	0x6e1e0200, 0xe0724300,
+	0x6e1e0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8f54306,
+	0x8675c175, 0xbf84002c,
+	0xbf8a0000, 0x867aff73,
+	0x04000000, 0xbf840028,
+	0x8e758675, 0x8e758275,
+	0xbefa0075, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x806e7a6e,
+	0x806eff6e, 0x00000080,
+	0xbefa00ff, 0x01000000,
+	0xbefc0080, 0xd28c0002,
+	0x000100c1, 0xd28d0003,
+	0x000204c1, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe80007b,
+	0x867bff7b, 0xff7fffff,
+	0x877bff7b, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8c007f, 0xe0765000,
+	0x6e1e0002, 0x32040702,
+	0xd0c9006a, 0x0000eb02,
+	0xbf87fff7, 0xbefb0000,
+	0xbeee00ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f52a05, 0x80758175,
+	0x8e758275, 0x8e7a8875,
+	0xbefa00ff, 0x01000000,
+	0xbefc0084, 0xbf0a757c,
+	0xbf840015, 0xbf11017c,
+	0x8075ff75, 0x00001000,
+	0x7e000300, 0x7e020301,
+	0x7e040302, 0x7e060303,
+	0xe0724000, 0x6e1e0000,
+	0xe0724100, 0x6e1e0100,
+	0xe0724200, 0x6e1e0200,
+	0xe0724300, 0x6e1e0300,
+	0x807c847c, 0x806eff6e,
+	0x00000400, 0xbf0a757c,
+	0xbf85ffef, 0xbf9c0000,
+	0xbf8200ca, 0xbef8007e,
+	0x8679ff7f, 0x0000ffff,
+	0x8779ff79, 0x00040000,
+	0xbefa0080, 0xbefb00ff,
+	0x00807fac, 0x8676ff7f,
+	0x08000000, 0x8f768376,
+	0x877b767b, 0x8676ff7f,
+	0x70000000, 0x8f768176,
+	0x877b767b, 0x8676ff7f,
+	0x04000000, 0xbf84001e,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f34306, 0x8673c173,
+	0xbf840019, 0x8e738673,
+	0x8e738273, 0xbefa0073,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0x8072ff72,
+	0x00000080, 0xbefa00ff,
+	0x01000000, 0xbefc0080,
+	0xe0510000, 0x721e0000,
+	0xe0510100, 0x721e0000,
+	0x807cff7c, 0x00000200,
+	0x8072ff72, 0x00000200,
+	0xbf0a737c, 0xbf85fff6,
+	0xbef20080, 0xbefe00c1,
+	0xbeff00c1, 0xb8f32a05,
+	0x80738173, 0x8e738273,
+	0x8e7a8873, 0xbefa00ff,
+	0x01000000, 0xbef60072,
+	0x8072ff72, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0x8073ff73, 0x00008000,
+	0xe0524000, 0x721e0000,
+	0xe0524100, 0x721e0100,
+	0xe0524200, 0x721e0200,
+	0xe0524300, 0x721e0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8072ff72, 0x00000400,
+	0xbf0a737c, 0xbf85ffee,
+	0xbf9c0000, 0xe0524000,
+	0x761e0000, 0xe0524100,
+	0x761e0100, 0xe0524200,
+	0x761e0200, 0xe0524300,
+	0x761e0300, 0xb8f22a05,
+	0x80728172, 0x8e728a72,
+	0xb8f61605, 0x80768176,
+	0x8e768676, 0x80727672,
+	0x80f2c072, 0xb8f31605,
+	0x80738173, 0x8e738473,
+	0x8e7a8273, 0xbefa00ff,
+	0x01000000, 0xbefc0073,
+	0xc031003c, 0x00000072,
+	0x80f2c072, 0xbf8c007f,
+	0x80fc907c, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff1, 0xb8f22a05,
+	0x80728172, 0x8e728a72,
+	0xb8f61605, 0x80768176,
+	0x8e768676, 0x80727672,
+	0xbefa0084, 0xbefa00ff,
+	0x01000000, 0xc0211cfc,
+	0x00000072, 0x80728472,
+	0xc0211c3c, 0x00000072,
+	0x80728472, 0xc0211c7c,
+	0x00000072, 0x80728472,
+	0xc0211bbc, 0x00000072,
+	0x80728472, 0xc0211bfc,
+	0x00000072, 0x80728472,
+	0xc0211d3c, 0x00000072,
+	0x80728472, 0xc0211d7c,
+	0x00000072, 0x80728472,
+	0xc0211a3c, 0x00000072,
+	0x80728472, 0xc0211a7c,
+	0x00000072, 0x80728472,
+	0xc0211dfc, 0x00000072,
+	0x80728472, 0xc0211b3c,
+	0x00000072, 0x80728472,
+	0xc0211b7c, 0x00000072,
+	0x80728472, 0xbf8c007f,
+	0x8671ff71, 0x0000ffff,
+	0xbefc0073, 0xbefe006e,
+	0xbeff006f, 0x867375ff,
+	0x000003ff, 0xb9734803,
+	0x867375ff, 0xfffff800,
+	0x8f738b73, 0xb973a2c3,
+	0xb977f801, 0x8673ff71,
+	0xf0000000, 0x8f739c73,
+	0x8e739073, 0xbef60080,
+	0x87767376, 0x8673ff71,
+	0x08000000, 0x8f739b73,
+	0x8e738f73, 0x87767376,
+	0x8673ff74, 0x00800000,
+	0x8f739773, 0xb976f807,
+	0x86fe7e7e, 0x86ea6a6a,
+	0xb974f802, 0xbf8a0000,
+	0x95807370, 0xbf810000,
+};
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 505d39156acdb1daa5300cc3bdbfd353d6085299..62c3d9cd6ef18807e754fa100f3f35c63b873a4d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
 		return -EPERM;
 	}
 
-	process = kfd_create_process(current);
+	process = kfd_create_process(filep);
 	if (IS_ERR(process))
 		return PTR_ERR(process);
 
@@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 	q_properties->ctx_save_restore_area_address =
 			args->ctx_save_restore_address;
 	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
+	q_properties->ctl_stack_size = args->ctl_stack_size;
 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
 		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -431,6 +432,38 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
 	return err;
 }
 
+static int kfd_ioctl_set_trap_handler(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_trap_handler_args *args = data;
+	struct kfd_dev *dev;
+	int err = 0;
+	struct kfd_process_device *pdd;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	if (dev->dqm->ops.set_trap_handler(dev->dqm,
+					&pdd->qpd,
+					args->tba_addr,
+					args->tma_addr))
+		err = -EINVAL;
+
+out:
+	mutex_unlock(&p->mutex);
+
+	return err;
+}
+
 static int kfd_ioctl_dbg_register(struct file *filep,
 				struct kfd_process *p, void *data)
 {
@@ -493,7 +526,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
 	long status;
 
 	dev = kfd_device_by_id(args->gpu_id);
-	if (!dev)
+	if (!dev || !dev->dbgmgr)
 		return -EINVAL;
 
 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -979,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 			kfd_ioctl_set_scratch_backing_va, 0),
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
-			kfd_ioctl_get_tile_config, 0)
+			kfd_ioctl_get_tile_config, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
+			kfd_ioctl_set_trap_handler, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
@@ -1088,6 +1124,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
 			KFD_MMAP_EVENTS_MASK) {
 		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
 		return kfd_event_mmap(process, vma);
+	} else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
+			KFD_MMAP_RESERVED_MEM_MASK) {
+		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
+		return kfd_reserved_mem_mmap(process, vma);
 	}
 
 	return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
new file mode 100644
index 0000000000000000000000000000000000000000..2bc2816767a7bbda4ea85fce39c83e05c9215058
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -0,0 +1,1267 @@
+/*
+ * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/amd-iommu.h>
+#include "kfd_crat.h"
+#include "kfd_priv.h"
+#include "kfd_topology.h"
+
+/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
+ * GPU processor ID are expressed with Bit[31]=1.
+ * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
+ * used in the CRAT.
+ */
+static uint32_t gpu_processor_id_low = 0x80001000;
+
+/* Return the next available gpu_processor_id and increment it for next GPU
+ *	@total_cu_count - Total CUs present in the GPU including ones
+ *			  masked off
+ */
+static inline unsigned int get_and_inc_gpu_processor_id(
+				unsigned int total_cu_count)
+{
+	int current_id = gpu_processor_id_low;
+
+	gpu_processor_id_low += total_cu_count;
+	return current_id;
+}
+
+/* Static table to describe GPU Cache information */
+struct kfd_gpu_cache_info {
+	uint32_t	cache_size;
+	uint32_t	cache_level;
+	uint32_t	flags;
+	/* Indicates how many Compute Units share this cache
+	 * Value = 1 indicates the cache is not shared
+	 */
+	uint32_t	num_cu_shared;
+};
+
+static struct kfd_gpu_cache_info kaveri_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+
+	},
+	{
+		/* Scalar L1 Instruction Cache (in SQC module) per bank */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache (in SQC module) per bank */
+		.cache_size = 8,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+
+	/* TODO: Add L2 Cache information */
+};
+
+
+static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache (in SQC module) per bank */
+		.cache_size = 8,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 4,
+	},
+	{
+		/* Scalar L1 Data Cache (in SQC module) per bank. */
+		.cache_size = 4,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 4,
+	},
+
+	/* TODO: Add L2 Cache information */
+};
+
+/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
+ * the following ASICs may need a separate table.
+ */
+#define hawaii_cache_info kaveri_cache_info
+#define tonga_cache_info carrizo_cache_info
+#define fiji_cache_info  carrizo_cache_info
+#define polaris10_cache_info carrizo_cache_info
+#define polaris11_cache_info carrizo_cache_info
+
+static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
+		struct crat_subtype_computeunit *cu)
+{
+	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
+	dev->node_props.cpu_core_id_base = cu->processor_id_low;
+	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
+		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+
+	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
+			cu->processor_id_low);
+}
+
+static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
+		struct crat_subtype_computeunit *cu)
+{
+	dev->node_props.simd_id_base = cu->processor_id_low;
+	dev->node_props.simd_count = cu->num_simd_cores;
+	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
+	dev->node_props.max_waves_per_simd = cu->max_waves_simd;
+	dev->node_props.wave_front_size = cu->wave_front_size;
+	dev->node_props.array_count = cu->array_count;
+	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
+	dev->node_props.simd_per_cu = cu->num_simd_per_cu;
+	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
+	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
+		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
+	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
+}
+
+/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
+				struct list_head *device_list)
+{
+	struct kfd_topology_device *dev;
+
+	pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
+			cu->proximity_domain, cu->hsa_capability);
+	list_for_each_entry(dev, device_list, list) {
+		if (cu->proximity_domain == dev->proximity_domain) {
+			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
+				kfd_populated_cu_info_cpu(dev, cu);
+
+			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
+				kfd_populated_cu_info_gpu(dev, cu);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+				struct list_head *device_list)
+{
+	struct kfd_mem_properties *props;
+	struct kfd_topology_device *dev;
+
+	pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
+			mem->proximity_domain);
+	list_for_each_entry(dev, device_list, list) {
+		if (mem->proximity_domain == dev->proximity_domain) {
+			props = kfd_alloc_struct(props);
+			if (!props)
+				return -ENOMEM;
+
+			/* We're on GPU node */
+			if (dev->node_props.cpu_cores_count == 0) {
+				/* APU */
+				if (mem->visibility_type == 0)
+					props->heap_type =
+						HSA_MEM_HEAP_TYPE_FB_PRIVATE;
+				/* dGPU */
+				else
+					props->heap_type = mem->visibility_type;
+			} else
+				props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
+
+			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
+				props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
+			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
+				props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
+
+			props->size_in_bytes =
+				((uint64_t)mem->length_high << 32) +
+							mem->length_low;
+			props->width = mem->width;
+
+			dev->node_props.mem_banks_count++;
+			list_add_tail(&props->list, &dev->mem_props);
+
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
+			struct list_head *device_list)
+{
+	struct kfd_cache_properties *props;
+	struct kfd_topology_device *dev;
+	uint32_t id;
+	uint32_t total_num_of_cu;
+
+	id = cache->processor_id_low;
+
+	pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
+	list_for_each_entry(dev, device_list, list) {
+		total_num_of_cu = (dev->node_props.array_count *
+					dev->node_props.cu_per_simd_array);
+
+		/* Cache infomration in CRAT doesn't have proximity_domain
+		 * information as it is associated with a CPU core or GPU
+		 * Compute Unit. So map the cache using CPU core Id or SIMD
+		 * (GPU) ID.
+		 * TODO: This works because currently we can safely assume that
+		 *  Compute Units are parsed before caches are parsed. In
+		 *  future, remove this dependency
+		 */
+		if ((id >= dev->node_props.cpu_core_id_base &&
+			id <= dev->node_props.cpu_core_id_base +
+				dev->node_props.cpu_cores_count) ||
+			(id >= dev->node_props.simd_id_base &&
+			id < dev->node_props.simd_id_base +
+				total_num_of_cu)) {
+			props = kfd_alloc_struct(props);
+			if (!props)
+				return -ENOMEM;
+
+			props->processor_id_low = id;
+			props->cache_level = cache->cache_level;
+			props->cache_size = cache->cache_size;
+			props->cacheline_size = cache->cache_line_size;
+			props->cachelines_per_tag = cache->lines_per_tag;
+			props->cache_assoc = cache->associativity;
+			props->cache_latency = cache->cache_latency;
+			memcpy(props->sibling_map, cache->sibling_map,
+					sizeof(props->sibling_map));
+
+			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_DATA;
+			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_CPU;
+			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+			dev->cache_count++;
+			dev->node_props.caches_count++;
+			list_add_tail(&props->list, &dev->cache_props);
+
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+					struct list_head *device_list)
+{
+	struct kfd_iolink_properties *props = NULL, *props2;
+	struct kfd_topology_device *dev, *cpu_dev;
+	uint32_t id_from;
+	uint32_t id_to;
+
+	id_from = iolink->proximity_domain_from;
+	id_to = iolink->proximity_domain_to;
+
+	pr_debug("Found IO link entry in CRAT table with id_from=%d\n",
+			id_from);
+	list_for_each_entry(dev, device_list, list) {
+		if (id_from == dev->proximity_domain) {
+			props = kfd_alloc_struct(props);
+			if (!props)
+				return -ENOMEM;
+
+			props->node_from = id_from;
+			props->node_to = id_to;
+			props->ver_maj = iolink->version_major;
+			props->ver_min = iolink->version_minor;
+			props->iolink_type = iolink->io_interface_type;
+
+			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
+				props->weight = 20;
+			else
+				props->weight = node_distance(id_from, id_to);
+
+			props->min_latency = iolink->minimum_latency;
+			props->max_latency = iolink->maximum_latency;
+			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
+			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
+			props->rec_transfer_size =
+					iolink->recommended_transfer_size;
+
+			dev->io_link_count++;
+			dev->node_props.io_links_count++;
+			list_add_tail(&props->list, &dev->io_link_props);
+			break;
+		}
+	}
+
+	/* CPU topology is created before GPUs are detected, so CPU->GPU
+	 * links are not built at that time. If a PCIe type is discovered, it
+	 * means a GPU is detected and we are adding GPU->CPU to the topology.
+	 * At this time, also add the corresponded CPU->GPU link.
+	 */
+	if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) {
+		cpu_dev = kfd_topology_device_by_proximity_domain(id_to);
+		if (!cpu_dev)
+			return -ENODEV;
+		/* same everything but the other direction */
+		props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+		props2->node_from = id_to;
+		props2->node_to = id_from;
+		props2->kobj = NULL;
+		cpu_dev->io_link_count++;
+		cpu_dev->node_props.io_links_count++;
+		list_add_tail(&props2->list, &cpu_dev->io_link_props);
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype - parse subtypes and attach it to correct topology device
+ * present in the device_list
+ *	@sub_type_hdr - subtype section of crat_image
+ *	@device_list - list of topology devices present in this crat_image
+ */
+static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
+				struct list_head *device_list)
+{
+	struct crat_subtype_computeunit *cu;
+	struct crat_subtype_memory *mem;
+	struct crat_subtype_cache *cache;
+	struct crat_subtype_iolink *iolink;
+	int ret = 0;
+
+	switch (sub_type_hdr->type) {
+	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
+		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+		ret = kfd_parse_subtype_cu(cu, device_list);
+		break;
+	case CRAT_SUBTYPE_MEMORY_AFFINITY:
+		mem = (struct crat_subtype_memory *)sub_type_hdr;
+		ret = kfd_parse_subtype_mem(mem, device_list);
+		break;
+	case CRAT_SUBTYPE_CACHE_AFFINITY:
+		cache = (struct crat_subtype_cache *)sub_type_hdr;
+		ret = kfd_parse_subtype_cache(cache, device_list);
+		break;
+	case CRAT_SUBTYPE_TLB_AFFINITY:
+		/*
+		 * For now, nothing to do here
+		 */
+		pr_debug("Found TLB entry in CRAT table (not processing)\n");
+		break;
+	case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
+		/*
+		 * For now, nothing to do here
+		 */
+		pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
+		break;
+	case CRAT_SUBTYPE_IOLINK_AFFINITY:
+		iolink = (struct crat_subtype_iolink *)sub_type_hdr;
+		ret = kfd_parse_subtype_iolink(iolink, device_list);
+		break;
+	default:
+		pr_warn("Unknown subtype %d in CRAT\n",
+				sub_type_hdr->type);
+	}
+
+	return ret;
+}
+
+/* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
+ * create a kfd_topology_device and add in to device_list. Also parse
+ * CRAT subtypes and attach it to appropriate kfd_topology_device
+ *	@crat_image - input image containing CRAT
+ *	@device_list - [OUT] list of kfd_topology_device generated after
+ *		       parsing crat_image
+ *	@proximity_domain - Proximity domain of the first device in the table
+ *
+ *	Return - 0 if successful else -ve value
+ */
+int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+			 uint32_t proximity_domain)
+{
+	struct kfd_topology_device *top_dev = NULL;
+	struct crat_subtype_generic *sub_type_hdr;
+	uint16_t node_id;
+	int ret = 0;
+	struct crat_header *crat_table = (struct crat_header *)crat_image;
+	uint16_t num_nodes;
+	uint32_t image_len;
+
+	if (!crat_image)
+		return -EINVAL;
+
+	if (!list_empty(device_list)) {
+		pr_warn("Error device list should be empty\n");
+		return -EINVAL;
+	}
+
+	num_nodes = crat_table->num_domains;
+	image_len = crat_table->length;
+
+	pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
+
+	for (node_id = 0; node_id < num_nodes; node_id++) {
+		top_dev = kfd_create_topology_device(device_list);
+		if (!top_dev)
+			break;
+		top_dev->proximity_domain = proximity_domain++;
+	}
+
+	if (!top_dev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
+	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
+			CRAT_OEMTABLEID_LENGTH);
+	top_dev->oem_revision = crat_table->oem_revision;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+	while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
+			((char *)crat_image) + image_len) {
+		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
+			ret = kfd_parse_subtype(sub_type_hdr, device_list);
+			if (ret)
+				break;
+		}
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+				sub_type_hdr->length);
+	}
+
+err:
+	if (ret)
+		kfd_release_topology_device_list(device_list);
+
+	return ret;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_pcache(struct crat_subtype_cache *pcache,
+				struct kfd_gpu_cache_info *pcache_info,
+				struct kfd_cu_info *cu_info,
+				int mem_available,
+				int cu_bitmask,
+				int cache_type, unsigned int cu_processor_id,
+				int cu_block)
+{
+	unsigned int cu_sibling_map_mask;
+	int first_active_cu;
+
+	/* First check if enough memory is available */
+	if (sizeof(struct crat_subtype_cache) > mem_available)
+		return -ENOMEM;
+
+	cu_sibling_map_mask = cu_bitmask;
+	cu_sibling_map_mask >>= cu_block;
+	cu_sibling_map_mask &=
+		((1 << pcache_info[cache_type].num_cu_shared) - 1);
+	first_active_cu = ffs(cu_sibling_map_mask);
+
+	/* CU could be inactive. In case of shared cache find the first active
+	 * CU. and incase of non-shared cache check if the CU is inactive. If
+	 * inactive active skip it
+	 */
+	if (first_active_cu) {
+		memset(pcache, 0, sizeof(struct crat_subtype_cache));
+		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+		pcache->length = sizeof(struct crat_subtype_cache);
+		pcache->flags = pcache_info[cache_type].flags;
+		pcache->processor_id_low = cu_processor_id
+					 + (first_active_cu - 1);
+		pcache->cache_level = pcache_info[cache_type].cache_level;
+		pcache->cache_size = pcache_info[cache_type].cache_size;
+
+		/* Sibling map is w.r.t processor_id_low, so shift out
+		 * inactive CU
+		 */
+		cu_sibling_map_mask =
+			cu_sibling_map_mask >> (first_active_cu - 1);
+
+		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+		pcache->sibling_map[1] =
+				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+		pcache->sibling_map[2] =
+				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+		pcache->sibling_map[3] =
+				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+		return 0;
+	}
+	return 1;
+}
+
+/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
+ * tables
+ *
+ *	@kdev - [IN] GPU device
+ *	@gpu_processor_id - [IN] GPU processor ID to which these caches
+ *			    associate
+ *	@available_size - [IN] Amount of memory available in pcache
+ *	@cu_info - [IN] Compute Unit info obtained from KGD
+ *	@pcache - [OUT] memory into which cache data is to be filled in.
+ *	@size_filled - [OUT] amount of data used up in pcache.
+ *	@num_of_entries - [OUT] number of caches added
+ */
+static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
+			int gpu_processor_id,
+			int available_size,
+			struct kfd_cu_info *cu_info,
+			struct crat_subtype_cache *pcache,
+			int *size_filled,
+			int *num_of_entries)
+{
+	struct kfd_gpu_cache_info *pcache_info;
+	int num_of_cache_types = 0;
+	int i, j, k;
+	int ct = 0;
+	int mem_available = available_size;
+	unsigned int cu_processor_id;
+	int ret;
+
+	switch (kdev->device_info->asic_family) {
+	case CHIP_KAVERI:
+		pcache_info = kaveri_cache_info;
+		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
+		break;
+	case CHIP_HAWAII:
+		pcache_info = hawaii_cache_info;
+		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
+		break;
+	case CHIP_CARRIZO:
+		pcache_info = carrizo_cache_info;
+		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
+		break;
+	case CHIP_TONGA:
+		pcache_info = tonga_cache_info;
+		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
+		break;
+	case CHIP_FIJI:
+		pcache_info = fiji_cache_info;
+		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
+		break;
+	case CHIP_POLARIS10:
+		pcache_info = polaris10_cache_info;
+		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
+		break;
+	case CHIP_POLARIS11:
+		pcache_info = polaris11_cache_info;
+		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*size_filled = 0;
+	*num_of_entries = 0;
+
+	/* For each type of cache listed in the kfd_gpu_cache_info table,
+	 * go through all available Compute Units.
+	 * The [i,j,k] loop will
+	 *		if kfd_gpu_cache_info.num_cu_shared = 1
+	 *			will parse through all available CU
+	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
+	 *			then it will consider only one CU from
+	 *			the shared unit
+	 */
+
+	for (ct = 0; ct < num_of_cache_types; ct++) {
+		cu_processor_id = gpu_processor_id;
+		for (i = 0; i < cu_info->num_shader_engines; i++) {
+			for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+				j++) {
+				for (k = 0; k < cu_info->num_cu_per_sh;
+					k += pcache_info[ct].num_cu_shared) {
+
+					ret = fill_in_pcache(pcache,
+						pcache_info,
+						cu_info,
+						mem_available,
+						cu_info->cu_bitmap[i][j],
+						ct,
+						cu_processor_id,
+						k);
+
+					if (ret < 0)
+						break;
+
+					if (!ret) {
+						pcache++;
+						(*num_of_entries)++;
+						mem_available -=
+							sizeof(*pcache);
+						(*size_filled) +=
+							sizeof(*pcache);
+					}
+
+					/* Move to next CU block */
+					cu_processor_id +=
+						pcache_info[ct].num_cu_shared;
+				}
+			}
+		}
+	}
+
+	pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
+
+	return 0;
+}
+
+/*
+ * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
+ * copies CRAT from ACPI (if available).
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ *	@crat_image: CRAT read from ACPI. If no CRAT in ACPI then
+ *		     crat_image will be NULL
+ *	@size: [OUT] size of crat_image
+ *
+ *	Return 0 if successful else return error code
+ */
+int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
+{
+	struct acpi_table_header *crat_table;
+	acpi_status status;
+	void *pcrat_image;
+
+	if (!crat_image)
+		return -EINVAL;
+
+	*crat_image = NULL;
+
+	/* Fetch the CRAT table from ACPI */
+	status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("CRAT table not found\n");
+		return -ENODATA;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+
+		pr_err("CRAT table error: %s\n", err);
+		return -EINVAL;
+	}
+
+	if (ignore_crat) {
+		pr_info("CRAT table disabled by module option\n");
+		return -ENODATA;
+	}
+
+	pcrat_image = kmalloc(crat_table->length, GFP_KERNEL);
+	if (!pcrat_image)
+		return -ENOMEM;
+
+	memcpy(pcrat_image, crat_table, crat_table->length);
+
+	*crat_image = pcrat_image;
+	*size = crat_table->length;
+
+	return 0;
+}
+
+/* Memory required to create Virtual CRAT.
+ * Since there is no easy way to predict the amount of memory required, the
+ * following amount are allocated for CPU and GPU Virtual CRAT. This is
+ * expected to cover all known conditions. But to be safe additional check
+ * is put in the code to ensure we don't overwrite.
+ */
+#define VCRAT_SIZE_FOR_CPU	(2 * PAGE_SIZE)
+#define VCRAT_SIZE_FOR_GPU	(3 * PAGE_SIZE)
+
+/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
+ *
+ *	@numa_node_id: CPU NUMA node id
+ *	@avail_size: Available size in the memory
+ *	@sub_type_hdr: Memory into which compute info will be filled in
+ *
+ *	Return 0 if successful else return -ve value
+ */
+static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
+				int proximity_domain,
+				struct crat_subtype_computeunit *sub_type_hdr)
+{
+	const struct cpumask *cpumask;
+
+	*avail_size -= sizeof(struct crat_subtype_computeunit);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+	/* Fill in subtype header data */
+	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	cpumask = cpumask_of_node(numa_node_id);
+
+	/* Fill in CU data */
+	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
+	sub_type_hdr->proximity_domain = proximity_domain;
+	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
+	if (sub_type_hdr->processor_id_low == -1)
+		return -EINVAL;
+
+	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
+
+	return 0;
+}
+
+/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
+ *
+ *	@numa_node_id: CPU NUMA node id
+ *	@avail_size: Available size in the memory
+ *	@sub_type_hdr: Memory into which compute info will be filled in
+ *
+ *	Return 0 if successful else return -ve value
+ */
+static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
+			int proximity_domain,
+			struct crat_subtype_memory *sub_type_hdr)
+{
+	uint64_t mem_in_bytes = 0;
+	pg_data_t *pgdat;
+	int zone_type;
+
+	*avail_size -= sizeof(struct crat_subtype_memory);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+
+	/* Fill in subtype header data */
+	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	/* Fill in Memory Subunit data */
+
+	/* Unlike si_meminfo, si_meminfo_node is not exported. So
+	 * the following lines are duplicated from si_meminfo_node
+	 * function
+	 */
+	pgdat = NODE_DATA(numa_node_id);
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+		mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
+	mem_in_bytes <<= PAGE_SHIFT;
+
+	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
+	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
+	sub_type_hdr->proximity_domain = proximity_domain;
+
+	return 0;
+}
+
+static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
+				uint32_t *num_entries,
+				struct crat_subtype_iolink *sub_type_hdr)
+{
+	int nid;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	uint8_t link_type;
+
+	if (c->x86_vendor == X86_VENDOR_AMD)
+		link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
+	else
+		link_type = CRAT_IOLINK_TYPE_QPI_1_1;
+
+	*num_entries = 0;
+
+	/* Create IO links from this node to other CPU nodes */
+	for_each_online_node(nid) {
+		if (nid == numa_node_id) /* node itself */
+			continue;
+
+		*avail_size -= sizeof(struct crat_subtype_iolink);
+		if (*avail_size < 0)
+			return -ENOMEM;
+
+		memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+		/* Fill in subtype header data */
+		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+		/* Fill in IO link data */
+		sub_type_hdr->proximity_domain_from = numa_node_id;
+		sub_type_hdr->proximity_domain_to = nid;
+		sub_type_hdr->io_interface_type = link_type;
+
+		(*num_entries)++;
+		sub_type_hdr++;
+	}
+
+	return 0;
+}
+
+/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
+ *
+ *	@pcrat_image: Fill in VCRAT for CPU
+ *	@size:	[IN] allocated size of crat_image.
+ *		[OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+{
+	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+	struct acpi_table_header *acpi_table;
+	acpi_status status;
+	struct crat_subtype_generic *sub_type_hdr;
+	int avail_size = *size;
+	int numa_node_id;
+	uint32_t entries = 0;
+	int ret = 0;
+
+	if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
+		return -EINVAL;
+
+	/* Fill in CRAT Header.
+	 * Modify length and total_entries as subunits are added.
+	 */
+	avail_size -= sizeof(struct crat_header);
+	if (avail_size < 0)
+		return -ENOMEM;
+
+	memset(crat_table, 0, sizeof(struct crat_header));
+	memcpy(&crat_table->signature, CRAT_SIGNATURE,
+			sizeof(crat_table->signature));
+	crat_table->length = sizeof(struct crat_header);
+
+	status = acpi_get_table("DSDT", 0, &acpi_table);
+	if (status == AE_NOT_FOUND)
+		pr_warn("DSDT table not found for OEM information\n");
+	else {
+		crat_table->oem_revision = acpi_table->revision;
+		memcpy(crat_table->oem_id, acpi_table->oem_id,
+				CRAT_OEMID_LENGTH);
+		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
+				CRAT_OEMTABLEID_LENGTH);
+	}
+	crat_table->total_entries = 0;
+	crat_table->num_domains = 0;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+
+	for_each_online_node(numa_node_id) {
+		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
+			continue;
+
+		/* Fill in Subtype: Compute Unit */
+		ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
+			crat_table->num_domains,
+			(struct crat_subtype_computeunit *)sub_type_hdr);
+		if (ret < 0)
+			return ret;
+		crat_table->length += sub_type_hdr->length;
+		crat_table->total_entries++;
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+			sub_type_hdr->length);
+
+		/* Fill in Subtype: Memory */
+		ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
+			crat_table->num_domains,
+			(struct crat_subtype_memory *)sub_type_hdr);
+		if (ret < 0)
+			return ret;
+		crat_table->length += sub_type_hdr->length;
+		crat_table->total_entries++;
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+			sub_type_hdr->length);
+
+		/* Fill in Subtype: IO Link */
+		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
+				&entries,
+				(struct crat_subtype_iolink *)sub_type_hdr);
+		if (ret < 0)
+			return ret;
+		crat_table->length += (sub_type_hdr->length * entries);
+		crat_table->total_entries += entries;
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+				sub_type_hdr->length * entries);
+
+		crat_table->num_domains++;
+	}
+
+	/* TODO: Add cache Subtype for CPU.
+	 * Currently, CPU cache information is available in function
+	 * detect_cache_attributes(cpu) defined in the file
+	 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
+	 * exported and to get the same information the code needs to be
+	 * duplicated.
+	 */
+
+	*size = crat_table->length;
+	pr_info("Virtual CRAT table created for CPU\n");
+
+	return 0;
+}
+
+static int kfd_fill_gpu_memory_affinity(int *avail_size,
+		struct kfd_dev *kdev, uint8_t type, uint64_t size,
+		struct crat_subtype_memory *sub_type_hdr,
+		uint32_t proximity_domain,
+		const struct kfd_local_mem_info *local_mem_info)
+{
+	*avail_size -= sizeof(struct crat_subtype_memory);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	sub_type_hdr->proximity_domain = proximity_domain;
+
+	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
+			type, size);
+
+	sub_type_hdr->length_low = lower_32_bits(size);
+	sub_type_hdr->length_high = upper_32_bits(size);
+
+	sub_type_hdr->width = local_mem_info->vram_width;
+	sub_type_hdr->visibility_type = type;
+
+	return 0;
+}
+
+/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
+ * to its NUMA node
+ *	@avail_size: Available size in the memory
+ *	@kdev - [IN] GPU device
+ *	@sub_type_hdr: Memory into which io link info will be filled in
+ *	@proximity_domain - proximity domain of the GPU node
+ *
+ *	Return 0 if successful else return -ve value
+ */
+static int kfd_fill_gpu_direct_io_link(int *avail_size,
+			struct kfd_dev *kdev,
+			struct crat_subtype_iolink *sub_type_hdr,
+			uint32_t proximity_domain)
+{
+	*avail_size -= sizeof(struct crat_subtype_iolink);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+	/* Fill in subtype header data */
+	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	/* Fill in IOLINK subtype.
+	 * TODO: Fill-in other fields of iolink subtype
+	 */
+	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
+	sub_type_hdr->proximity_domain_from = proximity_domain;
+#ifdef CONFIG_NUMA
+	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+		sub_type_hdr->proximity_domain_to = 0;
+	else
+		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
+#else
+	sub_type_hdr->proximity_domain_to = 0;
+#endif
+	return 0;
+}
+
+/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
+ *
+ *	@pcrat_image: Fill in VCRAT for GPU
+ *	@size:	[IN] allocated size of crat_image.
+ *		[OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+				      size_t *size, struct kfd_dev *kdev,
+				      uint32_t proximity_domain)
+{
+	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+	struct crat_subtype_generic *sub_type_hdr;
+	struct crat_subtype_computeunit *cu;
+	struct kfd_cu_info cu_info;
+	struct amd_iommu_device_info iommu_info;
+	int avail_size = *size;
+	uint32_t total_num_of_cu;
+	int num_of_cache_entries = 0;
+	int cache_mem_filled = 0;
+	int ret = 0;
+	const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
+					 AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
+					 AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+	struct kfd_local_mem_info local_mem_info;
+
+	if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
+		return -EINVAL;
+
+	/* Fill the CRAT Header.
+	 * Modify length and total_entries as subunits are added.
+	 */
+	avail_size -= sizeof(struct crat_header);
+	if (avail_size < 0)
+		return -ENOMEM;
+
+	memset(crat_table, 0, sizeof(struct crat_header));
+
+	memcpy(&crat_table->signature, CRAT_SIGNATURE,
+			sizeof(crat_table->signature));
+	/* Change length as we add more subtypes*/
+	crat_table->length = sizeof(struct crat_header);
+	crat_table->num_domains = 1;
+	crat_table->total_entries = 0;
+
+	/* Fill in Subtype: Compute Unit
+	 * First fill in the sub type header and then sub type data
+	 */
+	avail_size -= sizeof(struct crat_subtype_computeunit);
+	if (avail_size < 0)
+		return -ENOMEM;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
+	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	/* Fill CU subtype data */
+	cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
+	cu->proximity_domain = proximity_domain;
+
+	kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info);
+	cu->num_simd_per_cu = cu_info.simd_per_cu;
+	cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
+	cu->max_waves_simd = cu_info.max_waves_per_simd;
+
+	cu->wave_front_size = cu_info.wave_front_size;
+	cu->array_count = cu_info.num_shader_arrays_per_engine *
+		cu_info.num_shader_engines;
+	total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
+	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
+	cu->num_cu_per_array = cu_info.num_cu_per_sh;
+	cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
+	cu->num_banks = cu_info.num_shader_engines;
+	cu->lds_size_in_kb = cu_info.lds_size;
+
+	cu->hsa_capability = 0;
+
+	/* Check if this node supports IOMMU. During parsing this flag will
+	 * translate to HSA_CAP_ATS_PRESENT
+	 */
+	iommu_info.flags = 0;
+	if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
+		if ((iommu_info.flags & required_iommu_flags) ==
+				required_iommu_flags)
+			cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
+	}
+
+	crat_table->length += sub_type_hdr->length;
+	crat_table->total_entries++;
+
+	/* Fill in Subtype: Memory. Only on systems with large BAR (no
+	 * private FB), report memory as public. On other systems
+	 * report the total FB size (public+private) as a single
+	 * private heap.
+	 */
+	kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info);
+	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+			sub_type_hdr->length);
+
+	if (local_mem_info.local_mem_size_private == 0)
+		ret = kfd_fill_gpu_memory_affinity(&avail_size,
+				kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
+				local_mem_info.local_mem_size_public,
+				(struct crat_subtype_memory *)sub_type_hdr,
+				proximity_domain,
+				&local_mem_info);
+	else
+		ret = kfd_fill_gpu_memory_affinity(&avail_size,
+				kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
+				local_mem_info.local_mem_size_public +
+				local_mem_info.local_mem_size_private,
+				(struct crat_subtype_memory *)sub_type_hdr,
+				proximity_domain,
+				&local_mem_info);
+	if (ret < 0)
+		return ret;
+
+	crat_table->length += sizeof(struct crat_subtype_memory);
+	crat_table->total_entries++;
+
+	/* TODO: Fill in cache information. This information is NOT readily
+	 * available in KGD
+	 */
+	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+		sub_type_hdr->length);
+	ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
+				avail_size,
+				&cu_info,
+				(struct crat_subtype_cache *)sub_type_hdr,
+				&cache_mem_filled,
+				&num_of_cache_entries);
+
+	if (ret < 0)
+		return ret;
+
+	crat_table->length += cache_mem_filled;
+	crat_table->total_entries += num_of_cache_entries;
+	avail_size -= cache_mem_filled;
+
+	/* Fill in Subtype: IO_LINKS
+	 *  Only direct links are added here which is Link from GPU to
+	 *  to its NUMA node. Indirect links are added by userspace.
+	 */
+	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+		cache_mem_filled);
+	ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev,
+		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
+
+	if (ret < 0)
+		return ret;
+
+	crat_table->length += sub_type_hdr->length;
+	crat_table->total_entries++;
+
+	*size = crat_table->length;
+	pr_info("Virtual CRAT table created for GPU\n");
+
+	return ret;
+}
+
+/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
+ *		creates a Virtual CRAT (VCRAT) image
+ *
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ *	@crat_image: VCRAT image created because ACPI does not have a
+ *		     CRAT for this device
+ *	@size: [OUT] size of virtual crat_image
+ *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
+ *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
+ *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
+ *			-- this option is not currently implemented.
+ *			The assumption is that all AMD APUs will have CRAT
+ *	@kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ *
+ *	Return 0 if successful else return -ve value
+ */
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+				  int flags, struct kfd_dev *kdev,
+				  uint32_t proximity_domain)
+{
+	void *pcrat_image = NULL;
+	int ret = 0;
+
+	if (!crat_image)
+		return -EINVAL;
+
+	*crat_image = NULL;
+
+	/* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and
+	 * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover
+	 * all the current conditions. A check is put not to overwrite beyond
+	 * allocated size
+	 */
+	switch (flags) {
+	case COMPUTE_UNIT_CPU:
+		pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL);
+		if (!pcrat_image)
+			return -ENOMEM;
+		*size = VCRAT_SIZE_FOR_CPU;
+		ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
+		break;
+	case COMPUTE_UNIT_GPU:
+		if (!kdev)
+			return -EINVAL;
+		pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
+		if (!pcrat_image)
+			return -ENOMEM;
+		*size = VCRAT_SIZE_FOR_GPU;
+		ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
+						 proximity_domain);
+		break;
+	case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
+		/* TODO: */
+		ret = -EINVAL;
+		pr_err("VCRAT not implemented for APU\n");
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (!ret)
+		*crat_image = pcrat_image;
+	else
+		kfree(pcrat_image);
+
+	return ret;
+}
+
+
+/* kfd_destroy_crat_image
+ *
+ *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
+ *
+ */
+void kfd_destroy_crat_image(void *crat_image)
+{
+	kfree(crat_image);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index a374fa3d3ee6f28ad009cc30d4217df003248caf..b5cd182b9edd22fa08beb113396714b7e6e17b4a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -44,6 +44,10 @@
 
 #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
 
+/* Compute Unit flags */
+#define COMPUTE_UNIT_CPU	(1 << 0)  /* Create Virtual CRAT for CPU */
+#define COMPUTE_UNIT_GPU	(1 << 1)  /* Create Virtual CRAT for GPU */
+
 struct crat_header {
 	uint32_t	signature;
 	uint32_t	length;
@@ -105,7 +109,7 @@ struct crat_subtype_computeunit {
 	uint8_t		wave_front_size;
 	uint8_t		num_banks;
 	uint16_t	micro_engine_id;
-	uint8_t		num_arrays;
+	uint8_t		array_count;
 	uint8_t		num_cu_per_array;
 	uint8_t		num_simd_per_cu;
 	uint8_t		max_slots_scatch_cu;
@@ -127,13 +131,14 @@ struct crat_subtype_memory {
 	uint8_t		length;
 	uint16_t	reserved;
 	uint32_t	flags;
-	uint32_t	promixity_domain;
+	uint32_t	proximity_domain;
 	uint32_t	base_addr_low;
 	uint32_t	base_addr_high;
 	uint32_t	length_low;
 	uint32_t	length_high;
 	uint32_t	width;
-	uint8_t		reserved2[CRAT_MEMORY_RESERVED_LENGTH];
+	uint8_t		visibility_type; /* for virtual (dGPU) CRAT */
+	uint8_t		reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1];
 };
 
 /*
@@ -222,9 +227,12 @@ struct crat_subtype_ccompute {
 /*
  * HSA IO Link Affinity structure and definitions
  */
-#define CRAT_IOLINK_FLAGS_ENABLED	0x00000001
-#define CRAT_IOLINK_FLAGS_COHERENCY	0x00000002
-#define CRAT_IOLINK_FLAGS_RESERVED	0xfffffffc
+#define CRAT_IOLINK_FLAGS_ENABLED		(1 << 0)
+#define CRAT_IOLINK_FLAGS_NON_COHERENT		(1 << 1)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT	(1 << 2)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT	(1 << 3)
+#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA	(1 << 4)
+#define CRAT_IOLINK_FLAGS_RESERVED_MASK		0xffffffe0
 
 /*
  * IO interface types
@@ -232,10 +240,18 @@ struct crat_subtype_ccompute {
 #define CRAT_IOLINK_TYPE_UNDEFINED	0
 #define CRAT_IOLINK_TYPE_HYPERTRANSPORT	1
 #define CRAT_IOLINK_TYPE_PCIEXPRESS	2
-#define CRAT_IOLINK_TYPE_OTHER		3
+#define CRAT_IOLINK_TYPE_AMBA		3
+#define CRAT_IOLINK_TYPE_MIPI		4
+#define CRAT_IOLINK_TYPE_QPI_1_1	5
+#define CRAT_IOLINK_TYPE_RESERVED1	6
+#define CRAT_IOLINK_TYPE_RESERVED2	7
+#define CRAT_IOLINK_TYPE_RAPID_IO	8
+#define CRAT_IOLINK_TYPE_INFINIBAND	9
+#define CRAT_IOLINK_TYPE_RESERVED3	10
+#define CRAT_IOLINK_TYPE_OTHER		11
 #define CRAT_IOLINK_TYPE_MAX		255
 
-#define CRAT_IOLINK_RESERVED_LENGTH 24
+#define CRAT_IOLINK_RESERVED_LENGTH	24
 
 struct crat_subtype_iolink {
 	uint8_t		type;
@@ -291,4 +307,14 @@ struct cdit_header {
 
 #pragma pack()
 
+struct kfd_dev;
+
+int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
+void kfd_destroy_crat_image(void *crat_image);
+int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+			 uint32_t proximity_domain);
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+				  int flags, struct kfd_dev *kdev,
+				  uint32_t proximity_domain);
+
 #endif /* KFD_CRAT_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index c407f6bd99565c9e4d7c7147a65cdfa8c41417b8..afb26f205d2978717bdeec174603caa0a83d4712 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -95,7 +95,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 
 	ib_packet->control = (1 << 23) | (1 << 31) |
-			((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+			((size_in_bytes / 4) & 0xfffff);
 
 	ib_packet->bitfields5.pasid = pasid;
 
@@ -126,8 +126,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
 	rm_packet->header.opcode = IT_RELEASE_MEM;
 	rm_packet->header.type = PM4_TYPE_3;
-	rm_packet->header.count = sizeof(struct pm4__release_mem) /
-					sizeof(unsigned int) - 2;
+	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
 
 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
 	rm_packet->bitfields2.event_index =
@@ -652,8 +651,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
 	packets_vec[0].header.type = PM4_TYPE_3;
 	packets_vec[0].bitfields2.reg_offset =
-			GRBM_GFX_INDEX / (sizeof(uint32_t)) -
-				USERCONFIG_REG_BASE;
+			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 
 	packets_vec[0].bitfields2.insert_vmid = 0;
 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
@@ -661,8 +659,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	packets_vec[1].header.count = 1;
 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
 	packets_vec[1].header.type = PM4_TYPE_3;
-	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
-						AMD_CONFIG_REG_BASE;
+	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
 
 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
 	packets_vec[1].bitfields2.insert_vmid = 1;
@@ -678,8 +675,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 
 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 	packets_vec[2].bitfields2.reg_offset =
-				GRBM_GFX_INDEX / (sizeof(uint32_t)) -
-					USERCONFIG_REG_BASE;
+				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 
 	packets_vec[2].bitfields2.insert_vmid = 0;
 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
new file mode 100644
index 0000000000000000000000000000000000000000..4bd6ebfaf425bcc88947bf41cc6b2ad832bc84b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2016-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+#include "kfd_priv.h"
+
+static struct dentry *debugfs_root;
+
+static int kfd_debugfs_open(struct inode *inode, struct file *file)
+{
+	int (*show)(struct seq_file *, void *) = inode->i_private;
+
+	return single_open(file, show, NULL);
+}
+
+static const struct file_operations kfd_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kfd_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kfd_debugfs_init(void)
+{
+	struct dentry *ent;
+
+	debugfs_root = debugfs_create_dir("kfd", NULL);
+	if (!debugfs_root || debugfs_root == ERR_PTR(-ENODEV)) {
+		pr_warn("Failed to create kfd debugfs dir\n");
+		return;
+	}
+
+	ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
+				  kfd_debugfs_mqds_by_process,
+				  &kfd_debugfs_fops);
+	if (!ent)
+		pr_warn("Failed to create mqds in kfd debugfs\n");
+
+	ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root,
+				  kfd_debugfs_hqds_by_device,
+				  &kfd_debugfs_fops);
+	if (!ent)
+		pr_warn("Failed to create hqds in kfd debugfs\n");
+
+	ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
+				  kfd_debugfs_rls_by_device,
+				  &kfd_debugfs_fops);
+	if (!ent)
+		pr_warn("Failed to create rls in kfd debugfs\n");
+}
+
+void kfd_debugfs_fini(void)
+{
+	debugfs_remove_recursive(debugfs_root);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 621a3b53a0384e1ff3aa367eeb242aef21b6dbf2..a8fa33a08de301fde244f1584e8a9edeeea40775 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -27,6 +27,7 @@
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_pm4_headers_vi.h"
+#include "cwsr_trap_handler_gfx8.asm"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = {
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
-	.mqd_size_aligned = MQD_SIZE_ALIGNED
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = false,
 };
 
 static const struct kfd_device_info carrizo_device_info = {
@@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = {
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
-	.mqd_size_aligned = MQD_SIZE_ALIGNED
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = true,
 };
 
 struct kfd_deviceid {
@@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
 	return AMD_IOMMU_INV_PRI_RSP_INVALID;
 }
 
+static void kfd_cwsr_init(struct kfd_dev *kfd)
+{
+	if (cwsr_enable && kfd->device_info->supports_cwsr) {
+		BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+
+		kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+		kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+		kfd->cwsr_enabled = true;
+	}
+}
+
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			 const struct kgd2kfd_shared_resources *gpu_resources)
 {
@@ -224,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
 			- kfd->vm_info.first_vmid_kfd + 1;
 
+	/* Verify module parameters regarding mapped process number*/
+	if ((hws_max_conc_proc < 0)
+			|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
+		dev_err(kfd_device,
+			"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
+			hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
+			kfd->vm_info.vmid_num_kfd);
+		kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
+	} else
+		kfd->max_proc_per_quantum = hws_max_conc_proc;
+
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
@@ -286,6 +311,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 		goto device_iommu_pasid_error;
 	}
 
+	kfd_cwsr_init(kfd);
+
 	if (kfd_resume(kfd))
 		goto kfd_resume_error;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e202921c150e37c6e816ab49ce277c54c3304e8e..b21285afa4eac19f243ba1c22b3a486e1828eeae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm,
 				struct queue *q,
-				struct qcm_process_device *qpd,
-				int *allocated_vmid)
+				struct qcm_process_device *qpd)
 {
 	int retval;
 
@@ -170,9 +169,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 		if (retval)
 			goto out_unlock;
 	}
-	*allocated_vmid = qpd->vmid;
 	q->properties.vmid = qpd->vmid;
 
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+
 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
@@ -181,10 +182,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 		retval = -EINVAL;
 
 	if (retval) {
-		if (list_empty(&qpd->queues_list)) {
+		if (list_empty(&qpd->queues_list))
 			deallocate_vmid(dqm, qpd, q);
-			*allocated_vmid = 0;
-		}
 		goto out_unlock;
 	}
 
@@ -809,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 }
 
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
-			struct qcm_process_device *qpd, int *allocate_vmid)
+			struct qcm_process_device *qpd)
 {
 	int retval;
 	struct mqd_manager *mqd;
 
 	retval = 0;
 
-	if (allocate_vmid)
-		*allocate_vmid = 0;
-
 	mutex_lock(&dqm->lock);
 
 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -846,6 +842,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	}
 
 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (retval)
@@ -1014,13 +1013,13 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	list_del(&q->list);
 	qpd->queue_count--;
-	if (q->properties.is_active)
+	if (q->properties.is_active) {
 		dqm->queue_count--;
-
-	retval = execute_queues_cpsch(dqm,
+		retval = execute_queues_cpsch(dqm,
 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-	if (retval == -ETIME)
-		qpd->reset_wavefronts = true;
+		if (retval == -ETIME)
+			qpd->reset_wavefronts = true;
+	}
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
@@ -1034,7 +1033,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	mutex_unlock(&dqm->lock);
 
-	return 0;
+	return retval;
 
 failed:
 failed_try_destroy_debugged_queue:
@@ -1110,6 +1109,26 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
 	return retval;
 }
 
+static int set_trap_handler(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				uint64_t tba_addr,
+				uint64_t tma_addr)
+{
+	uint64_t *tma;
+
+	if (dqm->dev->cwsr_enabled) {
+		/* Jump from CWSR trap handler to user trap */
+		tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
+		tma[0] = tba_addr;
+		tma[1] = tma_addr;
+	} else {
+		qpd->tba_addr = tba_addr;
+		qpd->tma_addr = tma_addr;
+	}
+
+	return 0;
+}
+
 static int process_termination_nocpsch(struct device_queue_manager *dqm,
 		struct qcm_process_device *qpd)
 {
@@ -1241,6 +1260,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.set_trap_handler = set_trap_handler;
 		dqm->ops.process_termination = process_termination_cpsch;
 		break;
 	case KFD_SCHED_POLICY_NO_HWS:
@@ -1256,6 +1276,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.initialize = initialize_nocpsch;
 		dqm->ops.uninitialize = uninitialize;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.set_trap_handler = set_trap_handler;
 		dqm->ops.process_termination = process_termination_nocpsch;
 		break;
 	default:
@@ -1290,3 +1311,74 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
 	dqm->ops.uninitialize(dqm);
 	kfree(dqm);
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static void seq_reg_dump(struct seq_file *m,
+			 uint32_t (*dump)[2], uint32_t n_regs)
+{
+	uint32_t i, count;
+
+	for (i = 0, count = 0; i < n_regs; i++) {
+		if (count == 0 ||
+		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
+			seq_printf(m, "%s    %08x: %08x",
+				   i ? "\n" : "",
+				   dump[i][0], dump[i][1]);
+			count = 7;
+		} else {
+			seq_printf(m, " %08x", dump[i][1]);
+			count--;
+		}
+	}
+
+	seq_puts(m, "\n");
+}
+
+int dqm_debugfs_hqds(struct seq_file *m, void *data)
+{
+	struct device_queue_manager *dqm = data;
+	uint32_t (*dump)[2], n_regs;
+	int pipe, queue;
+	int r = 0;
+
+	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+		int pipe_offset = pipe * get_queues_per_pipe(dqm);
+
+		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
+			if (!test_bit(pipe_offset + queue,
+				      dqm->dev->shared_resources.queue_bitmap))
+				continue;
+
+			r = dqm->dev->kfd2kgd->hqd_dump(
+				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+			if (r)
+				break;
+
+			seq_printf(m, "  CP Pipe %d, Queue %d\n",
+				  pipe, queue);
+			seq_reg_dump(m, dump, n_regs);
+
+			kfree(dump);
+		}
+	}
+
+	for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
+		for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
+			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
+				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+			if (r)
+				break;
+
+			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
+				  pipe, queue);
+			seq_reg_dump(m, dump, n_regs);
+
+			kfree(dump);
+		}
+	}
+
+	return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 5b77cb69f732d3e3c4ac4a1761da12ecdf07a983..c61b693bfa8c3a6aac0af58e5c45bff4d9a3cc42 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -84,8 +84,7 @@ struct device_process_node {
 struct device_queue_manager_ops {
 	int	(*create_queue)(struct device_queue_manager *dqm,
 				struct queue *q,
-				struct qcm_process_device *qpd,
-				int *allocate_vmid);
+				struct qcm_process_device *qpd);
 
 	int	(*destroy_queue)(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
@@ -123,6 +122,11 @@ struct device_queue_manager_ops {
 					   void __user *alternate_aperture_base,
 					   uint64_t alternate_aperture_size);
 
+	int	(*set_trap_handler)(struct device_queue_manager *dqm,
+				    struct qcm_process_device *qpd,
+				    uint64_t tba_addr,
+				    uint64_t tma_addr);
+
 	int (*process_termination)(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd);
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index feb76c235b1a6ff70f3d28297744d408bb895091..ebb4da14e3df7a3fa1efeaf60551b211f7285298 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -116,8 +116,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
 	pr_debug("doorbell aperture size  == 0x%08lX\n",
 			kfd->shared_resources.doorbell_aperture_size);
 
-	pr_debug("doorbell kernel address == 0x%08lX\n",
-			(uintptr_t)kfd->doorbell_kernel_ptr);
+	pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
 
 	return 0;
 }
@@ -194,8 +193,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 
 	pr_debug("Get kernel queue doorbell\n"
 			 "     doorbell offset   == 0x%08X\n"
-			 "     kernel address    == 0x%08lX\n",
-		*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
+			 "     kernel address    == %p\n",
+		*doorbell_off, (kfd->doorbell_kernel_ptr + inx));
 
 	return kfd->doorbell_kernel_ptr + inx;
 }
@@ -215,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
 {
 	if (db) {
 		writel(value, db);
-		pr_debug("Writing %d to doorbell address 0x%p\n", value, db);
+		pr_debug("Writing %d to doorbell address %p\n", value, db);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index cb92d4b72400953c453f92357b82fbd6ad4ef1fc..93aae5c1e78bf7a37a4e885a262f9de0b21981a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 	/*
 	 * Because we are called from arbitrary context (workqueue) as opposed
 	 * to process context, kfd_process could attempt to exit while we are
-	 * running so the lookup function returns a locked process.
+	 * running so the lookup function increments the process ref count.
 	 */
 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
 
@@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 	}
 
 	mutex_unlock(&p->event_mutex);
-	mutex_unlock(&p->mutex);
+	kfd_unref_process(p);
 }
 
 static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
@@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 	/*
 	 * Because we are called from arbitrary context (workqueue) as opposed
 	 * to process context, kfd_process could attempt to exit while we are
-	 * running so the lookup function returns a locked process.
+	 * running so the lookup function increments the process ref count.
 	 */
 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
 	struct mm_struct *mm;
@@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 	 */
 	mm = get_task_mm(p->lead_thread);
 	if (!mm) {
-		mutex_unlock(&p->mutex);
+		kfd_unref_process(p);
 		return; /* Process is exiting */
 	}
 
@@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 			&memory_exception_data);
 
 	mutex_unlock(&p->event_mutex);
-	mutex_unlock(&p->mutex);
+	kfd_unref_process(p);
 }
 
 void kfd_signal_hw_exception_event(unsigned int pasid)
@@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
 	/*
 	 * Because we are called from arbitrary context (workqueue) as opposed
 	 * to process context, kfd_process could attempt to exit while we are
-	 * running so the lookup function returns a locked process.
+	 * running so the lookup function increments the process ref count.
 	 */
 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
 
@@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
 	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
 
 	mutex_unlock(&p->event_mutex);
-	mutex_unlock(&p->mutex);
+	kfd_unref_process(p);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index c59384bbbc5fc6befa302f99d10b6fdc0b30af94..7377513050e663901071486492ce9a1881a65353 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -300,9 +300,14 @@ int kfd_init_apertures(struct kfd_process *process)
 	struct kfd_process_device *pdd;
 
 	/*Iterating over all devices*/
-	while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
+	while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
 		id < NUM_OF_SUPPORTED_GPUS) {
 
+		if (!dev) {
+			id++; /* Skip non GPU devices */
+			continue;
+		}
+
 		pdd = kfd_create_process_device_data(dev, process);
 		if (!pdd) {
 			pr_err("Failed to create process device data\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 8b0c0645d7c05ed8c95b9ef59556f7c8645da5bd..5dc6567d4a138b9de087c8d543e72e90a879b40e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -218,7 +218,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	rptr = *kq->rptr_kernel;
 	wptr = *kq->wptr_kernel;
 	queue_address = (unsigned int *)kq->pq_kernel_addr;
-	queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
+	queue_size_dwords = kq->queue->properties.queue_size / 4;
 
 	pr_debug("rptr: %d\n", rptr);
 	pr_debug("wptr: %d\n", wptr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index f744caeaee049587520c75524f3ee6e580b8a63e..3ac72bed4f310a803b426a2483e40161020d5e41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -50,6 +50,15 @@ module_param(sched_policy, int, 0444);
 MODULE_PARM_DESC(sched_policy,
 	"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
 
+int hws_max_conc_proc = 8;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+	"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
 int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
 module_param(max_num_of_queues_per_device, int, 0444);
 MODULE_PARM_DESC(max_num_of_queues_per_device,
@@ -60,6 +69,11 @@ module_param(send_sigterm, int, 0444);
 MODULE_PARM_DESC(send_sigterm,
 	"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
 
+int ignore_crat;
+module_param(ignore_crat, int, 0444);
+MODULE_PARM_DESC(ignore_crat,
+	"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+
 static int amdkfd_init_completed;
 
 int kgd2kfd_init(unsigned int interface_version,
@@ -114,6 +128,8 @@ static int __init kfd_module_init(void)
 
 	kfd_process_create_wq();
 
+	kfd_debugfs_init();
+
 	amdkfd_init_completed = 1;
 
 	dev_info(kfd_device, "Initialized module\n");
@@ -130,6 +146,7 @@ static void __exit kfd_module_exit(void)
 {
 	amdkfd_init_completed = 0;
 
+	kfd_debugfs_fini();
 	kfd_process_destroy_wq();
 	kfd_topology_shutdown();
 	kfd_chardev_exit();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 1f3a6ba7eed21e624cf80ccfc068dc7f6ef73929..8972bcfbf701c269c14c3bb6804d439429b2bb08 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -85,6 +85,10 @@ struct mqd_manager {
 				uint64_t queue_address,	uint32_t pipe_id,
 				uint32_t queue_id);
 
+#if defined(CONFIG_DEBUG_FS)
+	int	(*debugfs_show_mqd)(struct seq_file *m, void *data);
+#endif
+
 	struct mutex	mqd_mutex;
 	struct kfd_dev	*dev;
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 4728fad3fd7425ca2e0ef2fbb805dc145d078df7..f8ef4a051e08282e2cb6b629a6f8d9f75162829b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -36,6 +36,11 @@ static inline struct cik_mqd *get_mqd(void *mqd)
 	return (struct cik_mqd *)mqd;
 }
 
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+	return (struct cik_sdma_rlc_registers *)mqd;
+}
+
 static int init_mqd(struct mqd_manager *mm, void **mqd,
 		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
 		struct queue_properties *q)
@@ -149,7 +154,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
 {
 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
-	uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
 
 	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
 					  (uint32_t __user *)p->write_ptr,
@@ -160,7 +165,9 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
 			 uint32_t pipe_id, uint32_t queue_id,
 			 struct queue_properties *p, struct mm_struct *mms)
 {
-	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
+	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+					       (uint32_t __user *)p->write_ptr,
+					       mms);
 }
 
 static int update_mqd(struct mqd_manager *mm, void *mqd,
@@ -176,8 +183,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
 	 * Calculating queue size which is log base 2 of actual queue size -1
 	 * dwords and another -1 for ffs
 	 */
-	m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
-								- 1 - 1;
+	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
@@ -202,7 +208,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	struct cik_sdma_rlc_registers *m;
 
 	m = get_sdma_mqd(mqd);
-	m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+	m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4)
 			<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
 			q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
 			1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
@@ -343,8 +349,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 	 * Calculating queue size which is log base 2 of actual queue
 	 * size -1 dwords
 	 */
-	m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
-								- 1 - 1;
+	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
@@ -360,15 +365,25 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 	return 0;
 }
 
-struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
-{
-	struct cik_sdma_rlc_registers *m;
+#if defined(CONFIG_DEBUG_FS)
 
-	m = (struct cik_sdma_rlc_registers *)mqd;
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct cik_mqd), false);
+	return 0;
+}
 
-	return m;
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct cik_sdma_rlc_registers), false);
+	return 0;
 }
 
+#endif
+
+
 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 		struct kfd_dev *dev)
 {
@@ -392,6 +407,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
 		break;
 	case KFD_MQD_TYPE_HIQ:
 		mqd->init_mqd = init_mqd_hiq;
@@ -400,6 +418,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 		mqd->update_mqd = update_mqd_hiq;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
 		break;
 	case KFD_MQD_TYPE_SDMA:
 		mqd->init_mqd = init_mqd_sdma;
@@ -408,6 +429,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 		mqd->update_mqd = update_mqd_sdma;
 		mqd->destroy_mqd = destroy_mqd_sdma;
 		mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
 		break;
 	default:
 		kfree(mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 4ea854f9007b7a1f26045b8d57e0b5a306bd6619..971aec0637dc7fe3c2c04f1550c79505930926b8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -30,7 +30,7 @@
 #include "vi_structs.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
-
+#include "oss/oss_3_0_sh_mask.h"
 #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
 
 static inline struct vi_mqd *get_mqd(void *mqd)
@@ -38,6 +38,11 @@ static inline struct vi_mqd *get_mqd(void *mqd)
 	return (struct vi_mqd *)mqd;
 }
 
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct vi_sdma_mqd *)mqd;
+}
+
 static int init_mqd(struct mqd_manager *mm, void **mqd,
 			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
 			struct queue_properties *q)
@@ -84,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 	if (q->format == KFD_QUEUE_FORMAT_AQL)
 		m->cp_hqd_iq_rptr = 1;
 
+	if (q->tba_addr) {
+		m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+		m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+		m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+		m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+		m->compute_pgm_rsrc2 |=
+			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+	}
+
+	if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->cp_hqd_ctx_save_base_addr_lo =
+			lower_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_base_addr_hi =
+			upper_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+	}
+
 	*mqd = m;
 	if (gart_addr)
 		*gart_addr = addr;
@@ -98,7 +125,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
 {
 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
-	uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
 
 	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
 					  (uint32_t __user *)p->write_ptr,
@@ -116,8 +143,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
 			atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
 			mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
-	m->cp_hqd_pq_control |=
-			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |=	order_base_2(q->queue_size / 4) - 1;
 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
 
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -147,7 +173,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 	 * is safe, giving a maximum field value of 0xA.
 	 */
 	m->cp_hqd_eop_control |= min(0xA,
-		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+		order_base_2(q->eop_ring_buffer_size / 4) - 1);
 	m->cp_hqd_eop_base_addr_lo =
 			lower_32_bits(q->eop_ring_buffer_address >> 8);
 	m->cp_hqd_eop_base_addr_hi =
@@ -163,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
 	}
 
+	if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+		m->cp_hqd_ctx_save_control =
+			atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
+			mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+
 	q->is_active = (q->queue_size > 0 &&
 			q->queue_address != 0 &&
 			q->queue_percent > 0);
@@ -234,6 +265,117 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 	return retval;
 }
 
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	int retval;
+	struct vi_sdma_mqd *m;
+
+
+	retval = kfd_gtt_sa_allocate(mm->dev,
+			sizeof(struct vi_sdma_mqd),
+			mqd_mem_obj);
+
+	if (retval != 0)
+		return -ENOMEM;
+
+	m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+	memset(m, 0, sizeof(struct vi_sdma_mqd));
+
+	*mqd = m;
+	if (gart_addr != NULL)
+		*gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+	retval = mm->update_mqd(mm, m, q);
+
+	return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		struct kfd_mem_obj *mqd_mem_obj)
+{
+	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		uint32_t pipe_id, uint32_t queue_id,
+		struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+					       (uint32_t __user *)p->write_ptr,
+					       mms);
+}
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		struct queue_properties *q)
+{
+	struct vi_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_doorbell =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+	m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+
+	q->is_active = (q->queue_size > 0 &&
+			q->queue_address != 0 &&
+			q->queue_percent > 0);
+
+	return 0;
+}
+
+/*
+ *  * preempt type here is ignored because there is only one way
+ *  * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		enum kfd_preempt_type type,
+		unsigned int timeout, uint32_t pipe_id,
+		uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+		uint64_t queue_address, uint32_t pipe_id,
+		uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct vi_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct vi_sdma_mqd), false);
+	return 0;
+}
+
+#endif
+
 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 		struct kfd_dev *dev)
 {
@@ -257,6 +399,9 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
 		break;
 	case KFD_MQD_TYPE_HIQ:
 		mqd->init_mqd = init_mqd_hiq;
@@ -265,8 +410,20 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 		mqd->update_mqd = update_mqd_hiq;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
 		break;
 	case KFD_MQD_TYPE_SDMA:
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->uninit_mqd = uninit_mqd_sdma;
+		mqd->load_mqd = load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = destroy_mqd_sdma;
+		mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
 		break;
 	default:
 		kfree(mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 16da8ad02d8beb32dcc7da884d4894c9b1b3ac43..0ecbd1f9b606bfffd6302e6b72ef4af60e9ea419 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -45,7 +45,7 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
 
 	header.u32All = 0;
 	header.opcode = opcode;
-	header.count = packet_size/sizeof(uint32_t) - 2;
+	header.count = packet_size / 4 - 2;
 	header.type = PM4_TYPE_3;
 
 	return header.u32All;
@@ -55,15 +55,27 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 				unsigned int *rlib_size,
 				bool *over_subscription)
 {
-	unsigned int process_count, queue_count;
+	unsigned int process_count, queue_count, compute_queue_count;
 	unsigned int map_queue_size;
+	unsigned int max_proc_per_quantum = 1;
+	struct kfd_dev *dev = pm->dqm->dev;
 
 	process_count = pm->dqm->processes_count;
 	queue_count = pm->dqm->queue_count;
+	compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
 
-	/* check if there is over subscription*/
+	/* check if there is over subscription
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
 	*over_subscription = false;
-	if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
+
+	if (dev->max_proc_per_quantum > 1)
+		max_proc_per_quantum = dev->max_proc_per_quantum;
+
+	if ((process_count > max_proc_per_quantum) ||
+	    compute_queue_count > get_queues_num(pm->dqm)) {
 		*over_subscription = true;
 		pr_debug("Over subscribed runlist\n");
 	}
@@ -116,10 +128,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
 			uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
 	struct pm4_mes_runlist *packet;
+	int concurrent_proc_cnt = 0;
+	struct kfd_dev *kfd = pm->dqm->dev;
 
 	if (WARN_ON(!ib))
 		return -EFAULT;
 
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
 	packet = (struct pm4_mes_runlist *)buffer;
 
 	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
@@ -130,6 +156,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
 	packet->bitfields4.chain = chain ? 1 : 0;
 	packet->bitfields4.offload_polling = 0;
 	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
 	packet->ordinal2 = lower_32_bits(ib);
 	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
 
@@ -251,6 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		return retval;
 
 	*rl_size_bytes = alloc_size_bytes;
+	pm->ib_size_bytes = alloc_size_bytes;
 
 	pr_debug("Building runlist ib process count: %d queues count %d\n",
 		pm->dqm->processes_count, pm->dqm->queue_count);
@@ -564,3 +592,26 @@ void pm_release_ib(struct packet_manager *pm)
 	}
 	mutex_unlock(&pm->lock);
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+int pm_debugfs_runlist(struct seq_file *m, void *data)
+{
+	struct packet_manager *pm = data;
+
+	mutex_lock(&pm->lock);
+
+	if (!pm->allocated) {
+		seq_puts(m, "  No active runlist\n");
+		goto out;
+	}
+
+	seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
+		     pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
+
+out:
+	mutex_unlock(&pm->lock);
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index d6a796144269dce5888c2c7737cfb52e73232b7c..15fff4420e534fbd79c1cc1844244f715d348935 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -59,7 +59,7 @@ unsigned int kfd_pasid_alloc(void)
 		struct kfd_dev *dev = NULL;
 		unsigned int i = 0;
 
-		while ((dev = kfd_topology_enum_kfd_devices(i)) != NULL) {
+		while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) {
 			if (dev && dev->kfd2kgd) {
 				kfd2kgd = dev->kfd2kgd;
 				break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9e4134c5b48196d0e9dfb87b2e6a0be37243be84..0bedcf9cc08c7945deaf86e6019505ed43aab6d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -33,14 +33,17 @@
 #include <linux/kfd_ioctl.h>
 #include <linux/idr.h>
 #include <linux/kfifo.h>
+#include <linux/seq_file.h>
+#include <linux/kref.h>
 #include <kgd_kfd_interface.h>
 
 #include "amd_shared.h"
 
 #define KFD_SYSFS_FILE_MODE 0444
 
-#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
-#define KFD_MMAP_EVENTS_MASK 0x4000000000000
+#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
+#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
+#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
 
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
@@ -62,6 +65,15 @@
 #define KFD_MAX_NUM_OF_PROCESSES 512
 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
 
+/*
+ * Size of the per-process TBA+TMA buffer: 2 pages
+ *
+ * The first page is the TBA used for the CWSR ISA code. The second
+ * page is used as TMA for daisy changing a user-mode trap handler.
+ */
+#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
+#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+
 /*
  * Kernel module parameter to specify maximum number of supported queues per
  * device
@@ -78,12 +90,26 @@ extern int max_num_of_queues_per_device;
 /* Kernel module parameter to specify the scheduling policy */
 extern int sched_policy;
 
+/*
+ * Kernel module parameter to specify the maximum process
+ * number per HW scheduler
+ */
+extern int hws_max_conc_proc;
+
+extern int cwsr_enable;
+
 /*
  * Kernel module parameter to specify whether to send sigterm to HSA process on
  * unhandled exception
  */
 extern int send_sigterm;
 
+/*
+ * Ignore CRAT table during KFD initialization, can be used to work around
+ * broken CRAT tables on some AMD systems
+ */
+extern int ignore_crat;
+
 /**
  * enum kfd_sched_policy
  *
@@ -131,6 +157,7 @@ struct kfd_device_info {
 	size_t ih_ring_entry_size;
 	uint8_t num_of_watch_points;
 	uint16_t mqd_size_aligned;
+	bool supports_cwsr;
 };
 
 struct kfd_mem_obj {
@@ -200,6 +227,14 @@ struct kfd_dev {
 
 	/* Debug manager */
 	struct kfd_dbgmgr           *dbgmgr;
+
+	/* Maximum process number mapped to HW scheduler */
+	unsigned int max_proc_per_quantum;
+
+	/* CWSR */
+	bool cwsr_enabled;
+	const void *cwsr_isa;
+	unsigned int cwsr_isa_size;
 };
 
 /* KGD2KFD callbacks */
@@ -332,6 +367,9 @@ struct queue_properties {
 	uint32_t eop_ring_buffer_size;
 	uint64_t ctx_save_restore_area_address;
 	uint32_t ctx_save_restore_area_size;
+	uint32_t ctl_stack_size;
+	uint64_t tba_addr;
+	uint64_t tma_addr;
 };
 
 /**
@@ -439,6 +477,11 @@ struct qcm_process_device {
 	uint32_t num_gws;
 	uint32_t num_oac;
 	uint32_t sh_hidden_private_base;
+
+	/* CWSR memory */
+	void *cwsr_kaddr;
+	uint64_t tba_addr;
+	uint64_t tma_addr;
 };
 
 
@@ -501,6 +544,9 @@ struct kfd_process {
 	 */
 	void *mm;
 
+	struct kref ref;
+	struct work_struct release_work;
+
 	struct mutex mutex;
 
 	/*
@@ -563,9 +609,10 @@ struct amdkfd_ioctl_desc {
 
 void kfd_process_create_wq(void);
 void kfd_process_destroy_wq(void);
-struct kfd_process *kfd_create_process(const struct task_struct *);
+struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
+void kfd_unref_process(struct kfd_process *p);
 
 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 						struct kfd_process *p);
@@ -577,6 +624,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 							struct kfd_process *p);
 
+int kfd_reserved_mem_mmap(struct kfd_process *process,
+			  struct vm_area_struct *vma);
+
 /* Process device data iterator */
 struct kfd_process_device *kfd_get_first_process_device_data(
 							struct kfd_process *p);
@@ -624,9 +674,12 @@ int kfd_topology_init(void);
 void kfd_topology_shutdown(void);
 int kfd_topology_add_device(struct kfd_dev *gpu);
 int kfd_topology_remove_device(struct kfd_dev *gpu);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+						uint32_t proximity_domain);
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
-struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
+int kfd_numa_node_to_apic_id(int numa_node_id);
 
 /* Interrupts */
 int kfd_interrupt_init(struct kfd_dev *dev);
@@ -643,8 +696,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd);
 int kfd_init_apertures(struct kfd_process *process);
 
 /* Queue Context Management */
-struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
-
 int init_queue(struct queue **q, const struct queue_properties *properties);
 void uninit_queue(struct queue *q);
 void print_queue_properties(struct queue_properties *q);
@@ -699,6 +750,7 @@ struct packet_manager {
 	struct mutex lock;
 	bool allocated;
 	struct kfd_mem_obj *ib_buffer_obj;
+	unsigned int ib_size_bytes;
 };
 
 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
@@ -745,4 +797,23 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
 
+/* Debugfs */
+#if defined(CONFIG_DEBUG_FS)
+
+void kfd_debugfs_init(void);
+void kfd_debugfs_fini(void);
+int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
+int pqm_debugfs_mqds(struct seq_file *m, void *data);
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
+int dqm_debugfs_hqds(struct seq_file *m, void *data);
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
+int pm_debugfs_runlist(struct seq_file *m, void *data);
+
+#else
+
+static inline void kfd_debugfs_init(void) {}
+static inline void kfd_debugfs_fini(void) {}
+
+#endif
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1f5ccd28bd41464932089e2916535d7152a0e942..4ff5f0fe6db83d49138a7f60bcfdce721e911d0d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -24,10 +24,12 @@
 #include <linux/log2.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
 #include <linux/compat.h>
+#include <linux/mman.h>
 
 struct mm_struct;
 
@@ -46,13 +48,12 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
 
 static struct workqueue_struct *kfd_process_wq;
 
-struct kfd_process_release_work {
-	struct work_struct kfd_work;
-	struct kfd_process *p;
-};
-
 static struct kfd_process *find_process(const struct task_struct *thread);
-static struct kfd_process *create_process(const struct task_struct *thread);
+static void kfd_process_ref_release(struct kref *ref);
+static struct kfd_process *create_process(const struct task_struct *thread,
+					struct file *filep);
+static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
+
 
 void kfd_process_create_wq(void)
 {
@@ -68,9 +69,10 @@ void kfd_process_destroy_wq(void)
 	}
 }
 
-struct kfd_process *kfd_create_process(const struct task_struct *thread)
+struct kfd_process *kfd_create_process(struct file *filep)
 {
 	struct kfd_process *process;
+	struct task_struct *thread = current;
 
 	if (!thread->mm)
 		return ERR_PTR(-EINVAL);
@@ -79,9 +81,6 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
 	if (thread->group_leader->mm != thread->mm)
 		return ERR_PTR(-EINVAL);
 
-	/* Take mmap_sem because we call __mmu_notifier_register inside */
-	down_write(&thread->mm->mmap_sem);
-
 	/*
 	 * take kfd processes mutex before starting of process creation
 	 * so there won't be a case where two threads of the same process
@@ -93,14 +92,11 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
 	process = find_process(thread);
 	if (process)
 		pr_debug("Process already found\n");
-
-	if (!process)
-		process = create_process(thread);
+	else
+		process = create_process(thread, filep);
 
 	mutex_unlock(&kfd_processes_mutex);
 
-	up_write(&thread->mm->mmap_sem);
-
 	return process;
 }
 
@@ -144,63 +140,75 @@ static struct kfd_process *find_process(const struct task_struct *thread)
 	return p;
 }
 
-static void kfd_process_wq_release(struct work_struct *work)
+void kfd_unref_process(struct kfd_process *p)
+{
+	kref_put(&p->ref, kfd_process_ref_release);
+}
+
+static void kfd_process_destroy_pdds(struct kfd_process *p)
 {
-	struct kfd_process_release_work *my_work;
 	struct kfd_process_device *pdd, *temp;
-	struct kfd_process *p;
 
-	my_work = (struct kfd_process_release_work *) work;
+	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
+				 per_device_list) {
+		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
+				pdd->dev->id, p->pasid);
 
-	p = my_work->p;
+		list_del(&pdd->per_device_list);
 
-	pr_debug("Releasing process (pasid %d) in workqueue\n",
-			p->pasid);
+		if (pdd->qpd.cwsr_kaddr)
+			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
+				get_order(KFD_CWSR_TBA_TMA_SIZE));
 
-	mutex_lock(&p->mutex);
+		kfree(pdd);
+	}
+}
 
-	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
-							per_device_list) {
-		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
-				pdd->dev->id, p->pasid);
+/* No process locking is needed in this function, because the process
+ * is not findable any more. We must assume that no other thread is
+ * using it any more, otherwise we couldn't safely free the process
+ * structure in the end.
+ */
+static void kfd_process_wq_release(struct work_struct *work)
+{
+	struct kfd_process *p = container_of(work, struct kfd_process,
+					     release_work);
+	struct kfd_process_device *pdd;
 
+	pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
 		if (pdd->bound == PDD_BOUND)
 			amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
-
-		list_del(&pdd->per_device_list);
-		kfree(pdd);
 	}
 
+	kfd_process_destroy_pdds(p);
+
 	kfd_event_free_process(p);
 
 	kfd_pasid_free(p->pasid);
 	kfd_free_process_doorbells(p);
 
-	mutex_unlock(&p->mutex);
-
 	mutex_destroy(&p->mutex);
 
-	kfree(p);
+	put_task_struct(p->lead_thread);
 
-	kfree(work);
+	kfree(p);
 }
 
-static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+static void kfd_process_ref_release(struct kref *ref)
 {
-	struct kfd_process_release_work *work;
-	struct kfd_process *p;
-
-	p = container_of(rcu, struct kfd_process, rcu);
+	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
 
-	mmdrop(p->mm);
+	INIT_WORK(&p->release_work, kfd_process_wq_release);
+	queue_work(kfd_process_wq, &p->release_work);
+}
 
-	work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
+static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+{
+	struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
 
-	if (work) {
-		INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
-		work->p = p;
-		queue_work(kfd_process_wq, (struct work_struct *) work);
-	}
+	kfd_unref_process(p);
 }
 
 static void kfd_process_notifier_release(struct mmu_notifier *mn,
@@ -244,15 +252,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);
 
+	/* Indicate to other users that MM is no longer valid */
+	p->mm = NULL;
+
 	mutex_unlock(&p->mutex);
 
-	/*
-	 * Because we drop mm_count inside kfd_process_destroy_delayed
-	 * and because the mmu_notifier_unregister function also drop
-	 * mm_count we need to take an extra count here.
-	 */
-	mmgrab(p->mm);
-	mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
+	mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
 }
 
@@ -260,7 +265,44 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 	.release = kfd_process_notifier_release,
 };
 
-static struct kfd_process *create_process(const struct task_struct *thread)
+static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+{
+	unsigned long  offset;
+	struct kfd_process_device *pdd = NULL;
+	struct kfd_dev *dev = NULL;
+	struct qcm_process_device *qpd = NULL;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		dev = pdd->dev;
+		qpd = &pdd->qpd;
+		if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
+			continue;
+		offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
+			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
+			MAP_SHARED, offset);
+
+		if (IS_ERR_VALUE(qpd->tba_addr)) {
+			int err = qpd->tba_addr;
+
+			pr_err("Failure to set tba address. error %d.\n", err);
+			qpd->tba_addr = 0;
+			qpd->cwsr_kaddr = NULL;
+			return err;
+		}
+
+		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+
+		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+	}
+
+	return 0;
+}
+
+static struct kfd_process *create_process(const struct task_struct *thread,
+					struct file *filep)
 {
 	struct kfd_process *process;
 	int err = -ENOMEM;
@@ -277,13 +319,15 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (kfd_alloc_process_doorbells(process) < 0)
 		goto err_alloc_doorbells;
 
+	kref_init(&process->ref);
+
 	mutex_init(&process->mutex);
 
 	process->mm = thread->mm;
 
 	/* register notifier */
 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
-	err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
+	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
 	if (err)
 		goto err_mmu_notifier;
 
@@ -291,6 +335,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 			(uintptr_t)process->mm);
 
 	process->lead_thread = thread->group_leader;
+	get_task_struct(process->lead_thread);
 
 	INIT_LIST_HEAD(&process->per_device_data);
 
@@ -306,8 +351,14 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (err != 0)
 		goto err_init_apertures;
 
+	err = kfd_process_init_cwsr(process, filep);
+	if (err)
+		goto err_init_cwsr;
+
 	return process;
 
+err_init_cwsr:
+	kfd_process_destroy_pdds(process);
 err_init_apertures:
 	pqm_uninit(&process->pqm);
 err_process_pqm_init:
@@ -343,16 +394,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	struct kfd_process_device *pdd = NULL;
 
 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
-	if (pdd != NULL) {
-		pdd->dev = dev;
-		INIT_LIST_HEAD(&pdd->qpd.queues_list);
-		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
-		pdd->qpd.dqm = dev->dqm;
-		pdd->process = p;
-		pdd->bound = PDD_UNBOUND;
-		pdd->already_dequeued = false;
-		list_add(&pdd->per_device_list, &p->per_device_data);
-	}
+	if (!pdd)
+		return NULL;
+
+	pdd->dev = dev;
+	INIT_LIST_HEAD(&pdd->qpd.queues_list);
+	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+	pdd->qpd.dqm = dev->dqm;
+	pdd->qpd.pqm = &p->pqm;
+	pdd->process = p;
+	pdd->bound = PDD_UNBOUND;
+	pdd->already_dequeued = false;
+	list_add(&pdd->per_device_list, &p->per_device_data);
 
 	return pdd;
 }
@@ -408,7 +461,8 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
 		mutex_lock(&p->mutex);
 		pdd = kfd_get_process_device_data(dev, p);
-		if (pdd->bound != PDD_BOUND_SUSPENDED) {
+
+		if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
 			mutex_unlock(&p->mutex);
 			continue;
 		}
@@ -448,6 +502,11 @@ void kfd_unbind_processes_from_device(struct kfd_dev *dev)
 		mutex_lock(&p->mutex);
 		pdd = kfd_get_process_device_data(dev, p);
 
+		if (WARN_ON(!pdd)) {
+			mutex_unlock(&p->mutex);
+			continue;
+		}
+
 		if (pdd->bound == PDD_BOUND)
 			pdd->bound = PDD_BOUND_SUSPENDED;
 		mutex_unlock(&p->mutex);
@@ -483,6 +542,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
 
 	mutex_unlock(kfd_get_dbgmgr_mutex());
 
+	mutex_lock(&p->mutex);
+
 	pdd = kfd_get_process_device_data(dev, p);
 	if (pdd)
 		/* For GPU relying on IOMMU, we need to dequeue here
@@ -491,6 +552,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
 		kfd_process_dequeue_from_device(pdd);
 
 	mutex_unlock(&p->mutex);
+
+	kfd_unref_process(p);
 }
 
 struct kfd_process_device *kfd_get_first_process_device_data(
@@ -515,22 +578,86 @@ bool kfd_has_process_device_data(struct kfd_process *p)
 	return !(list_empty(&p->per_device_data));
 }
 
-/* This returns with process->mutex locked. */
+/* This increments the process->ref counter. */
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
 {
-	struct kfd_process *p;
+	struct kfd_process *p, *ret_p = NULL;
 	unsigned int temp;
 
 	int idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
 		if (p->pasid == pasid) {
-			mutex_lock(&p->mutex);
+			kref_get(&p->ref);
+			ret_p = p;
 			break;
 		}
 	}
 
 	srcu_read_unlock(&kfd_processes_srcu, idx);
 
-	return p;
+	return ret_p;
+}
+
+int kfd_reserved_mem_mmap(struct kfd_process *process,
+			  struct vm_area_struct *vma)
+{
+	struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
+	struct kfd_process_device *pdd;
+	struct qcm_process_device *qpd;
+
+	if (!dev)
+		return -EINVAL;
+	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
+		pr_err("Incorrect CWSR mapping size.\n");
+		return -EINVAL;
+	}
+
+	pdd = kfd_get_process_device_data(dev, process);
+	if (!pdd)
+		return -EINVAL;
+	qpd = &pdd->qpd;
+
+	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(KFD_CWSR_TBA_TMA_SIZE));
+	if (!qpd->cwsr_kaddr) {
+		pr_err("Error allocating per process CWSR buffer.\n");
+		return -ENOMEM;
+	}
+
+	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+	/* Mapping pages to user process */
+	return remap_pfn_range(vma, vma->vm_start,
+			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
+			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
+{
+	struct kfd_process *p;
+	unsigned int temp;
+	int r = 0;
+
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		seq_printf(m, "Process %d PASID %d:\n",
+			   p->lead_thread->tgid, p->pasid);
+
+		mutex_lock(&p->mutex);
+		r = pqm_debugfs_mqds(m, &p->pqm);
+		mutex_unlock(&p->mutex);
+
+		if (r)
+			break;
+	}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return r;
 }
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index a3f1e62c60ba9d80b7d0244be3b3e3301a64557c..8763806326682c4c059359ef0b44dc9855e88ef5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -178,10 +178,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		return retval;
 
 	if (list_empty(&pdd->qpd.queues_list) &&
-	    list_empty(&pdd->qpd.priv_queue_list)) {
-		pdd->qpd.pqm = pqm;
+	    list_empty(&pdd->qpd.priv_queue_list))
 		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
-	}
 
 	pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
 	if (!pqn) {
@@ -203,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			goto err_create_queue;
 		pqn->q = q;
 		pqn->kq = NULL;
-		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
-						&q->properties.vmid);
+		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
 		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
@@ -224,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			goto err_create_queue;
 		pqn->q = q;
 		pqn->kq = NULL;
-		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
-						&q->properties.vmid);
+		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
 		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
@@ -315,6 +311,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 	if (pqn->q) {
 		dqm = pqn->q->device->dqm;
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
+		if (retval) {
+			pr_debug("Destroy queue failed, returned %d\n", retval);
+			goto err_destroy_queue;
+		}
 		uninit_queue(pqn->q);
 	}
 
@@ -326,6 +326,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 	    list_empty(&pdd->qpd.priv_queue_list))
 		dqm->ops.unregister_process(dqm, &pdd->qpd);
 
+err_destroy_queue:
 	return retval;
 }
 
@@ -367,4 +368,67 @@ struct kernel_queue *pqm_get_kernel_queue(
 	return NULL;
 }
 
+#if defined(CONFIG_DEBUG_FS)
+
+int pqm_debugfs_mqds(struct seq_file *m, void *data)
+{
+	struct process_queue_manager *pqm = data;
+	struct process_queue_node *pqn;
+	struct queue *q;
+	enum KFD_MQD_TYPE mqd_type;
+	struct mqd_manager *mqd_manager;
+	int r = 0;
+
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (pqn->q) {
+			q = pqn->q;
+			switch (q->properties.type) {
+			case KFD_QUEUE_TYPE_SDMA:
+				seq_printf(m, "  SDMA queue on device %x\n",
+					   q->device->id);
+				mqd_type = KFD_MQD_TYPE_SDMA;
+				break;
+			case KFD_QUEUE_TYPE_COMPUTE:
+				seq_printf(m, "  Compute queue on device %x\n",
+					   q->device->id);
+				mqd_type = KFD_MQD_TYPE_CP;
+				break;
+			default:
+				seq_printf(m,
+				"  Bad user queue type %d on device %x\n",
+					   q->properties.type, q->device->id);
+				continue;
+			}
+			mqd_manager = q->device->dqm->ops.get_mqd_manager(
+				q->device->dqm, mqd_type);
+		} else if (pqn->kq) {
+			q = pqn->kq->queue;
+			mqd_manager = pqn->kq->mqd;
+			switch (q->properties.type) {
+			case KFD_QUEUE_TYPE_DIQ:
+				seq_printf(m, "  DIQ on device %x\n",
+					   pqn->kq->dev->id);
+				mqd_type = KFD_MQD_TYPE_HIQ;
+				break;
+			default:
+				seq_printf(m,
+				"  Bad kernel queue type %d on device %x\n",
+					   q->properties.type,
+					   pqn->kq->dev->id);
+				continue;
+			}
+		} else {
+			seq_printf(m,
+		"  Weird: Queue node with neither kernel nor user queue\n");
+			continue;
+		}
+
+		r = mqd_manager->debugfs_show_mqd(m, q->mqd);
+		if (r != 0)
+			break;
+	}
+
+	return r;
+}
 
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 19ce59028d6bdb93844ac1582d237a11202fea73..c6a76090a725055df2bc61cccc50e34056ea8574 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -28,27 +28,32 @@
 #include <linux/hash.h>
 #include <linux/cpufreq.h>
 #include <linux/log2.h>
+#include <linux/dmi.h>
+#include <linux/atomic.h>
 
 #include "kfd_priv.h"
 #include "kfd_crat.h"
 #include "kfd_topology.h"
+#include "kfd_device_queue_manager.h"
 
+/* topology_device_list - Master list of all topology devices */
 static struct list_head topology_device_list;
-static int topology_crat_parsed;
 static struct kfd_system_properties sys_props;
 
 static DECLARE_RWSEM(topology_lock);
+static atomic_t topology_crat_proximity_domain;
 
-struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+						uint32_t proximity_domain)
 {
 	struct kfd_topology_device *top_dev;
-	struct kfd_dev *device = NULL;
+	struct kfd_topology_device *device = NULL;
 
 	down_read(&topology_lock);
 
 	list_for_each_entry(top_dev, &topology_device_list, list)
-		if (top_dev->gpu_id == gpu_id) {
-			device = top_dev->gpu;
+		if (top_dev->proximity_domain == proximity_domain) {
+			device = top_dev;
 			break;
 		}
 
@@ -57,7 +62,7 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
 	return device;
 }
 
-struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
+struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
 {
 	struct kfd_topology_device *top_dev;
 	struct kfd_dev *device = NULL;
@@ -65,7 +70,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
 	down_read(&topology_lock);
 
 	list_for_each_entry(top_dev, &topology_device_list, list)
-		if (top_dev->gpu->pdev == pdev) {
+		if (top_dev->gpu_id == gpu_id) {
 			device = top_dev->gpu;
 			break;
 		}
@@ -75,282 +80,31 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
 	return device;
 }
 
-static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
-{
-	struct acpi_table_header *crat_table;
-	acpi_status status;
-
-	if (!size)
-		return -EINVAL;
-
-	/*
-	 * Fetch the CRAT table from ACPI
-	 */
-	status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
-	if (status == AE_NOT_FOUND) {
-		pr_warn("CRAT table not found\n");
-		return -ENODATA;
-	} else if (ACPI_FAILURE(status)) {
-		const char *err = acpi_format_exception(status);
-
-		pr_err("CRAT table error: %s\n", err);
-		return -EINVAL;
-	}
-
-	if (*size >= crat_table->length && crat_image != NULL)
-		memcpy(crat_image, crat_table, crat_table->length);
-
-	*size = crat_table->length;
-
-	return 0;
-}
-
-static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
-		struct crat_subtype_computeunit *cu)
-{
-	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
-	dev->node_props.cpu_core_id_base = cu->processor_id_low;
-	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
-		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
-
-	pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
-			cu->processor_id_low);
-}
-
-static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
-		struct crat_subtype_computeunit *cu)
-{
-	dev->node_props.simd_id_base = cu->processor_id_low;
-	dev->node_props.simd_count = cu->num_simd_cores;
-	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
-	dev->node_props.max_waves_per_simd = cu->max_waves_simd;
-	dev->node_props.wave_front_size = cu->wave_front_size;
-	dev->node_props.mem_banks_count = cu->num_banks;
-	dev->node_props.array_count = cu->num_arrays;
-	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
-	dev->node_props.simd_per_cu = cu->num_simd_per_cu;
-	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
-	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
-		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
-	pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores,
-				cu->processor_id_low);
-}
-
-/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */
-static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu)
-{
-	struct kfd_topology_device *dev;
-	int i = 0;
-
-	pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
-			cu->proximity_domain, cu->hsa_capability);
-	list_for_each_entry(dev, &topology_device_list, list) {
-		if (cu->proximity_domain == i) {
-			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
-				kfd_populated_cu_info_cpu(dev, cu);
-
-			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
-				kfd_populated_cu_info_gpu(dev, cu);
-			break;
-		}
-		i++;
-	}
-
-	return 0;
-}
-
-/*
- * kfd_parse_subtype_mem is called when the topology mutex is
- * already acquired
- */
-static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem)
-{
-	struct kfd_mem_properties *props;
-	struct kfd_topology_device *dev;
-	int i = 0;
-
-	pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
-			mem->promixity_domain);
-	list_for_each_entry(dev, &topology_device_list, list) {
-		if (mem->promixity_domain == i) {
-			props = kfd_alloc_struct(props);
-			if (props == NULL)
-				return -ENOMEM;
-
-			if (dev->node_props.cpu_cores_count == 0)
-				props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE;
-			else
-				props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
-
-			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
-				props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
-			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
-				props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
-
-			props->size_in_bytes =
-				((uint64_t)mem->length_high << 32) +
-							mem->length_low;
-			props->width = mem->width;
-
-			dev->mem_bank_count++;
-			list_add_tail(&props->list, &dev->mem_props);
-
-			break;
-		}
-		i++;
-	}
-
-	return 0;
-}
-
-/*
- * kfd_parse_subtype_cache is called when the topology mutex
- * is already acquired
- */
-static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache)
-{
-	struct kfd_cache_properties *props;
-	struct kfd_topology_device *dev;
-	uint32_t id;
-
-	id = cache->processor_id_low;
-
-	pr_info("Found cache entry in CRAT table with processor_id=%d\n", id);
-	list_for_each_entry(dev, &topology_device_list, list)
-		if (id == dev->node_props.cpu_core_id_base ||
-		    id == dev->node_props.simd_id_base) {
-			props = kfd_alloc_struct(props);
-			if (props == NULL)
-				return -ENOMEM;
-
-			props->processor_id_low = id;
-			props->cache_level = cache->cache_level;
-			props->cache_size = cache->cache_size;
-			props->cacheline_size = cache->cache_line_size;
-			props->cachelines_per_tag = cache->lines_per_tag;
-			props->cache_assoc = cache->associativity;
-			props->cache_latency = cache->cache_latency;
-
-			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
-				props->cache_type |= HSA_CACHE_TYPE_DATA;
-			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
-				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
-			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
-				props->cache_type |= HSA_CACHE_TYPE_CPU;
-			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
-				props->cache_type |= HSA_CACHE_TYPE_HSACU;
-
-			dev->cache_count++;
-			dev->node_props.caches_count++;
-			list_add_tail(&props->list, &dev->cache_props);
-
-			break;
-		}
-
-	return 0;
-}
-
-/*
- * kfd_parse_subtype_iolink is called when the topology mutex
- * is already acquired
- */
-static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink)
+struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
 {
-	struct kfd_iolink_properties *props;
-	struct kfd_topology_device *dev;
-	uint32_t i = 0;
-	uint32_t id_from;
-	uint32_t id_to;
-
-	id_from = iolink->proximity_domain_from;
-	id_to = iolink->proximity_domain_to;
+	struct kfd_topology_device *top_dev;
+	struct kfd_dev *device = NULL;
 
-	pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from);
-	list_for_each_entry(dev, &topology_device_list, list) {
-		if (id_from == i) {
-			props = kfd_alloc_struct(props);
-			if (props == NULL)
-				return -ENOMEM;
-
-			props->node_from = id_from;
-			props->node_to = id_to;
-			props->ver_maj = iolink->version_major;
-			props->ver_min = iolink->version_minor;
-
-			/*
-			 * weight factor (derived from CDIR), currently always 1
-			 */
-			props->weight = 1;
-
-			props->min_latency = iolink->minimum_latency;
-			props->max_latency = iolink->maximum_latency;
-			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
-			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
-			props->rec_transfer_size =
-					iolink->recommended_transfer_size;
-
-			dev->io_link_count++;
-			dev->node_props.io_links_count++;
-			list_add_tail(&props->list, &dev->io_link_props);
+	down_read(&topology_lock);
 
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu->pdev == pdev) {
+			device = top_dev->gpu;
 			break;
 		}
-		i++;
-	}
 
-	return 0;
-}
-
-static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr)
-{
-	struct crat_subtype_computeunit *cu;
-	struct crat_subtype_memory *mem;
-	struct crat_subtype_cache *cache;
-	struct crat_subtype_iolink *iolink;
-	int ret = 0;
-
-	switch (sub_type_hdr->type) {
-	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
-		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
-		ret = kfd_parse_subtype_cu(cu);
-		break;
-	case CRAT_SUBTYPE_MEMORY_AFFINITY:
-		mem = (struct crat_subtype_memory *)sub_type_hdr;
-		ret = kfd_parse_subtype_mem(mem);
-		break;
-	case CRAT_SUBTYPE_CACHE_AFFINITY:
-		cache = (struct crat_subtype_cache *)sub_type_hdr;
-		ret = kfd_parse_subtype_cache(cache);
-		break;
-	case CRAT_SUBTYPE_TLB_AFFINITY:
-		/*
-		 * For now, nothing to do here
-		 */
-		pr_info("Found TLB entry in CRAT table (not processing)\n");
-		break;
-	case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
-		/*
-		 * For now, nothing to do here
-		 */
-		pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n");
-		break;
-	case CRAT_SUBTYPE_IOLINK_AFFINITY:
-		iolink = (struct crat_subtype_iolink *)sub_type_hdr;
-		ret = kfd_parse_subtype_iolink(iolink);
-		break;
-	default:
-		pr_warn("Unknown subtype (%d) in CRAT\n",
-				sub_type_hdr->type);
-	}
+	up_read(&topology_lock);
 
-	return ret;
+	return device;
 }
 
+/* Called with write topology_lock acquired */
 static void kfd_release_topology_device(struct kfd_topology_device *dev)
 {
 	struct kfd_mem_properties *mem;
 	struct kfd_cache_properties *cache;
 	struct kfd_iolink_properties *iolink;
+	struct kfd_perf_properties *perf;
 
 	list_del(&dev->list);
 
@@ -375,25 +129,35 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
 		kfree(iolink);
 	}
 
-	kfree(dev);
+	while (dev->perf_props.next != &dev->perf_props) {
+		perf = container_of(dev->perf_props.next,
+				struct kfd_perf_properties, list);
+		list_del(&perf->list);
+		kfree(perf);
+	}
 
-	sys_props.num_devices--;
+	kfree(dev);
 }
 
-static void kfd_release_live_view(void)
+void kfd_release_topology_device_list(struct list_head *device_list)
 {
 	struct kfd_topology_device *dev;
 
-	while (topology_device_list.next != &topology_device_list) {
-		dev = container_of(topology_device_list.next,
-				 struct kfd_topology_device, list);
+	while (!list_empty(device_list)) {
+		dev = list_first_entry(device_list,
+				       struct kfd_topology_device, list);
 		kfd_release_topology_device(dev);
+	}
 }
 
+static void kfd_release_live_view(void)
+{
+	kfd_release_topology_device_list(&topology_device_list);
 	memset(&sys_props, 0, sizeof(sys_props));
 }
 
-static struct kfd_topology_device *kfd_create_topology_device(void)
+struct kfd_topology_device *kfd_create_topology_device(
+				struct list_head *device_list)
 {
 	struct kfd_topology_device *dev;
 
@@ -406,65 +170,13 @@ static struct kfd_topology_device *kfd_create_topology_device(void)
 	INIT_LIST_HEAD(&dev->mem_props);
 	INIT_LIST_HEAD(&dev->cache_props);
 	INIT_LIST_HEAD(&dev->io_link_props);
+	INIT_LIST_HEAD(&dev->perf_props);
 
-	list_add_tail(&dev->list, &topology_device_list);
-	sys_props.num_devices++;
+	list_add_tail(&dev->list, device_list);
 
 	return dev;
 }
 
-static int kfd_parse_crat_table(void *crat_image)
-{
-	struct kfd_topology_device *top_dev;
-	struct crat_subtype_generic *sub_type_hdr;
-	uint16_t node_id;
-	int ret;
-	struct crat_header *crat_table = (struct crat_header *)crat_image;
-	uint16_t num_nodes;
-	uint32_t image_len;
-
-	if (!crat_image)
-		return -EINVAL;
-
-	num_nodes = crat_table->num_domains;
-	image_len = crat_table->length;
-
-	pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
-
-	for (node_id = 0; node_id < num_nodes; node_id++) {
-		top_dev = kfd_create_topology_device();
-		if (!top_dev) {
-			kfd_release_live_view();
-			return -ENOMEM;
-		}
-	}
-
-	sys_props.platform_id =
-		(*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK;
-	sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id);
-	sys_props.platform_rev = crat_table->revision;
-
-	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
-	while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
-			((char *)crat_image) + image_len) {
-		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
-			ret = kfd_parse_subtype(sub_type_hdr);
-			if (ret != 0) {
-				kfd_release_live_view();
-				return ret;
-			}
-		}
-
-		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-				sub_type_hdr->length);
-	}
-
-	sys_props.generation_count++;
-	topology_crat_parsed = 1;
-
-	return 0;
-}
-
 
 #define sysfs_show_gen_prop(buffer, fmt, ...) \
 		snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
@@ -501,11 +213,17 @@ static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
+static void kfd_topology_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
 static const struct sysfs_ops sysprops_ops = {
 	.show = sysprops_show,
 };
 
 static struct kobj_type sysprops_type = {
+	.release = kfd_topology_kobj_release,
 	.sysfs_ops = &sysprops_ops,
 };
 
@@ -541,6 +259,7 @@ static const struct sysfs_ops iolink_ops = {
 };
 
 static struct kobj_type iolink_type = {
+	.release = kfd_topology_kobj_release,
 	.sysfs_ops = &iolink_ops,
 };
 
@@ -568,6 +287,7 @@ static const struct sysfs_ops mem_ops = {
 };
 
 static struct kobj_type mem_type = {
+	.release = kfd_topology_kobj_release,
 	.sysfs_ops = &mem_ops,
 };
 
@@ -575,7 +295,7 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
 	ssize_t ret;
-	uint32_t i;
+	uint32_t i, j;
 	struct kfd_cache_properties *cache;
 
 	/* Making sure that the buffer is an empty string */
@@ -593,12 +313,18 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 	sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
 	sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
 	snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
-	for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++)
-		ret = snprintf(buffer, PAGE_SIZE, "%s%d%s",
-				buffer, cache->sibling_map[i],
-				(i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ?
-						"\n" : ",");
-
+	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
+		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
+			/* Check each bit */
+			if (cache->sibling_map[i] & (1 << j))
+				ret = snprintf(buffer, PAGE_SIZE,
+					 "%s%d%s", buffer, 1, ",");
+			else
+				ret = snprintf(buffer, PAGE_SIZE,
+					 "%s%d%s", buffer, 0, ",");
+		}
+	/* Replace the last "," with end of line */
+	*(buffer + strlen(buffer) - 1) = 0xA;
 	return ret;
 }
 
@@ -607,9 +333,43 @@ static const struct sysfs_ops cache_ops = {
 };
 
 static struct kobj_type cache_type = {
+	.release = kfd_topology_kobj_release,
 	.sysfs_ops = &cache_ops,
 };
 
+/****** Sysfs of Performance Counters ******/
+
+struct kfd_perf_attr {
+	struct kobj_attribute attr;
+	uint32_t data;
+};
+
+static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
+			char *buf)
+{
+	struct kfd_perf_attr *attr;
+
+	buf[0] = 0;
+	attr = container_of(attrs, struct kfd_perf_attr, attr);
+	if (!attr->data) /* invalid data for PMC */
+		return 0;
+	else
+		return sysfs_show_32bit_val(buf, attr->data);
+}
+
+#define KFD_PERF_DESC(_name, _data)			\
+{							\
+	.attr  = __ATTR(_name, 0444, perf_show, NULL),	\
+	.data = _data,					\
+}
+
+static struct kfd_perf_attr perf_attr_iommu[] = {
+	KFD_PERF_DESC(max_concurrent, 0),
+	KFD_PERF_DESC(num_counters, 0),
+	KFD_PERF_DESC(counter_ids, 0),
+};
+/****************************************/
+
 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 		char *buffer)
 {
@@ -646,18 +406,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.cpu_cores_count);
 	sysfs_show_32bit_prop(buffer, "simd_count",
 			dev->node_props.simd_count);
-
-	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
-		pr_info_once("mem_banks_count truncated from %d to %d\n",
-				dev->node_props.mem_banks_count,
-				dev->mem_bank_count);
-		sysfs_show_32bit_prop(buffer, "mem_banks_count",
-				dev->mem_bank_count);
-	} else {
-		sysfs_show_32bit_prop(buffer, "mem_banks_count",
-				dev->node_props.mem_banks_count);
-	}
-
+	sysfs_show_32bit_prop(buffer, "mem_banks_count",
+			dev->node_props.mem_banks_count);
 	sysfs_show_32bit_prop(buffer, "caches_count",
 			dev->node_props.caches_count);
 	sysfs_show_32bit_prop(buffer, "io_links_count",
@@ -705,9 +455,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 				HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
 		}
 
+		if (dev->gpu->device_info->asic_family == CHIP_TONGA)
+			dev->node_props.capability |=
+					HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+
 		sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
-			dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
-					dev->gpu->kgd));
+			dev->node_props.max_engine_clk_fcompute);
 
 		sysfs_show_64bit_prop(buffer, "local_mem_size",
 				(unsigned long long int) 0);
@@ -729,6 +482,7 @@ static const struct sysfs_ops node_ops = {
 };
 
 static struct kobj_type node_type = {
+	.release = kfd_topology_kobj_release,
 	.sysfs_ops = &node_ops,
 };
 
@@ -744,6 +498,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
 	struct kfd_iolink_properties *iolink;
 	struct kfd_cache_properties *cache;
 	struct kfd_mem_properties *mem;
+	struct kfd_perf_properties *perf;
 
 	if (dev->kobj_iolink) {
 		list_for_each_entry(iolink, &dev->io_link_props, list)
@@ -780,6 +535,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
 		dev->kobj_mem = NULL;
 	}
 
+	if (dev->kobj_perf) {
+		list_for_each_entry(perf, &dev->perf_props, list) {
+			kfree(perf->attr_group);
+			perf->attr_group = NULL;
+		}
+		kobject_del(dev->kobj_perf);
+		kobject_put(dev->kobj_perf);
+		dev->kobj_perf = NULL;
+	}
+
 	if (dev->kobj_node) {
 		sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
 		sysfs_remove_file(dev->kobj_node, &dev->attr_name);
@@ -796,8 +561,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
 	struct kfd_iolink_properties *iolink;
 	struct kfd_cache_properties *cache;
 	struct kfd_mem_properties *mem;
+	struct kfd_perf_properties *perf;
 	int ret;
-	uint32_t i;
+	uint32_t i, num_attrs;
+	struct attribute **attrs;
 
 	if (WARN_ON(dev->kobj_node))
 		return -EEXIST;
@@ -826,6 +593,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
 	if (!dev->kobj_iolink)
 		return -ENOMEM;
 
+	dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
+	if (!dev->kobj_perf)
+		return -ENOMEM;
+
 	/*
 	 * Creating sysfs files for node properties
 	 */
@@ -903,11 +674,38 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
 		if (ret < 0)
 			return ret;
 		i++;
-}
+	}
+
+	/* All hardware blocks have the same number of attributes. */
+	num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
+	list_for_each_entry(perf, &dev->perf_props, list) {
+		perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
+			* num_attrs + sizeof(struct attribute_group),
+			GFP_KERNEL);
+		if (!perf->attr_group)
+			return -ENOMEM;
+
+		attrs = (struct attribute **)(perf->attr_group + 1);
+		if (!strcmp(perf->block_name, "iommu")) {
+		/* Information of IOMMU's num_counters and counter_ids is shown
+		 * under /sys/bus/event_source/devices/amd_iommu. We don't
+		 * duplicate here.
+		 */
+			perf_attr_iommu[0].data = perf->max_concurrent;
+			for (i = 0; i < num_attrs; i++)
+				attrs[i] = &perf_attr_iommu[i].attr.attr;
+		}
+		perf->attr_group->name = perf->block_name;
+		perf->attr_group->attrs = attrs;
+		ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
+		if (ret < 0)
+			return ret;
+	}
 
 	return 0;
 }
 
+/* Called with write topology lock acquired */
 static int kfd_build_sysfs_node_tree(void)
 {
 	struct kfd_topology_device *dev;
@@ -924,6 +722,7 @@ static int kfd_build_sysfs_node_tree(void)
 	return 0;
 }
 
+/* Called with write topology lock acquired */
 static void kfd_remove_sysfs_node_tree(void)
 {
 	struct kfd_topology_device *dev;
@@ -995,75 +794,246 @@ static void kfd_topology_release_sysfs(void)
 	}
 }
 
+/* Called with write topology_lock acquired */
+static void kfd_topology_update_device_list(struct list_head *temp_list,
+					struct list_head *master_list)
+{
+	while (!list_empty(temp_list)) {
+		list_move_tail(temp_list->next, master_list);
+		sys_props.num_devices++;
+	}
+}
+
+static void kfd_debug_print_topology(void)
+{
+	struct kfd_topology_device *dev;
+
+	down_read(&topology_lock);
+
+	dev = list_last_entry(&topology_device_list,
+			struct kfd_topology_device, list);
+	if (dev) {
+		if (dev->node_props.cpu_cores_count &&
+				dev->node_props.simd_count) {
+			pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
+				dev->node_props.device_id,
+				dev->node_props.vendor_id);
+		} else if (dev->node_props.cpu_cores_count)
+			pr_info("Topology: Add CPU node\n");
+		else if (dev->node_props.simd_count)
+			pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
+				dev->node_props.device_id,
+				dev->node_props.vendor_id);
+	}
+	up_read(&topology_lock);
+}
+
+/* Helper function for intializing platform_xx members of
+ * kfd_system_properties. Uses OEM info from the last CPU/APU node.
+ */
+static void kfd_update_system_properties(void)
+{
+	struct kfd_topology_device *dev;
+
+	down_read(&topology_lock);
+	dev = list_last_entry(&topology_device_list,
+			struct kfd_topology_device, list);
+	if (dev) {
+		sys_props.platform_id =
+			(*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+		sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
+		sys_props.platform_rev = dev->oem_revision;
+	}
+	up_read(&topology_lock);
+}
+
+static void find_system_memory(const struct dmi_header *dm,
+	void *private)
+{
+	struct kfd_mem_properties *mem;
+	u16 mem_width, mem_clock;
+	struct kfd_topology_device *kdev =
+		(struct kfd_topology_device *)private;
+	const u8 *dmi_data = (const u8 *)(dm + 1);
+
+	if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
+		mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
+		mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
+		list_for_each_entry(mem, &kdev->mem_props, list) {
+			if (mem_width != 0xFFFF && mem_width != 0)
+				mem->width = mem_width;
+			if (mem_clock != 0)
+				mem->mem_clk_max = mem_clock;
+		}
+	}
+}
+
+/*
+ * Performance counters information is not part of CRAT but we would like to
+ * put them in the sysfs under topology directory for Thunk to get the data.
+ * This function is called before updating the sysfs.
+ */
+static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
+{
+	struct kfd_perf_properties *props;
+
+	if (amd_iommu_pc_supported()) {
+		props = kfd_alloc_struct(props);
+		if (!props)
+			return -ENOMEM;
+		strcpy(props->block_name, "iommu");
+		props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
+			amd_iommu_pc_get_max_counters(0); /* assume one iommu */
+		list_add_tail(&props->list, &kdev->perf_props);
+	}
+
+	return 0;
+}
+
+/* kfd_add_non_crat_information - Add information that is not currently
+ *	defined in CRAT but is necessary for KFD topology
+ * @dev - topology device to which addition info is added
+ */
+static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
+{
+	/* Check if CPU only node. */
+	if (!kdev->gpu) {
+		/* Add system memory information */
+		dmi_walk(find_system_memory, kdev);
+	}
+	/* TODO: For GPU node, rearrange code from kfd_topology_add_device */
+}
+
+/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
+ *	Ignore CRAT for all other devices. AMD APU is identified if both CPU
+ *	and GPU cores are present.
+ * @device_list - topology device list created by parsing ACPI CRAT table.
+ * @return - TRUE if invalid, FALSE is valid.
+ */
+static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
+{
+	struct kfd_topology_device *dev;
+
+	list_for_each_entry(dev, device_list, list) {
+		if (dev->node_props.cpu_cores_count &&
+			dev->node_props.simd_count)
+			return false;
+	}
+	pr_info("Ignoring ACPI CRAT on non-APU system\n");
+	return true;
+}
+
 int kfd_topology_init(void)
 {
 	void *crat_image = NULL;
 	size_t image_size = 0;
 	int ret;
-
-	/*
-	 * Initialize the head for the topology device list
+	struct list_head temp_topology_device_list;
+	int cpu_only_node = 0;
+	struct kfd_topology_device *kdev;
+	int proximity_domain;
+
+	/* topology_device_list - Master list of all topology devices
+	 * temp_topology_device_list - temporary list created while parsing CRAT
+	 * or VCRAT. Once parsing is complete the contents of list is moved to
+	 * topology_device_list
 	 */
+
+	/* Initialize the head for the both the lists */
 	INIT_LIST_HEAD(&topology_device_list);
+	INIT_LIST_HEAD(&temp_topology_device_list);
 	init_rwsem(&topology_lock);
-	topology_crat_parsed = 0;
 
 	memset(&sys_props, 0, sizeof(sys_props));
 
+	/* Proximity domains in ACPI CRAT tables start counting at
+	 * 0. The same should be true for virtual CRAT tables created
+	 * at this stage. GPUs added later in kfd_topology_add_device
+	 * use a counter.
+	 */
+	proximity_domain = 0;
+
 	/*
-	 * Get the CRAT image from the ACPI
+	 * Get the CRAT image from the ACPI. If ACPI doesn't have one
+	 * or if ACPI CRAT is invalid create a virtual CRAT.
+	 * NOTE: The current implementation expects all AMD APUs to have
+	 *	CRAT. If no CRAT is available, it is assumed to be a CPU
 	 */
-	ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
-	if (ret == 0 && image_size > 0) {
-		pr_info("Found CRAT image with size=%zd\n", image_size);
-		crat_image = kmalloc(image_size, GFP_KERNEL);
-		if (!crat_image) {
-			ret = -ENOMEM;
-			pr_err("No memory for allocating CRAT image\n");
-			goto err;
+	ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
+	if (!ret) {
+		ret = kfd_parse_crat_table(crat_image,
+					   &temp_topology_device_list,
+					   proximity_domain);
+		if (ret ||
+		    kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
+			kfd_release_topology_device_list(
+				&temp_topology_device_list);
+			kfd_destroy_crat_image(crat_image);
+			crat_image = NULL;
 		}
-		ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
-
-		if (ret == 0) {
-			down_write(&topology_lock);
-			ret = kfd_parse_crat_table(crat_image);
-			if (ret == 0)
-				ret = kfd_topology_update_sysfs();
-			up_write(&topology_lock);
-		} else {
-			pr_err("Couldn't get CRAT table size from ACPI\n");
+	}
+
+	if (!crat_image) {
+		ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+						    COMPUTE_UNIT_CPU, NULL,
+						    proximity_domain);
+		cpu_only_node = 1;
+		if (ret) {
+			pr_err("Error creating VCRAT table for CPU\n");
+			return ret;
 		}
-		kfree(crat_image);
-	} else if (ret == -ENODATA) {
-		ret = 0;
-	} else {
-		pr_err("Couldn't get CRAT table size from ACPI\n");
+
+		ret = kfd_parse_crat_table(crat_image,
+					   &temp_topology_device_list,
+					   proximity_domain);
+		if (ret) {
+			pr_err("Error parsing VCRAT table for CPU\n");
+			goto err;
+		}
+	}
+
+	kdev = list_first_entry(&temp_topology_device_list,
+				struct kfd_topology_device, list);
+	kfd_add_perf_to_topology(kdev);
+
+	down_write(&topology_lock);
+	kfd_topology_update_device_list(&temp_topology_device_list,
+					&topology_device_list);
+	atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
+	ret = kfd_topology_update_sysfs();
+	up_write(&topology_lock);
+
+	if (!ret) {
+		sys_props.generation_count++;
+		kfd_update_system_properties();
+		kfd_debug_print_topology();
+		pr_info("Finished initializing topology\n");
+	} else
+		pr_err("Failed to update topology in sysfs ret=%d\n", ret);
+
+	/* For nodes with GPU, this information gets added
+	 * when GPU is detected (kfd_topology_add_device).
+	 */
+	if (cpu_only_node) {
+		/* Add additional information to CPU only node created above */
+		down_write(&topology_lock);
+		kdev = list_first_entry(&topology_device_list,
+				struct kfd_topology_device, list);
+		up_write(&topology_lock);
+		kfd_add_non_crat_information(kdev);
 	}
 
 err:
-	pr_info("Finished initializing topology ret=%d\n", ret);
+	kfd_destroy_crat_image(crat_image);
 	return ret;
 }
 
 void kfd_topology_shutdown(void)
 {
+	down_write(&topology_lock);
 	kfd_topology_release_sysfs();
 	kfd_release_live_view();
-}
-
-static void kfd_debug_print_topology(void)
-{
-	struct kfd_topology_device *dev;
-	uint32_t i = 0;
-
-	pr_info("DEBUG PRINT OF TOPOLOGY:");
-	list_for_each_entry(dev, &topology_device_list, list) {
-		pr_info("Node: %d\n", i);
-		pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no"));
-		pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count);
-		pr_info("\tSIMD count: %d", dev->node_props.simd_count);
-		i++;
-	}
+	up_write(&topology_lock);
 }
 
 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
@@ -1072,11 +1042,15 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
 	uint32_t buf[7];
 	uint64_t local_mem_size;
 	int i;
+	struct kfd_local_mem_info local_mem_info;
 
 	if (!gpu)
 		return 0;
 
-	local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd);
+	gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
+
+	local_mem_size = local_mem_info.local_mem_size_private +
+			local_mem_info.local_mem_size_public;
 
 	buf[0] = gpu->pdev->devfn;
 	buf[1] = gpu->pdev->subsystem_vendor;
@@ -1091,19 +1065,26 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
 
 	return hashout;
 }
-
+/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
+ *		the GPU device is not already present in the topology device
+ *		list then return NULL. This means a new topology device has to
+ *		be created for this GPU.
+ * TODO: Rather than assiging @gpu to first topology device withtout
+ *		gpu attached, it will better to have more stringent check.
+ */
 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 {
 	struct kfd_topology_device *dev;
 	struct kfd_topology_device *out_dev = NULL;
 
+	down_write(&topology_lock);
 	list_for_each_entry(dev, &topology_device_list, list)
 		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
 			dev->gpu = gpu;
 			out_dev = dev;
 			break;
 		}
-
+	up_write(&topology_lock);
 	return out_dev;
 }
 
@@ -1115,84 +1096,196 @@ static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
 	 */
 }
 
+/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
+ *		patch this after CRAT parsing.
+ */
+static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
+{
+	struct kfd_mem_properties *mem;
+	struct kfd_local_mem_info local_mem_info;
+
+	if (!dev)
+		return;
+
+	/* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
+	 * single bank of VRAM local memory.
+	 * for dGPUs - VCRAT reports only one bank of Local Memory
+	 * for APUs - If CRAT from ACPI reports more than one bank, then
+	 *	all the banks will report the same mem_clk_max information
+	 */
+	dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
+		&local_mem_info);
+
+	list_for_each_entry(mem, &dev->mem_props, list)
+		mem->mem_clk_max = local_mem_info.mem_clk_max;
+}
+
+static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+{
+	struct kfd_iolink_properties *link;
+
+	if (!dev || !dev->gpu)
+		return;
+
+	/* GPU only creates direck links so apply flags setting to all */
+	if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
+		list_for_each_entry(link, &dev->io_link_props, list)
+			link->flags = CRAT_IOLINK_FLAGS_ENABLED |
+				CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+}
+
 int kfd_topology_add_device(struct kfd_dev *gpu)
 {
 	uint32_t gpu_id;
 	struct kfd_topology_device *dev;
-	int res;
+	struct kfd_cu_info cu_info;
+	int res = 0;
+	struct list_head temp_topology_device_list;
+	void *crat_image = NULL;
+	size_t image_size = 0;
+	int proximity_domain;
+
+	INIT_LIST_HEAD(&temp_topology_device_list);
 
 	gpu_id = kfd_generate_gpu_id(gpu);
 
 	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
 
-	down_write(&topology_lock);
-	/*
-	 * Try to assign the GPU to existing topology device (generated from
-	 * CRAT table
+	proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
+
+	/* Check to see if this gpu device exists in the topology_device_list.
+	 * If so, assign the gpu to that device,
+	 * else create a Virtual CRAT for this gpu device and then parse that
+	 * CRAT to create a new topology device. Once created assign the gpu to
+	 * that topology device
 	 */
 	dev = kfd_assign_gpu(gpu);
 	if (!dev) {
-		pr_info("GPU was not found in the current topology. Extending.\n");
-		kfd_debug_print_topology();
-		dev = kfd_create_topology_device();
-		if (!dev) {
-			res = -ENOMEM;
+		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+						    COMPUTE_UNIT_GPU, gpu,
+						    proximity_domain);
+		if (res) {
+			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+			       gpu_id);
+			return res;
+		}
+		res = kfd_parse_crat_table(crat_image,
+					   &temp_topology_device_list,
+					   proximity_domain);
+		if (res) {
+			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+			       gpu_id);
 			goto err;
 		}
-		dev->gpu = gpu;
 
-		/*
-		 * TODO: Make a call to retrieve topology information from the
-		 * GPU vBIOS
-		 */
+		down_write(&topology_lock);
+		kfd_topology_update_device_list(&temp_topology_device_list,
+			&topology_device_list);
 
 		/* Update the SYSFS tree, since we added another topology
 		 * device
 		 */
-		if (kfd_topology_update_sysfs() < 0)
-			kfd_topology_release_sysfs();
-
+		res = kfd_topology_update_sysfs();
+		up_write(&topology_lock);
+
+		if (!res)
+			sys_props.generation_count++;
+		else
+			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+						gpu_id, res);
+		dev = kfd_assign_gpu(gpu);
+		if (WARN_ON(!dev)) {
+			res = -ENODEV;
+			goto err;
+		}
 	}
 
 	dev->gpu_id = gpu_id;
 	gpu->id = gpu_id;
+
+	/* TODO: Move the following lines to function
+	 *	kfd_add_non_crat_information
+	 */
+
+	/* Fill-in additional information that is not available in CRAT but
+	 * needed for the topology
+	 */
+
+	dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
+	dev->node_props.simd_arrays_per_engine =
+		cu_info.num_shader_arrays_per_engine;
+
 	dev->node_props.vendor_id = gpu->pdev->vendor;
 	dev->node_props.device_id = gpu->pdev->device;
-	dev->node_props.location_id = (gpu->pdev->bus->number << 24) +
-			(gpu->pdev->devfn & 0xffffff);
-	/*
-	 * TODO: Retrieve max engine clock values from KGD
-	 */
+	dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
+		gpu->pdev->devfn);
+	dev->node_props.max_engine_clk_fcompute =
+		dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
+	dev->node_props.max_engine_clk_ccompute =
+		cpufreq_quick_get_max(0) / 1000;
+
+	kfd_fill_mem_clk_max_info(dev);
+	kfd_fill_iolink_non_crat_info(dev);
+
+	switch (dev->gpu->device_info->asic_family) {
+	case CHIP_KAVERI:
+	case CHIP_HAWAII:
+	case CHIP_TONGA:
+		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+		break;
+	case CHIP_CARRIZO:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+		pr_debug("Adding doorbell packet type capability\n");
+		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+		break;
+	default:
+		WARN(1, "Unexpected ASIC family %u",
+		     dev->gpu->device_info->asic_family);
+	}
 
+	/* Fix errors in CZ CRAT.
+	 * simd_count: Carrizo CRAT reports wrong simd_count, probably
+	 *		because it doesn't consider masked out CUs
+	 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
+	 * capability flag: Carrizo CRAT doesn't report IOMMU flags
+	 */
 	if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
-		dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
-		pr_info("Adding doorbell packet type capability\n");
+		dev->node_props.simd_count =
+			cu_info.simd_per_cu * cu_info.cu_active_number;
+		dev->node_props.max_waves_per_simd = 10;
+		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
 	}
 
-	res = 0;
-
-err:
-	up_write(&topology_lock);
+	kfd_debug_print_topology();
 
-	if (res == 0)
+	if (!res)
 		kfd_notify_gpu_change(gpu_id, 1);
-
+err:
+	kfd_destroy_crat_image(crat_image);
 	return res;
 }
 
 int kfd_topology_remove_device(struct kfd_dev *gpu)
 {
-	struct kfd_topology_device *dev;
+	struct kfd_topology_device *dev, *tmp;
 	uint32_t gpu_id;
 	int res = -ENODEV;
 
 	down_write(&topology_lock);
 
-	list_for_each_entry(dev, &topology_device_list, list)
+	list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
 		if (dev->gpu == gpu) {
 			gpu_id = dev->gpu_id;
 			kfd_remove_sysfs_node_entry(dev);
 			kfd_release_topology_device(dev);
+			sys_props.num_devices--;
 			res = 0;
 			if (kfd_topology_update_sysfs() < 0)
 				kfd_topology_release_sysfs();
@@ -1201,28 +1294,32 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
 
 	up_write(&topology_lock);
 
-	if (res == 0)
+	if (!res)
 		kfd_notify_gpu_change(gpu_id, 0);
 
 	return res;
 }
 
-/*
- * When idx is out of bounds, the function will return NULL
+/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
+ *	topology. If GPU device is found @idx, then valid kfd_dev pointer is
+ *	returned through @kdev
+ * Return -	0: On success (@kdev will be NULL for non GPU nodes)
+ *		-1: If end of list
  */
-struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
 {
 
 	struct kfd_topology_device *top_dev;
-	struct kfd_dev *device = NULL;
 	uint8_t device_idx = 0;
 
+	*kdev = NULL;
 	down_read(&topology_lock);
 
 	list_for_each_entry(top_dev, &topology_device_list, list) {
 		if (device_idx == idx) {
-			device = top_dev->gpu;
-			break;
+			*kdev = top_dev->gpu;
+			up_read(&topology_lock);
+			return 0;
 		}
 
 		device_idx++;
@@ -1230,6 +1327,88 @@ struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
 
 	up_read(&topology_lock);
 
-	return device;
+	return -1;
+
+}
+
+static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
+{
+	const struct cpuinfo_x86 *cpuinfo;
+	int first_cpu_of_numa_node;
+
+	if (!cpumask || cpumask == cpu_none_mask)
+		return -1;
+	first_cpu_of_numa_node = cpumask_first(cpumask);
+	if (first_cpu_of_numa_node >= nr_cpu_ids)
+		return -1;
+	cpuinfo = &cpu_data(first_cpu_of_numa_node);
 
+	return cpuinfo->apicid;
 }
+
+/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
+ *	of the given NUMA node (numa_node_id)
+ * Return -1 on failure
+ */
+int kfd_numa_node_to_apic_id(int numa_node_id)
+{
+	if (numa_node_id == -1) {
+		pr_warn("Invalid NUMA Node. Use online CPU mask\n");
+		return kfd_cpumask_to_apic_id(cpu_online_mask);
+	}
+	return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
+{
+	struct kfd_topology_device *dev;
+	unsigned int i = 0;
+	int r = 0;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (!dev->gpu) {
+			i++;
+			continue;
+		}
+
+		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+		r = dqm_debugfs_hqds(m, dev->gpu->dqm);
+		if (r)
+			break;
+	}
+
+	up_read(&topology_lock);
+
+	return r;
+}
+
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
+{
+	struct kfd_topology_device *dev;
+	unsigned int i = 0;
+	int r = 0;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (!dev->gpu) {
+			i++;
+			continue;
+		}
+
+		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+		r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
+		if (r)
+			break;
+	}
+
+	up_read(&topology_lock);
+
+	return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index c3ddb9b95ff8de6a52196f581d9eee1c1ccd3cb0..53fca1f4540141d7148e2423f941819d8aac66d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -39,8 +39,13 @@
 #define HSA_CAP_WATCH_POINTS_SUPPORTED		0x00000080
 #define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK	0x00000f00
 #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT	8
-#define HSA_CAP_RESERVED			0xfffff000
-#define HSA_CAP_DOORBELL_PACKET_TYPE		0x00001000
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK	0x00003000
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT	12
+#define HSA_CAP_RESERVED			0xffffc000
+
+#define HSA_CAP_DOORBELL_TYPE_PRE_1_0		0x0
+#define HSA_CAP_DOORBELL_TYPE_1_0		0x1
+#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000
 
 struct kfd_node_properties {
 	uint32_t cpu_cores_count;
@@ -91,8 +96,6 @@ struct kfd_mem_properties {
 	struct attribute	attr;
 };
 
-#define KFD_TOPOLOGY_CPU_SIBLINGS 256
-
 #define HSA_CACHE_TYPE_DATA		0x00000001
 #define HSA_CACHE_TYPE_INSTRUCTION	0x00000002
 #define HSA_CACHE_TYPE_CPU		0x00000004
@@ -109,7 +112,7 @@ struct kfd_cache_properties {
 	uint32_t		cache_assoc;
 	uint32_t		cache_latency;
 	uint32_t		cache_type;
-	uint8_t			sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS];
+	uint8_t			sibling_map[CRAT_SIBLINGMAP_SIZE];
 	struct kobject		*kobj;
 	struct attribute	attr;
 };
@@ -132,24 +135,36 @@ struct kfd_iolink_properties {
 	struct attribute	attr;
 };
 
+struct kfd_perf_properties {
+	struct list_head	list;
+	char			block_name[16];
+	uint32_t		max_concurrent;
+	struct attribute_group	*attr_group;
+};
+
 struct kfd_topology_device {
 	struct list_head		list;
 	uint32_t			gpu_id;
+	uint32_t			proximity_domain;
 	struct kfd_node_properties	node_props;
-	uint32_t			mem_bank_count;
 	struct list_head		mem_props;
 	uint32_t			cache_count;
 	struct list_head		cache_props;
 	uint32_t			io_link_count;
 	struct list_head		io_link_props;
+	struct list_head		perf_props;
 	struct kfd_dev			*gpu;
 	struct kobject			*kobj_node;
 	struct kobject			*kobj_mem;
 	struct kobject			*kobj_cache;
 	struct kobject			*kobj_iolink;
+	struct kobject			*kobj_perf;
 	struct attribute		attr_gpuid;
 	struct attribute		attr_name;
 	struct attribute		attr_props;
+	uint8_t				oem_id[CRAT_OEMID_LENGTH];
+	uint8_t				oem_table_id[CRAT_OEMTABLEID_LENGTH];
+	uint32_t			oem_revision;
 };
 
 struct kfd_system_properties {
@@ -164,6 +179,12 @@ struct kfd_system_properties {
 	struct attribute	attr_props;
 };
 
+struct kfd_topology_device *kfd_create_topology_device(
+		struct list_head *device_list);
+void kfd_release_topology_device_list(struct list_head *device_list);
 
+extern bool amd_iommu_pc_supported(void);
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
 
 #endif /* __KFD_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index 8ba37dd9cf7fce68104ffdd8a637642d4507290a..c27c81cdeed3b815eb4f0630b12a412cb9eabb27 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the DAL (Display Abstract Layer), which is a  sub-component
 # of the AMDGPU drm driver.
 # It provides the HW control for display related functionalities.
diff --git a/drivers/gpu/drm/amd/display/TODO b/drivers/gpu/drm/amd/display/TODO
index 46464678f2b317b9c0a588f81a5abe2ceb3ac36a..357d596484016a8f92cd9afd655cece66857b606 100644
--- a/drivers/gpu/drm/amd/display/TODO
+++ b/drivers/gpu/drm/amd/display/TODO
@@ -105,3 +105,6 @@ useless with filtering output. dynamic debug printing might be an option.
 20. Use kernel i2c device to program HDMI retimer. Some boards have an HDMI
 retimer that we need to program to pass PHY compliance. Currently that's
 bypassing the i2c device and goes directly to HW. This should be changed.
+
+21. Remove vector.c from dc/basics. It's used in DDC code which can probably
+be simplified enough to no longer need a vector implementation.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 4699e47aa76b00969bd0a7a7cc1f074df709563c..2b72009844f8316f92ed9dd2bf9bfac187219520 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'dm' sub-component of DAL.
 # It provides the control and status of dm blocks.
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ccbf10e3bbb6879dcf2f1cebfd3db1dcc085ec23..1ce4c98385e3a17385a00b076a699cd64462d4dd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -431,9 +431,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	adev->dm.dc = dc_create(&init_data);
 
 	if (adev->dm.dc) {
-		DRM_INFO("Display Core initialized!\n");
+		DRM_INFO("Display Core initialized with v%s!\n", DC_VER);
 	} else {
-		DRM_INFO("Display Core failed to initialize!\n");
+		DRM_INFO("Display Core failed to initialize with v%s!\n", DC_VER);
 		goto error;
 	}
 
@@ -2351,7 +2351,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		       const struct dm_connector_state *dm_state)
 {
 	struct drm_display_mode *preferred_mode = NULL;
-	const struct drm_connector *drm_connector;
+	struct drm_connector *drm_connector;
 	struct dc_stream_state *stream = NULL;
 	struct drm_display_mode mode = *drm_mode;
 	bool native_mode_found = false;
@@ -2370,11 +2370,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	if (!aconnector->dc_sink) {
 		/*
-		 * Exclude MST from creating fake_sink
-		 * TODO: need to enable MST into fake_sink feature
+		 * Create dc_sink when necessary to MST
+		 * Don't apply fake_sink to MST
 		 */
-		if (aconnector->mst_port)
-			goto stream_create_fail;
+		if (aconnector->mst_port) {
+			dm_dp_mst_dc_sink_create(drm_connector);
+			goto mst_dc_sink_create_done;
+		}
 
 		if (create_fake_sink(aconnector))
 			goto stream_create_fail;
@@ -2425,6 +2427,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 stream_create_fail:
 dm_state_null:
 drm_connector_null:
+mst_dc_sink_create_done:
 	return stream;
 }
 
@@ -2725,8 +2728,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
 	};
 	struct edid *edid;
 
-	if (!aconnector->base.edid_blob_ptr ||
-		!aconnector->base.edid_blob_ptr->data) {
+	if (!aconnector->base.edid_blob_ptr) {
 		DRM_ERROR("No EDID firmware found on connector: %s ,forcing to OFF!\n",
 				aconnector->base.name);
 
@@ -4514,18 +4516,15 @@ static int dm_update_crtcs_state(struct dc *dc,
 						__func__, acrtc->base.base.id);
 				break;
 			}
-		}
 
-		if (enable && dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
-				dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) {
-
-			new_crtc_state->mode_changed = false;
-
-			DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d",
-				         new_crtc_state->mode_changed);
+			if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
+			    dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) {
+				new_crtc_state->mode_changed = false;
+				DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d",
+						 new_crtc_state->mode_changed);
+			}
 		}
 
-
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			goto next_crtc;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 8a1e4f5dbd648cbdc58ce8c62c44a55192f89f57..2faa77a7eedaef72246c49e7ae893050a45e06d4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
 	struct mutex hpd_lock;
 
 	bool fake_enable;
+
+	bool mst_connected;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 707928b88448cf13fb41caaab84d999e31be3891..f3d87f418d2efffd349358fd08b3b52e2a3cca5d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -180,6 +180,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
 	return drm_add_edid_modes(connector, edid);
 }
 
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
+{
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct edid *edid;
+	struct dc_sink *dc_sink;
+	struct dc_sink_init_data init_params = {
+			.link = aconnector->dc_link,
+			.sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+
+	edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
+
+	if (!edid) {
+		drm_mode_connector_update_edid_property(
+			&aconnector->base,
+			NULL);
+		return;
+	}
+
+	aconnector->edid = edid;
+
+	dc_sink = dc_link_add_remote_sink(
+		aconnector->dc_link,
+		(uint8_t *)aconnector->edid,
+		(aconnector->edid->extensions + 1) * EDID_LENGTH,
+		&init_params);
+
+	dc_sink->priv = aconnector;
+	aconnector->dc_sink = dc_sink;
+
+	amdgpu_dm_add_sink_to_freesync_module(
+			connector, aconnector->edid);
+
+	drm_mode_connector_update_edid_property(
+					&aconnector->base, aconnector->edid);
+}
+
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -306,6 +342,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 			drm_mode_connector_set_path_property(connector, pathprop);
 
 			drm_connector_list_iter_end(&conn_iter);
+			aconnector->mst_connected = true;
 			return &aconnector->base;
 		}
 	}
@@ -358,6 +395,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	 */
 	amdgpu_dm_connector_funcs_reset(connector);
 
+	aconnector->mst_connected = true;
+
 	DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
 			aconnector, connector->base.id, aconnector->mst_port);
 
@@ -389,6 +428,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	drm_mode_connector_update_edid_property(
 			&aconnector->base,
 			NULL);
+
+	aconnector->mst_connected = false;
 }
 
 static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -399,10 +440,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
 	drm_kms_helper_hotplug_event(dev);
 }
 
+static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
+{
+	mutex_lock(&connector->dev->mode_config.mutex);
+	drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
+	mutex_unlock(&connector->dev->mode_config.mutex);
+}
+
 static void dm_dp_mst_register_connector(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
 	if (adev->mode_info.rfbdev)
 		drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -411,6 +460,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
 
 	drm_connector_register(connector);
 
+	if (aconnector->mst_connected)
+		dm_dp_mst_link_status_reset(connector);
 }
 
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 2da851b40042aee9b79eb2c666d45c0f5061fee0..8cf51da26657e29e72062b34aeed7e5d827f9e21 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
 				       struct amdgpu_dm_connector *aconnector);
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 4f83e3011743f14469401d867f9892a2dbb311bd..aed538a4d1bace016fb37526d099656227b81348 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for Display Core (dc) component.
 #
 
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index 43c5ccdeeb724e65729b5d93b6aea5cc5cf2b81f..bca33bd9a0d250d55734cc0d440839937dcbd71d 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -1,9 +1,30 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'utils' sub-component of DAL.
 # It provides the general basic services required by other DAL
 # subcomponents.
 
-BASICS = conversion.o fixpt31_32.o fixpt32_32.o grph_object_id.o \
+BASICS = conversion.o fixpt31_32.o fixpt32_32.o \
 	logger.o log_helpers.o vector.o
 
 AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 23c9a0ec01817c6a23be8fdbbd5ecbdf5816c585..310964915a83801c9cc914af3d5291a231ab7c38 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -46,7 +46,7 @@ uint16_t fixed_point_to_int_frac(
 			arg));
 
 	if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor))
-		numerator = (uint16_t)dal_fixed31_32_floor(
+		numerator = (uint16_t)dal_fixed31_32_round(
 			dal_fixed31_32_mul_int(
 				arg,
 				divisor));
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
index 26936892c6f59c7702b34351bfa63a4db6dfd643..011a97f82fb625f3c00e042d1a4acc2a0d2e5b89 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -554,6 +554,22 @@ static inline uint32_t ux_dy(
 	return result | fractional_part;
 }
 
+static inline uint32_t clamp_ux_dy(
+	int64_t value,
+	uint32_t integer_bits,
+	uint32_t fractional_bits,
+	uint32_t min_clamp)
+{
+	uint32_t truncated_val = ux_dy(value, integer_bits, fractional_bits);
+
+	if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
+		return (1 << (integer_bits + fractional_bits)) - 1;
+	else if (truncated_val > min_clamp)
+		return truncated_val;
+	else
+		return min_clamp;
+}
+
 uint32_t dal_fixed31_32_u2d19(
 	struct fixed31_32 arg)
 {
@@ -565,3 +581,15 @@ uint32_t dal_fixed31_32_u0d19(
 {
 	return ux_dy(arg.value, 0, 19);
 }
+
+uint32_t dal_fixed31_32_clamp_u0d14(
+	struct fixed31_32 arg)
+{
+	return clamp_ux_dy(arg.value, 0, 14, 1);
+}
+
+uint32_t dal_fixed31_32_clamp_u0d10(
+	struct fixed31_32 arg)
+{
+	return clamp_ux_dy(arg.value, 0, 10, 1);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/Makefile b/drivers/gpu/drm/amd/display/dc/bios/Makefile
index 6ec815dce9ccc116c330cfebdfbefcc29ae91b20..239e86bbec5a17e318f7d82bad3d607bcbcd43d3 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/bios/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'bios' sub-component of DAL.
 # It provides the parsing and executing controls for atom bios image.
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index 86e6438c5cf35a6c0f981524454d58af417a183d..c00e405b63e89e8ef93bd23f90fa9596184edc9d 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -190,6 +190,7 @@ static struct graphics_object_id bios_parser_get_connector_id(
 	struct bios_parser *bp = BP_FROM_DCB(dcb);
 	struct graphics_object_id object_id = dal_graphics_object_id_init(
 		0, ENUM_ID_UNKNOWN, OBJECT_TYPE_UNKNOWN);
+	uint16_t id;
 
 	uint32_t connector_table_offset = bp->object_info_tbl_offset
 		+ le16_to_cpu(bp->object_info_tbl.v1_1->usConnectorObjectTableOffset);
@@ -197,12 +198,19 @@ static struct graphics_object_id bios_parser_get_connector_id(
 	ATOM_OBJECT_TABLE *tbl =
 		GET_IMAGE(ATOM_OBJECT_TABLE, connector_table_offset);
 
-	if (tbl && tbl->ucNumberOfObjects > i) {
-		const uint16_t id = le16_to_cpu(tbl->asObjects[i].usObjectID);
+	if (!tbl) {
+		dm_error("Can't get connector table from atom bios.\n");
+		return object_id;
+	}
 
-		object_id = object_id_from_bios_object_id(id);
+	if (tbl->ucNumberOfObjects <= i) {
+		dm_error("Can't find connector id %d in connector table of size %d.\n",
+			 i, tbl->ucNumberOfObjects);
+		return object_id;
 	}
 
+	id = le16_to_cpu(tbl->asObjects[i].usObjectID);
+	object_id = object_id_from_bios_object_id(id);
 	return object_id;
 }
 
@@ -2254,6 +2262,52 @@ static enum bp_result get_gpio_i2c_info(struct bios_parser *bp,
 	return BP_RESULT_OK;
 }
 
+static bool dal_graphics_object_id_is_valid(struct graphics_object_id id)
+{
+	bool rc = true;
+
+	switch (id.type) {
+	case OBJECT_TYPE_UNKNOWN:
+		rc = false;
+		break;
+	case OBJECT_TYPE_GPU:
+	case OBJECT_TYPE_ENGINE:
+		/* do NOT check for id.id == 0 */
+		if (id.enum_id == ENUM_ID_UNKNOWN)
+			rc = false;
+		break;
+	default:
+		if (id.id == 0 || id.enum_id == ENUM_ID_UNKNOWN)
+			rc = false;
+		break;
+	}
+
+	return rc;
+}
+
+static bool dal_graphics_object_id_is_equal(
+	struct graphics_object_id id1,
+	struct graphics_object_id id2)
+{
+	if (false == dal_graphics_object_id_is_valid(id1)) {
+		dm_output_to_console(
+		"%s: Warning: comparing invalid object 'id1'!\n", __func__);
+		return false;
+	}
+
+	if (false == dal_graphics_object_id_is_valid(id2)) {
+		dm_output_to_console(
+		"%s: Warning: comparing invalid object 'id2'!\n", __func__);
+		return false;
+	}
+
+	if (id1.id == id2.id && id1.enum_id == id2.enum_id
+		&& id1.type == id2.type)
+		return true;
+
+	return false;
+}
+
 static ATOM_OBJECT *get_bios_object(struct bios_parser *bp,
 	struct graphics_object_id id)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
index 3f7b2dabc2b06c839d43a510585175620bb19aab..4b5fdd577848a059c9e14d4ed22914284499fa9c 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
@@ -387,6 +387,7 @@ static void init_transmitter_control(struct bios_parser *bp)
 		bp->cmd_tbl.transmitter_control = transmitter_control_v1_6;
 		break;
 	default:
+		dm_output_to_console("Don't have transmitter_control for v%d\n", crev);
 		bp->cmd_tbl.transmitter_control = NULL;
 		break;
 	}
@@ -910,6 +911,8 @@ static void init_set_pixel_clock(struct bios_parser *bp)
 		bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7;
 		break;
 	default:
+		dm_output_to_console("Don't have set_pixel_clock for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(SetPixelClock));
 		bp->cmd_tbl.set_pixel_clock = NULL;
 		break;
 	}
@@ -1227,6 +1230,8 @@ static void init_enable_spread_spectrum_on_ppll(struct bios_parser *bp)
 				enable_spread_spectrum_on_ppll_v3;
 		break;
 	default:
+		dm_output_to_console("Don't have enable_spread_spectrum_on_ppll for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(EnableSpreadSpectrumOnPPLL));
 		bp->cmd_tbl.enable_spread_spectrum_on_ppll = NULL;
 		break;
 	}
@@ -1422,6 +1427,8 @@ static void init_adjust_display_pll(struct bios_parser *bp)
 		bp->cmd_tbl.adjust_display_pll = adjust_display_pll_v3;
 		break;
 	default:
+		dm_output_to_console("Don't have adjust_display_pll for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(AdjustDisplayPll));
 		bp->cmd_tbl.adjust_display_pll = NULL;
 		break;
 	}
@@ -1695,6 +1702,8 @@ static void init_set_crtc_timing(struct bios_parser *bp)
 					set_crtc_using_dtd_timing_v3;
 			break;
 		default:
+			dm_output_to_console("Don't have set_crtc_timing for dtd v%d\n",
+				 dtd_version);
 			bp->cmd_tbl.set_crtc_timing = NULL;
 			break;
 		}
@@ -1704,6 +1713,8 @@ static void init_set_crtc_timing(struct bios_parser *bp)
 			bp->cmd_tbl.set_crtc_timing = set_crtc_timing_v1;
 			break;
 		default:
+			dm_output_to_console("Don't have set_crtc_timing for v%d\n",
+				 BIOS_CMD_TABLE_PARA_REVISION(SetCRTC_Timing));
 			bp->cmd_tbl.set_crtc_timing = NULL;
 			break;
 		}
@@ -1890,6 +1901,8 @@ static void init_select_crtc_source(struct bios_parser *bp)
 		bp->cmd_tbl.select_crtc_source = select_crtc_source_v3;
 		break;
 	default:
+		dm_output_to_console("Don't select_crtc_source enable_crtc for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(SelectCRTC_Source));
 		bp->cmd_tbl.select_crtc_source = NULL;
 		break;
 	}
@@ -1997,6 +2010,8 @@ static void init_enable_crtc(struct bios_parser *bp)
 		bp->cmd_tbl.enable_crtc = enable_crtc_v1;
 		break;
 	default:
+		dm_output_to_console("Don't have enable_crtc for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(EnableCRTC));
 		bp->cmd_tbl.enable_crtc = NULL;
 		break;
 	}
@@ -2103,6 +2118,8 @@ static void init_program_clock(struct bios_parser *bp)
 		bp->cmd_tbl.program_clock = program_clock_v6;
 		break;
 	default:
+		dm_output_to_console("Don't have program_clock for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(SetPixelClock));
 		bp->cmd_tbl.program_clock = NULL;
 		break;
 	}
@@ -2324,6 +2341,8 @@ static void init_enable_disp_power_gating(
 				enable_disp_power_gating_v2_1;
 		break;
 	default:
+		dm_output_to_console("Don't enable_disp_power_gating enable_crtc for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(EnableDispPowerGating));
 		bp->cmd_tbl.enable_disp_power_gating = NULL;
 		break;
 	}
@@ -2371,6 +2390,8 @@ static void init_set_dce_clock(struct bios_parser *bp)
 		bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1;
 		break;
 	default:
+		dm_output_to_console("Don't have set_dce_clock for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(SetDCEClock));
 		bp->cmd_tbl.set_dce_clock = NULL;
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index ba68693758a795c889765d17444b6eb232805611..fea5e83736fd9cc42a5b8604ec1f9c6807e94abd 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -118,6 +118,7 @@ static void init_dig_encoder_control(struct bios_parser *bp)
 		bp->cmd_tbl.dig_encoder_control = encoder_control_digx_v1_5;
 		break;
 	default:
+		dm_output_to_console("Don't have dig_encoder_control for v%d\n", version);
 		bp->cmd_tbl.dig_encoder_control = NULL;
 		break;
 	}
@@ -205,6 +206,7 @@ static void init_transmitter_control(struct bios_parser *bp)
 		bp->cmd_tbl.transmitter_control = transmitter_control_v1_6;
 		break;
 	default:
+		dm_output_to_console("Don't have transmitter_control for v%d\n", crev);
 		bp->cmd_tbl.transmitter_control = NULL;
 		break;
 	}
@@ -268,6 +270,8 @@ static void init_set_pixel_clock(struct bios_parser *bp)
 		bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7;
 		break;
 	default:
+		dm_output_to_console("Don't have set_pixel_clock for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(setpixelclock));
 		bp->cmd_tbl.set_pixel_clock = NULL;
 		break;
 	}
@@ -379,6 +383,7 @@ static void init_set_crtc_timing(struct bios_parser *bp)
 			set_crtc_using_dtd_timing_v3;
 		break;
 	default:
+		dm_output_to_console("Don't have set_crtc_timing for v%d\n", dtd_version);
 		bp->cmd_tbl.set_crtc_timing = NULL;
 		break;
 	}
@@ -498,6 +503,8 @@ static void init_select_crtc_source(struct bios_parser *bp)
 		bp->cmd_tbl.select_crtc_source = select_crtc_source_v3;
 		break;
 	default:
+		dm_output_to_console("Don't select_crtc_source enable_crtc for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(selectcrtc_source));
 		bp->cmd_tbl.select_crtc_source = NULL;
 		break;
 	}
@@ -565,6 +572,8 @@ static void init_enable_crtc(struct bios_parser *bp)
 		bp->cmd_tbl.enable_crtc = enable_crtc_v1;
 		break;
 	default:
+		dm_output_to_console("Don't have enable_crtc for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(enablecrtc));
 		bp->cmd_tbl.enable_crtc = NULL;
 		break;
 	}
@@ -661,6 +670,8 @@ static void init_enable_disp_power_gating(
 				enable_disp_power_gating_v2_1;
 		break;
 	default:
+		dm_output_to_console("Don't enable_disp_power_gating enable_crtc for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(enabledisppowergating));
 		bp->cmd_tbl.enable_disp_power_gating = NULL;
 		break;
 	}
@@ -710,6 +721,8 @@ static void init_set_dce_clock(struct bios_parser *bp)
 		bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1;
 		break;
 	default:
+		dm_output_to_console("Don't have set_dce_clock for v%d\n",
+			 BIOS_CMD_TABLE_PARA_REVISION(setdceclock));
 		bp->cmd_tbl.set_dce_clock = NULL;
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 41ef35995b029e5586e404a4b8632ef689d315c7..7959e382ed28f0b4aa833edace0ec3938dbefe34 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'calcs' sub-component of DAL.
 # It calculates Bandwidth and Watermarks values for HW programming
 #
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index 6347712db834b76af9dc8a0069884f8fff64a907..2e11fac2a63ddfac42eeb726cc66e27f3541e41d 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -29,6 +29,15 @@
 #include "core_types.h"
 #include "dal_asic_id.h"
 
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 /*******************************************************************************
  * Private Functions
  ******************************************************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
index 626f9cf8aad282f49af70516b612397705b54ab8..5e2ea12fbb731f71da889ca3e8338d008d377385 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
@@ -27,6 +27,15 @@
 #include "dcn_calc_auto.h"
 #include "dcn_calc_math.h"
 
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 /*REVISION#250*/
 void scaler_settings_calculation(struct dcn_bw_internal_vars *v)
 {
@@ -773,11 +782,11 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)
 					v->dst_y_after_scaler = 0.0;
 				}
 				v->time_calc = 24.0 / v->projected_dcfclk_deep_sleep;
-				v->v_update_offset[k] =dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0);
+				v->v_update_offset[k][j] = dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0);
 				v->total_repeater_delay = v->max_inter_dcn_tile_repeaters * (2.0 / (v->required_dispclk[i][j] / (j + 1)) + 3.0 / v->required_dispclk[i][j]);
-				v->v_update_width[k] = (14.0 / v->projected_dcfclk_deep_sleep + 12.0 / (v->required_dispclk[i][j] / (j + 1)) + v->total_repeater_delay) * v->pixel_clock[k];
-				v->v_ready_offset[k] =dcn_bw_max2(150.0 / (v->required_dispclk[i][j] / (j + 1)), v->total_repeater_delay + 20.0 / v->projected_dcfclk_deep_sleep + 10.0 / (v->required_dispclk[i][j] / (j + 1))) * v->pixel_clock[k];
-				v->time_setup = (v->v_update_offset[k] + v->v_update_width[k] + v->v_ready_offset[k]) / v->pixel_clock[k];
+				v->v_update_width[k][j] = (14.0 / v->projected_dcfclk_deep_sleep + 12.0 / (v->required_dispclk[i][j] / (j + 1)) + v->total_repeater_delay) * v->pixel_clock[k];
+				v->v_ready_offset[k][j] = dcn_bw_max2(150.0 / (v->required_dispclk[i][j] / (j + 1)), v->total_repeater_delay + 20.0 / v->projected_dcfclk_deep_sleep + 10.0 / (v->required_dispclk[i][j] / (j + 1))) * v->pixel_clock[k];
+				v->time_setup = (v->v_update_offset[k][j] + v->v_update_width[k][j] + v->v_ready_offset[k][j]) / v->pixel_clock[k];
 				v->extra_latency = v->urgent_round_trip_and_out_of_order_latency_per_state[i] + (v->total_number_of_active_dpp[i][j] * v->pixel_chunk_size_in_kbyte + v->total_number_of_dcc_active_dpp[i][j] * v->meta_chunk_size) * 1024.0 / v->return_bw_per_state[i];
 				if (v->pte_enable == dcn_bw_yes) {
 					v->extra_latency = v->extra_latency + v->total_number_of_active_dpp[i][j] * v->pte_chunk_size * 1024.0 / v->return_bw_per_state[i];
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c
index b6abe0f3bb152e3b89966d09988bccc3bb840ff5..7600a4a4abc7fd8bd03c707f6df8bf9d9b9aa3a7 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c
@@ -25,37 +25,44 @@
 
 #include "dcn_calc_math.h"
 
+#define isNaN(number) ((number) != (number))
+
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 float dcn_bw_mod(const float arg1, const float arg2)
 {
-	if (arg1 != arg1)
+	if (isNaN(arg1))
 		return arg2;
-	if (arg2 != arg2)
+	if (isNaN(arg2))
 		return arg1;
 	return arg1 - arg1 * ((int) (arg1 / arg2));
 }
 
 float dcn_bw_min2(const float arg1, const float arg2)
 {
-	if (arg1 != arg1)
+	if (isNaN(arg1))
 		return arg2;
-	if (arg2 != arg2)
+	if (isNaN(arg2))
 		return arg1;
 	return arg1 < arg2 ? arg1 : arg2;
 }
 
 unsigned int dcn_bw_max(const unsigned int arg1, const unsigned int arg2)
 {
-	if (arg1 != arg1)
-		return arg2;
-	if (arg2 != arg2)
-		return arg1;
 	return arg1 > arg2 ? arg1 : arg2;
 }
 float dcn_bw_max2(const float arg1, const float arg2)
 {
-	if (arg1 != arg1)
+	if (isNaN(arg1))
 		return arg2;
-	if (arg2 != arg2)
+	if (isNaN(arg2))
 		return arg1;
 	return arg1 > arg2 ? arg1 : arg2;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index a4fbca34bcdf873283ef34e19202d85211daa772..331891c2c71a2aacd39221ef62fce6f4cf4604c9 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -33,6 +33,15 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn_calc_math.h"
 
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 /* Defaults from spreadsheet rev#247 */
 const struct dcn_soc_bounding_box dcn10_soc_defaults = {
 		/* latencies */
@@ -878,6 +887,17 @@ bool dcn_validate_bandwidth(
 						+ pipe->bottom_pipe->plane_res.scl_data.recout.width;
 			}
 
+			if (pipe->plane_state->rotation % 2 == 0) {
+				ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value
+					|| v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]);
+				ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value
+					|| v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]);
+			} else {
+				ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value
+					|| v->scaler_recout_height[input_idx] == v->viewport_width[input_idx]);
+				ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value
+					|| v->scaler_rec_out_width[input_idx] == v->viewport_height[input_idx]);
+			}
 			v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no;
 			v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs(
 					pipe->plane_state->format);
@@ -888,6 +908,15 @@ bool dcn_validate_bandwidth(
 			v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
 			v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
 			v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
+			/*
+			 * Spreadsheet doesn't handle taps_c is one properly,
+			 * need to force Chroma to always be scaled to pass
+			 * bandwidth validation.
+			 */
+			if (v->override_hta_pschroma[input_idx] == 1)
+				v->override_hta_pschroma[input_idx] = 2;
+			if (v->override_vta_pschroma[input_idx] == 1)
+				v->override_vta_pschroma[input_idx] = 2;
 			v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
 		}
 		if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
@@ -985,9 +1014,9 @@ bool dcn_validate_bandwidth(
 			if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
 				continue;
 
-			pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx];
-			pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx];
-			pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx];
+			pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
+			pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
+			pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
 			pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx];
 
 			pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total;
@@ -1026,9 +1055,9 @@ bool dcn_validate_bandwidth(
 					 TIMING_3D_FORMAT_SIDE_BY_SIDE))) {
 					if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
 						/* update previously split pipe */
-						hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx];
-						hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx];
-						hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx];
+						hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
+						hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
+						hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0];
 						hsplit_pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx];
 
 						hsplit_pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total;
@@ -1556,35 +1585,6 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc)
 			dc->dcn_ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one,
 			dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed,
 			dc->dcn_ip->dcfclk_cstate_latency);
-	dc->dml.soc.vmin.socclk_mhz = dc->dcn_soc->socclk;
-	dc->dml.soc.vmid.socclk_mhz = dc->dcn_soc->socclk;
-	dc->dml.soc.vnom.socclk_mhz = dc->dcn_soc->socclk;
-	dc->dml.soc.vmax.socclk_mhz = dc->dcn_soc->socclk;
-
-	dc->dml.soc.vmin.dcfclk_mhz = dc->dcn_soc->dcfclkv_min0p65;
-	dc->dml.soc.vmid.dcfclk_mhz = dc->dcn_soc->dcfclkv_mid0p72;
-	dc->dml.soc.vnom.dcfclk_mhz = dc->dcn_soc->dcfclkv_nom0p8;
-	dc->dml.soc.vmax.dcfclk_mhz = dc->dcn_soc->dcfclkv_max0p9;
-
-	dc->dml.soc.vmin.dispclk_mhz = dc->dcn_soc->max_dispclk_vmin0p65;
-	dc->dml.soc.vmid.dispclk_mhz = dc->dcn_soc->max_dispclk_vmid0p72;
-	dc->dml.soc.vnom.dispclk_mhz = dc->dcn_soc->max_dispclk_vnom0p8;
-	dc->dml.soc.vmax.dispclk_mhz = dc->dcn_soc->max_dispclk_vmax0p9;
-
-	dc->dml.soc.vmin.dppclk_mhz = dc->dcn_soc->max_dppclk_vmin0p65;
-	dc->dml.soc.vmid.dppclk_mhz = dc->dcn_soc->max_dppclk_vmid0p72;
-	dc->dml.soc.vnom.dppclk_mhz = dc->dcn_soc->max_dppclk_vnom0p8;
-	dc->dml.soc.vmax.dppclk_mhz = dc->dcn_soc->max_dppclk_vmax0p9;
-
-	dc->dml.soc.vmin.phyclk_mhz = dc->dcn_soc->phyclkv_min0p65;
-	dc->dml.soc.vmid.phyclk_mhz = dc->dcn_soc->phyclkv_mid0p72;
-	dc->dml.soc.vnom.phyclk_mhz = dc->dcn_soc->phyclkv_nom0p8;
-	dc->dml.soc.vmax.phyclk_mhz = dc->dcn_soc->phyclkv_max0p9;
-
-	dc->dml.soc.vmin.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65;
-	dc->dml.soc.vmid.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72;
-	dc->dml.soc.vnom.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8;
-	dc->dml.soc.vmax.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9;
 
 	dc->dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time;
 	dc->dml.soc.sr_enter_plus_exit_time_us = dc->dcn_soc->sr_enter_plus_exit_time;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index d1488d5ee02816488155680f7b853877aef7167b..35e84ed031de08f5edfadbfb71e550291ae8ebef 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -283,19 +283,17 @@ static bool construct(struct dc *dc,
 		const struct dc_init_data *init_params)
 {
 	struct dal_logger *logger;
-	struct dc_context *dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL);
-	struct bw_calcs_dceip *dc_dceip = kzalloc(sizeof(*dc_dceip),
-						  GFP_KERNEL);
-	struct bw_calcs_vbios *dc_vbios = kzalloc(sizeof(*dc_vbios),
-						  GFP_KERNEL);
+	struct dc_context *dc_ctx;
+	struct bw_calcs_dceip *dc_dceip;
+	struct bw_calcs_vbios *dc_vbios;
 #ifdef CONFIG_DRM_AMD_DC_DCN1_0
-	struct dcn_soc_bounding_box *dcn_soc = kzalloc(sizeof(*dcn_soc),
-						       GFP_KERNEL);
-	struct dcn_ip_params *dcn_ip = kzalloc(sizeof(*dcn_ip), GFP_KERNEL);
+	struct dcn_soc_bounding_box *dcn_soc;
+	struct dcn_ip_params *dcn_ip;
 #endif
 
 	enum dce_version dc_version = DCE_VERSION_UNKNOWN;
 
+	dc_dceip = kzalloc(sizeof(*dc_dceip), GFP_KERNEL);
 	if (!dc_dceip) {
 		dm_error("%s: failed to create dceip\n", __func__);
 		goto fail;
@@ -303,6 +301,7 @@ static bool construct(struct dc *dc,
 
 	dc->bw_dceip = dc_dceip;
 
+	dc_vbios = kzalloc(sizeof(*dc_vbios), GFP_KERNEL);
 	if (!dc_vbios) {
 		dm_error("%s: failed to create vbios\n", __func__);
 		goto fail;
@@ -310,6 +309,7 @@ static bool construct(struct dc *dc,
 
 	dc->bw_vbios = dc_vbios;
 #ifdef CONFIG_DRM_AMD_DC_DCN1_0
+	dcn_soc = kzalloc(sizeof(*dcn_soc), GFP_KERNEL);
 	if (!dcn_soc) {
 		dm_error("%s: failed to create dcn_soc\n", __func__);
 		goto fail;
@@ -317,6 +317,7 @@ static bool construct(struct dc *dc,
 
 	dc->dcn_soc = dcn_soc;
 
+	dcn_ip = kzalloc(sizeof(*dcn_ip), GFP_KERNEL);
 	if (!dcn_ip) {
 		dm_error("%s: failed to create dcn_ip\n", __func__);
 		goto fail;
@@ -325,11 +326,18 @@ static bool construct(struct dc *dc,
 	dc->dcn_ip = dcn_ip;
 #endif
 
+	dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL);
 	if (!dc_ctx) {
 		dm_error("%s: failed to create ctx\n", __func__);
 		goto fail;
 	}
 
+	dc_ctx->cgs_device = init_params->cgs_device;
+	dc_ctx->driver_context = init_params->driver;
+	dc_ctx->dc = dc;
+	dc_ctx->asic_id = init_params->asic_id;
+	dc->ctx = dc_ctx;
+
 	dc->current_state = dc_create_state();
 
 	if (!dc->current_state) {
@@ -337,11 +345,6 @@ static bool construct(struct dc *dc,
 		goto fail;
 	}
 
-	dc_ctx->cgs_device = init_params->cgs_device;
-	dc_ctx->driver_context = init_params->driver;
-	dc_ctx->dc = dc;
-	dc_ctx->asic_id = init_params->asic_id;
-
 	/* Create logger */
 	logger = dal_logger_create(dc_ctx, init_params->log_mask);
 
@@ -351,11 +354,10 @@ static bool construct(struct dc *dc,
 		goto fail;
 	}
 	dc_ctx->logger = logger;
-	dc->ctx = dc_ctx;
-	dc->ctx->dce_environment = init_params->dce_environment;
+	dc_ctx->dce_environment = init_params->dce_environment;
 
 	dc_version = resource_parse_asic_id(init_params->asic_id);
-	dc->ctx->dce_version = dc_version;
+	dc_ctx->dce_version = dc_version;
 
 #if defined(CONFIG_DRM_AMD_DC_FBC)
 	dc->ctx->fbc_gpu_addr = init_params->fbc_gpu_addr;
@@ -578,7 +580,7 @@ static void program_timing_sync(
 		for (j = 0; j < group_size; j++) {
 			struct pipe_ctx *temp;
 
-			if (!pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) {
+			if (pipe_set[j]->stream_res.tg->funcs->is_blanked && !pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) {
 				if (j == 0)
 					break;
 
@@ -591,7 +593,7 @@ static void program_timing_sync(
 
 		/* remove any other unblanked pipes as they have already been synced */
 		for (j = j + 1; j < group_size; j++) {
-			if (!pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) {
+			if (pipe_set[j]->stream_res.tg->funcs->is_blanked && !pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) {
 				group_size--;
 				pipe_set[j] = pipe_set[group_size];
 				j--;
@@ -786,6 +788,8 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc)
 			dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
 		}
 
+	dc->optimized_required = false;
+
 	/* 3rd param should be true, temp w/a for RV*/
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	dc->hwss.set_bandwidth(dc, context, dc->ctx->dce_version < DCN_VERSION_1_0);
@@ -981,6 +985,11 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
 	if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha)
 		update_flags->bits.per_pixel_alpha_change = 1;
 
+	if (u->plane_info->dcc.enable != u->surface->dcc.enable
+			|| u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks
+			|| u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch)
+		update_flags->bits.dcc_change = 1;
+
 	if (pixel_format_to_bpp(u->plane_info->format) !=
 			pixel_format_to_bpp(u->surface->format))
 		/* different bytes per element will require full bandwidth
@@ -1178,12 +1187,6 @@ static void commit_planes_for_stream(struct dc *dc,
 	if (update_type == UPDATE_TYPE_FULL) {
 		dc->hwss.set_bandwidth(dc, context, false);
 		context_clock_trace(dc, context);
-
-		for (j = 0; j < dc->res_pool->pipe_count; j++) {
-			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
-
-			dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx);
-		}
 	}
 
 	if (surface_count == 0) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 2e509382935fecf7366f0d086c030d061071f008..1babac07bcc9eaee38651e179099d30f660996b4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -1,3 +1,25 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 /*
  * dc_debug.c
  *
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 71993d5983bfa907182f14148709eda4cfeefacd..ebc96b7200835ede9c64202ea0c7a44e69c9edac 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -28,6 +28,8 @@
 #include "timing_generator.h"
 #include "hw_sequencer.h"
 
+#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+
 /* used as index in array of black_color_format */
 enum black_color_format {
 	BLACK_COLOR_FORMAT_RGB_FULLRANGE = 0,
@@ -38,6 +40,15 @@ enum black_color_format {
 	BLACK_COLOR_FORMAT_DEBUG,
 };
 
+enum dc_color_space_type {
+	COLOR_SPACE_RGB_TYPE,
+	COLOR_SPACE_RGB_LIMITED_TYPE,
+	COLOR_SPACE_YCBCR601_TYPE,
+	COLOR_SPACE_YCBCR709_TYPE,
+	COLOR_SPACE_YCBCR601_LIMITED_TYPE,
+	COLOR_SPACE_YCBCR709_LIMITED_TYPE
+};
+
 static const struct tg_color black_color_format[] = {
 	/* BlackColorFormat_RGB_FullRange */
 	{0, 0, 0},
@@ -53,6 +64,140 @@ static const struct tg_color black_color_format[] = {
 	{0xff, 0xff, 0},
 };
 
+struct out_csc_color_matrix_type {
+	enum dc_color_space_type color_space_type;
+	uint16_t regval[12];
+};
+
+static const struct out_csc_color_matrix_type output_csc_matrix[] = {
+	{ COLOR_SPACE_RGB_TYPE,
+		{ 0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
+	{ COLOR_SPACE_RGB_LIMITED_TYPE,
+		{ 0x1B67, 0, 0, 0x201, 0, 0x1B67, 0, 0x201, 0, 0, 0x1B67, 0x201} },
+	{ COLOR_SPACE_YCBCR601_TYPE,
+		{ 0xE04, 0xF444, 0xFDB9, 0x1004, 0x831, 0x1016, 0x320, 0x201, 0xFB45,
+				0xF6B7, 0xE04, 0x1004} },
+	{ COLOR_SPACE_YCBCR709_TYPE,
+		{ 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,
+				0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} },
+
+	/* TODO: correct values below */
+	{ COLOR_SPACE_YCBCR601_LIMITED_TYPE,
+		{ 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991,
+				0x12C9, 0x3A6, 0x200, 0xFB47, 0xF6B9, 0xE00, 0x1000} },
+	{ COLOR_SPACE_YCBCR709_LIMITED_TYPE,
+		{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
+				0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
+};
+
+static bool is_rgb_type(
+		enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_SRGB			||
+		color_space == COLOR_SPACE_XR_RGB		||
+		color_space == COLOR_SPACE_MSREF_SCRGB		||
+		color_space == COLOR_SPACE_2020_RGB_FULLRANGE	||
+		color_space == COLOR_SPACE_ADOBERGB		||
+		color_space == COLOR_SPACE_DCIP3	||
+		color_space == COLOR_SPACE_DOLBYVISION)
+		ret = true;
+	return ret;
+}
+
+static bool is_rgb_limited_type(
+		enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_SRGB_LIMITED		||
+		color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE)
+		ret = true;
+	return ret;
+}
+
+static bool is_ycbcr601_type(
+		enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_YCBCR601	||
+		color_space == COLOR_SPACE_XV_YCC_601)
+		ret = true;
+	return ret;
+}
+
+static bool is_ycbcr601_limited_type(
+		enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_YCBCR601_LIMITED)
+		ret = true;
+	return ret;
+}
+
+static bool is_ycbcr709_type(
+		enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_YCBCR709	||
+		color_space == COLOR_SPACE_XV_YCC_709)
+		ret = true;
+	return ret;
+}
+
+static bool is_ycbcr709_limited_type(
+		enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_YCBCR709_LIMITED)
+		ret = true;
+	return ret;
+}
+enum dc_color_space_type get_color_space_type(enum dc_color_space color_space)
+{
+	enum dc_color_space_type type = COLOR_SPACE_RGB_TYPE;
+
+	if (is_rgb_type(color_space))
+		type = COLOR_SPACE_RGB_TYPE;
+	else if (is_rgb_limited_type(color_space))
+		type = COLOR_SPACE_RGB_LIMITED_TYPE;
+	else if (is_ycbcr601_type(color_space))
+		type = COLOR_SPACE_YCBCR601_TYPE;
+	else if (is_ycbcr709_type(color_space))
+		type = COLOR_SPACE_YCBCR709_TYPE;
+	else if (is_ycbcr601_limited_type(color_space))
+		type = COLOR_SPACE_YCBCR601_LIMITED_TYPE;
+	else if (is_ycbcr709_limited_type(color_space))
+		type = COLOR_SPACE_YCBCR709_LIMITED_TYPE;
+
+	return type;
+}
+
+const uint16_t *find_color_matrix(enum dc_color_space color_space,
+							uint32_t *array_size)
+{
+	int i;
+	enum dc_color_space_type type;
+	const uint16_t *val = NULL;
+	int arr_size = NUM_ELEMENTS(output_csc_matrix);
+
+	type = get_color_space_type(color_space);
+	for (i = 0; i < arr_size; i++)
+		if (output_csc_matrix[i].color_space_type == type) {
+			val = output_csc_matrix[i].regval;
+			*array_size = 12;
+			break;
+		}
+
+	return val;
+}
+
+
 void color_space_to_black_color(
 	const struct dc *dc,
 	enum dc_color_space colorspace,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 7b0e43c0685cc56044d4e36d3f1d8e6d8ecb55c2..a3742827157361ac0c3c57733eb845cd34ea4720 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -938,8 +938,9 @@ static bool construct(
 	link->link_id = bios->funcs->get_connector_id(bios, init_params->connector_index);
 
 	if (link->link_id.type != OBJECT_TYPE_CONNECTOR) {
-		dm_error("%s: Invalid Connector ObjectID from Adapter Service for connector index:%d!\n",
-				__func__, init_params->connector_index);
+		dm_error("%s: Invalid Connector ObjectID from Adapter Service for connector index:%d! type %d expected %d\n",
+			 __func__, init_params->connector_index,
+			 link->link_id.type, OBJECT_TYPE_CONNECTOR);
 		goto create_fail;
 	}
 
@@ -1271,6 +1272,24 @@ static enum dc_status enable_link_dp(
 	return status;
 }
 
+static enum dc_status enable_link_edp(
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
+{
+	enum dc_status status;
+	struct dc_stream_state *stream = pipe_ctx->stream;
+	struct dc_link *link = stream->sink->link;
+
+	link->dc->hwss.edp_power_control(link, true);
+	link->dc->hwss.edp_wait_for_hpd_ready(link, true);
+
+	status = enable_link_dp(state, pipe_ctx);
+
+	link->dc->hwss.edp_backlight_control(link, true);
+
+	return status;
+}
+
 static enum dc_status enable_link_dp_mst(
 		struct dc_state *state,
 		struct pipe_ctx *pipe_ctx)
@@ -1746,9 +1765,11 @@ static enum dc_status enable_link(
 	enum dc_status status = DC_ERROR_UNEXPECTED;
 	switch (pipe_ctx->stream->signal) {
 	case SIGNAL_TYPE_DISPLAY_PORT:
-	case SIGNAL_TYPE_EDP:
 		status = enable_link_dp(state, pipe_ctx);
 		break;
+	case SIGNAL_TYPE_EDP:
+		status = enable_link_edp(state, pipe_ctx);
+		break;
 	case SIGNAL_TYPE_DISPLAY_PORT_MST:
 		status = enable_link_dp_mst(state, pipe_ctx);
 		msleep(200);
@@ -1801,7 +1822,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
 		link->link_enc->funcs->disable_output(link->link_enc, signal);
 }
 
-bool dp_active_dongle_validate_timing(
+static bool dp_active_dongle_validate_timing(
 		const struct dc_crtc_timing *timing,
 		const struct dc_dongle_caps *dongle_caps)
 {
@@ -1833,6 +1854,8 @@ bool dp_active_dongle_validate_timing(
 	/* Check Color Depth and Pixel Clock */
 	if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
 		required_pix_clk /= 2;
+	else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+		required_pix_clk = required_pix_clk * 2 / 3;
 
 	switch (timing->display_color_depth) {
 	case COLOR_DEPTH_666:
@@ -1907,12 +1930,18 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level,
 {
 	struct dc  *core_dc = link->ctx->dc;
 	struct abm *abm = core_dc->res_pool->abm;
+	struct dmcu *dmcu = core_dc->res_pool->dmcu;
 	unsigned int controller_id = 0;
+	bool use_smooth_brightness = true;
 	int i;
 
-	if ((abm == NULL) || (abm->funcs->set_backlight_level == NULL))
+	if ((dmcu == NULL) ||
+		(abm == NULL) ||
+		(abm->funcs->set_backlight_level == NULL))
 		return false;
 
+	use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
+
 	dm_logger_write(link->ctx->logger, LOG_BACKLIGHT,
 			"New Backlight level: %d (0x%X)\n", level, level);
 
@@ -1935,7 +1964,8 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level,
 				abm,
 				level,
 				frame_ramp,
-				controller_id);
+				controller_id,
+				use_smooth_brightness);
 	}
 
 	return true;
@@ -1952,144 +1982,6 @@ bool dc_link_set_psr_enable(const struct dc_link *link, bool enable, bool wait)
 	return true;
 }
 
-bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state)
-{
-	struct dc  *core_dc = link->ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
-
-	if (dmcu != NULL && link->psr_enabled)
-		dmcu->funcs->get_psr_state(dmcu, psr_state);
-
-	return true;
-}
-
-bool dc_link_setup_psr(struct dc_link *link,
-		const struct dc_stream_state *stream, struct psr_config *psr_config,
-		struct psr_context *psr_context)
-{
-	struct dc  *core_dc = link->ctx->dc;
-	struct dmcu *dmcu = core_dc->res_pool->dmcu;
-	int i;
-
-	psr_context->controllerId = CONTROLLER_ID_UNDEFINED;
-
-	if (link != NULL &&
-		dmcu != NULL) {
-		/* updateSinkPsrDpcdConfig*/
-		union dpcd_psr_configuration psr_configuration;
-
-		memset(&psr_configuration, 0, sizeof(psr_configuration));
-
-		psr_configuration.bits.ENABLE                    = 1;
-		psr_configuration.bits.CRC_VERIFICATION          = 1;
-		psr_configuration.bits.FRAME_CAPTURE_INDICATION  =
-				psr_config->psr_frame_capture_indication_req;
-
-		/* Check for PSR v2*/
-		if (psr_config->psr_version == 0x2) {
-			/* For PSR v2 selective update.
-			 * Indicates whether sink should start capturing
-			 * immediately following active scan line,
-			 * or starting with the 2nd active scan line.
-			 */
-			psr_configuration.bits.LINE_CAPTURE_INDICATION = 0;
-			/*For PSR v2, determines whether Sink should generate
-			 * IRQ_HPD when CRC mismatch is detected.
-			 */
-			psr_configuration.bits.IRQ_HPD_WITH_CRC_ERROR    = 1;
-		}
-
-		dm_helpers_dp_write_dpcd(
-			link->ctx,
-			link,
-			368,
-			&psr_configuration.raw,
-			sizeof(psr_configuration.raw));
-
-		psr_context->channel = link->ddc->ddc_pin->hw_info.ddc_channel;
-		psr_context->transmitterId = link->link_enc->transmitter;
-		psr_context->engineId = link->link_enc->preferred_engine;
-
-		for (i = 0; i < MAX_PIPES; i++) {
-			if (core_dc->current_state->res_ctx.pipe_ctx[i].stream
-					== stream) {
-				/* dmcu -1 for all controller id values,
-				 * therefore +1 here
-				 */
-				psr_context->controllerId =
-					core_dc->current_state->res_ctx.
-					pipe_ctx[i].stream_res.tg->inst + 1;
-				break;
-			}
-		}
-
-		/* Hardcoded for now.  Can be Pcie or Uniphy (or Unknown)*/
-		psr_context->phyType = PHY_TYPE_UNIPHY;
-		/*PhyId is associated with the transmitter id*/
-		psr_context->smuPhyId = link->link_enc->transmitter;
-
-		psr_context->crtcTimingVerticalTotal = stream->timing.v_total;
-		psr_context->vsyncRateHz = div64_u64(div64_u64((stream->
-						timing.pix_clk_khz * 1000),
-						stream->timing.v_total),
-						stream->timing.h_total);
-
-		psr_context->psrSupportedDisplayConfig = true;
-		psr_context->psrExitLinkTrainingRequired =
-			psr_config->psr_exit_link_training_required;
-		psr_context->sdpTransmitLineNumDeadline =
-			psr_config->psr_sdp_transmit_line_num_deadline;
-		psr_context->psrFrameCaptureIndicationReq =
-			psr_config->psr_frame_capture_indication_req;
-
-		psr_context->skipPsrWaitForPllLock = 0; /* only = 1 in KV */
-
-		psr_context->numberOfControllers =
-				link->dc->res_pool->res_cap->num_timing_generator;
-
-		psr_context->rfb_update_auto_en = true;
-
-		/* 2 frames before enter PSR. */
-		psr_context->timehyst_frames = 2;
-		/* half a frame
-		 * (units in 100 lines, i.e. a value of 1 represents 100 lines)
-		 */
-		psr_context->hyst_lines = stream->timing.v_total / 2 / 100;
-		psr_context->aux_repeats = 10;
-
-		psr_context->psr_level.u32all = 0;
-
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
-		/*skip power down the single pipe since it blocks the cstate*/
-		if (ASIC_REV_IS_RAVEN(link->ctx->asic_id.hw_internal_rev))
-			psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true;
-#endif
-
-		/* SMU will perform additional powerdown sequence.
-		 * For unsupported ASICs, set psr_level flag to skip PSR
-		 *  static screen notification to SMU.
-		 *  (Always set for DAL2, did not check ASIC)
-		 */
-		psr_context->psr_level.bits.SKIP_SMU_NOTIFICATION = 1;
-
-		/* Complete PSR entry before aborting to prevent intermittent
-		 * freezes on certain eDPs
-		 */
-		psr_context->psr_level.bits.DISABLE_PSR_ENTRY_ABORT = 1;
-
-		/* Controls additional delay after remote frame capture before
-		 * continuing power down, default = 0
-		 */
-		psr_context->frame_delay = 0;
-
-		link->psr_enabled = true;
-		dmcu->funcs->setup_psr(dmcu, link, psr_context);
-		return true;
-	} else
-		return false;
-
-}
-
 const struct dc_link_status *dc_link_get_status(const struct dc_link *link)
 {
 	return &link->link_status;
@@ -2418,6 +2310,9 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 
+	if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP)
+		core_dc->hwss.edp_backlight_control(pipe_ctx->stream->sink->link, false);
+
 	core_dc->hwss.disable_stream(pipe_ctx, option);
 
 	disable_link(pipe_ctx->stream->sink->link, pipe_ctx->stream->signal);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 00528b214a9fefa0be61429537670375335bdef8..61e8c3e02d1696bf08f53bb24cefdc074a74cf10 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1470,6 +1470,12 @@ void decide_link_settings(struct dc_stream_state *stream,
 		return;
 	}
 
+	/* EDP use the link cap setting */
+	if (stream->sink->sink_signal == SIGNAL_TYPE_EDP) {
+		*link_setting = link->verified_link_cap;
+		return;
+	}
+
 	/* search for the minimum link setting that:
 	 * 1. is supported according to the link training result
 	 * 2. could support the b/w requested by the timing
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index f2902569be2ef92138bfbd186ff7ee7b06bdb888..2096f2a179f2c5e3509ac1c60115049f3a6e9ee5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -88,15 +88,7 @@ void dp_enable_link_phy(
 	}
 
 	if (dc_is_dp_sst_signal(signal)) {
-		if (signal == SIGNAL_TYPE_EDP) {
-			link->dc->hwss.edp_power_control(link, true);
-			link_enc->funcs->enable_dp_output(
-						link_enc,
-						link_settings,
-						clock_source);
-			link->dc->hwss.edp_backlight_control(link, true);
-		} else
-			link_enc->funcs->enable_dp_output(
+		link_enc->funcs->enable_dp_output(
 						link_enc,
 						link_settings,
 						clock_source);
@@ -138,7 +130,6 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal)
 		dp_receiver_power_ctrl(link, false);
 
 	if (signal == SIGNAL_TYPE_EDP) {
-		link->dc->hwss.edp_backlight_control(link, false);
 		edp_receiver_ready_T9(link);
 		link->link_enc->funcs->disable_output(link->link_enc, signal);
 		link->dc->hwss.edp_power_control(link, false);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 9c5e879f18b36da0c991a7bcf86450ce487bba97..95b8dd0e53c6951a1e28e988fd4cb13dcaf04f0d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1,5 +1,5 @@
 /*
-* Copyright 2012-15 Advanced Micro Devices, Inc.
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -498,26 +498,15 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx)
 	data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div;
 
 	/* Handle hsplit */
-	if (pri_split || sec_split) {
-		/* HMirror XOR Secondary_pipe XOR Rotation_180 */
-		bool right_view = (sec_split != plane_state->horizontal_mirror) !=
-					(plane_state->rotation == ROTATION_ANGLE_180);
-
-		if (plane_state->rotation == ROTATION_ANGLE_90
-				|| plane_state->rotation == ROTATION_ANGLE_270)
-			/* Secondary_pipe XOR Rotation_270 */
-			right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split;
-
-		if (right_view) {
-			data->viewport.x +=  data->viewport.width / 2;
-			data->viewport_c.x +=  data->viewport_c.width / 2;
-			/* Ceil offset pipe */
-			data->viewport.width = (data->viewport.width + 1) / 2;
-			data->viewport_c.width = (data->viewport_c.width + 1) / 2;
-		} else {
-			data->viewport.width /= 2;
-			data->viewport_c.width /= 2;
-		}
+	if (sec_split) {
+		data->viewport.x +=  data->viewport.width / 2;
+		data->viewport_c.x +=  data->viewport_c.width / 2;
+		/* Ceil offset pipe */
+		data->viewport.width = (data->viewport.width + 1) / 2;
+		data->viewport_c.width = (data->viewport_c.width + 1) / 2;
+	} else if (pri_split) {
+		data->viewport.width /= 2;
+		data->viewport_c.width /= 2;
 	}
 
 	if (plane_state->rotation == ROTATION_ANGLE_90 ||
@@ -534,6 +523,11 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip
 	struct rect surf_src = plane_state->src_rect;
 	struct rect surf_clip = plane_state->clip_rect;
 	int recout_full_x, recout_full_y;
+	bool pri_split = pipe_ctx->bottom_pipe &&
+			pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state;
+	bool sec_split = pipe_ctx->top_pipe &&
+			pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state;
+	bool top_bottom_split = stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM;
 
 	if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 ||
 			pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270)
@@ -568,33 +562,43 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip
 						- pipe_ctx->plane_res.scl_data.recout.y;
 
 	/* Handle h & vsplit */
-	if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state ==
-		pipe_ctx->plane_state) {
-		if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) {
-			pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height / 2;
-			/* Floor primary pipe, ceil 2ndary pipe */
-			pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2;
+	if (sec_split && top_bottom_split) {
+		pipe_ctx->plane_res.scl_data.recout.y +=
+				pipe_ctx->plane_res.scl_data.recout.height / 2;
+		/* Floor primary pipe, ceil 2ndary pipe */
+		pipe_ctx->plane_res.scl_data.recout.height =
+				(pipe_ctx->plane_res.scl_data.recout.height + 1) / 2;
+	} else if (pri_split && top_bottom_split)
+		pipe_ctx->plane_res.scl_data.recout.height /= 2;
+	else if (pri_split || sec_split) {
+		/* HMirror XOR Secondary_pipe XOR Rotation_180 */
+		bool right_view = (sec_split != plane_state->horizontal_mirror) !=
+					(plane_state->rotation == ROTATION_ANGLE_180);
+
+		if (plane_state->rotation == ROTATION_ANGLE_90
+				|| plane_state->rotation == ROTATION_ANGLE_270)
+			/* Secondary_pipe XOR Rotation_270 */
+			right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split;
+
+		if (right_view) {
+			pipe_ctx->plane_res.scl_data.recout.x +=
+					pipe_ctx->plane_res.scl_data.recout.width / 2;
+			/* Ceil offset pipe */
+			pipe_ctx->plane_res.scl_data.recout.width =
+					(pipe_ctx->plane_res.scl_data.recout.width + 1) / 2;
 		} else {
-			pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width / 2;
-			pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2;
-		}
-	} else if (pipe_ctx->bottom_pipe &&
-			pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) {
-		if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM)
-			pipe_ctx->plane_res.scl_data.recout.height /= 2;
-		else
 			pipe_ctx->plane_res.scl_data.recout.width /= 2;
+		}
 	}
-
 	/* Unclipped recout offset = stream dst offset + ((surf dst offset - stream surf_src offset)
 	 * 				* 1/ stream scaling ratio) - (surf surf_src offset * 1/ full scl
 	 * 				ratio)
 	 */
-	recout_full_x = stream->dst.x + (plane_state->dst_rect.x -  stream->src.x)
+	recout_full_x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x)
 					* stream->dst.width / stream->src.width -
 			surf_src.x * plane_state->dst_rect.width / surf_src.width
 					* stream->dst.width / stream->src.width;
-	recout_full_y = stream->dst.y + (plane_state->dst_rect.y -  stream->src.y)
+	recout_full_y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y)
 					* stream->dst.height / stream->src.height -
 			surf_src.y * plane_state->dst_rect.height / surf_src.height
 					* stream->dst.height / stream->src.height;
@@ -650,7 +654,20 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
 	struct rect src = pipe_ctx->plane_state->src_rect;
 	int vpc_div = (data->format == PIXEL_FORMAT_420BPP8
 			|| data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1;
+	bool flip_vert_scan_dir = false, flip_horz_scan_dir = false;
 
+	/*
+	 * Need to calculate the scan direction for viewport to make adjustments
+	 */
+	if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_180) {
+		flip_vert_scan_dir = true;
+		flip_horz_scan_dir = true;
+	} else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90)
+		flip_vert_scan_dir = true;
+	else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270)
+		flip_horz_scan_dir = true;
+	if (pipe_ctx->plane_state->horizontal_mirror)
+		flip_horz_scan_dir = !flip_horz_scan_dir;
 
 	if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 ||
 			pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) {
@@ -715,7 +732,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
 	}
 
 	/* Adjust for non-0 viewport offset */
-	if (data->viewport.x) {
+	if (data->viewport.x && !flip_horz_scan_dir) {
 		int int_part;
 
 		data->inits.h = dal_fixed31_32_add(data->inits.h, dal_fixed31_32_mul_int(
@@ -736,7 +753,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
 		data->inits.h = dal_fixed31_32_add_int(data->inits.h, int_part);
 	}
 
-	if (data->viewport_c.x) {
+	if (data->viewport_c.x && !flip_horz_scan_dir) {
 		int int_part;
 
 		data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_mul_int(
@@ -757,7 +774,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
 		data->inits.h_c = dal_fixed31_32_add_int(data->inits.h_c, int_part);
 	}
 
-	if (data->viewport.y) {
+	if (data->viewport.y && !flip_vert_scan_dir) {
 		int int_part;
 
 		data->inits.v = dal_fixed31_32_add(data->inits.v, dal_fixed31_32_mul_int(
@@ -778,7 +795,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
 		data->inits.v = dal_fixed31_32_add_int(data->inits.v, int_part);
 	}
 
-	if (data->viewport_c.y) {
+	if (data->viewport_c.y && !flip_vert_scan_dir) {
 		int int_part;
 
 		data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_mul_int(
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 375fb457e22353c0714464aa1142dc867879d190..261811e0c094a81a0a1c0c1c6420f83d4db4a3bc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -226,7 +226,7 @@ bool dc_stream_set_cursor_attributes(
 		if (pipe_ctx->plane_res.dpp != NULL &&
 				pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes != NULL)
 			pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes(
-				pipe_ctx->plane_res.dpp, attributes);
+				pipe_ctx->plane_res.dpp, attributes->color_format);
 	}
 
 	stream->cursor_attributes = *attributes;
@@ -301,6 +301,8 @@ bool dc_stream_set_cursor_position(
 
 	}
 
+	stream->cursor_position = *position;
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index c99ed85ba9a263ffda28ca0a242ada1bffc2edeb..e2e3c9df79ea0f976dbbc6f6bf889169ccce0484 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -38,7 +38,7 @@
 #include "inc/compressor.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.1.20"
+#define DC_VER "3.1.27"
 
 #define MAX_SURFACES 3
 #define MAX_STREAMS 6
@@ -250,6 +250,8 @@ struct dc {
 	 */
 	struct dm_pp_display_configuration prev_display_config;
 
+	bool optimized_required;
+
 	/* FBC compressor */
 #if defined(CONFIG_DRM_AMD_DC_FBC)
 	struct compressor *fbc_compressor;
@@ -340,7 +342,7 @@ struct dc_hdr_static_metadata {
 enum dc_transfer_func_type {
 	TF_TYPE_PREDEFINED,
 	TF_TYPE_DISTRIBUTED_POINTS,
-	TF_TYPE_BYPASS
+	TF_TYPE_BYPASS,
 };
 
 struct dc_transfer_func_distributed_points {
@@ -359,6 +361,7 @@ enum dc_transfer_func_predefined {
 	TRANSFER_FUNCTION_BT709,
 	TRANSFER_FUNCTION_PQ,
 	TRANSFER_FUNCTION_LINEAR,
+	TRANSFER_FUNCTION_UNITY,
 };
 
 struct dc_transfer_func {
@@ -385,6 +388,7 @@ union surface_update_flags {
 
 	struct {
 		/* Medium updates */
+		uint32_t dcc_change:1;
 		uint32_t color_space_change:1;
 		uint32_t input_tf_change:1;
 		uint32_t horizontal_mirror_change:1;
@@ -436,6 +440,7 @@ struct dc_plane_state {
 	enum dc_rotation_angle rotation;
 	enum plane_stereo_format stereo_format;
 
+	bool is_tiling_rotated;
 	bool per_pixel_alpha;
 	bool visible;
 	bool flip_immediate;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index c584252669fde5a73813aee0b5a79ab51aeb9c76..48e1fcf53d43499d56980f36c35dd87ac8fb521d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -1,3 +1,25 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 /*
  * dc_helper.c
  *
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 587c0bb3d4ac5193b6dd0f21e6005f6320b98425..03029f72dc3f31f902438edeafabf73f502a57c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -579,8 +579,6 @@ enum dc_timing_standard {
 	TIMING_STANDARD_MAX
 };
 
-
-
 enum dc_color_depth {
 	COLOR_DEPTH_UNDEFINED,
 	COLOR_DEPTH_666,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index fed0e5ea96256fc74f86c84b0e78904dc02b5f1d..01c60f11b2bdeec208f5df9e5cfd2092a614eac5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -86,6 +86,7 @@ struct dc_stream_state {
 	struct dc_stream_status status;
 
 	struct dc_cursor_attributes cursor_attributes;
+	struct dc_cursor_position cursor_position;
 
 	/* from stream struct */
 	struct kref refcount;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 9291a60126ad3926df576288bb93cfbb27c0dcfd..9faddfae241dd2d382b7b9b0027264624b9e5e3b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -218,6 +218,7 @@ struct dc_edid_caps {
 	bool lte_340mcsc_scramble;
 
 	bool edid_hdmi;
+	bool hdr_supported;
 };
 
 struct view {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile
index 8abec0bed379c15c4c3fb52f620af53ee4af6fb7..11401fd8e5356c8e4866f6660df34ff2da1e155b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for common 'dce' logic
 # HW object file under this folder follow similar pattern for HW programming
 #   - register offset and/or shift + mask stored in the dec_hw struct
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 3fe8e697483fff2c9b33d7c5e3f948c7cc68ac38..b48190f549079bb113dfc7d370fa8c7ff0c59439 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -385,21 +385,12 @@ static bool dce_abm_init_backlight(struct abm *abm)
 	return true;
 }
 
-static bool is_dmcu_initialized(struct abm *abm)
-{
-	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
-	unsigned int dmcu_uc_reset;
-
-	REG_GET(DMCU_STATUS, UC_IN_RESET, &dmcu_uc_reset);
-
-	return !dmcu_uc_reset;
-}
-
 static bool dce_abm_set_backlight_level(
 		struct abm *abm,
 		unsigned int backlight_level,
 		unsigned int frame_ramp,
-		unsigned int controller_id)
+		unsigned int controller_id,
+		bool use_smooth_brightness)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
@@ -408,7 +399,7 @@ static bool dce_abm_set_backlight_level(
 			backlight_level, backlight_level);
 
 	/* If DMCU is in reset state, DMCU is uninitialized */
-	if (is_dmcu_initialized(abm))
+	if (use_smooth_brightness)
 		dmcu_set_backlight_level(abm_dce,
 				backlight_level,
 				frame_ramp,
@@ -425,8 +416,7 @@ static const struct abm_funcs dce_funcs = {
 	.init_backlight = dce_abm_init_backlight,
 	.set_backlight_level = dce_abm_set_backlight_level,
 	.get_current_backlight_8_bit = dce_abm_get_current_backlight_8_bit,
-	.set_abm_immediate_disable = dce_abm_immediate_disable,
-	.is_dmcu_initialized = is_dmcu_initialized
+	.set_abm_immediate_disable = dce_abm_immediate_disable
 };
 
 static void dce_abm_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index 59e909ec88f22778d9917fb754875a9e79da0c59..ff9436966041098ed84e586240cf779854301dbe 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -37,8 +37,7 @@
 	SR(LVTMA_PWRSEQ_REF_DIV), \
 	SR(MASTER_COMM_CNTL_REG), \
 	SR(MASTER_COMM_CMD_REG), \
-	SR(MASTER_COMM_DATA_REG1), \
-	SR(DMCU_STATUS)
+	SR(MASTER_COMM_DATA_REG1)
 
 #define ABM_DCE110_COMMON_REG_LIST() \
 	ABM_COMMON_REG_LIST_DCE_BASE(), \
@@ -84,8 +83,7 @@
 	ABM_SF(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, mask_sh), \
 	ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, mask_sh), \
 	ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE1, mask_sh), \
-	ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE2, mask_sh), \
-	ABM_SF(DMCU_STATUS, UC_IN_RESET, mask_sh)
+	ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE2, mask_sh)
 
 #define ABM_MASK_SH_LIST_DCE110(mask_sh) \
 	ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh), \
@@ -174,7 +172,6 @@
 	type MASTER_COMM_CMD_REG_BYTE2; \
 	type BL_PWM_REF_DIV; \
 	type BL_PWM_EN; \
-	type UC_IN_RESET; \
 	type BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN; \
 	type BL_PWM_GRP1_REG_LOCK; \
 	type BL_PWM_GRP1_REG_UPDATE_PENDING
@@ -206,7 +203,6 @@ struct dce_abm_registers {
 	uint32_t MASTER_COMM_CMD_REG;
 	uint32_t MASTER_COMM_DATA_REG1;
 	uint32_t BIOS_SCRATCH_2;
-	uint32_t DMCU_STATUS;
 	uint32_t BL_PWM_GRP1_REG_LOCK;
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
index 9031d22285eab98578db34ea956f73bb6f6b300e..9e98a5f39a6dbbec76a9bb296f6327b20ef34fd0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
@@ -29,7 +29,6 @@
 #include "fixed32_32.h"
 #include "bios_parser_interface.h"
 #include "dc.h"
-#include "dce_abm.h"
 #include "dmcu.h"
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 #include "dcn_calcs.h"
@@ -384,7 +383,6 @@ static int dce112_set_clock(
 	struct bp_set_dce_clock_parameters dce_clk_params;
 	struct dc_bios *bp = clk->ctx->dc_bios;
 	struct dc *core_dc = clk->ctx->dc;
-	struct abm *abm =  core_dc->res_pool->abm;
 	struct dmcu *dmcu = core_dc->res_pool->dmcu;
 	int actual_clock = requested_clk_khz;
 	/* Prepare to program display clock*/
@@ -417,7 +415,7 @@ static int dce112_set_clock(
 
 	bp->funcs->set_dce_clock(bp, &dce_clk_params);
 
-	if (abm->funcs->is_dmcu_initialized(abm) && clk_dce->dfs_bypass_disp_clk != actual_clock)
+	if (clk_dce->dfs_bypass_disp_clk != actual_clock)
 		dmcu->funcs->set_psr_wait_loop(dmcu,
 				actual_clock / 1000 / 7);
 	clk_dce->dfs_bypass_disp_clk = actual_clock;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index a6de99db0444deb729c9da06ca212b9d70109204..f663adb335849746af6c420d69cb424684c30bb5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -263,15 +263,35 @@ static void dce_dmcu_setup_psr(struct dmcu *dmcu,
 	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
 }
 
+static bool dce_is_dmcu_initialized(struct dmcu *dmcu)
+{
+	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
+	unsigned int dmcu_uc_reset;
+
+	/* microcontroller is not running */
+	REG_GET(DMCU_STATUS, UC_IN_RESET, &dmcu_uc_reset);
+
+	/* DMCU is not running */
+	if (dmcu_uc_reset)
+		return false;
+
+	return true;
+}
+
 static void dce_psr_wait_loop(
 	struct dmcu *dmcu,
 	unsigned int wait_loop_number)
 {
 	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
 	union dce_dmcu_psr_config_data_wait_loop_reg1 masterCmdData1;
+
 	if (dmcu->cached_wait_loop_number == wait_loop_number)
 		return;
 
+	/* DMCU is not running */
+	if (!dce_is_dmcu_initialized(dmcu))
+		return;
+
 	/* waitDMCUReadyForCmd */
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 1, 10000);
 
@@ -691,6 +711,14 @@ static void dcn10_get_psr_wait_loop(
 	return;
 }
 
+static bool dcn10_is_dmcu_initialized(struct dmcu *dmcu)
+{
+	/* microcontroller is not running */
+	if (dmcu->dmcu_state != DMCU_RUNNING)
+		return false;
+	return true;
+}
+
 #endif
 
 static const struct dmcu_funcs dce_funcs = {
@@ -700,7 +728,8 @@ static const struct dmcu_funcs dce_funcs = {
 	.setup_psr = dce_dmcu_setup_psr,
 	.get_psr_state = dce_get_dmcu_psr_state,
 	.set_psr_wait_loop = dce_psr_wait_loop,
-	.get_psr_wait_loop = dce_get_psr_wait_loop
+	.get_psr_wait_loop = dce_get_psr_wait_loop,
+	.is_dmcu_initialized = dce_is_dmcu_initialized
 };
 
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
@@ -711,7 +740,8 @@ static const struct dmcu_funcs dcn10_funcs = {
 	.setup_psr = dcn10_dmcu_setup_psr,
 	.get_psr_state = dcn10_get_dmcu_psr_state,
 	.set_psr_wait_loop = dcn10_psr_wait_loop,
-	.get_psr_wait_loop = dcn10_get_psr_wait_loop
+	.get_psr_wait_loop = dcn10_get_psr_wait_loop,
+	.is_dmcu_initialized = dcn10_is_dmcu_initialized
 };
 #endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
index 4c25e2dd28f8ee4a6bebcca0e272992ea26a181c..1d4546f2313506a23449fccb137a834526bf0d23 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
@@ -62,6 +62,8 @@
 			DMCU_ENABLE, mask_sh), \
 	DMCU_SF(DMCU_STATUS, \
 			UC_IN_STOP_MODE, mask_sh), \
+	DMCU_SF(DMCU_STATUS, \
+			UC_IN_RESET, mask_sh), \
 	DMCU_SF(DMCU_RAM_ACCESS_CTRL, \
 			IRAM_HOST_ACCESS_EN, mask_sh), \
 	DMCU_SF(DMCU_RAM_ACCESS_CTRL, \
@@ -98,6 +100,7 @@
 	type IRAM_RD_ADDR_AUTO_INC; \
 	type DMCU_ENABLE; \
 	type UC_IN_STOP_MODE; \
+	type UC_IN_RESET; \
 	type MASTER_COMM_CMD_REG_BYTE0; \
 	type MASTER_COMM_INTERRUPT; \
 	type DPHY_RX_FAST_TRAINING_CAPABLE; \
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
index 3b0db253ac22f7b0afc841ad00fa7c829f477cf5..b73db9e784375618f87422f143cf36d00e1ce552 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
@@ -582,7 +582,8 @@ struct dce_hwseq_registers {
 	type DOMAIN7_PGFSM_PWR_STATUS; \
 	type DCFCLK_GATE_DIS; \
 	type DCHUBBUB_GLOBAL_TIMER_REFDIV; \
-	type DENTIST_DPPCLK_WDIVIDER;
+	type DENTIST_DPPCLK_WDIVIDER; \
+	type DENTIST_DISPCLK_WDIVIDER;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index bad70c6b3aad5214ad07d0c8a3fc4378c6bb15dc..a266e3f5e75fd7ae82a46f7d3e09f900660c72cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -1072,21 +1072,6 @@ void dce110_link_encoder_disable_output(
 	/* disable encoder */
 	if (dc_is_dp_signal(signal))
 		link_encoder_disable(enc110);
-
-	/*
-	 * TODO: Power control cause regression, we should implement
-	 * it properly, for now just comment it.
-	 */
-//	if (enc110->base.connector.id == CONNECTOR_ID_EDP) {
-//		/* power down eDP panel */
-//		link_encoder_edp_wait_for_hpd_ready(
-//				enc,
-//				enc->connector,
-//				false);
-//
-//		link_encoder_edp_power_control(
-//				enc, false);
-//	}
 }
 
 void dce110_link_encoder_dp_set_lane_settings(
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile
index ea40870624b3884b41b2369987ecc850919f4f29..a822d4e2a1693881f090d776f6fbc11ab68d5f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce100/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'controller' sub-component of DAL.
 # It provides the control and status of HW CRTC block.
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 90911258bdb3ac92a490846199dfd85ba701ca1a..3ea43e2a9450ce562cd01b8a5c18a9ee060d2a62 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -1,5 +1,5 @@
 /*
-* Copyright 2012-15 Advanced Micro Devices, Inc.
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
index de8fdf438f9b34125d7179aa6af941f68f18453a..2f366d66635d8f11ed566e025a69fafa6df96711 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
@@ -1,3 +1,26 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
 /*
  * dce100_resource.h
  *
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index 98d956e2f218f7d2e4142f586513276b48f5a108..d564c0eb8b045393ae235c6169b6c85b6af94693 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'controller' sub-component of DAL.
 # It provides the control and status of HW CRTC block.
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index e650bdcd9423b22e3d07cfb95b4e052059867ad1..86cdd7b4811fb7f1195ce7d8e73db9e8e42c6c3a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -354,8 +354,8 @@ static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted,
 		return false;
 	}
 
-	if (!convert_to_custom_float_format(arr_points[2].slope, &fmt,
-					    &arr_points[2].custom_float_slope)) {
+	if (!convert_to_custom_float_format(arr_points[1].slope, &fmt,
+					    &arr_points[1].custom_float_slope)) {
 		BREAK_TO_DEBUGGER();
 		return false;
 	}
@@ -870,8 +870,6 @@ void hwss_edp_power_control(
 				"%s: Skipping Panel Power action: %s\n",
 				__func__, (power_up ? "On":"Off"));
 	}
-
-	hwss_edp_wait_for_hpd_ready(link, true);
 }
 
 /*todo: cloned in stream enc, fix*/
@@ -972,11 +970,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option)
 	}
 
 	/* blank at encoder level */
-	if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
-		if (pipe_ctx->stream->sink->link->connector_signal == SIGNAL_TYPE_EDP)
-			hwss_edp_backlight_control(link, false);
+	if (dc_is_dp_signal(pipe_ctx->stream->signal))
 		pipe_ctx->stream_res.stream_enc->funcs->dp_blank(pipe_ctx->stream_res.stream_enc);
-	}
+
 	link->link_enc->funcs->connect_dig_be_to_fe(
 			link->link_enc,
 			pipe_ctx->stream_res.stream_enc->id,
@@ -988,15 +984,12 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings)
 {
 	struct encoder_unblank_param params = { { 0 } };
-	struct dc_link *link = pipe_ctx->stream->sink->link;
 
 	/* only 3 items below are used by unblank */
 	params.pixel_clk_khz =
 		pipe_ctx->stream->timing.pix_clk_khz;
 	params.link_settings.link_rate = link_settings->link_rate;
 	pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(pipe_ctx->stream_res.stream_enc, &params);
-	if (link->connector_signal == SIGNAL_TYPE_EDP)
-		hwss_edp_backlight_control(link, true);
 }
 
 
@@ -1342,10 +1335,8 @@ static void power_down_encoders(struct dc *dc)
 
 			if (!dc->links[i]->wa_flags.dp_keep_receiver_powered)
 				dp_receiver_power_ctrl(dc->links[i], false);
-			if (connector_id == CONNECTOR_ID_EDP) {
+			if (connector_id == CONNECTOR_ID_EDP)
 				signal = SIGNAL_TYPE_EDP;
-				hwss_edp_backlight_control(dc->links[i], false);
-			}
 		}
 
 		dc->links[i]->link_enc->funcs->disable_output(
@@ -1698,60 +1689,54 @@ static void apply_min_clocks(
 /*
  *  Check if FBC can be enabled
  */
-static enum dc_status validate_fbc(struct dc *dc,
-		struct dc_state *context)
+static bool should_enable_fbc(struct dc *dc,
+			      struct dc_state *context)
 {
-	struct pipe_ctx *pipe_ctx =
-			      &context->res_ctx.pipe_ctx[0];
+	struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0];
 
 	ASSERT(dc->fbc_compressor);
 
 	/* FBC memory should be allocated */
 	if (!dc->ctx->fbc_gpu_addr)
-		return DC_ERROR_UNEXPECTED;
+		return false;
 
 	/* Only supports single display */
 	if (context->stream_count != 1)
-		return DC_ERROR_UNEXPECTED;
+		return false;
 
 	/* Only supports eDP */
 	if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP)
-		return DC_ERROR_UNEXPECTED;
+		return false;
 
 	/* PSR should not be enabled */
 	if (pipe_ctx->stream->sink->link->psr_enabled)
-		return DC_ERROR_UNEXPECTED;
+		return false;
 
 	/* Nothing to compress */
 	if (!pipe_ctx->plane_state)
-		return DC_ERROR_UNEXPECTED;
+		return false;
 
 	/* Only for non-linear tiling */
 	if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL)
-		return DC_ERROR_UNEXPECTED;
+		return false;
 
-	return DC_OK;
+	return true;
 }
 
 /*
  *  Enable FBC
  */
-static enum dc_status enable_fbc(struct dc *dc,
-		struct dc_state *context)
+static void enable_fbc(struct dc *dc,
+		       struct dc_state *context)
 {
-	enum dc_status status = validate_fbc(dc, context);
-
-	if (status == DC_OK) {
+	if (should_enable_fbc(dc, context)) {
 		/* Program GRPH COMPRESSED ADDRESS and PITCH */
 		struct compr_addr_and_pitch_params params = {0, 0, 0};
 		struct compressor *compr = dc->fbc_compressor;
-		struct pipe_ctx *pipe_ctx =
-				      &context->res_ctx.pipe_ctx[0];
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0];
 
-		params.source_view_width =
-				pipe_ctx->stream->timing.h_addressable;
-		params.source_view_height =
-				pipe_ctx->stream->timing.v_addressable;
+		params.source_view_width = pipe_ctx->stream->timing.h_addressable;
+		params.source_view_height = pipe_ctx->stream->timing.v_addressable;
 
 		compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr;
 
@@ -1760,7 +1745,6 @@ static enum dc_status enable_fbc(struct dc *dc,
 
 		compr->funcs->enable_fbc(compr, &params);
 	}
-	return status;
 }
 #endif
 
@@ -2026,8 +2010,7 @@ enum dc_status dce110_apply_ctx_to_hw(
 		if (pipe_ctx->stream == pipe_ctx_old->stream)
 			continue;
 
-		if (pipe_ctx->stream && pipe_ctx_old->stream
-				&& !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
+		if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
 			continue;
 
 		if (pipe_ctx->top_pipe)
@@ -2063,9 +2046,6 @@ enum dc_status dce110_apply_ctx_to_hw(
 				context,
 				dc);
 
-		if (dc->hwss.enable_plane)
-			dc->hwss.enable_plane(dc, pipe_ctx, context);
-
 		if (DC_OK != status)
 			return status;
 	}
@@ -2095,16 +2075,8 @@ static void set_default_colors(struct pipe_ctx *pipe_ctx)
 	struct default_adjustment default_adjust = { 0 };
 
 	default_adjust.force_hw_default = false;
-	if (pipe_ctx->plane_state == NULL)
-		default_adjust.in_color_space = COLOR_SPACE_SRGB;
-	else
-		default_adjust.in_color_space =
-				pipe_ctx->plane_state->color_space;
-	if (pipe_ctx->stream == NULL)
-		default_adjust.out_color_space = COLOR_SPACE_SRGB;
-	else
-		default_adjust.out_color_space =
-				pipe_ctx->stream->output_color_space;
+	default_adjust.in_color_space = pipe_ctx->plane_state->color_space;
+	default_adjust.out_color_space = pipe_ctx->stream->output_color_space;
 	default_adjust.csc_adjust_type = GRAPHICS_CSC_ADJUST_TYPE_SW;
 	default_adjust.surface_pixel_format = pipe_ctx->plane_res.scl_data.format;
 
@@ -2872,13 +2844,12 @@ static void dce110_apply_ctx_for_surface(
 			continue;
 
 		/* Need to allocate mem before program front end for Fiji */
-		if (pipe_ctx->plane_res.mi != NULL)
-			pipe_ctx->plane_res.mi->funcs->allocate_mem_input(
-					pipe_ctx->plane_res.mi,
-					pipe_ctx->stream->timing.h_total,
-					pipe_ctx->stream->timing.v_total,
-					pipe_ctx->stream->timing.pix_clk_khz,
-					context->stream_count);
+		pipe_ctx->plane_res.mi->funcs->allocate_mem_input(
+				pipe_ctx->plane_res.mi,
+				pipe_ctx->stream->timing.h_total,
+				pipe_ctx->stream->timing.v_total,
+				pipe_ctx->stream->timing.pix_clk_khz,
+				context->stream_count);
 
 		dce110_program_front_end_for_pipe(dc, pipe_ctx);
 
@@ -2985,6 +2956,7 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.pplib_apply_display_requirements = pplib_apply_display_requirements,
 	.edp_backlight_control = hwss_edp_backlight_control,
 	.edp_power_control = hwss_edp_power_control,
+	.edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
 };
 
 void dce110_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
index 2dd6ac63757292365ebd69e65591c241b0440ba4..fc637647f64331a568d3c62f1a3ed7e73d07afc2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
@@ -77,5 +77,9 @@ void hwss_edp_backlight_control(
 	struct dc_link *link,
 	bool enable);
 
+void hwss_edp_wait_for_hpd_ready(
+		struct dc_link *link,
+		bool power_up);
+
 #endif /* __DC_HWSS_DCE110_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index 5228ee78f7e6dc38384cb96f0bc733531c5cce68..7c4779578fb76528caef31a1a531f73214c86059 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -1,5 +1,5 @@
 /*
-* Copyright 2012-15 Advanced Micro Devices, Inc.
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index 07d9303d54772581a42fc1888b3267c0f44e728d..59b4cd3297155129096e7a16c4edd4ece401fd5a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -1,3 +1,26 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
 #include "dm_services.h"
 
 /* include DCE11 register header files */
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index 265ac4310d855cd8ad8493ec7cb6d7de7bdd750f..8e090446d511937361db77ed56471a44d6ed897b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'controller' sub-component of DAL.
 # It provides the control and status of HW CRTC block.
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index 1779b963525cda70d14c855a0e0acd0eb336cd63..37db1f8d45ea547862b07b2503c4a55d6485c8ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'controller' sub-component of DAL.
 # It provides the control and status of HW CRTC block.
 
@@ -8,4 +29,4 @@ dce120_hw_sequencer.o
 
 AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120))
 
-AMD_DISPLAY_FILES += $(AMD_DAL_DCE120)
\ No newline at end of file
+AMD_DISPLAY_FILES += $(AMD_DAL_DCE120)
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index c1105895e5facd2cf3619e5432ba620123b926d5..bc388aa4b2f50cdd5c376b1f1833493dda24f550 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'controller' sub-component of DAL.
 # It provides the control and status of HW CRTC block.
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index a6ca1f97f7486cd51f7084dc1767be67409b2d5d..5469bdfe19f35125842b50bed2e169ab71df822d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -1,8 +1,29 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for DCN.
 
 DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \
-		dcn10_dpp.o dcn10_opp.o dcn10_timing_generator.o \
+		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
 		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
 		dcn10_hubbub.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 7f579cb19f4bb08ad2af104c9b693e1d5e250e78..53ba3600ee6ac9622ef5c54201b3dbc17df4de04 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -22,11 +22,12 @@
  * Authors: AMD
  *
  */
-
+#include "dc.h"
 #include "reg_helper.h"
 #include "dcn10_dpp.h"
 
 #include "dcn10_cm_common.h"
+#include "custom_float.h"
 
 #define REG(reg) reg
 
@@ -121,3 +122,294 @@ void cm_helper_program_xfer_func(
 	}
 
 }
+
+
+
+bool cm_helper_convert_to_custom_float(
+		struct pwl_result_data *rgb_resulted,
+		struct curve_points *arr_points,
+		uint32_t hw_points_num,
+		bool fixpoint)
+{
+	struct custom_float_format fmt;
+
+	struct pwl_result_data *rgb = rgb_resulted;
+
+	uint32_t i = 0;
+
+	fmt.exponenta_bits = 6;
+	fmt.mantissa_bits = 12;
+	fmt.sign = false;
+
+	if (!convert_to_custom_float_format(arr_points[0].x, &fmt,
+					    &arr_points[0].custom_float_x)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(arr_points[0].offset, &fmt,
+					    &arr_points[0].custom_float_offset)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(arr_points[0].slope, &fmt,
+					    &arr_points[0].custom_float_slope)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	fmt.mantissa_bits = 10;
+	fmt.sign = false;
+
+	if (!convert_to_custom_float_format(arr_points[1].x, &fmt,
+					    &arr_points[1].custom_float_x)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (fixpoint == true)
+		arr_points[1].custom_float_y = dal_fixed31_32_clamp_u0d14(arr_points[1].y);
+	else if (!convert_to_custom_float_format(arr_points[1].y, &fmt,
+		&arr_points[1].custom_float_y)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(arr_points[1].slope, &fmt,
+					    &arr_points[1].custom_float_slope)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (hw_points_num == 0 || rgb_resulted == NULL || fixpoint == true)
+		return true;
+
+	fmt.mantissa_bits = 12;
+	fmt.sign = true;
+
+	while (i != hw_points_num) {
+		if (!convert_to_custom_float_format(rgb->red, &fmt,
+						    &rgb->red_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(rgb->green, &fmt,
+						    &rgb->green_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(rgb->blue, &fmt,
+						    &rgb->blue_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(rgb->delta_red, &fmt,
+						    &rgb->delta_red_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(rgb->delta_green, &fmt,
+						    &rgb->delta_green_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(rgb->delta_blue, &fmt,
+						    &rgb->delta_blue_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		++rgb;
+		++i;
+	}
+
+	return true;
+}
+
+
+#define MAX_REGIONS_NUMBER 34
+#define MAX_LOW_POINT      25
+#define NUMBER_SEGMENTS    32
+
+bool cm_helper_translate_curve_to_hw_format(
+				const struct dc_transfer_func *output_tf,
+				struct pwl_params *lut_params, bool fixpoint)
+{
+	struct curve_points *arr_points;
+	struct pwl_result_data *rgb_resulted;
+	struct pwl_result_data *rgb;
+	struct pwl_result_data *rgb_plus_1;
+	struct fixed31_32 y_r;
+	struct fixed31_32 y_g;
+	struct fixed31_32 y_b;
+	struct fixed31_32 y1_min;
+	struct fixed31_32 y3_max;
+
+	int32_t segment_start, segment_end;
+	int32_t i;
+	uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
+
+	if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
+		return false;
+
+	PERF_TRACE();
+
+	arr_points = lut_params->arr_points;
+	rgb_resulted = lut_params->rgb_resulted;
+	hw_points = 0;
+
+	memset(lut_params, 0, sizeof(struct pwl_params));
+	memset(seg_distr, 0, sizeof(seg_distr));
+
+	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
+		/* 32 segments
+		 * segments are from 2^-25 to 2^7
+		 */
+		for (i = 0; i < 32 ; i++)
+			seg_distr[i] = 3;
+
+		segment_start = -25;
+		segment_end   = 7;
+	} else {
+		/* 10 segments
+		 * segment is from 2^-10 to 2^0
+		 * There are less than 256 points, for optimization
+		 */
+		seg_distr[0] = 3;
+		seg_distr[1] = 4;
+		seg_distr[2] = 4;
+		seg_distr[3] = 4;
+		seg_distr[4] = 4;
+		seg_distr[5] = 4;
+		seg_distr[6] = 4;
+		seg_distr[7] = 4;
+		seg_distr[8] = 5;
+		seg_distr[9] = 5;
+
+		segment_start = -10;
+		segment_end = 0;
+	}
+
+	for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++)
+		seg_distr[i] = -1;
+
+	for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
+		if (seg_distr[k] != -1)
+			hw_points += (1 << seg_distr[k]);
+	}
+
+	j = 0;
+	for (k = 0; k < (segment_end - segment_start); k++) {
+		increment = NUMBER_SEGMENTS / (1 << seg_distr[k]);
+		start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS;
+		for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) {
+			if (j == hw_points - 1)
+				break;
+			rgb_resulted[j].red = output_tf->tf_pts.red[i];
+			rgb_resulted[j].green = output_tf->tf_pts.green[i];
+			rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
+			j++;
+		}
+	}
+
+	/* last point */
+	start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS;
+	rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
+	rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
+	rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+
+	arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+					     dal_fixed31_32_from_int(segment_start));
+	arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+					     dal_fixed31_32_from_int(segment_end));
+
+	y_r = rgb_resulted[0].red;
+	y_g = rgb_resulted[0].green;
+	y_b = rgb_resulted[0].blue;
+
+	y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+
+	arr_points[0].y = y1_min;
+	arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x);
+	y_r = rgb_resulted[hw_points - 1].red;
+	y_g = rgb_resulted[hw_points - 1].green;
+	y_b = rgb_resulted[hw_points - 1].blue;
+
+	/* see comment above, m_arrPoints[1].y should be the Y value for the
+	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
+	 */
+	y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+
+	arr_points[1].y = y3_max;
+
+	arr_points[1].slope = dal_fixed31_32_zero;
+
+	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
+		/* for PQ, we want to have a straight line from last HW X point,
+		 * and the slope to be such that we hit 1.0 at 10000 nits.
+		 */
+		const struct fixed31_32 end_value =
+				dal_fixed31_32_from_int(125);
+
+		arr_points[1].slope = dal_fixed31_32_div(
+			dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
+			dal_fixed31_32_sub(end_value, arr_points[1].x));
+	}
+
+	lut_params->hw_points_num = hw_points;
+
+	i = 1;
+	for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) {
+		if (seg_distr[k] != -1) {
+			lut_params->arr_curve_points[k].segments_num =
+					seg_distr[k];
+			lut_params->arr_curve_points[i].offset =
+					lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
+		}
+		i++;
+	}
+
+	if (seg_distr[k] != -1)
+		lut_params->arr_curve_points[k].segments_num = seg_distr[k];
+
+	rgb = rgb_resulted;
+	rgb_plus_1 = rgb_resulted + 1;
+
+	i = 1;
+	while (i != hw_points + 1) {
+		if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+			rgb_plus_1->red = rgb->red;
+		if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+			rgb_plus_1->green = rgb->green;
+		if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+			rgb_plus_1->blue = rgb->blue;
+
+		rgb->delta_red   = dal_fixed31_32_sub(rgb_plus_1->red,   rgb->red);
+		rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
+		rgb->delta_blue  = dal_fixed31_32_sub(rgb_plus_1->blue,  rgb->blue);
+
+		if (fixpoint == true) {
+			rgb->delta_red_reg   = dal_fixed31_32_clamp_u0d10(rgb->delta_red);
+			rgb->delta_green_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_green);
+			rgb->delta_blue_reg  = dal_fixed31_32_clamp_u0d10(rgb->delta_blue);
+			rgb->red_reg         = dal_fixed31_32_clamp_u0d14(rgb->red);
+			rgb->green_reg       = dal_fixed31_32_clamp_u0d14(rgb->green);
+			rgb->blue_reg        = dal_fixed31_32_clamp_u0d14(rgb->blue);
+		}
+
+		++rgb_plus_1;
+		++rgb;
+		++i;
+	}
+	cm_helper_convert_to_custom_float(rgb_resulted,
+						lut_params->arr_points,
+						hw_points, fixpoint);
+
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
index 64836dcf21f2296da5d673e400ca4267b905a813..64e476b83bcba88528c81151bb935725069bc561 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
@@ -96,4 +96,14 @@ void cm_helper_program_xfer_func(
 		const struct pwl_params *params,
 		const struct xfer_func_reg *reg);
 
+bool cm_helper_convert_to_custom_float(
+		struct pwl_result_data *rgb_resulted,
+		struct curve_points *arr_points,
+		uint32_t hw_points_num,
+		bool fixpoint);
+
+bool cm_helper_translate_curve_to_hw_format(
+		const struct dc_transfer_func *output_tf,
+		struct pwl_params *lut_params, bool fixpoint);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index 8df3945370cf1d8fe052aee667862658df4db6f4..f2a08b156cf0018f2de676fb65d9c0d0ce837db3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
 			scl_data->taps.h_taps = 1;
 		if (IDENTITY_RATIO(scl_data->ratios.vert))
 			scl_data->taps.v_taps = 1;
-		/*
-		 * Spreadsheet doesn't handle taps_c is one properly,
-		 * need to force Chroma to always be scaled to pass
-		 * bandwidth validation.
-		 */
+		if (IDENTITY_RATIO(scl_data->ratios.horz_c))
+			scl_data->taps.h_taps_c = 1;
+		if (IDENTITY_RATIO(scl_data->ratios.vert_c))
+			scl_data->taps.v_taps_c = 1;
 	}
 
 	return true;
@@ -386,10 +385,9 @@ void dpp1_cnv_setup (
 
 void dpp1_set_cursor_attributes(
 		struct dpp *dpp_base,
-		const struct dc_cursor_attributes *attr)
+		enum dc_cursor_color_format color_format)
 {
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
-	enum dc_cursor_color_format color_format = attr->color_format;
 
 	REG_UPDATE_2(CURSOR0_CONTROL,
 			CUR0_MODE, color_format,
@@ -402,13 +400,6 @@ void dpp1_set_cursor_attributes(
 		REG_UPDATE(CURSOR0_COLOR1,
 				CUR0_COLOR1, 0xFFFFFFFF);
 	}
-
-	/* TODO: Fixed vs float */
-
-	REG_UPDATE_3(FORMAT_CONTROL,
-				CNVC_BYPASS, 0,
-				FORMAT_CONTROL__ALPHA_EN, 1,
-				FORMAT_EXPANSION_MODE, 0);
 }
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
index ad71fb50f8a51793c6673c0f3e3a4d4d3a421570..f56ee4d08d893f1fc908804e997e87589a6fef70 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
@@ -730,8 +730,9 @@
 	type CM_BLNDGAM_RAMA_EXP_REGION33_NUM_SEGMENTS; \
 	type CM_BLNDGAM_LUT_WRITE_EN_MASK; \
 	type CM_BLNDGAM_LUT_WRITE_SEL; \
+	type CM_BLNDGAM_CONFIG_STATUS; \
 	type CM_BLNDGAM_LUT_INDEX; \
-	type CM_BLNDGAM_LUT_DATA; \
+	type BLNDGAM_MEM_PWR_FORCE; \
 	type CM_3DLUT_MODE; \
 	type CM_3DLUT_SIZE; \
 	type CM_3DLUT_INDEX; \
@@ -905,6 +906,7 @@
 	type CM_SHAPER_RAMA_EXP_REGION33_LUT_OFFSET; \
 	type CM_SHAPER_RAMA_EXP_REGION33_NUM_SEGMENTS; \
 	type CM_SHAPER_LUT_WRITE_EN_MASK; \
+	type CM_SHAPER_CONFIG_STATUS; \
 	type CM_SHAPER_LUT_WRITE_SEL; \
 	type CM_SHAPER_LUT_INDEX; \
 	type CM_SHAPER_LUT_DATA; \
@@ -1005,258 +1007,255 @@
 	type CM_BYPASS; \
 	type FORMAT_CONTROL__ALPHA_EN; \
 	type CUR0_COLOR0; \
-	type CUR0_COLOR1
-
-
+	type CUR0_COLOR1;
 
 struct dcn_dpp_shift {
-	TF_REG_FIELD_LIST(uint8_t);
+	TF_REG_FIELD_LIST(uint8_t)
 };
 
 struct dcn_dpp_mask {
-	TF_REG_FIELD_LIST(uint32_t);
+	TF_REG_FIELD_LIST(uint32_t)
 };
 
-
-
+#define DPP_COMMON_REG_VARIABLE_LIST \
+	uint32_t DSCL_EXT_OVERSCAN_LEFT_RIGHT; \
+	uint32_t DSCL_EXT_OVERSCAN_TOP_BOTTOM; \
+	uint32_t OTG_H_BLANK; \
+	uint32_t OTG_V_BLANK; \
+	uint32_t SCL_MODE; \
+	uint32_t LB_DATA_FORMAT; \
+	uint32_t LB_MEMORY_CTRL; \
+	uint32_t DSCL_AUTOCAL; \
+	uint32_t SCL_BLACK_OFFSET; \
+	uint32_t SCL_TAP_CONTROL; \
+	uint32_t SCL_COEF_RAM_TAP_SELECT; \
+	uint32_t SCL_COEF_RAM_TAP_DATA; \
+	uint32_t DSCL_2TAP_CONTROL; \
+	uint32_t MPC_SIZE; \
+	uint32_t SCL_HORZ_FILTER_SCALE_RATIO; \
+	uint32_t SCL_VERT_FILTER_SCALE_RATIO; \
+	uint32_t SCL_HORZ_FILTER_SCALE_RATIO_C; \
+	uint32_t SCL_VERT_FILTER_SCALE_RATIO_C; \
+	uint32_t SCL_HORZ_FILTER_INIT; \
+	uint32_t SCL_HORZ_FILTER_INIT_C; \
+	uint32_t SCL_VERT_FILTER_INIT; \
+	uint32_t SCL_VERT_FILTER_INIT_BOT; \
+	uint32_t SCL_VERT_FILTER_INIT_C; \
+	uint32_t SCL_VERT_FILTER_INIT_BOT_C; \
+	uint32_t RECOUT_START; \
+	uint32_t RECOUT_SIZE; \
+	uint32_t CM_GAMUT_REMAP_CONTROL; \
+	uint32_t CM_GAMUT_REMAP_C11_C12; \
+	uint32_t CM_GAMUT_REMAP_C33_C34; \
+	uint32_t CM_COMA_C11_C12; \
+	uint32_t CM_COMA_C33_C34; \
+	uint32_t CM_COMB_C11_C12; \
+	uint32_t CM_COMB_C33_C34; \
+	uint32_t CM_OCSC_CONTROL; \
+	uint32_t CM_OCSC_C11_C12; \
+	uint32_t CM_OCSC_C33_C34; \
+	uint32_t CM_MEM_PWR_CTRL; \
+	uint32_t CM_RGAM_LUT_DATA; \
+	uint32_t CM_RGAM_LUT_WRITE_EN_MASK; \
+	uint32_t CM_RGAM_LUT_INDEX; \
+	uint32_t CM_RGAM_RAMB_START_CNTL_B; \
+	uint32_t CM_RGAM_RAMB_START_CNTL_G; \
+	uint32_t CM_RGAM_RAMB_START_CNTL_R; \
+	uint32_t CM_RGAM_RAMB_SLOPE_CNTL_B; \
+	uint32_t CM_RGAM_RAMB_SLOPE_CNTL_G; \
+	uint32_t CM_RGAM_RAMB_SLOPE_CNTL_R; \
+	uint32_t CM_RGAM_RAMB_END_CNTL1_B; \
+	uint32_t CM_RGAM_RAMB_END_CNTL2_B; \
+	uint32_t CM_RGAM_RAMB_END_CNTL1_G; \
+	uint32_t CM_RGAM_RAMB_END_CNTL2_G; \
+	uint32_t CM_RGAM_RAMB_END_CNTL1_R; \
+	uint32_t CM_RGAM_RAMB_END_CNTL2_R; \
+	uint32_t CM_RGAM_RAMB_REGION_0_1; \
+	uint32_t CM_RGAM_RAMB_REGION_32_33; \
+	uint32_t CM_RGAM_RAMA_START_CNTL_B; \
+	uint32_t CM_RGAM_RAMA_START_CNTL_G; \
+	uint32_t CM_RGAM_RAMA_START_CNTL_R; \
+	uint32_t CM_RGAM_RAMA_SLOPE_CNTL_B; \
+	uint32_t CM_RGAM_RAMA_SLOPE_CNTL_G; \
+	uint32_t CM_RGAM_RAMA_SLOPE_CNTL_R; \
+	uint32_t CM_RGAM_RAMA_END_CNTL1_B; \
+	uint32_t CM_RGAM_RAMA_END_CNTL2_B; \
+	uint32_t CM_RGAM_RAMA_END_CNTL1_G; \
+	uint32_t CM_RGAM_RAMA_END_CNTL2_G; \
+	uint32_t CM_RGAM_RAMA_END_CNTL1_R; \
+	uint32_t CM_RGAM_RAMA_END_CNTL2_R; \
+	uint32_t CM_RGAM_RAMA_REGION_0_1; \
+	uint32_t CM_RGAM_RAMA_REGION_32_33; \
+	uint32_t CM_RGAM_CONTROL; \
+	uint32_t CM_CMOUT_CONTROL; \
+	uint32_t CM_BLNDGAM_LUT_WRITE_EN_MASK; \
+	uint32_t CM_BLNDGAM_CONTROL; \
+	uint32_t CM_BLNDGAM_RAMB_START_CNTL_B; \
+	uint32_t CM_BLNDGAM_RAMB_START_CNTL_G; \
+	uint32_t CM_BLNDGAM_RAMB_START_CNTL_R; \
+	uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_B; \
+	uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_G; \
+	uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_R; \
+	uint32_t CM_BLNDGAM_RAMB_END_CNTL1_B; \
+	uint32_t CM_BLNDGAM_RAMB_END_CNTL2_B; \
+	uint32_t CM_BLNDGAM_RAMB_END_CNTL1_G; \
+	uint32_t CM_BLNDGAM_RAMB_END_CNTL2_G; \
+	uint32_t CM_BLNDGAM_RAMB_END_CNTL1_R; \
+	uint32_t CM_BLNDGAM_RAMB_END_CNTL2_R; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_0_1; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_2_3; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_4_5; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_6_7; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_8_9; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_10_11; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_12_13; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_14_15; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_16_17; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_18_19; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_20_21; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_22_23; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_24_25; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_26_27; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_28_29; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_30_31; \
+	uint32_t CM_BLNDGAM_RAMB_REGION_32_33; \
+	uint32_t CM_BLNDGAM_RAMA_START_CNTL_B; \
+	uint32_t CM_BLNDGAM_RAMA_START_CNTL_G; \
+	uint32_t CM_BLNDGAM_RAMA_START_CNTL_R; \
+	uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_B; \
+	uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_G; \
+	uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_R; \
+	uint32_t CM_BLNDGAM_RAMA_END_CNTL1_B; \
+	uint32_t CM_BLNDGAM_RAMA_END_CNTL2_B; \
+	uint32_t CM_BLNDGAM_RAMA_END_CNTL1_G; \
+	uint32_t CM_BLNDGAM_RAMA_END_CNTL2_G; \
+	uint32_t CM_BLNDGAM_RAMA_END_CNTL1_R; \
+	uint32_t CM_BLNDGAM_RAMA_END_CNTL2_R; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_0_1; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_2_3; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_4_5; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_6_7; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_8_9; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_10_11; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_12_13; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_14_15; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_16_17; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_18_19; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_20_21; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_22_23; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_24_25; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_26_27; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_28_29; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_30_31; \
+	uint32_t CM_BLNDGAM_RAMA_REGION_32_33; \
+	uint32_t CM_BLNDGAM_LUT_INDEX; \
+	uint32_t CM_3DLUT_MODE; \
+	uint32_t CM_3DLUT_INDEX; \
+	uint32_t CM_3DLUT_DATA; \
+	uint32_t CM_3DLUT_DATA_30BIT; \
+	uint32_t CM_3DLUT_READ_WRITE_CONTROL; \
+	uint32_t CM_SHAPER_LUT_WRITE_EN_MASK; \
+	uint32_t CM_SHAPER_CONTROL; \
+	uint32_t CM_SHAPER_RAMB_START_CNTL_B; \
+	uint32_t CM_SHAPER_RAMB_START_CNTL_G; \
+	uint32_t CM_SHAPER_RAMB_START_CNTL_R; \
+	uint32_t CM_SHAPER_RAMB_END_CNTL_B; \
+	uint32_t CM_SHAPER_RAMB_END_CNTL_G; \
+	uint32_t CM_SHAPER_RAMB_END_CNTL_R; \
+	uint32_t CM_SHAPER_RAMB_REGION_0_1; \
+	uint32_t CM_SHAPER_RAMB_REGION_2_3; \
+	uint32_t CM_SHAPER_RAMB_REGION_4_5; \
+	uint32_t CM_SHAPER_RAMB_REGION_6_7; \
+	uint32_t CM_SHAPER_RAMB_REGION_8_9; \
+	uint32_t CM_SHAPER_RAMB_REGION_10_11; \
+	uint32_t CM_SHAPER_RAMB_REGION_12_13; \
+	uint32_t CM_SHAPER_RAMB_REGION_14_15; \
+	uint32_t CM_SHAPER_RAMB_REGION_16_17; \
+	uint32_t CM_SHAPER_RAMB_REGION_18_19; \
+	uint32_t CM_SHAPER_RAMB_REGION_20_21; \
+	uint32_t CM_SHAPER_RAMB_REGION_22_23; \
+	uint32_t CM_SHAPER_RAMB_REGION_24_25; \
+	uint32_t CM_SHAPER_RAMB_REGION_26_27; \
+	uint32_t CM_SHAPER_RAMB_REGION_28_29; \
+	uint32_t CM_SHAPER_RAMB_REGION_30_31; \
+	uint32_t CM_SHAPER_RAMB_REGION_32_33; \
+	uint32_t CM_SHAPER_RAMA_START_CNTL_B; \
+	uint32_t CM_SHAPER_RAMA_START_CNTL_G; \
+	uint32_t CM_SHAPER_RAMA_START_CNTL_R; \
+	uint32_t CM_SHAPER_RAMA_END_CNTL_B; \
+	uint32_t CM_SHAPER_RAMA_END_CNTL_G; \
+	uint32_t CM_SHAPER_RAMA_END_CNTL_R; \
+	uint32_t CM_SHAPER_RAMA_REGION_0_1; \
+	uint32_t CM_SHAPER_RAMA_REGION_2_3; \
+	uint32_t CM_SHAPER_RAMA_REGION_4_5; \
+	uint32_t CM_SHAPER_RAMA_REGION_6_7; \
+	uint32_t CM_SHAPER_RAMA_REGION_8_9; \
+	uint32_t CM_SHAPER_RAMA_REGION_10_11; \
+	uint32_t CM_SHAPER_RAMA_REGION_12_13; \
+	uint32_t CM_SHAPER_RAMA_REGION_14_15; \
+	uint32_t CM_SHAPER_RAMA_REGION_16_17; \
+	uint32_t CM_SHAPER_RAMA_REGION_18_19; \
+	uint32_t CM_SHAPER_RAMA_REGION_20_21; \
+	uint32_t CM_SHAPER_RAMA_REGION_22_23; \
+	uint32_t CM_SHAPER_RAMA_REGION_24_25; \
+	uint32_t CM_SHAPER_RAMA_REGION_26_27; \
+	uint32_t CM_SHAPER_RAMA_REGION_28_29; \
+	uint32_t CM_SHAPER_RAMA_REGION_30_31; \
+	uint32_t CM_SHAPER_RAMA_REGION_32_33; \
+	uint32_t CM_SHAPER_LUT_INDEX; \
+	uint32_t CM_SHAPER_LUT_DATA; \
+	uint32_t CM_ICSC_CONTROL; \
+	uint32_t CM_ICSC_C11_C12; \
+	uint32_t CM_ICSC_C33_C34; \
+	uint32_t CM_BNS_VALUES_R; \
+	uint32_t CM_BNS_VALUES_G; \
+	uint32_t CM_BNS_VALUES_B; \
+	uint32_t CM_DGAM_RAMB_START_CNTL_B; \
+	uint32_t CM_DGAM_RAMB_START_CNTL_G; \
+	uint32_t CM_DGAM_RAMB_START_CNTL_R; \
+	uint32_t CM_DGAM_RAMB_SLOPE_CNTL_B; \
+	uint32_t CM_DGAM_RAMB_SLOPE_CNTL_G; \
+	uint32_t CM_DGAM_RAMB_SLOPE_CNTL_R; \
+	uint32_t CM_DGAM_RAMB_END_CNTL1_B; \
+	uint32_t CM_DGAM_RAMB_END_CNTL2_B; \
+	uint32_t CM_DGAM_RAMB_END_CNTL1_G; \
+	uint32_t CM_DGAM_RAMB_END_CNTL2_G; \
+	uint32_t CM_DGAM_RAMB_END_CNTL1_R; \
+	uint32_t CM_DGAM_RAMB_END_CNTL2_R; \
+	uint32_t CM_DGAM_RAMB_REGION_0_1; \
+	uint32_t CM_DGAM_RAMB_REGION_14_15; \
+	uint32_t CM_DGAM_RAMA_START_CNTL_B; \
+	uint32_t CM_DGAM_RAMA_START_CNTL_G; \
+	uint32_t CM_DGAM_RAMA_START_CNTL_R; \
+	uint32_t CM_DGAM_RAMA_SLOPE_CNTL_B; \
+	uint32_t CM_DGAM_RAMA_SLOPE_CNTL_G; \
+	uint32_t CM_DGAM_RAMA_SLOPE_CNTL_R; \
+	uint32_t CM_DGAM_RAMA_END_CNTL1_B; \
+	uint32_t CM_DGAM_RAMA_END_CNTL2_B; \
+	uint32_t CM_DGAM_RAMA_END_CNTL1_G; \
+	uint32_t CM_DGAM_RAMA_END_CNTL2_G; \
+	uint32_t CM_DGAM_RAMA_END_CNTL1_R; \
+	uint32_t CM_DGAM_RAMA_END_CNTL2_R; \
+	uint32_t CM_DGAM_RAMA_REGION_0_1; \
+	uint32_t CM_DGAM_RAMA_REGION_14_15; \
+	uint32_t CM_DGAM_LUT_WRITE_EN_MASK; \
+	uint32_t CM_DGAM_LUT_INDEX; \
+	uint32_t CM_DGAM_LUT_DATA; \
+	uint32_t CM_CONTROL; \
+	uint32_t CM_DGAM_CONTROL; \
+	uint32_t CM_IGAM_CONTROL; \
+	uint32_t CM_IGAM_LUT_RW_CONTROL; \
+	uint32_t CM_IGAM_LUT_RW_INDEX; \
+	uint32_t CM_IGAM_LUT_SEQ_COLOR; \
+	uint32_t FORMAT_CONTROL; \
+	uint32_t CNVC_SURFACE_PIXEL_FORMAT; \
+	uint32_t CURSOR_CONTROL; \
+	uint32_t CURSOR0_CONTROL; \
+	uint32_t CURSOR0_COLOR0; \
+	uint32_t CURSOR0_COLOR1;
 
 struct dcn_dpp_registers {
-	uint32_t DSCL_EXT_OVERSCAN_LEFT_RIGHT;
-	uint32_t DSCL_EXT_OVERSCAN_TOP_BOTTOM;
-	uint32_t OTG_H_BLANK;
-	uint32_t OTG_V_BLANK;
-	uint32_t SCL_MODE;
-	uint32_t LB_DATA_FORMAT;
-	uint32_t LB_MEMORY_CTRL;
-	uint32_t DSCL_AUTOCAL;
-	uint32_t SCL_BLACK_OFFSET;
-	uint32_t SCL_TAP_CONTROL;
-	uint32_t SCL_COEF_RAM_TAP_SELECT;
-	uint32_t SCL_COEF_RAM_TAP_DATA;
-	uint32_t DSCL_2TAP_CONTROL;
-	uint32_t MPC_SIZE;
-	uint32_t SCL_HORZ_FILTER_SCALE_RATIO;
-	uint32_t SCL_VERT_FILTER_SCALE_RATIO;
-	uint32_t SCL_HORZ_FILTER_SCALE_RATIO_C;
-	uint32_t SCL_VERT_FILTER_SCALE_RATIO_C;
-	uint32_t SCL_HORZ_FILTER_INIT;
-	uint32_t SCL_HORZ_FILTER_INIT_C;
-	uint32_t SCL_VERT_FILTER_INIT;
-	uint32_t SCL_VERT_FILTER_INIT_BOT;
-	uint32_t SCL_VERT_FILTER_INIT_C;
-	uint32_t SCL_VERT_FILTER_INIT_BOT_C;
-	uint32_t RECOUT_START;
-	uint32_t RECOUT_SIZE;
-	uint32_t CM_GAMUT_REMAP_CONTROL;
-	uint32_t CM_GAMUT_REMAP_C11_C12;
-	uint32_t CM_GAMUT_REMAP_C33_C34;
-	uint32_t CM_COMA_C11_C12;
-	uint32_t CM_COMA_C33_C34;
-	uint32_t CM_COMB_C11_C12;
-	uint32_t CM_COMB_C33_C34;
-	uint32_t CM_OCSC_CONTROL;
-	uint32_t CM_OCSC_C11_C12;
-	uint32_t CM_OCSC_C33_C34;
-	uint32_t CM_MEM_PWR_CTRL;
-	uint32_t CM_RGAM_LUT_DATA;
-	uint32_t CM_RGAM_LUT_WRITE_EN_MASK;
-	uint32_t CM_RGAM_LUT_INDEX;
-	uint32_t CM_RGAM_RAMB_START_CNTL_B;
-	uint32_t CM_RGAM_RAMB_START_CNTL_G;
-	uint32_t CM_RGAM_RAMB_START_CNTL_R;
-	uint32_t CM_RGAM_RAMB_SLOPE_CNTL_B;
-	uint32_t CM_RGAM_RAMB_SLOPE_CNTL_G;
-	uint32_t CM_RGAM_RAMB_SLOPE_CNTL_R;
-	uint32_t CM_RGAM_RAMB_END_CNTL1_B;
-	uint32_t CM_RGAM_RAMB_END_CNTL2_B;
-	uint32_t CM_RGAM_RAMB_END_CNTL1_G;
-	uint32_t CM_RGAM_RAMB_END_CNTL2_G;
-	uint32_t CM_RGAM_RAMB_END_CNTL1_R;
-	uint32_t CM_RGAM_RAMB_END_CNTL2_R;
-	uint32_t CM_RGAM_RAMB_REGION_0_1;
-	uint32_t CM_RGAM_RAMB_REGION_32_33;
-	uint32_t CM_RGAM_RAMA_START_CNTL_B;
-	uint32_t CM_RGAM_RAMA_START_CNTL_G;
-	uint32_t CM_RGAM_RAMA_START_CNTL_R;
-	uint32_t CM_RGAM_RAMA_SLOPE_CNTL_B;
-	uint32_t CM_RGAM_RAMA_SLOPE_CNTL_G;
-	uint32_t CM_RGAM_RAMA_SLOPE_CNTL_R;
-	uint32_t CM_RGAM_RAMA_END_CNTL1_B;
-	uint32_t CM_RGAM_RAMA_END_CNTL2_B;
-	uint32_t CM_RGAM_RAMA_END_CNTL1_G;
-	uint32_t CM_RGAM_RAMA_END_CNTL2_G;
-	uint32_t CM_RGAM_RAMA_END_CNTL1_R;
-	uint32_t CM_RGAM_RAMA_END_CNTL2_R;
-	uint32_t CM_RGAM_RAMA_REGION_0_1;
-	uint32_t CM_RGAM_RAMA_REGION_32_33;
-	uint32_t CM_RGAM_CONTROL;
-	uint32_t CM_CMOUT_CONTROL;
-	uint32_t CM_BLNDGAM_LUT_WRITE_EN_MASK;
-	uint32_t CM_BLNDGAM_CONTROL;
-	uint32_t CM_BLNDGAM_RAMB_START_CNTL_B;
-	uint32_t CM_BLNDGAM_RAMB_START_CNTL_G;
-	uint32_t CM_BLNDGAM_RAMB_START_CNTL_R;
-	uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_B;
-	uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_G;
-	uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_R;
-	uint32_t CM_BLNDGAM_RAMB_END_CNTL1_B;
-	uint32_t CM_BLNDGAM_RAMB_END_CNTL2_B;
-	uint32_t CM_BLNDGAM_RAMB_END_CNTL1_G;
-	uint32_t CM_BLNDGAM_RAMB_END_CNTL2_G;
-	uint32_t CM_BLNDGAM_RAMB_END_CNTL1_R;
-	uint32_t CM_BLNDGAM_RAMB_END_CNTL2_R;
-	uint32_t CM_BLNDGAM_RAMB_REGION_0_1;
-	uint32_t CM_BLNDGAM_RAMB_REGION_2_3;
-	uint32_t CM_BLNDGAM_RAMB_REGION_4_5;
-	uint32_t CM_BLNDGAM_RAMB_REGION_6_7;
-	uint32_t CM_BLNDGAM_RAMB_REGION_8_9;
-	uint32_t CM_BLNDGAM_RAMB_REGION_10_11;
-	uint32_t CM_BLNDGAM_RAMB_REGION_12_13;
-	uint32_t CM_BLNDGAM_RAMB_REGION_14_15;
-	uint32_t CM_BLNDGAM_RAMB_REGION_16_17;
-	uint32_t CM_BLNDGAM_RAMB_REGION_18_19;
-	uint32_t CM_BLNDGAM_RAMB_REGION_20_21;
-	uint32_t CM_BLNDGAM_RAMB_REGION_22_23;
-	uint32_t CM_BLNDGAM_RAMB_REGION_24_25;
-	uint32_t CM_BLNDGAM_RAMB_REGION_26_27;
-	uint32_t CM_BLNDGAM_RAMB_REGION_28_29;
-	uint32_t CM_BLNDGAM_RAMB_REGION_30_31;
-	uint32_t CM_BLNDGAM_RAMB_REGION_32_33;
-	uint32_t CM_BLNDGAM_RAMA_START_CNTL_B;
-	uint32_t CM_BLNDGAM_RAMA_START_CNTL_G;
-	uint32_t CM_BLNDGAM_RAMA_START_CNTL_R;
-	uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_B;
-	uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_G;
-	uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_R;
-	uint32_t CM_BLNDGAM_RAMA_END_CNTL1_B;
-	uint32_t CM_BLNDGAM_RAMA_END_CNTL2_B;
-	uint32_t CM_BLNDGAM_RAMA_END_CNTL1_G;
-	uint32_t CM_BLNDGAM_RAMA_END_CNTL2_G;
-	uint32_t CM_BLNDGAM_RAMA_END_CNTL1_R;
-	uint32_t CM_BLNDGAM_RAMA_END_CNTL2_R;
-	uint32_t CM_BLNDGAM_RAMA_REGION_0_1;
-	uint32_t CM_BLNDGAM_RAMA_REGION_2_3;
-	uint32_t CM_BLNDGAM_RAMA_REGION_4_5;
-	uint32_t CM_BLNDGAM_RAMA_REGION_6_7;
-	uint32_t CM_BLNDGAM_RAMA_REGION_8_9;
-	uint32_t CM_BLNDGAM_RAMA_REGION_10_11;
-	uint32_t CM_BLNDGAM_RAMA_REGION_12_13;
-	uint32_t CM_BLNDGAM_RAMA_REGION_14_15;
-	uint32_t CM_BLNDGAM_RAMA_REGION_16_17;
-	uint32_t CM_BLNDGAM_RAMA_REGION_18_19;
-	uint32_t CM_BLNDGAM_RAMA_REGION_20_21;
-	uint32_t CM_BLNDGAM_RAMA_REGION_22_23;
-	uint32_t CM_BLNDGAM_RAMA_REGION_24_25;
-	uint32_t CM_BLNDGAM_RAMA_REGION_26_27;
-	uint32_t CM_BLNDGAM_RAMA_REGION_28_29;
-	uint32_t CM_BLNDGAM_RAMA_REGION_30_31;
-	uint32_t CM_BLNDGAM_RAMA_REGION_32_33;
-	uint32_t CM_BLNDGAM_LUT_INDEX;
-	uint32_t CM_BLNDGAM_LUT_DATA;
-	uint32_t CM_3DLUT_MODE;
-	uint32_t CM_3DLUT_INDEX;
-	uint32_t CM_3DLUT_DATA;
-	uint32_t CM_3DLUT_DATA_30BIT;
-	uint32_t CM_3DLUT_READ_WRITE_CONTROL;
-	uint32_t CM_SHAPER_LUT_WRITE_EN_MASK;
-	uint32_t CM_SHAPER_CONTROL;
-	uint32_t CM_SHAPER_RAMB_START_CNTL_B;
-	uint32_t CM_SHAPER_RAMB_START_CNTL_G;
-	uint32_t CM_SHAPER_RAMB_START_CNTL_R;
-	uint32_t CM_SHAPER_RAMB_END_CNTL_B;
-	uint32_t CM_SHAPER_RAMB_END_CNTL_G;
-	uint32_t CM_SHAPER_RAMB_END_CNTL_R;
-	uint32_t CM_SHAPER_RAMB_REGION_0_1;
-	uint32_t CM_SHAPER_RAMB_REGION_2_3;
-	uint32_t CM_SHAPER_RAMB_REGION_4_5;
-	uint32_t CM_SHAPER_RAMB_REGION_6_7;
-	uint32_t CM_SHAPER_RAMB_REGION_8_9;
-	uint32_t CM_SHAPER_RAMB_REGION_10_11;
-	uint32_t CM_SHAPER_RAMB_REGION_12_13;
-	uint32_t CM_SHAPER_RAMB_REGION_14_15;
-	uint32_t CM_SHAPER_RAMB_REGION_16_17;
-	uint32_t CM_SHAPER_RAMB_REGION_18_19;
-	uint32_t CM_SHAPER_RAMB_REGION_20_21;
-	uint32_t CM_SHAPER_RAMB_REGION_22_23;
-	uint32_t CM_SHAPER_RAMB_REGION_24_25;
-	uint32_t CM_SHAPER_RAMB_REGION_26_27;
-	uint32_t CM_SHAPER_RAMB_REGION_28_29;
-	uint32_t CM_SHAPER_RAMB_REGION_30_31;
-	uint32_t CM_SHAPER_RAMB_REGION_32_33;
-	uint32_t CM_SHAPER_RAMA_START_CNTL_B;
-	uint32_t CM_SHAPER_RAMA_START_CNTL_G;
-	uint32_t CM_SHAPER_RAMA_START_CNTL_R;
-	uint32_t CM_SHAPER_RAMA_END_CNTL_B;
-	uint32_t CM_SHAPER_RAMA_END_CNTL_G;
-	uint32_t CM_SHAPER_RAMA_END_CNTL_R;
-	uint32_t CM_SHAPER_RAMA_REGION_0_1;
-	uint32_t CM_SHAPER_RAMA_REGION_2_3;
-	uint32_t CM_SHAPER_RAMA_REGION_4_5;
-	uint32_t CM_SHAPER_RAMA_REGION_6_7;
-	uint32_t CM_SHAPER_RAMA_REGION_8_9;
-	uint32_t CM_SHAPER_RAMA_REGION_10_11;
-	uint32_t CM_SHAPER_RAMA_REGION_12_13;
-	uint32_t CM_SHAPER_RAMA_REGION_14_15;
-	uint32_t CM_SHAPER_RAMA_REGION_16_17;
-	uint32_t CM_SHAPER_RAMA_REGION_18_19;
-	uint32_t CM_SHAPER_RAMA_REGION_20_21;
-	uint32_t CM_SHAPER_RAMA_REGION_22_23;
-	uint32_t CM_SHAPER_RAMA_REGION_24_25;
-	uint32_t CM_SHAPER_RAMA_REGION_26_27;
-	uint32_t CM_SHAPER_RAMA_REGION_28_29;
-	uint32_t CM_SHAPER_RAMA_REGION_30_31;
-	uint32_t CM_SHAPER_RAMA_REGION_32_33;
-	uint32_t CM_SHAPER_LUT_INDEX;
-	uint32_t CM_SHAPER_LUT_DATA;
-	uint32_t CM_ICSC_CONTROL;
-	uint32_t CM_ICSC_C11_C12;
-	uint32_t CM_ICSC_C33_C34;
-	uint32_t CM_BNS_VALUES_R;
-	uint32_t CM_BNS_VALUES_G;
-	uint32_t CM_BNS_VALUES_B;
-	uint32_t CM_DGAM_RAMB_START_CNTL_B;
-	uint32_t CM_DGAM_RAMB_START_CNTL_G;
-	uint32_t CM_DGAM_RAMB_START_CNTL_R;
-	uint32_t CM_DGAM_RAMB_SLOPE_CNTL_B;
-	uint32_t CM_DGAM_RAMB_SLOPE_CNTL_G;
-	uint32_t CM_DGAM_RAMB_SLOPE_CNTL_R;
-	uint32_t CM_DGAM_RAMB_END_CNTL1_B;
-	uint32_t CM_DGAM_RAMB_END_CNTL2_B;
-	uint32_t CM_DGAM_RAMB_END_CNTL1_G;
-	uint32_t CM_DGAM_RAMB_END_CNTL2_G;
-	uint32_t CM_DGAM_RAMB_END_CNTL1_R;
-	uint32_t CM_DGAM_RAMB_END_CNTL2_R;
-	uint32_t CM_DGAM_RAMB_REGION_0_1;
-	uint32_t CM_DGAM_RAMB_REGION_14_15;
-	uint32_t CM_DGAM_RAMA_START_CNTL_B;
-	uint32_t CM_DGAM_RAMA_START_CNTL_G;
-	uint32_t CM_DGAM_RAMA_START_CNTL_R;
-	uint32_t CM_DGAM_RAMA_SLOPE_CNTL_B;
-	uint32_t CM_DGAM_RAMA_SLOPE_CNTL_G;
-	uint32_t CM_DGAM_RAMA_SLOPE_CNTL_R;
-	uint32_t CM_DGAM_RAMA_END_CNTL1_B;
-	uint32_t CM_DGAM_RAMA_END_CNTL2_B;
-	uint32_t CM_DGAM_RAMA_END_CNTL1_G;
-	uint32_t CM_DGAM_RAMA_END_CNTL2_G;
-	uint32_t CM_DGAM_RAMA_END_CNTL1_R;
-	uint32_t CM_DGAM_RAMA_END_CNTL2_R;
-	uint32_t CM_DGAM_RAMA_REGION_0_1;
-	uint32_t CM_DGAM_RAMA_REGION_14_15;
-	uint32_t CM_DGAM_LUT_WRITE_EN_MASK;
-	uint32_t CM_DGAM_LUT_INDEX;
-	uint32_t CM_DGAM_LUT_DATA;
-	uint32_t CM_CONTROL;
-	uint32_t CM_DGAM_CONTROL;
-	uint32_t CM_IGAM_CONTROL;
-	uint32_t CM_IGAM_LUT_RW_CONTROL;
-	uint32_t CM_IGAM_LUT_RW_INDEX;
-	uint32_t CM_IGAM_LUT_SEQ_COLOR;
-	uint32_t FORMAT_CONTROL;
-	uint32_t CNVC_SURFACE_PIXEL_FORMAT;
-	uint32_t CURSOR_CONTROL;
-	uint32_t CURSOR0_CONTROL;
-	uint32_t CURSOR0_COLOR0;
-	uint32_t CURSOR0_COLOR1;
+	DPP_COMMON_REG_VARIABLE_LIST
 };
 
 struct dcn10_dpp {
@@ -1284,6 +1283,10 @@ enum dcn10_input_csc_select {
 	INPUT_CSC_SELECT_COMA
 };
 
+void dpp1_set_cursor_attributes(
+		struct dpp *dpp_base,
+		enum dc_cursor_color_format color_format);
+
 bool dpp1_dscl_is_lb_conf_valid(
 		int ceil_vratio,
 		int num_partitions,
@@ -1371,7 +1374,7 @@ void dpp1_cm_program_regamma_lutb_settings(
 		const struct pwl_params *params);
 void dpp1_cm_set_output_csc_adjustment(
 		struct dpp *dpp_base,
-		const struct out_csc_color_matrix *tbl_entry);
+		const uint16_t *regval);
 
 void dpp1_cm_set_output_csc_default(
 		struct dpp *dpp_base,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
index 4c90043e7b8c9bf919ee2c8f8667ebe95c4731b8..a5b099023652a4ecfc32fdae06d1045bf2fd650c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
@@ -49,6 +49,8 @@
 #define FN(reg_name, field_name) \
 	dpp->tf_shift->field_name, dpp->tf_mask->field_name
 
+#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+
 struct dcn10_input_csc_matrix {
 	enum dc_color_space color_space;
 	uint16_t regval[12];
@@ -223,18 +225,18 @@ void dpp1_cm_set_gamut_remap(
 
 static void dpp1_cm_program_color_matrix(
 		struct dcn10_dpp *dpp,
-		const struct out_csc_color_matrix *tbl_entry)
+		const uint16_t *regval)
 {
 	uint32_t mode;
 	struct color_matrices_reg gam_regs;
 
 	REG_GET(CM_OCSC_CONTROL, CM_OCSC_MODE, &mode);
 
-	if (tbl_entry == NULL) {
+	if (regval == NULL) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
-
+	mode = 4;
 	gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_OCSC_C11;
 	gam_regs.masks.csc_c11  = dpp->tf_mask->CM_OCSC_C11;
 	gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_OCSC_C12;
@@ -247,7 +249,7 @@ static void dpp1_cm_program_color_matrix(
 
 		cm_helper_program_color_matrices(
 				dpp->base.ctx,
-				tbl_entry->regval,
+				regval,
 				&gam_regs);
 
 	} else {
@@ -257,7 +259,7 @@ static void dpp1_cm_program_color_matrix(
 
 		cm_helper_program_color_matrices(
 				dpp->base.ctx,
-				tbl_entry->regval,
+				regval,
 				&gam_regs);
 	}
 }
@@ -266,24 +268,18 @@ void dpp1_cm_set_output_csc_default(
 		struct dpp *dpp_base,
 		enum dc_color_space colorspace)
 {
-
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
-	struct out_csc_color_matrix tbl_entry;
-	int i, j;
-	int arr_size = sizeof(output_csc_matrix) / sizeof(struct output_csc_matrix);
+	const uint16_t *regval = NULL;
+	int arr_size;
 	uint32_t ocsc_mode = 4;
 
-	tbl_entry.color_space = colorspace;
-
-	for (i = 0; i < arr_size; i++)
-		if (output_csc_matrix[i].color_space == colorspace) {
-			for (j = 0; j < 12; j++)
-				tbl_entry.regval[j] = output_csc_matrix[i].regval[j];
-			break;
-		}
-
+	regval = find_color_matrix(colorspace, &arr_size);
+	if (regval == NULL) {
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+	dpp1_cm_program_color_matrix(dpp, regval);
 	REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
-	dpp1_cm_program_color_matrix(dpp, &tbl_entry);
 }
 
 static void dpp1_cm_get_reg_field(
@@ -315,41 +311,12 @@ static void dpp1_cm_get_reg_field(
 
 void dpp1_cm_set_output_csc_adjustment(
 		struct dpp *dpp_base,
-		const struct out_csc_color_matrix *tbl_entry)
+		const uint16_t *regval)
 {
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
-	//enum csc_color_mode config = CSC_COLOR_MODE_GRAPHICS_OUTPUT_CSC;
 	uint32_t ocsc_mode = 4;
-
-	/**
-	*if (tbl_entry != NULL) {
-	*	switch (tbl_entry->color_space) {
-	*	case COLOR_SPACE_SRGB:
-	*	case COLOR_SPACE_2020_RGB_FULLRANGE:
-	*		ocsc_mode = 0;
-	*		break;
-	*	case COLOR_SPACE_SRGB_LIMITED:
-	*	case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
-	*		ocsc_mode = 1;
-	*		break;
-	*	case COLOR_SPACE_YCBCR601:
-	*	case COLOR_SPACE_YCBCR601_LIMITED:
-	*		ocsc_mode = 2;
-	*		break;
-	*	case COLOR_SPACE_YCBCR709:
-	*	case COLOR_SPACE_YCBCR709_LIMITED:
-	*	case COLOR_SPACE_2020_YCBCR:
-	*		ocsc_mode = 3;
-	*		break;
-	*	case COLOR_SPACE_UNKNOWN:
-	*	default:
-	*		break;
-	*	}
-	*}
-	*/
-
+	dpp1_cm_program_color_matrix(dpp, regval);
 	REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
-	dpp1_cm_program_color_matrix(dpp, tbl_entry);
 }
 
 void dpp1_cm_power_on_regamma_lut(struct dpp *dpp_base,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 584e82cc5df364a12ce9cecb7330e8b0387b9fa9..585b33384002364d031372adeab0918238821ec8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -48,9 +48,20 @@ void hubp1_set_blank(struct hubp *hubp, bool blank)
 			HUBP_TTU_DISABLE, blank_en);
 
 	if (blank) {
-		REG_WAIT(DCHUBP_CNTL,
-				HUBP_NO_OUTSTANDING_REQ, 1,
-				1, 200);
+		uint32_t reg_val = REG_READ(DCHUBP_CNTL);
+
+		if (reg_val) {
+			/* init sequence workaround: in case HUBP is
+			 * power gated, this wait would timeout.
+			 *
+			 * we just wrote reg_val to non-0, if it stay 0
+			 * it means HUBP is gated
+			 */
+			REG_WAIT(DCHUBP_CNTL,
+					HUBP_NO_OUTSTANDING_REQ, 1,
+					1, 200);
+		}
+
 		hubp->mpcc_id = 0xf;
 		hubp->opp_id = 0xf;
 	}
@@ -96,10 +107,12 @@ static void hubp1_vready_workaround(struct hubp *hubp,
 }
 
 void hubp1_program_tiling(
-	struct dcn10_hubp *hubp1,
+	struct hubp *hubp,
 	const union dc_tiling_info *info,
 	const enum surface_pixel_format pixel_format)
 {
+	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
 	REG_UPDATE_6(DCSURF_ADDR_CONFIG,
 			NUM_PIPES, log_2(info->gfx9.num_pipes),
 			NUM_BANKS, log_2(info->gfx9.num_banks),
@@ -116,13 +129,14 @@ void hubp1_program_tiling(
 }
 
 void hubp1_program_size_and_rotation(
-	struct dcn10_hubp *hubp1,
+	struct hubp *hubp,
 	enum dc_rotation_angle rotation,
 	enum surface_pixel_format format,
 	const union plane_size *plane_size,
 	struct dc_plane_dcc_param *dcc,
 	bool horizontal_mirror)
 {
+	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
 	uint32_t pitch, meta_pitch, pitch_c, meta_pitch_c, mirror;
 
 	/* Program data and meta surface pitch (calculation from addrlib)
@@ -178,9 +192,10 @@ void hubp1_program_size_and_rotation(
 }
 
 void hubp1_program_pixel_format(
-	struct dcn10_hubp *hubp1,
+	struct hubp *hubp,
 	enum surface_pixel_format format)
 {
+	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
 	uint32_t red_bar = 3;
 	uint32_t blue_bar = 2;
 
@@ -424,13 +439,11 @@ void hubp1_program_surface_config(
 	struct dc_plane_dcc_param *dcc,
 	bool horizontal_mirror)
 {
-	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
-
 	hubp1_dcc_control(hubp, dcc->enable, dcc->grph.independent_64b_blks);
-	hubp1_program_tiling(hubp1, tiling_info, format);
+	hubp1_program_tiling(hubp, tiling_info, format);
 	hubp1_program_size_and_rotation(
-			hubp1, rotation, format, plane_size, dcc, horizontal_mirror);
-	hubp1_program_pixel_format(hubp1, format);
+			hubp, rotation, format, plane_size, dcc, horizontal_mirror);
+	hubp1_program_pixel_format(hubp, format);
 }
 
 void hubp1_program_requestor(
@@ -765,42 +778,7 @@ void hubp1_read_state(struct dcn10_hubp *hubp1,
 			QoS_LEVEL_HIGH_WM, &s->qos_level_high_wm);
 }
 
-enum cursor_pitch {
-	CURSOR_PITCH_64_PIXELS = 0,
-	CURSOR_PITCH_128_PIXELS,
-	CURSOR_PITCH_256_PIXELS
-};
-
-enum cursor_lines_per_chunk {
-	CURSOR_LINE_PER_CHUNK_2 = 1,
-	CURSOR_LINE_PER_CHUNK_4,
-	CURSOR_LINE_PER_CHUNK_8,
-	CURSOR_LINE_PER_CHUNK_16
-};
-
-static bool ippn10_cursor_program_control(
-		struct dcn10_hubp *hubp1,
-		bool pixel_data_invert,
-		enum dc_cursor_color_format color_format)
-{
-	if (REG(CURSOR_SETTINS))
-		REG_SET_2(CURSOR_SETTINS, 0,
-				/* no shift of the cursor HDL schedule */
-				CURSOR0_DST_Y_OFFSET, 0,
-				 /* used to shift the cursor chunk request deadline */
-				CURSOR0_CHUNK_HDL_ADJUST, 3);
-	else
-		REG_SET_2(CURSOR_SETTINGS, 0,
-				/* no shift of the cursor HDL schedule */
-				CURSOR0_DST_Y_OFFSET, 0,
-				 /* used to shift the cursor chunk request deadline */
-				CURSOR0_CHUNK_HDL_ADJUST, 3);
-
-	return true;
-}
-
-static enum cursor_pitch ippn10_get_cursor_pitch(
-		unsigned int pitch)
+enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch)
 {
 	enum cursor_pitch hw_pitch;
 
@@ -823,7 +801,7 @@ static enum cursor_pitch ippn10_get_cursor_pitch(
 	return hw_pitch;
 }
 
-static enum cursor_lines_per_chunk ippn10_get_lines_per_chunk(
+static enum cursor_lines_per_chunk hubp1_get_lines_per_chunk(
 		unsigned int cur_width,
 		enum dc_cursor_color_format format)
 {
@@ -849,8 +827,8 @@ void hubp1_cursor_set_attributes(
 		const struct dc_cursor_attributes *attr)
 {
 	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
-	enum cursor_pitch hw_pitch = ippn10_get_cursor_pitch(attr->pitch);
-	enum cursor_lines_per_chunk lpc = ippn10_get_lines_per_chunk(
+	enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch);
+	enum cursor_lines_per_chunk lpc = hubp1_get_lines_per_chunk(
 			attr->width, attr->color_format);
 
 	hubp->curs_attr = *attr;
@@ -863,13 +841,17 @@ void hubp1_cursor_set_attributes(
 	REG_UPDATE_2(CURSOR_SIZE,
 			CURSOR_WIDTH, attr->width,
 			CURSOR_HEIGHT, attr->height);
+
 	REG_UPDATE_3(CURSOR_CONTROL,
 			CURSOR_MODE, attr->color_format,
 			CURSOR_PITCH, hw_pitch,
 			CURSOR_LINES_PER_CHUNK, lpc);
-	ippn10_cursor_program_control(hubp1,
-			attr->attribute_flags.bits.INVERT_PIXEL_DATA,
-			attr->color_format);
+
+	REG_SET_2(CURSOR_SETTINS, 0,
+			/* no shift of the cursor HDL schedule */
+			CURSOR0_DST_Y_OFFSET, 0,
+			 /* used to shift the cursor chunk request deadline */
+			CURSOR0_CHUNK_HDL_ADJUST, 3);
 }
 
 void hubp1_cursor_set_position(
@@ -909,7 +891,8 @@ void hubp1_cursor_set_position(
 		cur_en = 0;  /* not visible beyond left edge*/
 
 	if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
-		hubp1_cursor_set_attributes(hubp, &hubp->curs_attr);
+		hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr);
+
 	REG_UPDATE(CURSOR_CONTROL,
 			CURSOR_ENABLE, cur_en);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
index a7834dd507163eec24cf165862b7d006f0ce3441..33e91d9c010f8bb48f18fe67cfbdfc4b35575c81 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
@@ -127,113 +127,110 @@
 	SRI(CURSOR_HOT_SPOT, CURSOR, id), \
 	SRI(CURSOR_DST_OFFSET, CURSOR, id)
 
-
-
-struct dcn_mi_registers {
-	uint32_t DCHUBP_CNTL;
-	uint32_t HUBPREQ_DEBUG_DB;
-	uint32_t DCSURF_ADDR_CONFIG;
-	uint32_t DCSURF_TILING_CONFIG;
-	uint32_t DCSURF_SURFACE_PITCH;
-	uint32_t DCSURF_SURFACE_PITCH_C;
-	uint32_t DCSURF_SURFACE_CONFIG;
-	uint32_t DCSURF_FLIP_CONTROL;
-	uint32_t DCSURF_PRI_VIEWPORT_DIMENSION;
-	uint32_t DCSURF_PRI_VIEWPORT_START;
-	uint32_t DCSURF_SEC_VIEWPORT_DIMENSION;
-	uint32_t DCSURF_SEC_VIEWPORT_START;
-	uint32_t DCSURF_PRI_VIEWPORT_DIMENSION_C;
-	uint32_t DCSURF_PRI_VIEWPORT_START_C;
-	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
-	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS;
-	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH;
-	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS;
-	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH;
-	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS;
-	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH;
-	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS;
-	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
-	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C;
-	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C;
-	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C;
-	uint32_t DCSURF_SURFACE_INUSE;
-	uint32_t DCSURF_SURFACE_INUSE_HIGH;
-	uint32_t DCSURF_SURFACE_INUSE_C;
-	uint32_t DCSURF_SURFACE_INUSE_HIGH_C;
-	uint32_t DCSURF_SURFACE_EARLIEST_INUSE;
-	uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH;
-	uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C;
-	uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C;
-	uint32_t DCSURF_SURFACE_CONTROL;
-	uint32_t HUBPRET_CONTROL;
-	uint32_t DCN_EXPANSION_MODE;
-	uint32_t DCHUBP_REQ_SIZE_CONFIG;
-	uint32_t DCHUBP_REQ_SIZE_CONFIG_C;
-	uint32_t BLANK_OFFSET_0;
-	uint32_t BLANK_OFFSET_1;
-	uint32_t DST_DIMENSIONS;
-	uint32_t DST_AFTER_SCALER;
-	uint32_t PREFETCH_SETTINS;
-	uint32_t PREFETCH_SETTINGS;
-	uint32_t VBLANK_PARAMETERS_0;
-	uint32_t REF_FREQ_TO_PIX_FREQ;
-	uint32_t VBLANK_PARAMETERS_1;
-	uint32_t VBLANK_PARAMETERS_3;
-	uint32_t NOM_PARAMETERS_0;
-	uint32_t NOM_PARAMETERS_1;
-	uint32_t NOM_PARAMETERS_4;
-	uint32_t NOM_PARAMETERS_5;
-	uint32_t PER_LINE_DELIVERY_PRE;
-	uint32_t PER_LINE_DELIVERY;
-	uint32_t PREFETCH_SETTINS_C;
-	uint32_t PREFETCH_SETTINGS_C;
-	uint32_t VBLANK_PARAMETERS_2;
-	uint32_t VBLANK_PARAMETERS_4;
-	uint32_t NOM_PARAMETERS_2;
-	uint32_t NOM_PARAMETERS_3;
-	uint32_t NOM_PARAMETERS_6;
-	uint32_t NOM_PARAMETERS_7;
-	uint32_t DCN_TTU_QOS_WM;
-	uint32_t DCN_GLOBAL_TTU_CNTL;
-	uint32_t DCN_SURF0_TTU_CNTL0;
-	uint32_t DCN_SURF0_TTU_CNTL1;
-	uint32_t DCN_SURF1_TTU_CNTL0;
-	uint32_t DCN_SURF1_TTU_CNTL1;
-	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB;
-	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB;
-	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB;
-	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LSB;
-	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_MSB;
-	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LSB;
-	uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_MSB;
-	uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_LSB;
-	uint32_t DCN_VM_MX_L1_TLB_CNTL;
-	uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB;
-	uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB;
-	uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_MSB;
-	uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_LSB;
-	uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_MSB;
-	uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_LSB;
-	uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR;
-	uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR;
-	uint32_t DCHUBBUB_SDPIF_FB_BASE;
-	uint32_t DCHUBBUB_SDPIF_FB_OFFSET;
-	uint32_t DCN_VM_FB_LOCATION_TOP;
-	uint32_t DCN_VM_FB_LOCATION_BASE;
-	uint32_t DCN_VM_FB_OFFSET;
-	uint32_t DCN_VM_AGP_BASE;
-	uint32_t DCN_VM_AGP_BOT;
-	uint32_t DCN_VM_AGP_TOP;
-	uint32_t CURSOR_SETTINS;
-	uint32_t CURSOR_SETTINGS;
-	uint32_t CURSOR_SURFACE_ADDRESS_HIGH;
-	uint32_t CURSOR_SURFACE_ADDRESS;
-	uint32_t CURSOR_SIZE;
-	uint32_t CURSOR_CONTROL;
-	uint32_t CURSOR_POSITION;
-	uint32_t CURSOR_HOT_SPOT;
-	uint32_t CURSOR_DST_OFFSET;
-};
+#define HUBP_COMMON_REG_VARIABLE_LIST \
+	uint32_t DCHUBP_CNTL; \
+	uint32_t HUBPREQ_DEBUG_DB; \
+	uint32_t DCSURF_ADDR_CONFIG; \
+	uint32_t DCSURF_TILING_CONFIG; \
+	uint32_t DCSURF_SURFACE_PITCH; \
+	uint32_t DCSURF_SURFACE_PITCH_C; \
+	uint32_t DCSURF_SURFACE_CONFIG; \
+	uint32_t DCSURF_FLIP_CONTROL; \
+	uint32_t DCSURF_PRI_VIEWPORT_DIMENSION; \
+	uint32_t DCSURF_PRI_VIEWPORT_START; \
+	uint32_t DCSURF_SEC_VIEWPORT_DIMENSION; \
+	uint32_t DCSURF_SEC_VIEWPORT_START; \
+	uint32_t DCSURF_PRI_VIEWPORT_DIMENSION_C; \
+	uint32_t DCSURF_PRI_VIEWPORT_START_C; \
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; \
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; \
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; \
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; \
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; \
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; \
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; \
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; \
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; \
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; \
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; \
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; \
+	uint32_t DCSURF_SURFACE_INUSE; \
+	uint32_t DCSURF_SURFACE_INUSE_HIGH; \
+	uint32_t DCSURF_SURFACE_INUSE_C; \
+	uint32_t DCSURF_SURFACE_INUSE_HIGH_C; \
+	uint32_t DCSURF_SURFACE_EARLIEST_INUSE; \
+	uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH; \
+	uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C; \
+	uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C; \
+	uint32_t DCSURF_SURFACE_CONTROL; \
+	uint32_t HUBPRET_CONTROL; \
+	uint32_t DCN_EXPANSION_MODE; \
+	uint32_t DCHUBP_REQ_SIZE_CONFIG; \
+	uint32_t DCHUBP_REQ_SIZE_CONFIG_C; \
+	uint32_t BLANK_OFFSET_0; \
+	uint32_t BLANK_OFFSET_1; \
+	uint32_t DST_DIMENSIONS; \
+	uint32_t DST_AFTER_SCALER; \
+	uint32_t PREFETCH_SETTINS; \
+	uint32_t PREFETCH_SETTINGS; \
+	uint32_t VBLANK_PARAMETERS_0; \
+	uint32_t REF_FREQ_TO_PIX_FREQ; \
+	uint32_t VBLANK_PARAMETERS_1; \
+	uint32_t VBLANK_PARAMETERS_3; \
+	uint32_t NOM_PARAMETERS_0; \
+	uint32_t NOM_PARAMETERS_1; \
+	uint32_t NOM_PARAMETERS_4; \
+	uint32_t NOM_PARAMETERS_5; \
+	uint32_t PER_LINE_DELIVERY_PRE; \
+	uint32_t PER_LINE_DELIVERY; \
+	uint32_t PREFETCH_SETTINS_C; \
+	uint32_t PREFETCH_SETTINGS_C; \
+	uint32_t VBLANK_PARAMETERS_2; \
+	uint32_t VBLANK_PARAMETERS_4; \
+	uint32_t NOM_PARAMETERS_2; \
+	uint32_t NOM_PARAMETERS_3; \
+	uint32_t NOM_PARAMETERS_6; \
+	uint32_t NOM_PARAMETERS_7; \
+	uint32_t DCN_TTU_QOS_WM; \
+	uint32_t DCN_GLOBAL_TTU_CNTL; \
+	uint32_t DCN_SURF0_TTU_CNTL0; \
+	uint32_t DCN_SURF0_TTU_CNTL1; \
+	uint32_t DCN_SURF1_TTU_CNTL0; \
+	uint32_t DCN_SURF1_TTU_CNTL1; \
+	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; \
+	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; \
+	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; \
+	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LSB; \
+	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_MSB; \
+	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LSB; \
+	uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_MSB; \
+	uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_LSB; \
+	uint32_t DCN_VM_MX_L1_TLB_CNTL; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_MSB; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_LSB; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_MSB; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_LSB; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR; \
+	uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR; \
+	uint32_t DCHUBBUB_SDPIF_FB_BASE; \
+	uint32_t DCHUBBUB_SDPIF_FB_OFFSET; \
+	uint32_t DCN_VM_FB_LOCATION_TOP; \
+	uint32_t DCN_VM_FB_LOCATION_BASE; \
+	uint32_t DCN_VM_FB_OFFSET; \
+	uint32_t DCN_VM_AGP_BASE; \
+	uint32_t DCN_VM_AGP_BOT; \
+	uint32_t DCN_VM_AGP_TOP; \
+	uint32_t CURSOR_SETTINS; \
+	uint32_t CURSOR_SETTINGS; \
+	uint32_t CURSOR_SURFACE_ADDRESS_HIGH; \
+	uint32_t CURSOR_SURFACE_ADDRESS; \
+	uint32_t CURSOR_SIZE; \
+	uint32_t CURSOR_CONTROL; \
+	uint32_t CURSOR_POSITION; \
+	uint32_t CURSOR_HOT_SPOT; \
+	uint32_t CURSOR_DST_OFFSET
 
 #define HUBP_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
@@ -397,7 +394,6 @@ struct dcn_mi_registers {
 	HUBP_SF(CURSOR0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_Y, mask_sh), \
 	HUBP_SF(CURSOR0_CURSOR_DST_OFFSET, CURSOR_DST_X_OFFSET, mask_sh)
 
-
 #define DCN_HUBP_REG_FIELD_LIST(type) \
 	type HUBP_BLANK_EN;\
 	type HUBP_TTU_DISABLE;\
@@ -577,6 +573,10 @@ struct dcn_mi_registers {
 	type CURSOR_DST_X_OFFSET; \
 	type OUTPUT_FP
 
+struct dcn_mi_registers {
+	HUBP_COMMON_REG_VARIABLE_LIST;
+};
+
 struct dcn_mi_shift {
 	DCN_HUBP_REG_FIELD_LIST(uint8_t);
 };
@@ -611,11 +611,11 @@ void hubp1_program_requestor(
 		struct _vcs_dpi_display_rq_regs_st *rq_regs);
 
 void hubp1_program_pixel_format(
-	struct dcn10_hubp *hubp,
+	struct hubp *hubp,
 	enum surface_pixel_format format);
 
 void hubp1_program_size_and_rotation(
-	struct dcn10_hubp *hubp,
+	struct hubp *hubp,
 	enum dc_rotation_angle rotation,
 	enum surface_pixel_format format,
 	const union plane_size *plane_size,
@@ -623,7 +623,7 @@ void hubp1_program_size_and_rotation(
 	bool horizontal_mirror);
 
 void hubp1_program_tiling(
-	struct dcn10_hubp *hubp,
+	struct hubp *hubp,
 	const union dc_tiling_info *info,
 	const enum surface_pixel_format pixel_format);
 
@@ -681,4 +681,6 @@ struct dcn_hubp_state {
 void hubp1_read_state(struct dcn10_hubp *hubp1,
 		struct dcn_hubp_state *s);
 
+enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 8e2ddbc2129cd3ead334f672d7cf6afb15e7711b..82572863acab747759e3d8c2787a1e2cffc1f8df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -32,7 +32,7 @@
 #include "dce/dce_hwseq.h"
 #include "abm.h"
 #include "dmcu.h"
-#include "dcn10/dcn10_timing_generator.h"
+#include "dcn10_optc.h"
 #include "dcn10/dcn10_dpp.h"
 #include "dcn10/dcn10_mpc.h"
 #include "timing_generator.h"
@@ -43,6 +43,7 @@
 #include "custom_float.h"
 #include "dcn10_hubp.h"
 #include "dcn10_hubbub.h"
+#include "dcn10_cm_common.h"
 
 #define CTX \
 	hws->ctx
@@ -158,7 +159,7 @@ void dcn10_log_hw_state(struct dc *dc)
 		struct timing_generator *tg = pool->timing_generators[i];
 		struct dcn_otg_state s = {0};
 
-		tgn10_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+		optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
 
 		//only print if OTG master is enabled
 		if ((s.otg_enabled & 1) == 0)
@@ -425,6 +426,34 @@ static void bios_golden_init(struct dc *dc)
 	}
 }
 
+static void false_optc_underflow_wa(
+		struct dc *dc,
+		const struct dc_stream_state *stream,
+		struct timing_generator *tg)
+{
+	int i;
+	bool underflow;
+
+	if (!dc->hwseq->wa.false_optc_underflow)
+		return;
+
+	underflow = tg->funcs->is_optc_underflow_occurred(tg);
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+		if (old_pipe_ctx->stream != stream)
+			continue;
+
+		dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, old_pipe_ctx);
+	}
+
+	tg->funcs->set_blank_data_double_buffer(tg, true);
+
+	if (tg->funcs->is_optc_underflow_occurred(tg) && !underflow)
+		tg->funcs->clear_optc_underflow(tg);
+}
+
 static enum dc_status dcn10_prog_pixclk_crtc_otg(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context,
@@ -433,9 +462,6 @@ static enum dc_status dcn10_prog_pixclk_crtc_otg(
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	enum dc_color_space color_space;
 	struct tg_color black_color = {0};
-	bool enableStereo    = stream->timing.timing_3d_format == TIMING_3D_FORMAT_NONE ?
-			false:true;
-	bool rightEyePolarity = stream->timing.flags.RIGHT_EYE_3D_POLARITY;
 
 	/* by upper caller loop, pipe0 is parent pipe and be called first.
 	 * back end is set up by for pipe0. Other children pipe share back end
@@ -470,11 +496,6 @@ static enum dc_status dcn10_prog_pixclk_crtc_otg(
 			&stream->timing,
 			true);
 
-	pipe_ctx->stream_res.opp->funcs->opp_set_stereo_polarity(
-				pipe_ctx->stream_res.opp,
-				enableStereo,
-				rightEyePolarity);
-
 #if 0 /* move to after enable_crtc */
 	/* TODO: OPP FMT, ABM. etc. should be done here. */
 	/* or FPGA now. instance 0 only. TODO: move to opp.c */
@@ -489,12 +510,18 @@ static enum dc_status dcn10_prog_pixclk_crtc_otg(
 	/* program otg blank color */
 	color_space = stream->output_color_space;
 	color_space_to_black_color(dc, color_space, &black_color);
-	pipe_ctx->stream_res.tg->funcs->set_blank_color(
-			pipe_ctx->stream_res.tg,
-			&black_color);
 
-	pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, true);
-	hwss_wait_for_blank_complete(pipe_ctx->stream_res.tg);
+	if (pipe_ctx->stream_res.tg->funcs->set_blank_color)
+		pipe_ctx->stream_res.tg->funcs->set_blank_color(
+				pipe_ctx->stream_res.tg,
+				&black_color);
+
+	if (pipe_ctx->stream_res.tg->funcs->is_blanked &&
+			!pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg)) {
+		pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, true);
+		hwss_wait_for_blank_complete(pipe_ctx->stream_res.tg);
+		false_optc_underflow_wa(dc, pipe_ctx->stream, pipe_ctx->stream_res.tg);
+	}
 
 	/* VTG is  within DCHUB command block. DCFCLK is always on */
 	if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) {
@@ -573,41 +600,34 @@ static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	int fe_idx = pipe_ctx->pipe_idx;
 	struct hubp *hubp = dc->res_pool->hubps[fe_idx];
 	struct mpc *mpc = dc->res_pool->mpc;
-	int opp_id, z_idx;
-	int mpcc_id = -1;
+	int opp_id;
+	struct mpc_tree *mpc_tree_params;
+	struct mpcc *mpcc_to_remove = NULL;
 
 	/* look at tree rather than mi here to know if we already reset */
 	for (opp_id = 0; opp_id < dc->res_pool->pipe_count; opp_id++) {
 		struct output_pixel_processor *opp = dc->res_pool->opps[opp_id];
 
-		for (z_idx = 0; z_idx < opp->mpc_tree.num_pipes; z_idx++) {
-			if (opp->mpc_tree.dpp[z_idx] == fe_idx) {
-				mpcc_id = opp->mpc_tree.mpcc[z_idx];
-				break;
-			}
-		}
-		if (mpcc_id != -1)
+		mpc_tree_params = &(opp->mpc_tree_params);
+		mpcc_to_remove = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, fe_idx);
+		if (mpcc_to_remove != NULL)
 			break;
 	}
+
 	/*Already reset*/
 	if (opp_id == dc->res_pool->pipe_count)
 		return;
 
-	mpc->funcs->remove(mpc, &(dc->res_pool->opps[opp_id]->mpc_tree),
-					dc->res_pool->opps[opp_id]->inst, fe_idx);
+	mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
+	dc->res_pool->opps[opp_id]->mpcc_disconnect_pending[fe_idx] = true;
+
+	dc->optimized_required = true;
 
 	if (hubp->funcs->hubp_disconnect)
 		hubp->funcs->hubp_disconnect(hubp);
 
 	if (dc->debug.sanity_checks)
 		dcn10_verify_allow_pstate_change_high(dc);
-
-	pipe_ctx->stream = NULL;
-	memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res));
-	memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res));
-	pipe_ctx->top_pipe = NULL;
-	pipe_ctx->bottom_pipe = NULL;
-	pipe_ctx->plane_state = NULL;
 }
 
 static void plane_atomic_power_down(struct dc *dc, int fe_idx)
@@ -636,29 +656,30 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	int fe_idx = pipe_ctx->pipe_idx;
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = dc->res_pool->hubps[fe_idx];
-	struct mpc *mpc = dc->res_pool->mpc;
 	int opp_id = hubp->opp_id;
-	struct output_pixel_processor *opp;
 
-	if (opp_id != 0xf) {
-		mpc->funcs->wait_for_idle(mpc, hubp->mpcc_id);
-		opp = dc->res_pool->opps[hubp->opp_id];
-		opp->mpcc_disconnect_pending[hubp->mpcc_id] = false;
-		hubp->funcs->set_blank(hubp, true);
-	}
+	dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx);
 
 	REG_UPDATE(HUBP_CLK_CNTL[fe_idx],
 			HUBP_CLOCK_ENABLE, 0);
 	REG_UPDATE(DPP_CONTROL[fe_idx],
 			DPP_CLOCK_ENABLE, 0);
 
-	if (opp_id != 0xf && dc->res_pool->opps[opp_id]->mpc_tree.num_pipes == 0)
+	if (opp_id != 0xf && dc->res_pool->opps[opp_id]->mpc_tree_params.opp_list == NULL)
 		REG_UPDATE(OPP_PIPE_CONTROL[opp_id],
 				OPP_PIPE_CLOCK_EN, 0);
 
 	hubp->power_gated = true;
+	dc->optimized_required = false; /* We're powering off, no need to optimize */
 
 	plane_atomic_power_down(dc, fe_idx);
+
+	pipe_ctx->stream = NULL;
+	memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res));
+	memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res));
+	pipe_ctx->top_pipe = NULL;
+	pipe_ctx->bottom_pipe = NULL;
+	pipe_ctx->plane_state = NULL;
 }
 
 static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
@@ -740,25 +761,27 @@ static void dcn10_init_hw(struct dc *dc)
 		}
 	}
 
+	/* Reset all MPCC muxes */
+	dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct timing_generator *tg = dc->res_pool->timing_generators[i];
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-		struct output_pixel_processor *opp = dc->res_pool->opps[i];
-		struct mpc_tree_cfg *mpc_tree = &opp->mpc_tree;
 		struct hubp *hubp = dc->res_pool->hubps[i];
 
-		mpc_tree->dpp[0] = i;
-		mpc_tree->mpcc[0] = i;
-		mpc_tree->num_pipes = 1;
-
 		pipe_ctx->stream_res.tg = tg;
 		pipe_ctx->pipe_idx = i;
 
 		pipe_ctx->plane_res.hubp = hubp;
 		hubp->mpcc_id = i;
-		hubp->opp_id = dc->res_pool->mpc->funcs->get_opp_id(dc->res_pool->mpc, i);
+		hubp->opp_id = 0xf;
 		hubp->power_gated = false;
 
+		dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
+		dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
+		dc->res_pool->opps[i]->mpcc_disconnect_pending[i] = true;
+		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
+
 		plane_atomic_disconnect(dc, pipe_ctx);
 	}
 
@@ -929,280 +952,10 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
 
 	return result;
 }
-/*modify the method to handle rgb for arr_points*/
-static bool convert_to_custom_float(
-		struct pwl_result_data *rgb_resulted,
-		struct curve_points *arr_points,
-		uint32_t hw_points_num)
-{
-	struct custom_float_format fmt;
-
-	struct pwl_result_data *rgb = rgb_resulted;
-
-	uint32_t i = 0;
-
-	fmt.exponenta_bits = 6;
-	fmt.mantissa_bits = 12;
-	fmt.sign = false;
-
-	if (!convert_to_custom_float_format(arr_points[0].x, &fmt,
-					    &arr_points[0].custom_float_x)) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	if (!convert_to_custom_float_format(arr_points[0].offset, &fmt,
-					    &arr_points[0].custom_float_offset)) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	if (!convert_to_custom_float_format(arr_points[0].slope, &fmt,
-					    &arr_points[0].custom_float_slope)) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	fmt.mantissa_bits = 10;
-	fmt.sign = false;
-
-	if (!convert_to_custom_float_format(arr_points[1].x, &fmt,
-					    &arr_points[1].custom_float_x)) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	if (!convert_to_custom_float_format(arr_points[1].y, &fmt,
-					    &arr_points[1].custom_float_y)) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	if (!convert_to_custom_float_format(arr_points[1].slope, &fmt,
-					    &arr_points[1].custom_float_slope)) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
 
-	fmt.mantissa_bits = 12;
-	fmt.sign = true;
 
-	while (i != hw_points_num) {
-		if (!convert_to_custom_float_format(rgb->red, &fmt,
-						    &rgb->red_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
-		if (!convert_to_custom_float_format(rgb->green, &fmt,
-						    &rgb->green_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
 
-		if (!convert_to_custom_float_format(rgb->blue, &fmt,
-						    &rgb->blue_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
 
-		if (!convert_to_custom_float_format(rgb->delta_red, &fmt,
-						    &rgb->delta_red_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
-		if (!convert_to_custom_float_format(rgb->delta_green, &fmt,
-						    &rgb->delta_green_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
-		if (!convert_to_custom_float_format(rgb->delta_blue, &fmt,
-						    &rgb->delta_blue_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
-		++rgb;
-		++i;
-	}
-
-	return true;
-}
-#define MAX_REGIONS_NUMBER 34
-#define MAX_LOW_POINT      25
-#define NUMBER_SEGMENTS    32
-
-static bool
-dcn10_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
-				     struct pwl_params *regamma_params)
-{
-	struct curve_points *arr_points;
-	struct pwl_result_data *rgb_resulted;
-	struct pwl_result_data *rgb;
-	struct pwl_result_data *rgb_plus_1;
-	struct fixed31_32 y_r;
-	struct fixed31_32 y_g;
-	struct fixed31_32 y_b;
-	struct fixed31_32 y1_min;
-	struct fixed31_32 y3_max;
-
-	int32_t segment_start, segment_end;
-	int32_t i;
-	uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
-
-	if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS)
-		return false;
-
-	PERF_TRACE();
-
-	arr_points = regamma_params->arr_points;
-	rgb_resulted = regamma_params->rgb_resulted;
-	hw_points = 0;
-
-	memset(regamma_params, 0, sizeof(struct pwl_params));
-	memset(seg_distr, 0, sizeof(seg_distr));
-
-	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
-		/* 32 segments
-		 * segments are from 2^-25 to 2^7
-		 */
-		for (i = 0; i < 32 ; i++)
-			seg_distr[i] = 3;
-
-		segment_start = -25;
-		segment_end   = 7;
-	} else {
-		/* 10 segments
-		 * segment is from 2^-10 to 2^0
-		 * There are less than 256 points, for optimization
-		 */
-		seg_distr[0] = 3;
-		seg_distr[1] = 4;
-		seg_distr[2] = 4;
-		seg_distr[3] = 4;
-		seg_distr[4] = 4;
-		seg_distr[5] = 4;
-		seg_distr[6] = 4;
-		seg_distr[7] = 4;
-		seg_distr[8] = 5;
-		seg_distr[9] = 5;
-
-		segment_start = -10;
-		segment_end = 0;
-	}
-
-	for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++)
-		seg_distr[i] = -1;
-
-	for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
-		if (seg_distr[k] != -1)
-			hw_points += (1 << seg_distr[k]);
-	}
-
-	j = 0;
-	for (k = 0; k < (segment_end - segment_start); k++) {
-		increment = NUMBER_SEGMENTS / (1 << seg_distr[k]);
-		start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS;
-		for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) {
-			if (j == hw_points - 1)
-				break;
-			rgb_resulted[j].red = output_tf->tf_pts.red[i];
-			rgb_resulted[j].green = output_tf->tf_pts.green[i];
-			rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
-			j++;
-		}
-	}
-
-	/* last point */
-	start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS;
-	rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
-	rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
-	rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
-
-	arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(segment_start));
-	arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(segment_end));
-
-	y_r = rgb_resulted[0].red;
-	y_g = rgb_resulted[0].green;
-	y_b = rgb_resulted[0].blue;
-
-	y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
-
-	arr_points[0].y = y1_min;
-	arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x);
-	y_r = rgb_resulted[hw_points - 1].red;
-	y_g = rgb_resulted[hw_points - 1].green;
-	y_b = rgb_resulted[hw_points - 1].blue;
-
-	/* see comment above, m_arrPoints[1].y should be the Y value for the
-	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
-	 */
-	y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
-
-	arr_points[1].y = y3_max;
-
-	arr_points[1].slope = dal_fixed31_32_zero;
-
-	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
-		/* for PQ, we want to have a straight line from last HW X point,
-		 * and the slope to be such that we hit 1.0 at 10000 nits.
-		 */
-		const struct fixed31_32 end_value =
-				dal_fixed31_32_from_int(125);
-
-		arr_points[1].slope = dal_fixed31_32_div(
-			dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
-			dal_fixed31_32_sub(end_value, arr_points[1].x));
-	}
-
-	regamma_params->hw_points_num = hw_points;
-
-	i = 1;
-	for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) {
-		if (seg_distr[k] != -1) {
-			regamma_params->arr_curve_points[k].segments_num =
-					seg_distr[k];
-			regamma_params->arr_curve_points[i].offset =
-					regamma_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
-		}
-		i++;
-	}
-
-	if (seg_distr[k] != -1)
-		regamma_params->arr_curve_points[k].segments_num = seg_distr[k];
-
-	rgb = rgb_resulted;
-	rgb_plus_1 = rgb_resulted + 1;
-
-	i = 1;
-
-	while (i != hw_points + 1) {
-		if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
-			rgb_plus_1->red = rgb->red;
-		if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
-			rgb_plus_1->green = rgb->green;
-		if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
-			rgb_plus_1->blue = rgb->blue;
-
-		rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red);
-		rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
-		rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue);
-
-		++rgb_plus_1;
-		++rgb;
-		++i;
-	}
-
-	convert_to_custom_float(rgb_resulted, arr_points, hw_points);
-
-	PERF_TRACE();
-
-	return true;
-}
 
 static bool
 dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
@@ -1223,9 +976,9 @@ dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
 	/* dcn10_translate_regamma_to_hw_format takes 750us, only do it when full
 	 * update.
 	 */
-	else if (dcn10_translate_regamma_to_hw_format(
+	else if (cm_helper_translate_curve_to_hw_format(
 			stream->out_transfer_func,
-			&dpp->regamma_params)) {
+			&dpp->regamma_params, false)) {
 		dpp->funcs->dpp_program_regamma_pwl(
 				dpp,
 				&dpp->regamma_params, OPP_REGAMMA_USER);
@@ -1579,7 +1332,6 @@ static void dcn10_enable_plane(
 	/* make sure OPP_PIPE_CLOCK_EN = 1 */
 	REG_UPDATE(OPP_PIPE_CONTROL[pipe_ctx->stream_res.tg->inst],
 			OPP_PIPE_CLOCK_EN, 1);
-	/*TODO: REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, 0x1f);*/
 
 /* TODO: enable/disable in dm as per update type.
 	if (plane_state) {
@@ -1672,60 +1424,15 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx,
 		enum dc_color_space colorspace,
 		uint16_t *matrix)
 {
-	int i;
-	struct out_csc_color_matrix tbl_entry;
-
 	if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
-			enum dc_color_space color_space =
-				pipe_ctx->stream->output_color_space;
-
-			//uint16_t matrix[12];
-			for (i = 0; i < 12; i++)
-				tbl_entry.regval[i] = pipe_ctx->stream->csc_color_matrix.matrix[i];
-
-			tbl_entry.color_space = color_space;
-			//tbl_entry.regval = matrix;
-
 			if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment != NULL)
-				pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry);
+				pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, matrix);
 	} else {
 		if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default != NULL)
 			pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace);
 	}
 }
 
-static void set_mpc_output_csc(struct dc *dc,
-		struct pipe_ctx *pipe_ctx,
-		enum dc_color_space colorspace,
-		uint16_t *matrix,
-		int opp_id)
-{
-	struct mpc *mpc = dc->res_pool->mpc;
-	int i;
-	struct out_csc_color_matrix tbl_entry;
-	enum mpc_output_csc_mode ocsc_mode = MPC_OUTPUT_CSC_COEF_A;
-
-
-	if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
-		//uint16_t matrix[12];
-		for (i = 0; i < 12; i++)
-			tbl_entry.regval[i] = matrix[i];
-		tbl_entry.color_space = colorspace;
-
-		if (mpc->funcs->set_output_csc != NULL)
-			mpc->funcs->set_output_csc(mpc,
-					opp_id,
-					&tbl_entry,
-					ocsc_mode);
-	} else {
-		if (mpc->funcs->set_ocsc_default != NULL)
-			mpc->funcs->set_ocsc_default(mpc,
-					opp_id,
-					colorspace,
-					ocsc_mode);
-	}
-}
-
 static void program_output_csc(struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		enum dc_color_space colorspace,
@@ -1736,13 +1443,6 @@ static void program_output_csc(struct dc *dc,
 		program_csc_matrix(pipe_ctx,
 				colorspace,
 				matrix);
-	else
-		set_mpc_output_csc(dc,
-			pipe_ctx,
-			colorspace,
-			matrix,
-			opp_id);
-
 }
 
 static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
@@ -1914,35 +1614,73 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
 
 static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
-	struct mpcc_cfg mpcc_cfg = {0};
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
-	struct pipe_ctx *top_pipe;
-	bool per_pixel_alpha =
-			pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe;
+	struct mpcc_blnd_cfg blnd_cfg;
+	bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe;
+	int mpcc_id;
+	struct mpcc *new_mpcc;
+	struct mpc *mpc = dc->res_pool->mpc;
+	struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
 
 	/* TODO: proper fix once fpga works */
 
-	mpcc_cfg.dpp_id = hubp->inst;
-	mpcc_cfg.opp_id = pipe_ctx->stream_res.opp->inst;
-	mpcc_cfg.tree_cfg = &(pipe_ctx->stream_res.opp->mpc_tree);
-	for (top_pipe = pipe_ctx->top_pipe; top_pipe; top_pipe = top_pipe->top_pipe)
-		mpcc_cfg.z_index++;
 	if (dc->debug.surface_visual_confirm)
 		dcn10_get_surface_visual_confirm_color(
-				pipe_ctx, &mpcc_cfg.black_color);
+				pipe_ctx, &blnd_cfg.black_color);
 	else
 		color_space_to_black_color(
 			dc, pipe_ctx->stream->output_color_space,
-			&mpcc_cfg.black_color);
-	mpcc_cfg.per_pixel_alpha = per_pixel_alpha;
+			&blnd_cfg.black_color);
+
+	if (per_pixel_alpha)
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+	else
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+
+	blnd_cfg.overlap_only = false;
+	blnd_cfg.global_alpha = 0xff;
+	blnd_cfg.global_gain = 0xff;
+
 	/* DCN1.0 has output CM before MPC which seems to screw with
 	 * pre-multiplied alpha.
 	 */
-	mpcc_cfg.pre_multiplied_alpha = is_rgb_cspace(
+	blnd_cfg.pre_multiplied_alpha = is_rgb_cspace(
 			pipe_ctx->stream->output_color_space)
 					&& per_pixel_alpha;
-	hubp->mpcc_id = dc->res_pool->mpc->funcs->add(dc->res_pool->mpc, &mpcc_cfg);
-	hubp->opp_id = mpcc_cfg.opp_id;
+
+	/*
+	 * TODO: remove hack
+	 * Note: currently there is a bug in init_hw such that
+	 * on resume from hibernate, BIOS sets up MPCC0, and
+	 * we do mpcc_remove but the mpcc cannot go to idle
+	 * after remove. This cause us to pick mpcc1 here,
+	 * which causes a pstate hang for yet unknown reason.
+	 */
+	mpcc_id = hubp->inst;
+
+	/* check if this MPCC is already being used */
+	new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
+	/* remove MPCC if being used */
+	if (new_mpcc != NULL)
+		mpc->funcs->remove_mpcc(mpc, mpc_tree_params, new_mpcc);
+	else
+		if (dc->debug.sanity_checks)
+			mpc->funcs->assert_mpcc_idle_before_connect(
+					dc->res_pool->mpc, mpcc_id);
+
+	/* Call MPC to insert new plane */
+	new_mpcc = mpc->funcs->insert_plane(dc->res_pool->mpc,
+			mpc_tree_params,
+			&blnd_cfg,
+			NULL,
+			NULL,
+			hubp->inst,
+			mpcc_id);
+
+	ASSERT(new_mpcc != NULL);
+
+	hubp->opp_id = pipe_ctx->stream_res.opp->inst;
+	hubp->mpcc_id = mpcc_id;
 }
 
 static void update_scaler(struct pipe_ctx *pipe_ctx)
@@ -1971,7 +1709,7 @@ static void update_dchubp_dpp(
 	union plane_size size = plane_state->plane_size;
 
 	/* depends on DML calculation, DPP clock value may change dynamically */
-	if (pipe_ctx->plane_state->update_flags.raw != 0) {
+	if (plane_state->update_flags.bits.full_update) {
 		enable_dppclk(
 			dc->hwseq,
 			pipe_ctx->pipe_idx,
@@ -2015,7 +1753,8 @@ static void update_dchubp_dpp(
 	}
 
 	if (plane_state->update_flags.bits.full_update ||
-		plane_state->update_flags.bits.scaling_change) {
+		plane_state->update_flags.bits.scaling_change ||
+		plane_state->update_flags.bits.position_change) {
 		hubp->funcs->mem_program_viewport(
 			hubp,
 			&pipe_ctx->plane_res.scl_data.viewport,
@@ -2037,7 +1776,9 @@ static void update_dchubp_dpp(
 		plane_state->update_flags.bits.horizontal_mirror_change ||
 		plane_state->update_flags.bits.rotation_change ||
 		plane_state->update_flags.bits.swizzle_change ||
-		plane_state->update_flags.bits.bpp_change) {
+		plane_state->update_flags.bits.dcc_change ||
+		plane_state->update_flags.bits.bpp_change ||
+		plane_state->update_flags.bits.scaling_change) {
 		hubp->funcs->hubp_program_surface_config(
 			hubp,
 			plane_state->format,
@@ -2062,6 +1803,7 @@ static void program_all_pipe_in_tree(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context)
 {
+
 	if (pipe_ctx->top_pipe == NULL) {
 
 		pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
@@ -2072,7 +1814,11 @@ static void program_all_pipe_in_tree(
 
 		pipe_ctx->stream_res.tg->funcs->program_global_sync(
 				pipe_ctx->stream_res.tg);
-		pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, !is_pipe_tree_visible(pipe_ctx));
+
+		if (pipe_ctx->stream_res.tg->funcs->set_blank)
+			pipe_ctx->stream_res.tg->funcs->set_blank(
+					pipe_ctx->stream_res.tg,
+					!is_pipe_tree_visible(pipe_ctx));
 	}
 
 	if (pipe_ctx->plane_state != NULL) {
@@ -2179,6 +1925,7 @@ static void dcn10_apply_ctx_for_surface(
 {
 	int i;
 	struct timing_generator *tg;
+	struct output_pixel_processor *opp;
 	bool removed_pipe[4] = { false };
 	unsigned int ref_clk_mhz = dc->res_pool->ref_clock_inKhz/1000;
 	bool program_water_mark = false;
@@ -2189,6 +1936,8 @@ static void dcn10_apply_ctx_for_surface(
 	if (!top_pipe_to_program)
 		return;
 
+	opp = top_pipe_to_program->stream_res.opp;
+
 	tg = top_pipe_to_program->stream_res.tg;
 
 	tg->funcs->lock(tg);
@@ -2196,7 +1945,8 @@ static void dcn10_apply_ctx_for_surface(
 	if (num_planes == 0) {
 
 		/* OTG blank before remove all front end */
-		tg->funcs->set_blank(tg, true);
+		if (tg->funcs->set_blank)
+			tg->funcs->set_blank(tg, true);
 	}
 
 	/* Disconnect unused mpcc */
@@ -2236,24 +1986,14 @@ static void dcn10_apply_ctx_for_surface(
 		}
 	}
 
-	if (num_planes > 0) {
+	if (num_planes > 0)
 		program_all_pipe_in_tree(dc, top_pipe_to_program, context);
 
-		/* TODO: this is a hack w/a for switching from mpo to pipe split */
-		if (stream->cursor_attributes.address.quad_part != 0) {
-			struct dc_cursor_position position = { 0 };
-
-			dc_stream_set_cursor_position(
-				(struct dc_stream_state *)stream,
-				&position);
-			dc_stream_set_cursor_attributes(
-				(struct dc_stream_state *)stream,
-				&stream->cursor_attributes);
-		}
-	}
-
 	tg->funcs->unlock(tg);
 
+	if (num_planes == 0)
+		false_optc_underflow_wa(dc, stream, tg);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *old_pipe_ctx =
 				&dc->current_state->res_ctx.pipe_ctx[i];
@@ -2264,7 +2004,7 @@ static void dcn10_apply_ctx_for_surface(
 			pipe_ctx->plane_state->update_flags.bits.full_update)
 			program_water_mark = true;
 
-		if (removed_pipe[i] && num_planes == 0)
+		if (removed_pipe[i])
 			dcn10_disable_plane(dc, old_pipe_ctx);
 	}
 
@@ -2273,6 +2013,7 @@ static void dcn10_apply_ctx_for_surface(
 			/* pstate stuck check after watermark update */
 			dcn10_verify_allow_pstate_change_high(dc);
 		}
+
 		/* watermark is for all pipes */
 		hubbub1_program_watermarks(dc->res_pool->hubbub,
 				&context->bw.dcn.watermarks, ref_clk_mhz);
@@ -2502,10 +2243,10 @@ static void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc)
 
 	dcn10_config_stereo_parameters(stream, &flags);
 
-	pipe_ctx->stream_res.opp->funcs->opp_set_stereo_polarity(
+	pipe_ctx->stream_res.opp->funcs->opp_program_stereo(
 		pipe_ctx->stream_res.opp,
 		flags.PROGRAM_STEREO == 1 ? true:false,
-		stream->timing.flags.RIGHT_EYE_3D_POLARITY == 1 ? true:false);
+		&stream->timing);
 
 	pipe_ctx->stream_res.tg->funcs->program_stereo(
 		pipe_ctx->stream_res.tg,
@@ -2619,7 +2360,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
 	.pplib_apply_display_requirements =
 			dcn10_pplib_apply_display_requirements,
 	.edp_backlight_control = hwss_edp_backlight_control,
-	.edp_power_control = hwss_edp_power_control
+	.edp_power_control = hwss_edp_power_control,
+	.edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index b016f4cbd45c74c5931ad9693d8cd3b5a3da69ce..179890b1a8c4e5029501a0caf42ed8acad1c4099 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -25,8 +25,6 @@
 
 #include "reg_helper.h"
 #include "dcn10_mpc.h"
-#include "dc.h"
-#include "mem_input.h"
 
 #define REG(reg)\
 	mpc10->mpc_regs->reg
@@ -38,17 +36,13 @@
 #define FN(reg_name, field_name) \
 	mpc10->mpc_shift->field_name, mpc10->mpc_mask->field_name
 
-#define MODE_TOP_ONLY 1
-#define MODE_BLEND 3
-#define BLND_PP_ALPHA 0
-#define BLND_GLOBAL_ALPHA 2
 
-
-static void mpc10_set_bg_color(
-		struct dcn10_mpc *mpc10,
+void mpc1_set_bg_color(struct mpc *mpc,
 		struct tg_color *bg_color,
-		int id)
+		int mpcc_id)
 {
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+
 	/* mpc color is 12 bit.  tg_color is 10 bit */
 	/* todo: might want to use 16 bit to represent color and have each
 	 * hw block translate to correct color depth.
@@ -57,15 +51,47 @@ static void mpc10_set_bg_color(
 	uint32_t bg_g_y = bg_color->color_g_y << 2;
 	uint32_t bg_b_cb = bg_color->color_b_cb << 2;
 
-	REG_SET(MPCC_BG_R_CR[id], 0,
+	REG_SET(MPCC_BG_R_CR[mpcc_id], 0,
 			MPCC_BG_R_CR, bg_r_cr);
-	REG_SET(MPCC_BG_G_Y[id], 0,
+	REG_SET(MPCC_BG_G_Y[mpcc_id], 0,
 			MPCC_BG_G_Y, bg_g_y);
-	REG_SET(MPCC_BG_B_CB[id], 0,
+	REG_SET(MPCC_BG_B_CB[mpcc_id], 0,
 			MPCC_BG_B_CB, bg_b_cb);
 }
 
-void mpc10_assert_idle_mpcc(struct mpc *mpc, int id)
+static void mpc1_update_blending(
+	struct mpc *mpc,
+	struct mpcc_blnd_cfg *blnd_cfg,
+	int mpcc_id)
+{
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+
+	REG_UPDATE_5(MPCC_CONTROL[mpcc_id],
+			MPCC_ALPHA_BLND_MODE,		blnd_cfg->alpha_mode,
+			MPCC_ALPHA_MULTIPLIED_MODE,	blnd_cfg->pre_multiplied_alpha,
+			MPCC_BLND_ACTIVE_OVERLAP_ONLY,	blnd_cfg->overlap_only,
+			MPCC_GLOBAL_ALPHA,		blnd_cfg->global_alpha,
+			MPCC_GLOBAL_GAIN,		blnd_cfg->global_gain);
+
+	mpc1_set_bg_color(mpc, &blnd_cfg->black_color, mpcc_id);
+}
+
+void mpc1_update_stereo_mix(
+	struct mpc *mpc,
+	struct mpcc_sm_cfg *sm_cfg,
+	int mpcc_id)
+{
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+
+	REG_UPDATE_6(MPCC_SM_CONTROL[mpcc_id],
+			MPCC_SM_EN,			sm_cfg->enable,
+			MPCC_SM_MODE,			sm_cfg->sm_mode,
+			MPCC_SM_FRAME_ALT,		sm_cfg->frame_alt,
+			MPCC_SM_FIELD_ALT,		sm_cfg->field_alt,
+			MPCC_SM_FORCE_NEXT_FRAME_POL,	sm_cfg->force_next_frame_porlarity,
+			MPCC_SM_FORCE_NEXT_TOP_POL,	sm_cfg->force_next_field_polarity);
+}
+void mpc1_assert_idle_mpcc(struct mpc *mpc, int id)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
 
@@ -75,39 +101,52 @@ void mpc10_assert_idle_mpcc(struct mpc *mpc, int id)
 			1, 100000);
 }
 
-static int mpc10_get_idle_mpcc_id(struct dcn10_mpc *mpc10)
+struct mpcc *mpc1_get_mpcc(struct mpc *mpc, int mpcc_id)
 {
-	int i;
-	int last_free_mpcc_id = -1;
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
 
-	for (i = 0; i < mpc10->num_mpcc; i++) {
-		uint32_t is_idle = 0;
+	ASSERT(mpcc_id < mpc10->num_mpcc);
+	return &(mpc->mpcc_array[mpcc_id]);
+}
 
-		if (mpc10->mpcc_in_use_mask & 1 << i)
-			continue;
+struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
+{
+	struct mpcc *tmp_mpcc = tree->opp_list;
 
-		last_free_mpcc_id = i;
-		REG_GET(MPCC_STATUS[i], MPCC_IDLE, &is_idle);
-		if (is_idle)
-			return i;
+	while (tmp_mpcc != NULL) {
+		if (tmp_mpcc->dpp_id == dpp_id)
+			return tmp_mpcc;
+		tmp_mpcc = tmp_mpcc->mpcc_bot;
 	}
+	return NULL;
+}
 
-	/* This assert should never trigger, we have mpcc leak if it does */
-	ASSERT(last_free_mpcc_id != -1);
-
-	mpc10_assert_idle_mpcc(&mpc10->base, last_free_mpcc_id);
-	return last_free_mpcc_id;
+bool mpc1_is_mpcc_idle(struct mpc *mpc, int mpcc_id)
+{
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+	unsigned int top_sel;
+	unsigned int opp_id;
+	unsigned int idle;
+
+	REG_GET(MPCC_TOP_SEL[mpcc_id], MPCC_TOP_SEL, &top_sel);
+	REG_GET(MPCC_OPP_ID[mpcc_id],  MPCC_OPP_ID, &opp_id);
+	REG_GET(MPCC_STATUS[mpcc_id],  MPCC_IDLE,   &idle);
+	if (top_sel == 0xf && opp_id == 0xf && idle)
+		return true;
+	else
+		return false;
 }
 
-static void mpc10_assert_mpcc_idle_before_connect(struct dcn10_mpc *mpc10, int id)
+void mpc1_assert_mpcc_idle_before_connect(struct mpc *mpc, int mpcc_id)
 {
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
 	unsigned int top_sel, mpc_busy, mpc_idle;
 
-	REG_GET(MPCC_TOP_SEL[id],
+	REG_GET(MPCC_TOP_SEL[mpcc_id],
 			MPCC_TOP_SEL, &top_sel);
 
 	if (top_sel == 0xf) {
-		REG_GET_2(MPCC_STATUS[id],
+		REG_GET_2(MPCC_STATUS[mpcc_id],
 				MPCC_BUSY, &mpc_busy,
 				MPCC_IDLE, &mpc_idle);
 
@@ -116,241 +155,269 @@ static void mpc10_assert_mpcc_idle_before_connect(struct dcn10_mpc *mpc10, int i
 	}
 }
 
-void mpc10_mpcc_remove(
-		struct mpc *mpc,
-		struct mpc_tree_cfg *tree_cfg,
-		int opp_id,
-		int dpp_id)
-{
-	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
-	int mpcc_id, z_idx;
-
-	/* find z_idx for the dpp to be removed */
-	for (z_idx = 0; z_idx < tree_cfg->num_pipes; z_idx++)
-		if (tree_cfg->dpp[z_idx] == dpp_id)
-			break;
-
-	if (z_idx == tree_cfg->num_pipes) {
-		/* In case of resume from S3/S4, remove mpcc from bios left over */
-		REG_SET(MPCC_OPP_ID[dpp_id], 0,
-				MPCC_OPP_ID, 0xf);
-		REG_SET(MPCC_TOP_SEL[dpp_id], 0,
-				MPCC_TOP_SEL, 0xf);
-		REG_SET(MPCC_BOT_SEL[dpp_id], 0,
-				MPCC_BOT_SEL, 0xf);
-		return;
-	}
-
-	mpcc_id = tree_cfg->mpcc[z_idx];
-
-	REG_SET(MPCC_OPP_ID[mpcc_id], 0,
-			MPCC_OPP_ID, 0xf);
-	REG_SET(MPCC_TOP_SEL[mpcc_id], 0,
-			MPCC_TOP_SEL, 0xf);
-	REG_SET(MPCC_BOT_SEL[mpcc_id], 0,
-			MPCC_BOT_SEL, 0xf);
-
-	if (z_idx > 0) {
-		int top_mpcc_id = tree_cfg->mpcc[z_idx - 1];
-
-		if (z_idx + 1 < tree_cfg->num_pipes)
-			/* mpcc to be removed is in the middle of the tree */
-			REG_SET(MPCC_BOT_SEL[top_mpcc_id], 0,
-					MPCC_BOT_SEL, tree_cfg->mpcc[z_idx + 1]);
-		else {
-			/* mpcc to be removed is at the bottom of the tree */
-			REG_SET(MPCC_BOT_SEL[top_mpcc_id], 0,
-					MPCC_BOT_SEL, 0xf);
-			REG_UPDATE(MPCC_CONTROL[top_mpcc_id],
-					MPCC_MODE, MODE_TOP_ONLY);
-		}
-	} else if (tree_cfg->num_pipes > 1)
-		/* mpcc to be removed is at the top of the tree */
-		REG_SET(MUX[opp_id], 0,
-				MPC_OUT_MUX, tree_cfg->mpcc[z_idx + 1]);
-	else
-		/* mpcc to be removed is the only one in the tree */
-		REG_SET(MUX[opp_id], 0, MPC_OUT_MUX, 0xf);
-
-	/* mark this mpcc as not in use */
-	mpc10->mpcc_in_use_mask &= ~(1 << mpcc_id);
-	tree_cfg->num_pipes--;
-	for (; z_idx < tree_cfg->num_pipes; z_idx++) {
-		tree_cfg->dpp[z_idx] = tree_cfg->dpp[z_idx + 1];
-		tree_cfg->mpcc[z_idx] = tree_cfg->mpcc[z_idx + 1];
-	}
-	tree_cfg->dpp[tree_cfg->num_pipes] = 0xdeadbeef;
-	tree_cfg->mpcc[tree_cfg->num_pipes] = 0xdeadbeef;
-}
-
-static void mpc10_add_to_tree_cfg(
+/*
+ * Insert DPP into MPC tree based on specified blending position.
+ * Only used for planes that are part of blending chain for OPP output
+ *
+ * Parameters:
+ * [in/out] mpc		- MPC context.
+ * [in/out] tree	- MPC tree structure that plane will be added to.
+ * [in]	blnd_cfg	- MPCC blending configuration for the new blending layer.
+ * [in]	sm_cfg		- MPCC stereo mix configuration for the new blending layer.
+ *			  stereo mix must disable for the very bottom layer of the tree config.
+ * [in]	insert_above_mpcc - Insert new plane above this MPCC.  If NULL, insert as bottom plane.
+ * [in]	dpp_id		- DPP instance for the plane to be added.
+ * [in]	mpcc_id		- The MPCC physical instance to use for blending.
+ *
+ * Return:  struct mpcc* - MPCC that was added.
+ */
+struct mpcc *mpc1_insert_plane(
 	struct mpc *mpc,
-	struct mpcc_cfg *cfg,
+	struct mpc_tree *tree,
+	struct mpcc_blnd_cfg *blnd_cfg,
+	struct mpcc_sm_cfg *sm_cfg,
+	struct mpcc *insert_above_mpcc,
+	int dpp_id,
 	int mpcc_id)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
-	int mpcc_mode = MODE_TOP_ONLY;
-	int position = cfg->z_index;
-	struct mpc_tree_cfg *tree_cfg = cfg->tree_cfg;
-	int alpha_blnd_mode = cfg->per_pixel_alpha ?
-			BLND_PP_ALPHA : BLND_GLOBAL_ALPHA;
-	int z_idx;
+	struct mpcc *new_mpcc = NULL;
 
-	REG_SET(MPCC_OPP_ID[mpcc_id], 0,
-			MPCC_OPP_ID, cfg->opp_id);
+	/* sanity check parameters */
+	ASSERT(mpcc_id < mpc10->num_mpcc);
+	ASSERT(!(mpc10->mpcc_in_use_mask & 1 << mpcc_id));
 
-	REG_SET(MPCC_TOP_SEL[mpcc_id], 0,
-			MPCC_TOP_SEL, cfg->dpp_id);
+	if (insert_above_mpcc) {
+		/* check insert_above_mpcc exist in tree->opp_list */
+		struct mpcc *temp_mpcc = tree->opp_list;
 
-	if (position == 0) {
-		/* idle dpp/mpcc is added to the top layer of tree */
+		while (temp_mpcc && temp_mpcc->mpcc_bot != insert_above_mpcc)
+			temp_mpcc = temp_mpcc->mpcc_bot;
+		if (temp_mpcc == NULL)
+			return NULL;
+	}
 
-		if (tree_cfg->num_pipes > 0) {
-			/* get instance of previous top mpcc */
-			int prev_top_mpcc_id = tree_cfg->mpcc[0];
+	/* Get and update MPCC struct parameters */
+	new_mpcc = mpc1_get_mpcc(mpc, mpcc_id);
+	new_mpcc->dpp_id = dpp_id;
 
-			REG_SET(MPCC_BOT_SEL[mpcc_id], 0,
-					MPCC_BOT_SEL, prev_top_mpcc_id);
-			mpcc_mode = MODE_BLEND;
+	/* program mux and MPCC_MODE */
+	if (insert_above_mpcc) {
+		new_mpcc->mpcc_bot = insert_above_mpcc;
+		REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, insert_above_mpcc->mpcc_id);
+		REG_UPDATE(MPCC_CONTROL[mpcc_id], MPCC_MODE, MPCC_BLEND_MODE_TOP_BOT_BLENDING);
+	} else {
+		new_mpcc->mpcc_bot = NULL;
+		REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
+		REG_UPDATE(MPCC_CONTROL[mpcc_id], MPCC_MODE, MPCC_BLEND_MODE_TOP_LAYER_PASSTHROUGH);
+	}
+	REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, dpp_id);
+	REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, tree->opp_id);
+
+	/* update mpc tree mux setting */
+	if (tree->opp_list == insert_above_mpcc) {
+		/* insert the toppest mpcc */
+		tree->opp_list = new_mpcc;
+		REG_UPDATE(MUX[tree->opp_id], MPC_OUT_MUX, mpcc_id);
+	} else {
+		/* find insert position */
+		struct mpcc *temp_mpcc = tree->opp_list;
+
+		while (temp_mpcc && temp_mpcc->mpcc_bot != insert_above_mpcc)
+			temp_mpcc = temp_mpcc->mpcc_bot;
+		if (temp_mpcc && temp_mpcc->mpcc_bot == insert_above_mpcc) {
+			REG_SET(MPCC_BOT_SEL[temp_mpcc->mpcc_id], 0, MPCC_BOT_SEL, mpcc_id);
+			temp_mpcc->mpcc_bot = new_mpcc;
+			if (!insert_above_mpcc)
+				REG_UPDATE(MPCC_CONTROL[temp_mpcc->mpcc_id],
+						MPCC_MODE, MPCC_BLEND_MODE_TOP_BOT_BLENDING);
 		}
+	}
 
-		/* opp will get new output. from new added mpcc */
-		REG_SET(MUX[cfg->opp_id], 0, MPC_OUT_MUX, mpcc_id);
-
-	} else if (position == tree_cfg->num_pipes) {
-		/* idle dpp/mpcc is added to the bottom layer of tree */
-
-		/* get instance of previous bottom mpcc, set to middle layer */
-		int prev_bot_mpcc_id = tree_cfg->mpcc[tree_cfg->num_pipes - 1];
-
-		REG_SET(MPCC_BOT_SEL[prev_bot_mpcc_id], 0,
-				MPCC_BOT_SEL, mpcc_id);
-		REG_UPDATE(MPCC_CONTROL[prev_bot_mpcc_id],
-				MPCC_MODE, MODE_BLEND);
-
-		/* mpcc_id become new bottom mpcc*/
-		REG_SET(MPCC_BOT_SEL[mpcc_id], 0,
-				MPCC_BOT_SEL, 0xf);
+	/* update the blending configuration */
+	new_mpcc->blnd_cfg = *blnd_cfg;
+	mpc->funcs->update_blending(mpc, &new_mpcc->blnd_cfg, mpcc_id);
 
-	} else {
-		/* idle dpp/mpcc is added to middle of tree */
-		int above_mpcc_id = tree_cfg->mpcc[position - 1];
-		int below_mpcc_id = tree_cfg->mpcc[position];
-
-		/* mpcc above new mpcc_id has new bottom mux*/
-		REG_SET(MPCC_BOT_SEL[above_mpcc_id], 0,
-				MPCC_BOT_SEL, mpcc_id);
-		REG_UPDATE(MPCC_CONTROL[above_mpcc_id],
-				MPCC_MODE, MODE_BLEND);
-
-		/* mpcc_id bottom mux is from below mpcc*/
-		REG_SET(MPCC_BOT_SEL[mpcc_id], 0,
-				MPCC_BOT_SEL, below_mpcc_id);
-		mpcc_mode = MODE_BLEND;
+	/* update the stereo mix settings, if provided */
+	if (sm_cfg != NULL) {
+		new_mpcc->sm_cfg = *sm_cfg;
+		mpc1_update_stereo_mix(mpc, sm_cfg, mpcc_id);
 	}
 
-	REG_SET_4(MPCC_CONTROL[mpcc_id], 0xffffffff,
-		MPCC_MODE, mpcc_mode,
-		MPCC_ALPHA_BLND_MODE, alpha_blnd_mode,
-		MPCC_ALPHA_MULTIPLIED_MODE, cfg->pre_multiplied_alpha,
-		MPCC_BLND_ACTIVE_OVERLAP_ONLY, false);
+	/* mark this mpcc as in use */
+	mpc10->mpcc_in_use_mask |= 1 << mpcc_id;
 
-	/* update mpc_tree_cfg with new mpcc */
-	for (z_idx = tree_cfg->num_pipes; z_idx > position; z_idx--) {
-		tree_cfg->dpp[z_idx] = tree_cfg->dpp[z_idx - 1];
-		tree_cfg->mpcc[z_idx] = tree_cfg->mpcc[z_idx - 1];
-	}
-	tree_cfg->dpp[position] = cfg->dpp_id;
-	tree_cfg->mpcc[position] = mpcc_id;
-	tree_cfg->num_pipes++;
+	return new_mpcc;
 }
 
-int mpc10_mpcc_add(struct mpc *mpc, struct mpcc_cfg *cfg)
+/*
+ * Remove a specified MPCC from the MPC tree.
+ *
+ * Parameters:
+ * [in/out] mpc		- MPC context.
+ * [in/out] tree	- MPC tree structure that plane will be removed from.
+ * [in/out] mpcc	- MPCC to be removed from tree.
+ *
+ * Return:  void
+ */
+void mpc1_remove_mpcc(
+	struct mpc *mpc,
+	struct mpc_tree *tree,
+	struct mpcc *mpcc_to_remove)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
-	int mpcc_id, z_idx;
-
-	ASSERT(cfg->z_index < mpc10->num_mpcc);
-
-	/* check in dpp already exists in mpc tree */
-	for (z_idx = 0; z_idx < cfg->tree_cfg->num_pipes; z_idx++)
-		if (cfg->tree_cfg->dpp[z_idx] == cfg->dpp_id)
-			break;
-	if (z_idx == cfg->tree_cfg->num_pipes) {
-		ASSERT(cfg->z_index <= cfg->tree_cfg->num_pipes);
-		mpcc_id = mpc10_get_idle_mpcc_id(mpc10);
-
-		/*
-		 * TODO: remove hack
-		 * Note: currently there is a bug in init_hw such that
-		 * on resume from hibernate, BIOS sets up MPCC0, and
-		 * we do mpcc_remove but the mpcc cannot go to idle
-		 * after remove. This cause us to pick mpcc1 here,
-		 * which causes a pstate hang for yet unknown reason.
-		 */
-		mpcc_id = cfg->dpp_id;
-		/* end hack*/
-
-		ASSERT(!(mpc10->mpcc_in_use_mask & 1 << mpcc_id));
-
-		if (mpc->ctx->dc->debug.sanity_checks)
-			mpc10_assert_mpcc_idle_before_connect(mpc10, mpcc_id);
+	bool found = false;
+	int mpcc_id = mpcc_to_remove->mpcc_id;
+
+	if (tree->opp_list == mpcc_to_remove) {
+		found = true;
+		/* remove MPCC from top of tree */
+		if (mpcc_to_remove->mpcc_bot) {
+			/* set the next MPCC in list to be the top MPCC */
+			tree->opp_list = mpcc_to_remove->mpcc_bot;
+			REG_UPDATE(MUX[tree->opp_id], MPC_OUT_MUX, tree->opp_list->mpcc_id);
+		} else {
+			/* there are no other MPCC is list */
+			tree->opp_list = NULL;
+			REG_UPDATE(MUX[tree->opp_id], MPC_OUT_MUX, 0xf);
+		}
 	} else {
-		ASSERT(cfg->z_index < cfg->tree_cfg->num_pipes);
-		mpcc_id = cfg->tree_cfg->mpcc[z_idx];
-		mpc10_mpcc_remove(mpc, cfg->tree_cfg, cfg->opp_id, cfg->dpp_id);
+		/* find mpcc to remove MPCC list */
+		struct mpcc *temp_mpcc = tree->opp_list;
+
+		while (temp_mpcc && temp_mpcc->mpcc_bot != mpcc_to_remove)
+			temp_mpcc = temp_mpcc->mpcc_bot;
+
+		if (temp_mpcc && temp_mpcc->mpcc_bot == mpcc_to_remove) {
+			found = true;
+			temp_mpcc->mpcc_bot = mpcc_to_remove->mpcc_bot;
+			if (mpcc_to_remove->mpcc_bot) {
+				/* remove MPCC in middle of list */
+				REG_SET(MPCC_BOT_SEL[temp_mpcc->mpcc_id], 0,
+						MPCC_BOT_SEL, mpcc_to_remove->mpcc_bot->mpcc_id);
+			} else {
+				/* remove MPCC from bottom of list */
+				REG_SET(MPCC_BOT_SEL[temp_mpcc->mpcc_id], 0,
+						MPCC_BOT_SEL, 0xf);
+				REG_UPDATE(MPCC_CONTROL[temp_mpcc->mpcc_id],
+						MPCC_MODE, MPCC_BLEND_MODE_TOP_LAYER_PASSTHROUGH);
+			}
+		}
 	}
 
-	/* add dpp/mpcc pair to mpc_tree_cfg and update mpcc registers */
-	mpc10_add_to_tree_cfg(mpc, cfg, mpcc_id);
+	if (found) {
+		/* turn off MPCC mux registers */
+		REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
+		REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
+		REG_SET(MPCC_OPP_ID[mpcc_id],  0, MPCC_OPP_ID,  0xf);
 
-	/* set background color */
-	mpc10_set_bg_color(mpc10, &cfg->black_color, mpcc_id);
-
-	/* mark this mpcc as in use */
-	mpc10->mpcc_in_use_mask |= 1 << mpcc_id;
+		/* mark this mpcc as not in use */
+		mpc10->mpcc_in_use_mask &= ~(1 << mpcc_id);
+		mpcc_to_remove->dpp_id = 0xf;
+		mpcc_to_remove->mpcc_bot = NULL;
+	} else {
+		/* In case of resume from S3/S4, remove mpcc from bios left over */
+		REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
+		REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
+		REG_SET(MPCC_OPP_ID[mpcc_id],  0, MPCC_OPP_ID,  0xf);
+	}
+}
 
-	return mpcc_id;
+static void mpc1_init_mpcc(struct mpcc *mpcc, int mpcc_inst)
+{
+	mpcc->mpcc_id = mpcc_inst;
+	mpcc->dpp_id = 0xf;
+	mpcc->mpcc_bot = NULL;
+	mpcc->blnd_cfg.overlap_only = false;
+	mpcc->blnd_cfg.global_alpha = 0xff;
+	mpcc->blnd_cfg.global_gain = 0xff;
+	mpcc->sm_cfg.enable = false;
 }
 
-void mpc10_update_blend_mode(
-		struct mpc *mpc,
-		struct mpcc_cfg *cfg)
+/*
+ * Reset the MPCC HW status by disconnecting all muxes.
+ *
+ * Parameters:
+ * [in/out] mpc		- MPC context.
+ *
+ * Return:  void
+ */
+void mpc1_mpc_init(struct mpc *mpc)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
-	int mpcc_id, z_idx;
-	int alpha_blnd_mode = cfg->per_pixel_alpha ?
-			BLND_PP_ALPHA : BLND_GLOBAL_ALPHA;
+	int mpcc_id;
+	int opp_id;
 
-	/* find z_idx for the dpp that requires blending mode update*/
-	for (z_idx = 0; z_idx < cfg->tree_cfg->num_pipes; z_idx++)
-		if (cfg->tree_cfg->dpp[z_idx] == cfg->dpp_id)
-			break;
+	mpc10->mpcc_in_use_mask = 0;
+	for (mpcc_id = 0; mpcc_id < mpc10->num_mpcc; mpcc_id++) {
+		REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
+		REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
+		REG_SET(MPCC_OPP_ID[mpcc_id],  0, MPCC_OPP_ID,  0xf);
 
-	ASSERT(z_idx < cfg->tree_cfg->num_pipes);
-	mpcc_id = cfg->tree_cfg->mpcc[z_idx];
+		mpc1_init_mpcc(&(mpc->mpcc_array[mpcc_id]), mpcc_id);
+	}
 
-	REG_UPDATE_2(MPCC_CONTROL[mpcc_id],
-			MPCC_ALPHA_BLND_MODE, alpha_blnd_mode,
-			MPCC_ALPHA_MULTIPLIED_MODE, cfg->pre_multiplied_alpha);
+	for (opp_id = 0; opp_id < MAX_OPP; opp_id++) {
+		if (REG(MUX[opp_id]))
+			REG_UPDATE(MUX[opp_id], MPC_OUT_MUX, 0xf);
+	}
 }
 
-int mpc10_get_opp_id(struct mpc *mpc, int mpcc_id)
+void mpc1_init_mpcc_list_from_hw(
+	struct mpc *mpc,
+	struct mpc_tree *tree)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
-	int opp_id = 0xF;
-
-	REG_GET(MPCC_OPP_ID[mpcc_id], MPCC_OPP_ID, &opp_id);
-
-	return opp_id;
+	unsigned int opp_id;
+	unsigned int top_sel;
+	unsigned int bot_sel;
+	unsigned int out_mux;
+	struct mpcc *mpcc;
+	int mpcc_id;
+	int bot_mpcc_id;
+
+	REG_GET(MUX[tree->opp_id], MPC_OUT_MUX, &out_mux);
+
+	if (out_mux != 0xf) {
+		for (mpcc_id = 0; mpcc_id < mpc10->num_mpcc; mpcc_id++) {
+			REG_GET(MPCC_OPP_ID[mpcc_id],  MPCC_OPP_ID,  &opp_id);
+			REG_GET(MPCC_TOP_SEL[mpcc_id], MPCC_TOP_SEL, &top_sel);
+			REG_GET(MPCC_BOT_SEL[mpcc_id],  MPCC_BOT_SEL, &bot_sel);
+
+			if (bot_sel == mpcc_id)
+				bot_sel = 0xf;
+
+			if ((opp_id == tree->opp_id) && (top_sel != 0xf)) {
+				mpcc = mpc1_get_mpcc(mpc, mpcc_id);
+				mpcc->dpp_id = top_sel;
+				mpc10->mpcc_in_use_mask |= 1 << mpcc_id;
+
+				if (out_mux == mpcc_id)
+					tree->opp_list = mpcc;
+				if (bot_sel != 0xf && bot_sel < mpc10->num_mpcc) {
+					bot_mpcc_id = bot_sel;
+					REG_GET(MPCC_OPP_ID[bot_mpcc_id],  MPCC_OPP_ID,  &opp_id);
+					REG_GET(MPCC_TOP_SEL[bot_mpcc_id], MPCC_TOP_SEL, &top_sel);
+					if ((opp_id == tree->opp_id) && (top_sel != 0xf)) {
+						struct mpcc *mpcc_bottom = mpc1_get_mpcc(mpc, bot_mpcc_id);
+
+						mpcc->mpcc_bot = mpcc_bottom;
+					}
+				}
+			}
+		}
+	}
 }
 
 const struct mpc_funcs dcn10_mpc_funcs = {
-		.add = mpc10_mpcc_add,
-		.remove = mpc10_mpcc_remove,
-		.wait_for_idle = mpc10_assert_idle_mpcc,
-		.update_blend_mode = mpc10_update_blend_mode,
-		.get_opp_id = mpc10_get_opp_id,
+	.insert_plane = mpc1_insert_plane,
+	.remove_mpcc = mpc1_remove_mpcc,
+	.mpc_init = mpc1_mpc_init,
+	.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
+	.wait_for_idle = mpc1_assert_idle_mpcc,
+	.assert_mpcc_idle_before_connect = mpc1_assert_mpcc_idle_before_connect,
+	.init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw,
+	.update_blending = mpc1_update_blending,
 };
 
 void dcn10_mpc_construct(struct dcn10_mpc *mpc10,
@@ -360,6 +427,8 @@ void dcn10_mpc_construct(struct dcn10_mpc *mpc10,
 	const struct dcn_mpc_mask *mpc_mask,
 	int num_mpcc)
 {
+	int i;
+
 	mpc10->base.ctx = ctx;
 
 	mpc10->base.funcs = &dcn10_mpc_funcs;
@@ -370,5 +439,8 @@ void dcn10_mpc_construct(struct dcn10_mpc *mpc10,
 
 	mpc10->mpcc_in_use_mask = 0;
 	mpc10->num_mpcc = num_mpcc;
+
+	for (i = 0; i < MAX_MPCC; i++)
+		mpc1_init_mpcc(&mpc10->base.mpcc_array[i], i);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
index e85e1f342266b7bcad3f7a11ae3c4fb91c2f24b7..267a2995ef6e74b2e05ec492c1a8aff51c4a8bf6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
@@ -30,9 +30,6 @@
 #define TO_DCN10_MPC(mpc_base) \
 	container_of(mpc_base, struct dcn10_mpc, base)
 
-#define MAX_MPCC 6
-#define MAX_OPP 6
-
 #define MPC_COMMON_REG_LIST_DCN1_0(inst) \
 	SRII(MPCC_TOP_SEL, MPCC, inst),\
 	SRII(MPCC_BOT_SEL, MPCC, inst),\
@@ -42,7 +39,8 @@
 	SRII(MPCC_BG_G_Y, MPCC, inst),\
 	SRII(MPCC_BG_R_CR, MPCC, inst),\
 	SRII(MPCC_BG_B_CB, MPCC, inst),\
-	SRII(MPCC_BG_B_CB, MPCC, inst)
+	SRII(MPCC_BG_B_CB, MPCC, inst),\
+	SRII(MPCC_SM_CONTROL, MPCC, inst)
 
 #define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(inst) \
 	SRII(MUX, MPC_OUT, inst)
@@ -56,6 +54,7 @@
 	uint32_t MPCC_BG_G_Y[MAX_MPCC]; \
 	uint32_t MPCC_BG_R_CR[MAX_MPCC]; \
 	uint32_t MPCC_BG_B_CB[MAX_MPCC]; \
+	uint32_t MPCC_SM_CONTROL[MAX_MPCC]; \
 	uint32_t MUX[MAX_OPP];
 
 #define MPC_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\
@@ -65,12 +64,20 @@
 	SF(MPCC0_MPCC_CONTROL, MPCC_ALPHA_BLND_MODE, mask_sh),\
 	SF(MPCC0_MPCC_CONTROL, MPCC_ALPHA_MULTIPLIED_MODE, mask_sh),\
 	SF(MPCC0_MPCC_CONTROL, MPCC_BLND_ACTIVE_OVERLAP_ONLY, mask_sh),\
+	SF(MPCC0_MPCC_CONTROL, MPCC_GLOBAL_ALPHA, mask_sh),\
+	SF(MPCC0_MPCC_CONTROL, MPCC_GLOBAL_GAIN, mask_sh),\
 	SF(MPCC0_MPCC_STATUS, MPCC_IDLE, mask_sh),\
 	SF(MPCC0_MPCC_STATUS, MPCC_BUSY, mask_sh),\
 	SF(MPCC0_MPCC_OPP_ID, MPCC_OPP_ID, mask_sh),\
 	SF(MPCC0_MPCC_BG_G_Y, MPCC_BG_G_Y, mask_sh),\
 	SF(MPCC0_MPCC_BG_R_CR, MPCC_BG_R_CR, mask_sh),\
 	SF(MPCC0_MPCC_BG_B_CB, MPCC_BG_B_CB, mask_sh),\
+	SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_EN, mask_sh),\
+	SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_MODE, mask_sh),\
+	SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FRAME_ALT, mask_sh),\
+	SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FIELD_ALT, mask_sh),\
+	SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_FRAME_POL, mask_sh),\
+	SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_TOP_POL, mask_sh),\
 	SF(MPC_OUT0_MUX, MPC_OUT_MUX, mask_sh)
 
 #define MPC_REG_FIELD_LIST(type) \
@@ -80,12 +87,20 @@
 	type MPCC_ALPHA_BLND_MODE;\
 	type MPCC_ALPHA_MULTIPLIED_MODE;\
 	type MPCC_BLND_ACTIVE_OVERLAP_ONLY;\
+	type MPCC_GLOBAL_ALPHA;\
+	type MPCC_GLOBAL_GAIN;\
 	type MPCC_IDLE;\
 	type MPCC_BUSY;\
 	type MPCC_OPP_ID;\
 	type MPCC_BG_G_Y;\
 	type MPCC_BG_R_CR;\
 	type MPCC_BG_B_CB;\
+	type MPCC_SM_EN;\
+	type MPCC_SM_MODE;\
+	type MPCC_SM_FRAME_ALT;\
+	type MPCC_SM_FIELD_ALT;\
+	type MPCC_SM_FORCE_NEXT_FRAME_POL;\
+	type MPCC_SM_FORCE_NEXT_TOP_POL;\
 	type MPC_OUT_MUX;
 
 struct dcn_mpc_registers {
@@ -117,23 +132,55 @@ void dcn10_mpc_construct(struct dcn10_mpc *mpcc10,
 	const struct dcn_mpc_mask *mpc_mask,
 	int num_mpcc);
 
-int mpc10_mpcc_add(
-		struct mpc *mpc,
-		struct mpcc_cfg *cfg);
-
-void mpc10_mpcc_remove(
-		struct mpc *mpc,
-		struct mpc_tree_cfg *tree_cfg,
-		int opp_id,
-		int dpp_id);
-
-void mpc10_assert_idle_mpcc(
-		struct mpc *mpc,
-		int id);
-
-void mpc10_update_blend_mode(
-		struct mpc *mpc,
-		struct mpcc_cfg *cfg);
-int mpc10_get_opp_id(struct mpc *mpc, int mpcc_id);
+struct mpcc *mpc1_insert_plane(
+	struct mpc *mpc,
+	struct mpc_tree *tree,
+	struct mpcc_blnd_cfg *blnd_cfg,
+	struct mpcc_sm_cfg *sm_cfg,
+	struct mpcc *insert_above_mpcc,
+	int dpp_id,
+	int mpcc_id);
+
+void mpc1_remove_mpcc(
+	struct mpc *mpc,
+	struct mpc_tree *tree,
+	struct mpcc *mpcc);
+
+void mpc1_mpc_init(
+	struct mpc *mpc);
+
+void mpc1_assert_idle_mpcc(
+	struct mpc *mpc,
+	int id);
+
+void mpc1_set_bg_color(
+	struct mpc *mpc,
+	struct tg_color *bg_color,
+	int id);
+
+void mpc1_update_stereo_mix(
+	struct mpc *mpc,
+	struct mpcc_sm_cfg *sm_cfg,
+	int mpcc_id);
+
+bool mpc1_is_mpcc_idle(
+	struct mpc *mpc,
+	int mpcc_id);
+
+void mpc1_assert_mpcc_idle_before_connect(
+	struct mpc *mpc,
+	int mpcc_id);
+
+void mpc1_init_mpcc_list_from_hw(
+	struct mpc *mpc,
+	struct mpc_tree *tree);
+
+struct mpcc *mpc1_get_mpcc(
+	struct mpc *mpc,
+	int mpcc_id);
+
+struct mpcc *mpc1_get_mpcc_for_dpp(
+	struct mpc_tree *tree,
+	int dpp_id);
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
index 6d6f67b7d30e1bc7533175eb33f2b1c4c3d8f73f..f6ba0eef4489b6f72fb97069eb90ec8b76631c9a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
@@ -296,13 +296,75 @@ void opp1_program_fmt(
 	return;
 }
 
-void opp1_set_stereo_polarity(
-		struct output_pixel_processor *opp,
-		bool enable, bool rightEyePolarity)
+void opp1_program_stereo(
+	struct output_pixel_processor *opp,
+	bool enable,
+	const struct dc_crtc_timing *timing)
 {
 	struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp);
 
-	REG_UPDATE(FMT_CONTROL, FMT_STEREOSYNC_OVERRIDE, enable);
+	uint32_t active_width = timing->h_addressable - timing->h_border_right - timing->h_border_right;
+	uint32_t space1_size = timing->v_total - timing->v_addressable;
+	/* TODO: confirm computation of space2_size */
+	uint32_t space2_size = timing->v_total - timing->v_addressable;
+
+	if (!enable) {
+		active_width = 0;
+		space1_size = 0;
+		space2_size = 0;
+	}
+
+	/* TODO: for which cases should FMT_STEREOSYNC_OVERRIDE be set? */
+	REG_UPDATE(FMT_CONTROL, FMT_STEREOSYNC_OVERRIDE, 0);
+
+	REG_UPDATE(OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, active_width);
+
+	/* Program OPPBUF_3D_VACT_SPACE1_SIZE and OPPBUF_VACT_SPACE2_SIZE registers
+	 * In 3D progressive frames, Vactive space happens only in between the 2 frames,
+	 * so only need to program OPPBUF_3D_VACT_SPACE1_SIZE
+	 * In 3D alternative frames, left and right frames, top and bottom field.
+	 */
+	if (timing->timing_3d_format == TIMING_3D_FORMAT_FRAME_ALTERNATE)
+		REG_UPDATE(OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE2_SIZE, space2_size);
+	else
+		REG_UPDATE(OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, space1_size);
+
+	/* TODO: Is programming of OPPBUF_DUMMY_DATA_R/G/B needed? */
+	/*
+	REG_UPDATE(OPPBUF_3D_PARAMETERS_0,
+			OPPBUF_DUMMY_DATA_R, data_r);
+	REG_UPDATE(OPPBUF_3D_PARAMETERS_1,
+			OPPBUF_DUMMY_DATA_G, data_g);
+	REG_UPDATE(OPPBUF_3D_PARAMETERS_1,
+			OPPBUF_DUMMY_DATA_B, _data_b);
+	*/
+}
+
+void opp1_program_oppbuf(
+	struct output_pixel_processor *opp,
+	struct oppbuf_params *oppbuf)
+{
+	struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp);
+
+	/* Program the oppbuf active width to be the frame width from mpc */
+	REG_UPDATE(OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, oppbuf->active_width);
+
+	/* Specifies the number of segments in multi-segment mode (DP-MSO operation)
+	 * description  "In 1/2/4 segment mode, specifies the horizontal active width in pixels of the display panel.
+	 * In 4 segment split left/right mode, specifies the horizontal 1/2 active width in pixels of the display panel.
+	 * Used to determine segment boundaries in multi-segment mode. Used to determine the width of the vertical active space in 3D frame packed modes.
+	 * OPPBUF_ACTIVE_WIDTH must be integer divisible by the total number of segments."
+	 */
+	REG_UPDATE(OPPBUF_CONTROL, OPPBUF_DISPLAY_SEGMENTATION, oppbuf->mso_segmentation);
+
+	/* description  "Specifies the number of overlap pixels (1-8 overlapping pixels supported), used in multi-segment mode (DP-MSO operation)" */
+	REG_UPDATE(OPPBUF_CONTROL, OPPBUF_OVERLAP_PIXEL_NUM, oppbuf->mso_overlap_pixel_num);
+
+	/* description  "Specifies the number of times a pixel is replicated (0-15 pixel replications supported).
+	 * A value of 0 disables replication. The total number of times a pixel is output is OPPBUF_PIXEL_REPETITION + 1."
+	 */
+	REG_UPDATE(OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, oppbuf->pixel_repetition);
+
 }
 
 /*****************************************/
@@ -319,7 +381,7 @@ static struct opp_funcs dcn10_opp_funcs = {
 		.opp_set_dyn_expansion = opp1_set_dyn_expansion,
 		.opp_program_fmt = opp1_program_fmt,
 		.opp_program_bit_depth_reduction = opp1_program_bit_depth_reduction,
-		.opp_set_stereo_polarity = opp1_set_stereo_polarity,
+		.opp_program_stereo = opp1_program_stereo,
 		.opp_destroy = opp1_destroy
 };
 
@@ -330,6 +392,7 @@ void dcn10_opp_construct(struct dcn10_opp *oppn10,
 	const struct dcn10_opp_shift *opp_shift,
 	const struct dcn10_opp_mask *opp_mask)
 {
+
 	oppn10->base.ctx = ctx;
 	oppn10->base.inst = inst;
 	oppn10->base.funcs = &dcn10_opp_funcs;
@@ -338,4 +401,3 @@ void dcn10_opp_construct(struct dcn10_opp *oppn10,
 	oppn10->opp_shift = opp_shift;
 	oppn10->opp_mask = opp_mask;
 }
-
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
index f3c298ec37fb8d222c417a2efdf746f7dbae714b..bc5058af626627d62d9f6bf8266764ff75910a84 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
@@ -41,7 +41,10 @@
 	SRI(FMT_DITHER_RAND_B_SEED, FMT, id), \
 	SRI(FMT_CLAMP_CNTL, FMT, id), \
 	SRI(FMT_DYNAMIC_EXP_CNTL, FMT, id), \
-	SRI(FMT_MAP420_MEMORY_CONTROL, FMT, id)
+	SRI(FMT_MAP420_MEMORY_CONTROL, FMT, id), \
+	SRI(OPPBUF_CONTROL, OPPBUF, id),\
+	SRI(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \
+	SRI(OPPBUF_3D_PARAMETERS_1, OPPBUF, id)
 
 #define OPP_REG_LIST_DCN10(id) \
 	OPP_REG_LIST_DCN(id)
@@ -54,7 +57,11 @@
 	uint32_t FMT_DITHER_RAND_B_SEED; \
 	uint32_t FMT_CLAMP_CNTL; \
 	uint32_t FMT_DYNAMIC_EXP_CNTL; \
-	uint32_t FMT_MAP420_MEMORY_CONTROL;
+	uint32_t FMT_MAP420_MEMORY_CONTROL; \
+	uint32_t OPPBUF_CONTROL; \
+	uint32_t OPPBUF_CONTROL1; \
+	uint32_t OPPBUF_3D_PARAMETERS_0; \
+	uint32_t OPPBUF_3D_PARAMETERS_1
 
 #define OPP_MASK_SH_LIST_DCN(mask_sh) \
 	OPP_SF(FMT0_FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, mask_sh), \
@@ -78,10 +85,16 @@
 	OPP_SF(FMT0_FMT_CLAMP_CNTL, FMT_CLAMP_COLOR_FORMAT, mask_sh), \
 	OPP_SF(FMT0_FMT_DYNAMIC_EXP_CNTL, FMT_DYNAMIC_EXP_EN, mask_sh), \
 	OPP_SF(FMT0_FMT_DYNAMIC_EXP_CNTL, FMT_DYNAMIC_EXP_MODE, mask_sh), \
-	OPP_SF(FMT0_FMT_MAP420_MEMORY_CONTROL, FMT_MAP420MEM_PWR_FORCE, mask_sh)
+	OPP_SF(FMT0_FMT_MAP420_MEMORY_CONTROL, FMT_MAP420MEM_PWR_FORCE, mask_sh), \
+	OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, mask_sh),\
+	OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, mask_sh),\
+	OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, mask_sh), \
+	OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE2_SIZE, mask_sh)
 
 #define OPP_MASK_SH_LIST_DCN10(mask_sh) \
-	OPP_MASK_SH_LIST_DCN(mask_sh)
+	OPP_MASK_SH_LIST_DCN(mask_sh), \
+	OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_DISPLAY_SEGMENTATION, mask_sh),\
+	OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_OVERLAP_PIXEL_NUM, mask_sh)
 
 #define OPP_DCN10_REG_FIELD_LIST(type) \
 	type FMT_TRUNCATE_EN; \
@@ -105,18 +118,25 @@
 	type FMT_DYNAMIC_EXP_EN; \
 	type FMT_DYNAMIC_EXP_MODE; \
 	type FMT_MAP420MEM_PWR_FORCE; \
-	type FMT_STEREOSYNC_OVERRIDE;
+	type FMT_STEREOSYNC_OVERRIDE; \
+	type OPPBUF_ACTIVE_WIDTH;\
+	type OPPBUF_PIXEL_REPETITION;\
+	type OPPBUF_DISPLAY_SEGMENTATION;\
+	type OPPBUF_OVERLAP_PIXEL_NUM;\
+	type OPPBUF_NUM_SEGMENT_PADDED_PIXELS; \
+	type OPPBUF_3D_VACT_SPACE1_SIZE; \
+	type OPPBUF_3D_VACT_SPACE2_SIZE
 
 struct dcn10_opp_registers {
-	OPP_COMMON_REG_VARIABLE_LIST
+	OPP_COMMON_REG_VARIABLE_LIST;
 };
 
 struct dcn10_opp_shift {
-	OPP_DCN10_REG_FIELD_LIST(uint8_t)
+	OPP_DCN10_REG_FIELD_LIST(uint8_t);
 };
 
 struct dcn10_opp_mask {
-	OPP_DCN10_REG_FIELD_LIST(uint32_t)
+	OPP_DCN10_REG_FIELD_LIST(uint32_t);
 };
 
 struct dcn10_opp {
@@ -151,9 +171,10 @@ void opp1_program_bit_depth_reduction(
 	struct output_pixel_processor *opp,
 	const struct bit_depth_reduction_params *params);
 
-void opp1_set_stereo_polarity(
-		struct output_pixel_processor *opp,
-		bool enable, bool rightEyePolarity);
+void opp1_program_stereo(
+	struct output_pixel_processor *opp,
+	bool enable,
+	const struct dc_crtc_timing *timing);
 
 void opp1_destroy(struct output_pixel_processor **opp);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
similarity index 76%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c
rename to drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 73ff78f9cae1cc1868afb23b3b10e34c26bfd54f..4bf64d1b2c60223ff8ad557f2202d6bf1890e346 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -23,19 +23,20 @@
  *
  */
 
+
 #include "reg_helper.h"
-#include "dcn10_timing_generator.h"
+#include "dcn10_optc.h"
 #include "dc.h"
 
 #define REG(reg)\
-	tgn10->tg_regs->reg
+	optc1->tg_regs->reg
 
 #define CTX \
-	tgn10->base.ctx
+	optc1->base.ctx
 
 #undef FN
 #define FN(reg_name, field_name) \
-	tgn10->tg_shift->field_name, tgn10->tg_mask->field_name
+	optc1->tg_shift->field_name, optc1->tg_mask->field_name
 
 #define STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN 0x100
 
@@ -45,8 +46,8 @@
 * This is a workaround for a bug that has existed since R5xx and has not been
 * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
 */
-static void tgn10_apply_front_porch_workaround(
-	struct timing_generator *tg,
+static void optc1_apply_front_porch_workaround(
+	struct timing_generator *optc,
 	struct dc_crtc_timing *timing)
 {
 	if (timing->flags.INTERLACE == 1) {
@@ -58,30 +59,30 @@ static void tgn10_apply_front_porch_workaround(
 	}
 }
 
-static void tgn10_program_global_sync(
-		struct timing_generator *tg)
+void optc1_program_global_sync(
+		struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	if (tg->dlg_otg_param.vstartup_start == 0) {
+	if (optc->dlg_otg_param.vstartup_start == 0) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
 
 	REG_SET(OTG_VSTARTUP_PARAM, 0,
-		VSTARTUP_START, tg->dlg_otg_param.vstartup_start);
+		VSTARTUP_START, optc->dlg_otg_param.vstartup_start);
 
 	REG_SET_2(OTG_VUPDATE_PARAM, 0,
-			VUPDATE_OFFSET, tg->dlg_otg_param.vupdate_offset,
-			VUPDATE_WIDTH, tg->dlg_otg_param.vupdate_width);
+			VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset,
+			VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width);
 
 	REG_SET(OTG_VREADY_PARAM, 0,
-			VREADY_OFFSET, tg->dlg_otg_param.vready_offset);
+			VREADY_OFFSET, optc->dlg_otg_param.vready_offset);
 }
 
-static void tgn10_disable_stereo(struct timing_generator *tg)
+static void optc1_disable_stereo(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_SET(OTG_STEREO_CONTROL, 0,
 		OTG_STEREO_EN, 0);
@@ -90,11 +91,6 @@ static void tgn10_disable_stereo(struct timing_generator *tg)
 		OTG_3D_STRUCTURE_EN, 0,
 		OTG_3D_STRUCTURE_V_UPDATE_MODE, 0,
 		OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0);
-
-	REG_UPDATE(OPPBUF_CONTROL,
-		OPPBUF_ACTIVE_WIDTH, 0);
-	REG_UPDATE(OPPBUF_3D_PARAMETERS_0,
-		OPPBUF_3D_VACT_SPACE1_SIZE, 0);
 }
 
 /**
@@ -102,8 +98,8 @@ static void tgn10_disable_stereo(struct timing_generator *tg)
  * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition.
  * Including SYNC. Call BIOS command table to program Timings.
  */
-static void tgn10_program_timing(
-	struct timing_generator *tg,
+void optc1_program_timing(
+	struct timing_generator *optc,
 	const struct dc_crtc_timing *dc_crtc_timing,
 	bool use_vbios)
 {
@@ -121,10 +117,10 @@ static void tgn10_program_timing(
 	uint32_t h_div_2;
 	int32_t vertical_line_start;
 
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	patched_crtc_timing = *dc_crtc_timing;
-	tgn10_apply_front_porch_workaround(tg, &patched_crtc_timing);
+	optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
 
 	/* Load horizontal timing */
 
@@ -217,7 +213,7 @@ static void tgn10_program_timing(
 	/* Use OTG_VERTICAL_INTERRUPT2 replace VUPDATE interrupt,
 	 * program the reg for interrupt postition.
 	 */
-	vertical_line_start = asic_blank_end - tg->dlg_otg_param.vstartup_start + 1;
+	vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
 	if (vertical_line_start < 0) {
 		ASSERT(0);
 		vertical_line_start = 0;
@@ -233,26 +229,25 @@ static void tgn10_program_timing(
 			OTG_V_SYNC_A_POL, v_sync_polarity);
 
 	v_init = asic_blank_start;
-	if (tg->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT ||
-		tg->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
-		tg->dlg_otg_param.signal == SIGNAL_TYPE_EDP) {
+	if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT ||
+		optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+		optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) {
 		start_point = 1;
 		if (patched_crtc_timing.flags.INTERLACE == 1)
 			field_num = 1;
 	}
 	v_fp2 = 0;
-	if (tg->dlg_otg_param.vstartup_start > asic_blank_end)
-		v_fp2 = tg->dlg_otg_param.vstartup_start > asic_blank_end;
+	if (optc->dlg_otg_param.vstartup_start > asic_blank_end)
+		v_fp2 = optc->dlg_otg_param.vstartup_start > asic_blank_end;
 
 	/* Interlace */
 	if (patched_crtc_timing.flags.INTERLACE == 1) {
 		REG_UPDATE(OTG_INTERLACE_CONTROL,
 				OTG_INTERLACE_ENABLE, 1);
 		v_init = v_init / 2;
-		if ((tg->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end)
+		if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end)
 			v_fp2 = v_fp2 / 2;
-	}
-	else
+	} else
 		REG_UPDATE(OTG_INTERLACE_CONTROL,
 				OTG_INTERLACE_ENABLE, 0);
 
@@ -270,13 +265,13 @@ static void tgn10_program_timing(
 			OTG_START_POINT_CNTL, start_point,
 			OTG_FIELD_NUMBER_CNTL, field_num);
 
-	tgn10_program_global_sync(tg);
+	optc1_program_global_sync(optc);
 
 	/* TODO
 	 * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1
 	 * program_horz_count_by_2
 	 * for DVI 30bpp mode, 0 otherwise
-	 * program_horz_count_by_2(tg, &patched_crtc_timing);
+	 * program_horz_count_by_2(optc, &patched_crtc_timing);
 	 */
 
 	/* Enable stereo - only when we need to pack 3D frame. Other types
@@ -290,9 +285,9 @@ static void tgn10_program_timing(
 
 }
 
-static void tgn10_set_blank_data_double_buffer(struct timing_generator *tg, bool enable)
+static void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	uint32_t blank_data_double_buffer_enable = enable ? 1 : 0;
 
@@ -304,9 +299,9 @@ static void tgn10_set_blank_data_double_buffer(struct timing_generator *tg, bool
  * unblank_crtc
  * Call ASIC Control Object to UnBlank CRTC.
  */
-static void tgn10_unblank_crtc(struct timing_generator *tg)
+static void optc1_unblank_crtc(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t vertical_interrupt_enable = 0;
 
 	REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL,
@@ -316,7 +311,7 @@ static void tgn10_unblank_crtc(struct timing_generator *tg)
 	 * this check will be removed.
 	 */
 	if (vertical_interrupt_enable)
-		tgn10_set_blank_data_double_buffer(tg, true);
+		optc1_set_blank_data_double_buffer(optc, true);
 
 	REG_UPDATE_2(OTG_BLANK_CONTROL,
 			OTG_BLANK_DATA_EN, 0,
@@ -328,36 +323,29 @@ static void tgn10_unblank_crtc(struct timing_generator *tg)
  * Call ASIC Control Object to Blank CRTC.
  */
 
-static void tgn10_blank_crtc(struct timing_generator *tg)
+static void optc1_blank_crtc(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_UPDATE_2(OTG_BLANK_CONTROL,
 			OTG_BLANK_DATA_EN, 1,
 			OTG_BLANK_DE_MODE, 0);
 
-	/* todo: why are we waiting for BLANK_DATA_EN?  shouldn't we be waiting
-	 * for status?
-	 */
-	REG_WAIT(OTG_BLANK_CONTROL,
-			OTG_BLANK_DATA_EN, 1,
-			1, 100000);
-
-	tgn10_set_blank_data_double_buffer(tg, false);
+	optc1_set_blank_data_double_buffer(optc, false);
 }
 
-static void tgn10_set_blank(struct timing_generator *tg,
+void optc1_set_blank(struct timing_generator *optc,
 		bool enable_blanking)
 {
 	if (enable_blanking)
-		tgn10_blank_crtc(tg);
+		optc1_blank_crtc(optc);
 	else
-		tgn10_unblank_crtc(tg);
+		optc1_unblank_crtc(optc);
 }
 
-static bool tgn10_is_blanked(struct timing_generator *tg)
+bool optc1_is_blanked(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t blank_en;
 	uint32_t blank_state;
 
@@ -368,9 +356,9 @@ static bool tgn10_is_blanked(struct timing_generator *tg)
 	return blank_en && blank_state;
 }
 
-static void tgn10_enable_optc_clock(struct timing_generator *tg, bool enable)
+void optc1_enable_optc_clock(struct timing_generator *optc, bool enable)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	if (enable) {
 		REG_UPDATE_2(OPTC_INPUT_CLOCK_CONTROL,
@@ -403,19 +391,19 @@ static void tgn10_enable_optc_clock(struct timing_generator *tg, bool enable)
  * Enable CRTC
  * Enable CRTC - call ASIC Control Object to enable Timing generator.
  */
-static bool tgn10_enable_crtc(struct timing_generator *tg)
+static bool optc1_enable_crtc(struct timing_generator *optc)
 {
 	/* TODO FPGA wait for answer
 	 * OTG_MASTER_UPDATE_MODE != CRTC_MASTER_UPDATE_MODE
 	 * OTG_MASTER_UPDATE_LOCK != CRTC_MASTER_UPDATE_LOCK
 	 */
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	/* opp instance for OTG. For DCN1.0, ODM is remoed.
 	 * OPP and OPTC should 1:1 mapping
 	 */
 	REG_UPDATE(OPTC_DATA_SOURCE_SELECT,
-			OPTC_SRC_SEL, tg->inst);
+			OPTC_SRC_SEL, optc->inst);
 
 	/* VTG enable first is for HW workaround */
 	REG_UPDATE(CONTROL,
@@ -430,9 +418,9 @@ static bool tgn10_enable_crtc(struct timing_generator *tg)
 }
 
 /* disable_crtc - call ASIC Control Object to disable Timing generator. */
-static bool tgn10_disable_crtc(struct timing_generator *tg)
+bool optc1_disable_crtc(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	/* disable otg request until end of the first line
 	 * in the vertical blank region
@@ -453,11 +441,11 @@ static bool tgn10_disable_crtc(struct timing_generator *tg)
 }
 
 
-static void tgn10_program_blank_color(
-		struct timing_generator *tg,
+void optc1_program_blank_color(
+		struct timing_generator *optc,
 		const struct tg_color *black_color)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_SET_3(OTG_BLACK_COLOR, 0,
 			OTG_BLACK_COLOR_B_CB, black_color->color_b_cb,
@@ -465,15 +453,15 @@ static void tgn10_program_blank_color(
 			OTG_BLACK_COLOR_R_CR, black_color->color_r_cr);
 }
 
-static bool tgn10_validate_timing(
-	struct timing_generator *tg,
+bool optc1_validate_timing(
+	struct timing_generator *optc,
 	const struct dc_crtc_timing *timing)
 {
 	uint32_t interlace_factor;
 	uint32_t v_blank;
 	uint32_t h_blank;
 	uint32_t min_v_blank;
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	ASSERT(timing != NULL);
 
@@ -503,19 +491,19 @@ static bool tgn10_validate_timing(
 	 * needs more than 8192 horizontal and
 	 * more than 8192 vertical total pixels)
 	 */
-	if (timing->h_total > tgn10->max_h_total ||
-		timing->v_total > tgn10->max_v_total)
+	if (timing->h_total > optc1->max_h_total ||
+		timing->v_total > optc1->max_v_total)
 		return false;
 
 
-	if (h_blank < tgn10->min_h_blank)
+	if (h_blank < optc1->min_h_blank)
 		return false;
 
-	if (timing->h_sync_width  < tgn10->min_h_sync_width ||
-		 timing->v_sync_width  < tgn10->min_v_sync_width)
+	if (timing->h_sync_width  < optc1->min_h_sync_width ||
+		 timing->v_sync_width  < optc1->min_v_sync_width)
 		return false;
 
-	min_v_blank = timing->flags.INTERLACE?tgn10->min_v_blank_interlace:tgn10->min_v_blank;
+	min_v_blank = timing->flags.INTERLACE?optc1->min_v_blank_interlace:optc1->min_v_blank;
 
 	if (v_blank < min_v_blank)
 		return false;
@@ -532,15 +520,15 @@ static bool tgn10_validate_timing(
  * holds the counter of frames.
  *
  * @param
- * struct timing_generator *tg - [in] timing generator which controls the
+ * struct timing_generator *optc - [in] timing generator which controls the
  * desired CRTC
  *
  * @return
  * Counter of frames, which should equal to number of vblanks.
  */
-static uint32_t tgn10_get_vblank_counter(struct timing_generator *tg)
+uint32_t optc1_get_vblank_counter(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t frame_count;
 
 	REG_GET(OTG_STATUS_FRAME_COUNT,
@@ -549,34 +537,34 @@ static uint32_t tgn10_get_vblank_counter(struct timing_generator *tg)
 	return frame_count;
 }
 
-static void tgn10_lock(struct timing_generator *tg)
+void optc1_lock(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_SET(OTG_GLOBAL_CONTROL0, 0,
-			OTG_MASTER_UPDATE_LOCK_SEL, tg->inst);
+			OTG_MASTER_UPDATE_LOCK_SEL, optc->inst);
 	REG_SET(OTG_MASTER_UPDATE_LOCK, 0,
 			OTG_MASTER_UPDATE_LOCK, 1);
 
 	/* Should be fast, status does not update on maximus */
-	if (tg->ctx->dce_environment != DCE_ENV_FPGA_MAXIMUS)
+	if (optc->ctx->dce_environment != DCE_ENV_FPGA_MAXIMUS)
 		REG_WAIT(OTG_MASTER_UPDATE_LOCK,
 				UPDATE_LOCK_STATUS, 1,
 				1, 10);
 }
 
-static void tgn10_unlock(struct timing_generator *tg)
+void optc1_unlock(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_SET(OTG_MASTER_UPDATE_LOCK, 0,
 			OTG_MASTER_UPDATE_LOCK, 0);
 }
 
-static void tgn10_get_position(struct timing_generator *tg,
+void optc1_get_position(struct timing_generator *optc,
 		struct crtc_position *position)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_GET_2(OTG_STATUS_POSITION,
 			OTG_HORZ_COUNT, &position->horizontal_count,
@@ -586,12 +574,12 @@ static void tgn10_get_position(struct timing_generator *tg,
 			OTG_VERT_COUNT_NOM, &position->nominal_vcount);
 }
 
-static bool tgn10_is_counter_moving(struct timing_generator *tg)
+bool optc1_is_counter_moving(struct timing_generator *optc)
 {
 	struct crtc_position position1, position2;
 
-	tg->funcs->get_position(tg, &position1);
-	tg->funcs->get_position(tg, &position2);
+	optc->funcs->get_position(optc, &position1);
+	optc->funcs->get_position(optc, &position2);
 
 	if (position1.horizontal_count == position2.horizontal_count &&
 		position1.vertical_count == position2.vertical_count)
@@ -600,10 +588,10 @@ static bool tgn10_is_counter_moving(struct timing_generator *tg)
 		return true;
 }
 
-static bool tgn10_did_triggered_reset_occur(
-	struct timing_generator *tg)
+bool optc1_did_triggered_reset_occur(
+	struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t occurred_force, occurred_vsync;
 
 	REG_GET(OTG_FORCE_COUNT_NOW_CNTL,
@@ -615,9 +603,9 @@ static bool tgn10_did_triggered_reset_occur(
 	return occurred_vsync != 0 || occurred_force != 0;
 }
 
-static void tgn10_disable_reset_trigger(struct timing_generator *tg)
+void optc1_disable_reset_trigger(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_WRITE(OTG_TRIGA_CNTL, 0);
 
@@ -628,9 +616,9 @@ static void tgn10_disable_reset_trigger(struct timing_generator *tg)
 		OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, 1);
 }
 
-static void tgn10_enable_reset_trigger(struct timing_generator *tg, int source_tg_inst)
+void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t falling_edge;
 
 	REG_GET(OTG_V_SYNC_A_CNTL,
@@ -662,12 +650,12 @@ static void tgn10_enable_reset_trigger(struct timing_generator *tg, int source_t
 			OTG_FORCE_COUNT_NOW_MODE, 2);
 }
 
-void tgn10_enable_crtc_reset(
-		struct timing_generator *tg,
+void optc1_enable_crtc_reset(
+		struct timing_generator *optc,
 		int source_tg_inst,
 		struct crtc_trigger_info *crtc_tp)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t falling_edge = 0;
 	uint32_t rising_edge = 0;
 
@@ -707,10 +695,10 @@ void tgn10_enable_crtc_reset(
 	}
 }
 
-static void tgn10_wait_for_state(struct timing_generator *tg,
+void optc1_wait_for_state(struct timing_generator *optc,
 		enum crtc_state state)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	switch (state) {
 	case CRTC_STATE_VBLANK:
@@ -730,8 +718,8 @@ static void tgn10_wait_for_state(struct timing_generator *tg,
 	}
 }
 
-static void tgn10_set_early_control(
-	struct timing_generator *tg,
+void optc1_set_early_control(
+	struct timing_generator *optc,
 	uint32_t early_cntl)
 {
 	/* asic design change, do not need this control
@@ -740,11 +728,11 @@ static void tgn10_set_early_control(
 }
 
 
-static void tgn10_set_static_screen_control(
-	struct timing_generator *tg,
+void optc1_set_static_screen_control(
+	struct timing_generator *optc,
 	uint32_t value)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	/* Bit 8 is no longer applicable in RV for PSR case,
 	 * set bit 8 to 0 if given
@@ -769,11 +757,11 @@ static void tgn10_set_static_screen_control(
  *
  *****************************************************************************
  */
-static void tgn10_set_drr(
-	struct timing_generator *tg,
+void optc1_set_drr(
+	struct timing_generator *optc,
 	const struct drr_params *params)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	if (params != NULL &&
 		params->vertical_total_max > 0 &&
@@ -806,15 +794,15 @@ static void tgn10_set_drr(
 	}
 }
 
-static void tgn10_set_test_pattern(
-	struct timing_generator *tg,
+static void optc1_set_test_pattern(
+	struct timing_generator *optc,
 	/* TODO: replace 'controller_dp_test_pattern' by 'test_pattern_mode'
 	 * because this is not DP-specific (which is probably somewhere in DP
 	 * encoder) */
 	enum controller_dp_test_pattern test_pattern,
 	enum dc_color_depth color_depth)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	enum test_pattern_color_format bit_depth;
 	enum test_pattern_dyn_range dyn_range;
 	enum test_pattern_mode mode;
@@ -1065,35 +1053,30 @@ static void tgn10_set_test_pattern(
 	}
 }
 
-static void tgn10_get_crtc_scanoutpos(
-	struct timing_generator *tg,
+void optc1_get_crtc_scanoutpos(
+	struct timing_generator *optc,
 	uint32_t *v_blank_start,
 	uint32_t *v_blank_end,
 	uint32_t *h_position,
 	uint32_t *v_position)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	struct crtc_position position;
 
 	REG_GET_2(OTG_V_BLANK_START_END,
 			OTG_V_BLANK_START, v_blank_start,
 			OTG_V_BLANK_END, v_blank_end);
 
-	tgn10_get_position(tg, &position);
+	optc1_get_position(optc, &position);
 
 	*h_position = position.horizontal_count;
 	*v_position = position.vertical_count;
 }
 
-
-
-static void tgn10_enable_stereo(struct timing_generator *tg,
+static void optc1_enable_stereo(struct timing_generator *optc,
 	const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
-
-	uint32_t active_width = timing->h_addressable;
-	uint32_t space1_size = timing->v_total - timing->v_addressable;
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	if (flags) {
 		uint32_t stereo_en;
@@ -1121,29 +1104,23 @@ static void tgn10_enable_stereo(struct timing_generator *tg,
 				OTG_3D_STRUCTURE_STEREO_SEL_OVR, flags->FRAME_PACKED);
 
 	}
-
-	REG_UPDATE(OPPBUF_CONTROL,
-		OPPBUF_ACTIVE_WIDTH, active_width);
-
-	REG_UPDATE(OPPBUF_3D_PARAMETERS_0,
-		OPPBUF_3D_VACT_SPACE1_SIZE, space1_size);
 }
 
-static void tgn10_program_stereo(struct timing_generator *tg,
+void optc1_program_stereo(struct timing_generator *optc,
 	const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags)
 {
 	if (flags->PROGRAM_STEREO)
-		tgn10_enable_stereo(tg, timing, flags);
+		optc1_enable_stereo(optc, timing, flags);
 	else
-		tgn10_disable_stereo(tg);
+		optc1_disable_stereo(optc);
 }
 
 
-static bool tgn10_is_stereo_left_eye(struct timing_generator *tg)
+bool optc1_is_stereo_left_eye(struct timing_generator *optc)
 {
 	bool ret = false;
 	uint32_t left_eye = 0;
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_GET(OTG_STEREO_STATUS,
 		OTG_STEREO_CURRENT_EYE, &left_eye);
@@ -1155,7 +1132,7 @@ static bool tgn10_is_stereo_left_eye(struct timing_generator *tg)
 	return ret;
 }
 
-void tgn10_read_otg_state(struct dcn10_timing_generator *tgn10,
+void optc1_read_otg_state(struct optc *optc1,
 		struct dcn_otg_state *s)
 {
 	REG_GET(OTG_CONTROL,
@@ -1199,17 +1176,22 @@ void tgn10_read_otg_state(struct dcn10_timing_generator *tgn10,
 			OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status);
 }
 
-static void tgn10_tg_init(struct timing_generator *tg)
+static void optc1_clear_optc_underflow(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	tgn10_set_blank_data_double_buffer(tg, true);
 	REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1);
 }
 
-static bool tgn10_is_tg_enabled(struct timing_generator *tg)
+static void optc1_tg_init(struct timing_generator *optc)
+{
+	optc1_set_blank_data_double_buffer(optc, true);
+	optc1_clear_optc_underflow(optc);
+}
+
+static bool optc1_is_tg_enabled(struct timing_generator *optc)
 {
-	struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg);
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t otg_enabled = 0;
 
 	REG_GET(OTG_CONTROL, OTG_MASTER_EN, &otg_enabled);
@@ -1217,50 +1199,65 @@ static bool tgn10_is_tg_enabled(struct timing_generator *tg)
 	return (otg_enabled != 0);
 
 }
+
+static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+	uint32_t underflow_occurred = 0;
+
+	REG_GET(OPTC_INPUT_GLOBAL_CONTROL,
+			OPTC_UNDERFLOW_OCCURRED_STATUS,
+			&underflow_occurred);
+
+	return (underflow_occurred == 1);
+}
+
 static const struct timing_generator_funcs dcn10_tg_funcs = {
-		.validate_timing = tgn10_validate_timing,
-		.program_timing = tgn10_program_timing,
-		.program_global_sync = tgn10_program_global_sync,
-		.enable_crtc = tgn10_enable_crtc,
-		.disable_crtc = tgn10_disable_crtc,
+		.validate_timing = optc1_validate_timing,
+		.program_timing = optc1_program_timing,
+		.program_global_sync = optc1_program_global_sync,
+		.enable_crtc = optc1_enable_crtc,
+		.disable_crtc = optc1_disable_crtc,
 		/* used by enable_timing_synchronization. Not need for FPGA */
-		.is_counter_moving = tgn10_is_counter_moving,
-		.get_position = tgn10_get_position,
-		.get_frame_count = tgn10_get_vblank_counter,
-		.get_scanoutpos = tgn10_get_crtc_scanoutpos,
-		.set_early_control = tgn10_set_early_control,
+		.is_counter_moving = optc1_is_counter_moving,
+		.get_position = optc1_get_position,
+		.get_frame_count = optc1_get_vblank_counter,
+		.get_scanoutpos = optc1_get_crtc_scanoutpos,
+		.set_early_control = optc1_set_early_control,
 		/* used by enable_timing_synchronization. Not need for FPGA */
-		.wait_for_state = tgn10_wait_for_state,
-		.set_blank = tgn10_set_blank,
-		.is_blanked = tgn10_is_blanked,
-		.set_blank_color = tgn10_program_blank_color,
-		.did_triggered_reset_occur = tgn10_did_triggered_reset_occur,
-		.enable_reset_trigger = tgn10_enable_reset_trigger,
-		.enable_crtc_reset = tgn10_enable_crtc_reset,
-		.disable_reset_trigger = tgn10_disable_reset_trigger,
-		.lock = tgn10_lock,
-		.unlock = tgn10_unlock,
-		.enable_optc_clock = tgn10_enable_optc_clock,
-		.set_drr = tgn10_set_drr,
-		.set_static_screen_control = tgn10_set_static_screen_control,
-		.set_test_pattern = tgn10_set_test_pattern,
-		.program_stereo = tgn10_program_stereo,
-		.is_stereo_left_eye = tgn10_is_stereo_left_eye,
-		.set_blank_data_double_buffer = tgn10_set_blank_data_double_buffer,
-		.tg_init = tgn10_tg_init,
-		.is_tg_enabled = tgn10_is_tg_enabled,
+		.wait_for_state = optc1_wait_for_state,
+		.set_blank = optc1_set_blank,
+		.is_blanked = optc1_is_blanked,
+		.set_blank_color = optc1_program_blank_color,
+		.did_triggered_reset_occur = optc1_did_triggered_reset_occur,
+		.enable_reset_trigger = optc1_enable_reset_trigger,
+		.enable_crtc_reset = optc1_enable_crtc_reset,
+		.disable_reset_trigger = optc1_disable_reset_trigger,
+		.lock = optc1_lock,
+		.unlock = optc1_unlock,
+		.enable_optc_clock = optc1_enable_optc_clock,
+		.set_drr = optc1_set_drr,
+		.set_static_screen_control = optc1_set_static_screen_control,
+		.set_test_pattern = optc1_set_test_pattern,
+		.program_stereo = optc1_program_stereo,
+		.is_stereo_left_eye = optc1_is_stereo_left_eye,
+		.set_blank_data_double_buffer = optc1_set_blank_data_double_buffer,
+		.tg_init = optc1_tg_init,
+		.is_tg_enabled = optc1_is_tg_enabled,
+		.is_optc_underflow_occurred = optc1_is_optc_underflow_occurred,
+		.clear_optc_underflow = optc1_clear_optc_underflow,
 };
 
-void dcn10_timing_generator_init(struct dcn10_timing_generator *tgn10)
+void dcn10_timing_generator_init(struct optc *optc1)
 {
-	tgn10->base.funcs = &dcn10_tg_funcs;
+	optc1->base.funcs = &dcn10_tg_funcs;
 
-	tgn10->max_h_total = tgn10->tg_mask->OTG_H_TOTAL + 1;
-	tgn10->max_v_total = tgn10->tg_mask->OTG_V_TOTAL + 1;
+	optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1;
+	optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1;
 
-	tgn10->min_h_blank = 32;
-	tgn10->min_v_blank = 3;
-	tgn10->min_v_blank_interlace = 5;
-	tgn10->min_h_sync_width = 8;
-	tgn10->min_v_sync_width = 1;
+	optc1->min_h_blank = 32;
+	optc1->min_v_blank = 3;
+	optc1->min_v_blank_interlace = 5;
+	optc1->min_h_sync_width = 8;
+	optc1->min_v_sync_width = 1;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
similarity index 83%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.h
rename to drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index bb1cbfdc355476583ed2733a15861b3f6c4c674d..a3c7c2012f057f431f5ce8e9a4d24942f3dc4d61 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -29,7 +29,7 @@
 #include "timing_generator.h"
 
 #define DCN10TG_FROM_TG(tg)\
-	container_of(tg, struct dcn10_timing_generator, base)
+	container_of(tg, struct optc, base)
 
 #define TG_COMMON_REG_LIST_DCN(inst) \
 	SRI(OTG_VSTARTUP_PARAM, OTG, inst),\
@@ -70,8 +70,6 @@
 	SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\
 	SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\
 	SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\
-	SRI(OPPBUF_CONTROL, OPPBUF, inst),\
-	SRI(OPPBUF_3D_PARAMETERS_0, OPPBUF, inst),\
 	SRI(CONTROL, VTG, inst),\
 	SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\
 	SRI(OTG_MASTER_UPDATE_MODE, OTG, inst),\
@@ -84,7 +82,7 @@
 	SRI(OTG_TEST_PATTERN_COLOR, OTG, inst)
 
 
-struct dcn_tg_registers {
+struct dcn_optc_registers {
 	uint32_t OTG_VERT_SYNC_CONTROL;
 	uint32_t OTG_MASTER_UPDATE_MODE;
 	uint32_t OTG_GSL_CONTROL;
@@ -129,9 +127,11 @@ struct dcn_tg_registers {
 	uint32_t OPTC_INPUT_CLOCK_CONTROL;
 	uint32_t OPTC_DATA_SOURCE_SELECT;
 	uint32_t OPTC_INPUT_GLOBAL_CONTROL;
-	uint32_t OPPBUF_CONTROL;
-	uint32_t OPPBUF_3D_PARAMETERS_0;
 	uint32_t CONTROL;
+	uint32_t OTG_GSL_WINDOW_X;
+	uint32_t OTG_GSL_WINDOW_Y;
+	uint32_t OTG_VUPDATE_KEEPOUT;
+	uint32_t OTG_DSC_START_POSITION;
 };
 
 #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\
@@ -211,8 +211,6 @@ struct dcn_tg_registers {
 	SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\
 	SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\
 	SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\
-	SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, mask_sh),\
-	SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, mask_sh),\
 	SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\
 	SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\
 	SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\
@@ -332,8 +330,6 @@ struct dcn_tg_registers {
 	type OPTC_SEG0_SRC_SEL;\
 	type OPTC_UNDERFLOW_OCCURRED_STATUS;\
 	type OPTC_UNDERFLOW_CLEAR;\
-	type OPPBUF_ACTIVE_WIDTH;\
-	type OPPBUF_3D_VACT_SPACE1_SIZE;\
 	type VTG0_ENABLE;\
 	type VTG0_FP2;\
 	type VTG0_VCOUNT_INIT;\
@@ -346,22 +342,35 @@ struct dcn_tg_registers {
 	type OTG_GSL2_EN;\
 	type OTG_GSL_MASTER_EN;\
 	type OTG_GSL_FORCE_DELAY;\
-	type OTG_GSL_CHECK_ALL_FIELDS;
+	type OTG_GSL_CHECK_ALL_FIELDS;\
+	type OTG_GSL_WINDOW_START_X;\
+	type OTG_GSL_WINDOW_END_X;\
+	type OTG_GSL_WINDOW_START_Y;\
+	type OTG_GSL_WINDOW_END_Y;\
+	type OTG_RANGE_TIMING_DBUF_UPDATE_MODE;\
+	type OTG_GSL_MASTER_MODE;\
+	type OTG_MASTER_UPDATE_LOCK_GSL_EN;\
+	type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET;\
+	type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET;\
+	type OTG_DSC_START_POSITION_X;\
+	type OTG_DSC_START_POSITION_LINE_NUM;\
+	type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;
 
-struct dcn_tg_shift {
+
+struct dcn_optc_shift {
 	TG_REG_FIELD_LIST(uint8_t)
 };
 
-struct dcn_tg_mask {
+struct dcn_optc_mask {
 	TG_REG_FIELD_LIST(uint32_t)
 };
 
-struct dcn10_timing_generator {
+struct optc {
 	struct timing_generator base;
 
-	const struct dcn_tg_registers *tg_regs;
-	const struct dcn_tg_shift *tg_shift;
-	const struct dcn_tg_mask *tg_mask;
+	const struct dcn_optc_registers *tg_regs;
+	const struct dcn_optc_shift *tg_shift;
+	const struct dcn_optc_mask *tg_mask;
 
 	enum controller_id controller_id;
 
@@ -376,7 +385,7 @@ struct dcn10_timing_generator {
 	uint32_t min_v_blank_interlace;
 };
 
-void dcn10_timing_generator_init(struct dcn10_timing_generator *tg);
+void dcn10_timing_generator_init(struct optc *optc);
 
 struct dcn_otg_state {
 	uint32_t v_blank_start;
@@ -397,7 +406,77 @@ struct dcn_otg_state {
 	uint32_t otg_enabled;
 };
 
-void tgn10_read_otg_state(struct dcn10_timing_generator *tgn10,
+void optc1_read_otg_state(struct optc *optc1,
 		struct dcn_otg_state *s);
 
+bool optc1_validate_timing(
+	struct timing_generator *optc,
+	const struct dc_crtc_timing *timing);
+
+void optc1_program_timing(
+	struct timing_generator *optc,
+	const struct dc_crtc_timing *dc_crtc_timing,
+	bool use_vbios);
+
+void optc1_program_global_sync(
+		struct timing_generator *optc);
+
+bool optc1_disable_crtc(struct timing_generator *optc);
+
+bool optc1_is_counter_moving(struct timing_generator *optc);
+
+void optc1_get_position(struct timing_generator *optc,
+		struct crtc_position *position);
+
+uint32_t optc1_get_vblank_counter(struct timing_generator *optc);
+
+void optc1_get_crtc_scanoutpos(
+	struct timing_generator *optc,
+	uint32_t *v_blank_start,
+	uint32_t *v_blank_end,
+	uint32_t *h_position,
+	uint32_t *v_position);
+
+void optc1_set_early_control(
+	struct timing_generator *optc,
+	uint32_t early_cntl);
+
+void optc1_wait_for_state(struct timing_generator *optc,
+		enum crtc_state state);
+
+void optc1_set_blank(struct timing_generator *optc,
+		bool enable_blanking);
+
+bool optc1_is_blanked(struct timing_generator *optc);
+
+void optc1_program_blank_color(
+		struct timing_generator *optc,
+		const struct tg_color *black_color);
+
+bool optc1_did_triggered_reset_occur(
+	struct timing_generator *optc);
+
+void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst);
+
+void optc1_disable_reset_trigger(struct timing_generator *optc);
+
+void optc1_lock(struct timing_generator *optc);
+
+void optc1_unlock(struct timing_generator *optc);
+
+void optc1_enable_optc_clock(struct timing_generator *optc, bool enable);
+
+void optc1_set_drr(
+	struct timing_generator *optc,
+	const struct drr_params *params);
+
+void optc1_set_static_screen_control(
+	struct timing_generator *optc,
+	uint32_t value);
+
+void optc1_program_stereo(struct timing_generator *optc,
+	const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
+
+bool optc1_is_stereo_left_eye(struct timing_generator *optc);
+
 #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 10cce51d31d2a08b87e5e7675d4d8a4664d802fc..44825e2c9ebb5fb3247c86c7ab0213e3984a13c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -34,7 +34,7 @@
 #include "dcn10/dcn10_mpc.h"
 #include "irq/dcn10/irq_service_dcn10.h"
 #include "dcn10/dcn10_dpp.h"
-#include "dcn10/dcn10_timing_generator.h"
+#include "dcn10_optc.h"
 #include "dcn10/dcn10_hw_sequencer.h"
 #include "dce110/dce110_hw_sequencer.h"
 #include "dcn10/dcn10_opp.h"
@@ -348,18 +348,18 @@ static const struct dcn_mpc_mask mpc_mask = {
 #define tg_regs(id)\
 [id] = {TG_COMMON_REG_LIST_DCN1_0(id)}
 
-static const struct dcn_tg_registers tg_regs[] = {
+static const struct dcn_optc_registers tg_regs[] = {
 	tg_regs(0),
 	tg_regs(1),
 	tg_regs(2),
 	tg_regs(3),
 };
 
-static const struct dcn_tg_shift tg_shift = {
+static const struct dcn_optc_shift tg_shift = {
 	TG_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT)
 };
 
-static const struct dcn_tg_mask tg_mask = {
+static const struct dcn_optc_mask tg_mask = {
 	TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK)
 };
 
@@ -553,8 +553,8 @@ static struct timing_generator *dcn10_timing_generator_create(
 		struct dc_context *ctx,
 		uint32_t instance)
 {
-	struct dcn10_timing_generator *tgn10 =
-		kzalloc(sizeof(struct dcn10_timing_generator), GFP_KERNEL);
+	struct optc *tgn10 =
+		kzalloc(sizeof(struct optc), GFP_KERNEL);
 
 	if (!tgn10)
 		return NULL;
@@ -678,6 +678,7 @@ static struct dce_hwseq *dcn10_hwseq_create(
 		hws->shifts = &hwseq_shift;
 		hws->masks = &hwseq_mask;
 		hws->wa.DEGVIDCN10_253 = true;
+		hws->wa.false_optc_underflow = true;
 	}
 	return hws;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 87bab8e8139fbb05e960a88d989455ef48371ae3..3488af2b5786cd0a299df91da12b8e4b2ed3f843 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'utils' sub-component of DAL.
 # It provides the general basic services required by other DAL
 # subcomponents.
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index 4c31fa54af3902509707c9e75d33d80fa7e47402..c109b2c34c8fc718f1bc88ddd3871d94fce474f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -35,35 +35,6 @@ static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum
 		soc->writeback_latency_us = 12.0;
 		soc->ideal_dram_bw_after_urgent_percent = 80.0;
 		soc->max_request_size_bytes = 256;
-
-		soc->vmin.dcfclk_mhz = 300.0;
-		soc->vmin.dispclk_mhz = 608.0;
-		soc->vmin.dppclk_mhz = 435.0;
-		soc->vmin.dram_bw_per_chan_gbps = 12.8;
-		soc->vmin.phyclk_mhz = 540.0;
-		soc->vmin.socclk_mhz = 208.0;
-
-		soc->vmid.dcfclk_mhz = 600.0;
-		soc->vmid.dispclk_mhz = 661.0;
-		soc->vmid.dppclk_mhz = 661.0;
-		soc->vmid.dram_bw_per_chan_gbps = 12.8;
-		soc->vmid.phyclk_mhz = 540.0;
-		soc->vmid.socclk_mhz = 208.0;
-
-		soc->vnom.dcfclk_mhz = 600.0;
-		soc->vnom.dispclk_mhz = 661.0;
-		soc->vnom.dppclk_mhz = 661.0;
-		soc->vnom.dram_bw_per_chan_gbps = 38.4;
-		soc->vnom.phyclk_mhz = 810;
-		soc->vnom.socclk_mhz = 208.0;
-
-		soc->vmax.dcfclk_mhz = 600.0;
-		soc->vmax.dispclk_mhz = 1086.0;
-		soc->vmax.dppclk_mhz = 661.0;
-		soc->vmax.dram_bw_per_chan_gbps = 38.4;
-		soc->vmax.phyclk_mhz = 810.0;
-		soc->vmax.socclk_mhz = 208.0;
-
 		soc->downspread_percent = 0.5;
 		soc->dram_page_open_time_ns = 50.0;
 		soc->dram_rw_turnaround_time_ns = 17.5;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 2d9d6298f0d320da80c601425e6c287e6d8f1e30..aeebd8bee6289e1432ed37ca02ccc2f3cc764946 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -79,10 +79,6 @@ struct	_vcs_dpi_soc_bounding_box_st	{
 	double	writeback_latency_us;
 	double	ideal_dram_bw_after_urgent_percent;
 	unsigned int	max_request_size_bytes;
-	struct _vcs_dpi_voltage_scaling_st	vmin;
-	struct _vcs_dpi_voltage_scaling_st	vmid;
-	struct _vcs_dpi_voltage_scaling_st	vnom;
-	struct _vcs_dpi_voltage_scaling_st	vmax;
 	double	downspread_percent;
 	double	dram_page_open_time_ns;
 	double	dram_rw_turnaround_time_ns;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 1f337ecfeab049d9241f65a1b1d2c5b80b95cc76..260e113fcc02682f8e731e2c0ec0ed621b77e0b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -28,6 +28,15 @@
 
 #include "dml_inline_defs.h"
 
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 #define BPP_INVALID 0
 #define BPP_BLENDED_PIPE 0xffffffff
 static const unsigned int NumberOfStates = DC__VOLTAGE_STATES;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c
index 8ba962df42e6057fb51a4d6cb7de299676955668..325dd2b757d6b1d4228acb1c3f73c2c2002d8e80 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c
@@ -27,6 +27,15 @@
 #include "display_mode_vba.h"
 #include "display_rq_dlg_calc.h"
 
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
 		double *refcyc_per_req_delivery_pre_cur,
 		double *refcyc_per_req_delivery_cur,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index 1e4b1e38340123708b9b009f6b3d580eb3e2bb8e..c2037daa8e6666d7b2dd9b76477e03f7472e9220 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -28,6 +28,15 @@
 
 #include "dml_inline_defs.h"
 
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
 {
 	unsigned int ret_val = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c b/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c
index bc7d8c7072211c94ba10aec24784c157388ce0c7..324239c779583b26c4d781f8673daa81b1f02b42 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c
@@ -27,6 +27,16 @@
 #include "dc_features.h"
 
 #include "dml_inline_defs.h"
+
+/*
+ * NOTE:
+ *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
 void dml_socbb_set_latencies(soc_bounding_box_st *to_box, soc_bounding_box_st *from_box)
 {
 	to_box->dram_clock_change_latency_us = from_box->dram_clock_change_latency_us;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/Makefile b/drivers/gpu/drm/amd/display/dc/gpio/Makefile
index 70d01a9e96760ac2828dd5537cefe63e53ac3427..562ee189d780c4de20d150a16497c27ef44300a8 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/gpio/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'gpio' sub-component of DAL.
 # It provides the control and status of HW GPIO pins.
 
diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/Makefile b/drivers/gpu/drm/amd/display/dc/i2caux/Makefile
index 55603400acd99128c0957ebe7ee19d610283003f..352885cb4d0763dd3bb4912e59722abd9ee4589c 100644
--- a/drivers/gpu/drm/amd/display/dc/i2caux/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/i2caux/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'i2c' sub-component of DAL.
 # It provides the control and status of HW i2c engine of the adapter.
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index d680b565af6fa529bac934ac86f5e58768f60413..d6971054ec07e93f21637701ee81486ea0973387 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -212,7 +212,6 @@ struct pipe_ctx {
 	struct _vcs_dpi_display_rq_regs_st rq_regs;
 	struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param;
 #endif
-	struct dwbc *dwbc;
 };
 
 struct resource_context {
@@ -241,6 +240,7 @@ struct dce_bw_output {
 
 struct dcn_bw_clocks {
 	int dispclk_khz;
+	int dppclk_khz;
 	bool dppclk_div;
 	int dcfclk_khz;
 	int dcfclk_deep_sleep_khz;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
index 1e231f6de73247212914e126312966b9f4588c65..132d18d4b29383c10977d3fc13096b87204dd3b8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
@@ -349,10 +349,10 @@ struct dcn_bw_internal_vars {
 	float dst_x_after_scaler;
 	float dst_y_after_scaler;
 	float time_calc;
-	float v_update_offset[number_of_planes_minus_one + 1];
+	float v_update_offset[number_of_planes_minus_one + 1][2];
 	float total_repeater_delay;
-	float v_update_width[number_of_planes_minus_one + 1];
-	float v_ready_offset[number_of_planes_minus_one + 1];
+	float v_update_width[number_of_planes_minus_one + 1][2];
+	float v_ready_offset[number_of_planes_minus_one + 1][2];
 	float time_setup;
 	float extra_latency;
 	float maximum_vstartup;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 48217ecfabd418bbcb75bdd6f2504babd95a5746..a83a484946133d00311c8f1219ba2bcf17527805 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -50,9 +50,9 @@ struct abm_funcs {
 	bool (*set_backlight_level)(struct abm *abm,
 			unsigned int backlight_level,
 			unsigned int frame_ramp,
-			unsigned int controller_id);
+			unsigned int controller_id,
+			bool use_smooth_brightness);
 	unsigned int (*get_current_backlight_8_bit)(struct abm *abm);
-	bool (*is_dmcu_initialized)(struct abm *abm);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
index b59712b41b8139ee19828c366ce4581569c84185..ce206355461b1e4496b80f76dd5e42a1ad38a11f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
@@ -63,6 +63,7 @@ struct dmcu_funcs {
 			unsigned int wait_loop_number);
 	void (*get_psr_wait_loop)(struct dmcu *dmcu,
 			unsigned int *psr_wait_loop_number);
+	bool (*is_dmcu_initialized)(struct dmcu *dmcu);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index ccb4896975c2430c9c00cab50ced81250b3d2ab4..25edbde6163eed65206e07b2d4e594b47210af4c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -68,7 +68,7 @@ struct dpp_funcs {
 
 	void (*dpp_set_csc_adjustment)(
 		struct dpp *dpp,
-		const struct out_csc_color_matrix *tbl_entry);
+		const uint16_t *regval);
 
 	void (*dpp_power_on_regamma_lut)(
 		struct dpp *dpp,
@@ -122,7 +122,7 @@ struct dpp_funcs {
 
 	void (*set_cursor_attributes)(
 			struct dpp *dpp_base,
-			const struct dc_cursor_attributes *attr);
+			enum dc_cursor_color_format color_format);
 
 	void (*set_cursor_position)(
 			struct dpp *dpp_base,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 49b12f602e79c4f58cb338f4bc472a779480995b..b7c7e70022e43c3a3ca0feb35ac793de0cea1c3a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -28,6 +28,20 @@
 
 #include "mem_input.h"
 
+
+enum cursor_pitch {
+	CURSOR_PITCH_64_PIXELS = 0,
+	CURSOR_PITCH_128_PIXELS,
+	CURSOR_PITCH_256_PIXELS
+};
+
+enum cursor_lines_per_chunk {
+	CURSOR_LINE_PER_CHUNK_2 = 1,
+	CURSOR_LINE_PER_CHUNK_4,
+	CURSOR_LINE_PER_CHUNK_8,
+	CURSOR_LINE_PER_CHUNK_16
+};
+
 struct hubp {
 	struct hubp_funcs *funcs;
 	struct dc_context *ctx;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index ddc56700109b36e25280ab5cc90665a21cf03ad1..e3f0b4056318f0bc61de80754db0d6fb6d755721 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -126,36 +126,12 @@ struct default_adjustment {
 	bool force_hw_default;
 };
 
-struct out_csc_color_matrix {
-	enum dc_color_space color_space;
-	uint16_t regval[12];
-};
 
-struct output_csc_matrix {
+struct out_csc_color_matrix {
 	enum dc_color_space color_space;
 	uint16_t regval[12];
 };
 
-static const struct output_csc_matrix output_csc_matrix[] = {
-	{ COLOR_SPACE_SRGB,
-		{ 0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} },
-	{ COLOR_SPACE_SRGB_LIMITED,
-		{ 0x1B67, 0, 0, 0x201, 0, 0x1B67, 0, 0x201, 0, 0, 0x1B67, 0x201} },
-	{ COLOR_SPACE_YCBCR601,
-		{ 0xE04, 0xF444, 0xFDB9, 0x1004, 0x831, 0x1016, 0x320, 0x201, 0xFB45,
-				0xF6B7, 0xE04, 0x1004} },
-	{ COLOR_SPACE_YCBCR709,
-		{ 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,
-				0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} },
-
-	/* TODO: correct values below */
-	{ COLOR_SPACE_YCBCR601_LIMITED,
-		{ 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991,
-				0x12C9, 0x3A6, 0x200, 0xFB47, 0xF6B9, 0xE00, 0x1000} },
-	{ COLOR_SPACE_YCBCR709_LIMITED,
-		{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
-				0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
-};
 
 enum opp_regamma {
 	OPP_REGAMMA_BYPASS = 0,
@@ -178,4 +154,41 @@ struct dc_bias_and_scale {
 	uint16_t bias_blue;
 };
 
+enum test_pattern_dyn_range {
+	TEST_PATTERN_DYN_RANGE_VESA = 0,
+	TEST_PATTERN_DYN_RANGE_CEA
+};
+
+enum test_pattern_mode {
+	TEST_PATTERN_MODE_COLORSQUARES_RGB = 0,
+	TEST_PATTERN_MODE_COLORSQUARES_YCBCR601,
+	TEST_PATTERN_MODE_COLORSQUARES_YCBCR709,
+	TEST_PATTERN_MODE_VERTICALBARS,
+	TEST_PATTERN_MODE_HORIZONTALBARS,
+	TEST_PATTERN_MODE_SINGLERAMP_RGB,
+	TEST_PATTERN_MODE_DUALRAMP_RGB
+};
+
+enum test_pattern_color_format {
+	TEST_PATTERN_COLOR_FORMAT_BPC_6 = 0,
+	TEST_PATTERN_COLOR_FORMAT_BPC_8,
+	TEST_PATTERN_COLOR_FORMAT_BPC_10,
+	TEST_PATTERN_COLOR_FORMAT_BPC_12
+};
+
+enum controller_dp_test_pattern {
+	CONTROLLER_DP_TEST_PATTERN_D102 = 0,
+	CONTROLLER_DP_TEST_PATTERN_SYMBOLERROR,
+	CONTROLLER_DP_TEST_PATTERN_PRBS7,
+	CONTROLLER_DP_TEST_PATTERN_COLORSQUARES,
+	CONTROLLER_DP_TEST_PATTERN_VERTICALBARS,
+	CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS,
+	CONTROLLER_DP_TEST_PATTERN_COLORRAMP,
+	CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
+	CONTROLLER_DP_TEST_PATTERN_RESERVED_8,
+	CONTROLLER_DP_TEST_PATTERN_RESERVED_9,
+	CONTROLLER_DP_TEST_PATTERN_RESERVED_A,
+	CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA
+};
+
 #endif /* __DAL_HW_SHARED_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index 8a08f0a97f94d35f6ef996f0dc1d9f822eaa8697..0fd329deacd8a047c6302e092075972698191fff 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -1,3 +1,25 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 /*
  * link_encoder.h
  *
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 72ea33526a5c8851d6e34d6ca89c62da1b5c7cc1..23a8d5e53a89d695136a26c3144529752f17a0f0 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -26,7 +26,10 @@
 #define __DC_MPCC_H__
 
 #include "dc_hw_types.h"
-#include "opp.h"
+#include "hw_shared.h"
+
+#define MAX_MPCC 6
+#define MAX_OPP 6
 
 enum mpc_output_csc_mode {
 	MPC_OUTPUT_CSC_DISABLE = 0,
@@ -34,45 +37,151 @@ enum mpc_output_csc_mode {
 	MPC_OUTPUT_CSC_COEF_B
 };
 
-struct mpcc_cfg {
-	int dpp_id;
-	int opp_id;
-	struct mpc_tree_cfg *tree_cfg;
-	unsigned int z_index;
 
-	struct tg_color black_color;
-	bool per_pixel_alpha;
-	bool pre_multiplied_alpha;
+enum mpcc_blend_mode {
+	MPCC_BLEND_MODE_BYPASS,
+	MPCC_BLEND_MODE_TOP_LAYER_PASSTHROUGH,
+	MPCC_BLEND_MODE_TOP_LAYER_ONLY,
+	MPCC_BLEND_MODE_TOP_BOT_BLENDING
+};
+
+enum mpcc_alpha_blend_mode {
+	MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA,
+	MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN,
+	MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA
+};
+
+/*
+ * MPCC blending configuration
+ */
+struct mpcc_blnd_cfg {
+	struct tg_color black_color;	/* background color */
+	enum mpcc_alpha_blend_mode alpha_mode;	/* alpha blend mode */
+	bool pre_multiplied_alpha;	/* alpha pre-multiplied mode flag */
+	int global_gain;
+	int global_alpha;
+	bool overlap_only;
+
+};
+
+struct mpcc_sm_cfg {
+	bool enable;
+	/* 0-single plane,2-row subsampling,4-column subsampling,6-checkboard subsampling */
+	int sm_mode;
+	/* 0- disable frame alternate, 1- enable frame alternate */
+	bool frame_alt;
+	/* 0- disable field alternate, 1- enable field alternate */
+	bool field_alt;
+	/* 0-no force,2-force frame polarity from top,3-force frame polarity from bottom */
+	int force_next_frame_porlarity;
+	/* 0-no force,2-force field polarity from top,3-force field polarity from bottom */
+	int force_next_field_polarity;
+};
+
+/*
+ * MPCC connection and blending configuration for a single MPCC instance.
+ * This struct is used as a node in an MPC tree.
+ */
+struct mpcc {
+	int mpcc_id;			/* MPCC physical instance */
+	int dpp_id;			/* DPP input to this MPCC */
+	struct mpcc *mpcc_bot;		/* pointer to bottom layer MPCC.  NULL when not connected */
+	struct mpcc_blnd_cfg blnd_cfg;	/* The blending configuration for this MPCC */
+	struct mpcc_sm_cfg sm_cfg;	/* stereo mix setting for this MPCC */
+};
+
+/*
+ * MPC tree represents all MPCC connections for a pipe.
+ */
+struct mpc_tree {
+	int opp_id;			/* The OPP instance that owns this MPC tree */
+	struct mpcc *opp_list;		/* The top MPCC layer of the MPC tree that outputs to OPP endpoint */
 };
 
 struct mpc {
 	const struct mpc_funcs *funcs;
 	struct dc_context *ctx;
+
+	struct mpcc mpcc_array[MAX_MPCC];
 };
 
 struct mpc_funcs {
-	int (*add)(struct mpc *mpc, struct mpcc_cfg *cfg);
+	/*
+	 * Insert DPP into MPC tree based on specified blending position.
+	 * Only used for planes that are part of blending chain for OPP output
+	 *
+	 * Parameters:
+	 * [in/out] mpc		- MPC context.
+	 * [in/out] tree	- MPC tree structure that plane will be added to.
+	 * [in]	blnd_cfg	- MPCC blending configuration for the new blending layer.
+	 * [in]	sm_cfg		- MPCC stereo mix configuration for the new blending layer.
+	 *			  stereo mix must disable for the very bottom layer of the tree config.
+	 * [in]	insert_above_mpcc - Insert new plane above this MPCC.  If NULL, insert as bottom plane.
+	 * [in]	dpp_id		 - DPP instance for the plane to be added.
+	 * [in]	mpcc_id		 - The MPCC physical instance to use for blending.
+	 *
+	 * Return:  struct mpcc* - MPCC that was added.
+	 */
+	struct mpcc* (*insert_plane)(
+			struct mpc *mpc,
+			struct mpc_tree *tree,
+			struct mpcc_blnd_cfg *blnd_cfg,
+			struct mpcc_sm_cfg *sm_cfg,
+			struct mpcc *insert_above_mpcc,
+			int dpp_id,
+			int mpcc_id);
 
-	void (*remove)(struct mpc *mpc,
-			struct mpc_tree_cfg *tree_cfg,
-			int opp_id,
-			int mpcc_inst);
+	/*
+	 * Remove a specified MPCC from the MPC tree.
+	 *
+	 * Parameters:
+	 * [in/out] mpc		- MPC context.
+	 * [in/out] tree	- MPC tree structure that plane will be removed from.
+	 * [in/out] mpcc	- MPCC to be removed from tree.
+	 *
+	 * Return:  void
+	 */
+	void (*remove_mpcc)(
+			struct mpc *mpc,
+			struct mpc_tree *tree,
+			struct mpcc *mpcc);
 
-	void (*wait_for_idle)(struct mpc *mpc, int id);
+	/*
+	 * Reset the MPCC HW status by disconnecting all muxes.
+	 *
+	 * Parameters:
+	 * [in/out] mpc		- MPC context.
+	 *
+	 * Return:  void
+	 */
+	void (*mpc_init)(struct mpc *mpc);
 
-	void (*update_blend_mode)(struct mpc *mpc, struct mpcc_cfg *cfg);
+	/*
+	 * Update the blending configuration for a specified MPCC.
+	 *
+	 * Parameters:
+	 * [in/out] mpc		- MPC context.
+	 * [in]     blnd_cfg	- MPCC blending configuration.
+	 * [in]     mpcc_id	- The MPCC physical instance.
+	 *
+	 * Return:  void
+	 */
+	void (*update_blending)(
+		struct mpc *mpc,
+		struct mpcc_blnd_cfg *blnd_cfg,
+		int mpcc_id);
 
-	int (*get_opp_id)(struct mpc *mpc, int mpcc_id);
+	struct mpcc* (*get_mpcc_for_dpp)(
+			struct mpc_tree *tree,
+			int dpp_id);
+
+	void (*wait_for_idle)(struct mpc *mpc, int id);
 
-	void (*set_output_csc)(struct mpc *mpc,
-			int opp_id,
-			const struct out_csc_color_matrix *tbl_entry,
-			enum mpc_output_csc_mode ocsc_mode);
+	void (*assert_mpcc_idle_before_connect)(struct mpc *mpc, int mpcc_id);
 
-	void (*set_ocsc_default)(struct mpc *mpc,
-			int opp_id,
-			enum dc_color_space color_space,
-			enum mpc_output_csc_mode ocsc_mode);
+	void (*init_mpcc_list_from_hw)(
+		struct mpc *mpc,
+		struct mpc_tree *tree);
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
index 579d1059a3d467aebf79215b789b915cd47d5ec8..ab8fb77f1ae5d36d01c263694860d4d681579239 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
@@ -29,6 +29,7 @@
 #include "hw_shared.h"
 #include "dc_hw_types.h"
 #include "transform.h"
+#include "mpc.h"
 
 struct fixed31_32;
 
@@ -204,7 +205,7 @@ struct output_pixel_processor {
 	struct dc_context *ctx;
 	uint32_t inst;
 	struct pwl_params regamma_params;
-	struct mpc_tree_cfg mpc_tree;
+	struct mpc_tree mpc_tree_params;
 	bool mpcc_disconnect_pending[MAX_PIPES];
 	const struct opp_funcs *funcs;
 };
@@ -248,6 +249,21 @@ enum ovl_csc_adjust_item {
 	OVERLAY_COLOR_TEMPERATURE
 };
 
+enum oppbuf_display_segmentation {
+	OPPBUF_DISPLAY_SEGMENTATION_1_SEGMENT = 0,
+	OPPBUF_DISPLAY_SEGMENTATION_2_SEGMENT = 1,
+	OPPBUF_DISPLAY_SEGMENTATION_4_SEGMENT = 2,
+	OPPBUF_DISPLAY_SEGMENTATION_4_SEGMENT_SPLIT_LEFT = 3,
+	OPPBUF_DISPLAY_SEGMENTATION_4_SEGMENT_SPLIT_RIGHT = 4
+};
+
+struct oppbuf_params {
+	uint32_t active_width;
+	enum oppbuf_display_segmentation mso_segmentation;
+	uint32_t mso_overlap_pixel_num;
+	uint32_t pixel_repetition;
+};
+
 struct opp_funcs {
 
 
@@ -276,26 +292,11 @@ struct opp_funcs {
 
 	void (*opp_destroy)(struct output_pixel_processor **opp);
 
-	void (*opp_set_stereo_polarity)(
-			struct output_pixel_processor *opp,
-			bool enable,
-			bool rightEyePolarity);
-
-	void (*opp_set_test_pattern)(
-			struct output_pixel_processor *opp,
-			bool enable);
-
-	void (*opp_dpg_blank_enable)(
-			struct output_pixel_processor *opp,
-			bool enable,
-			const struct tg_color *color,
-			int width,
-			int height);
-
-	void (*opp_convert_pti)(
+	void (*opp_program_stereo)(
 		struct output_pixel_processor *opp,
 		bool enable,
-		bool polarity);
+		const struct dc_crtc_timing *timing);
+
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index 3050afe8e8a96c62c8a7eb26ac70e8ed21c7fcbc..b5db1692393c9e5769f43848cc60cc4317691bde 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -1,3 +1,25 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 /*
  * stream_encoder.h
  *
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 860259913d7863336686d2ac1f2e6de17eee79df..ec312f1a3e55e97dc6a5f1f3a4c49718bbd1e2ee 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -26,6 +26,8 @@
 #ifndef __DAL_TIMING_GENERATOR_TYPES_H__
 #define __DAL_TIMING_GENERATOR_TYPES_H__
 
+#include "hw_shared.h"
+
 struct dc_bios;
 
 /* Contains CRTC vertical/horizontal pixel counters */
@@ -40,6 +42,19 @@ struct dcp_gsl_params {
 	int gsl_master;
 };
 
+struct gsl_params {
+	int gsl0_en;
+	int gsl1_en;
+	int gsl2_en;
+	int gsl_master_en;
+	int gsl_master_mode;
+	int master_update_lock_gsl_en;
+	int gsl_window_start_x;
+	int gsl_window_end_x;
+	int gsl_window_start_y;
+	int gsl_window_end_y;
+};
+
 /* define the structure of Dynamic Refresh Mode */
 struct drr_params {
 	uint32_t vertical_total_min;
@@ -50,43 +65,6 @@ struct drr_params {
 #define LEFT_EYE_3D_PRIMARY_SURFACE 1
 #define RIGHT_EYE_3D_PRIMARY_SURFACE 0
 
-enum test_pattern_dyn_range {
-	TEST_PATTERN_DYN_RANGE_VESA = 0,
-	TEST_PATTERN_DYN_RANGE_CEA
-};
-
-enum test_pattern_mode {
-	TEST_PATTERN_MODE_COLORSQUARES_RGB = 0,
-	TEST_PATTERN_MODE_COLORSQUARES_YCBCR601,
-	TEST_PATTERN_MODE_COLORSQUARES_YCBCR709,
-	TEST_PATTERN_MODE_VERTICALBARS,
-	TEST_PATTERN_MODE_HORIZONTALBARS,
-	TEST_PATTERN_MODE_SINGLERAMP_RGB,
-	TEST_PATTERN_MODE_DUALRAMP_RGB
-};
-
-enum test_pattern_color_format {
-	TEST_PATTERN_COLOR_FORMAT_BPC_6 = 0,
-	TEST_PATTERN_COLOR_FORMAT_BPC_8,
-	TEST_PATTERN_COLOR_FORMAT_BPC_10,
-	TEST_PATTERN_COLOR_FORMAT_BPC_12
-};
-
-enum controller_dp_test_pattern {
-	CONTROLLER_DP_TEST_PATTERN_D102 = 0,
-	CONTROLLER_DP_TEST_PATTERN_SYMBOLERROR,
-	CONTROLLER_DP_TEST_PATTERN_PRBS7,
-	CONTROLLER_DP_TEST_PATTERN_COLORSQUARES,
-	CONTROLLER_DP_TEST_PATTERN_VERTICALBARS,
-	CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS,
-	CONTROLLER_DP_TEST_PATTERN_COLORRAMP,
-	CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
-	CONTROLLER_DP_TEST_PATTERN_RESERVED_8,
-	CONTROLLER_DP_TEST_PATTERN_RESERVED_9,
-	CONTROLLER_DP_TEST_PATTERN_RESERVED_A,
-	CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA
-};
-
 enum crtc_state {
 	CRTC_STATE_VBLANK = 0,
 	CRTC_STATE_VACTIVE
@@ -100,6 +78,12 @@ struct _dlg_otg_param {
 	enum signal_type signal;
 };
 
+struct vupdate_keepout_params {
+	int start_offset;
+	int end_offset;
+	int enable;
+};
+
 struct crtc_stereo_flags {
 	uint8_t PROGRAM_STEREO         : 1;
 	uint8_t PROGRAM_POLARITY       : 1;
@@ -187,6 +171,8 @@ struct timing_generator_funcs {
 
 	void (*tg_init)(struct timing_generator *tg);
 	bool (*is_tg_enabled)(struct timing_generator *tg);
+	bool (*is_optc_underflow_occurred)(struct timing_generator *tg);
+	void (*clear_optc_underflow)(struct timing_generator *tg);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 5dc4ecf618ff4afe82f8c0e499b08472dc404fcb..4c0aa56f7bae255e42b18db495efba17b9868971 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -28,6 +28,7 @@
 #include "dc_types.h"
 #include "clock_source.h"
 #include "inc/hw/timing_generator.h"
+#include "inc/hw/opp.h"
 #include "inc/hw/link_encoder.h"
 #include "core_status.h"
 
@@ -40,6 +41,7 @@ enum pipe_gating_control {
 struct dce_hwseq_wa {
 	bool blnd_crtc_trigger;
 	bool DEGVIDCN10_253;
+	bool false_optc_underflow;
 };
 
 struct hwseq_wa_state {
@@ -137,10 +139,6 @@ struct hw_sequencer_funcs {
 
 	void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
 
-	void (*enable_plane)(struct dc *dc,
-			struct pipe_ctx *pipe,
-			struct dc_state *context);
-
 	void (*update_info_frame)(struct pipe_ctx *pipe_ctx);
 
 	void (*enable_stream)(struct pipe_ctx *pipe_ctx);
@@ -198,6 +196,7 @@ struct hw_sequencer_funcs {
 	void (*edp_backlight_control)(
 			struct dc_link *link,
 			bool enable);
+	void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up);
 
 };
 
@@ -209,4 +208,8 @@ void color_space_to_black_color(
 bool hwss_wait_for_blank_complete(
 		struct timing_generator *tg);
 
+const uint16_t *find_color_matrix(
+		enum dc_color_space color_space,
+		uint32_t *array_size);
+
 #endif /* __DC_HW_SEQUENCER_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/Makefile b/drivers/gpu/drm/amd/display/dc/irq/Makefile
index c7e93f7223bdabe032436004d6d2dd0304ebed10..498515aad4a50bf35133755d1bded3a387a0817e 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/irq/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'audio' sub-component of DAL.
 # It provides the control and status of HW adapter resources,
 # that are global for the ASIC and sharable between pipes.
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index a87c0329541f584c2c7dcff884b67ad96f4a684f..1fcbc99e63b58f1839be535c95665c682165a942 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -26,8 +26,6 @@
 #ifndef _OS_TYPES_H_
 #define _OS_TYPES_H_
 
-#if defined __KERNEL__
-
 #include <asm/byteorder.h>
 #include <linux/types.h>
 #include <drm/drmP.h>
@@ -46,14 +44,12 @@
 #undef WRITE
 #undef FRAME_SIZE
 
-#define dm_output_to_console(fmt, ...) DRM_INFO(fmt, ##__VA_ARGS__)
+#define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__)
 
 #define dm_error(fmt, ...) DRM_ERROR(fmt, ##__VA_ARGS__)
 
-#define dm_debug(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__)
-
-#define dm_vlog(fmt, args) vprintk(fmt, args)
-
+#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#include <asm/fpu/api.h>
 #endif
 
 /*
@@ -89,8 +85,4 @@
 	BREAK_TO_DEBUGGER(); \
 } while (0)
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
-#include <asm/fpu/api.h>
-#endif
-
 #endif /* _OS_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/Makefile b/drivers/gpu/drm/amd/display/dc/virtual/Makefile
index fc0b7318d9ccb239bc7127a1b735ff1f7324e3aa..07326d244d50a7333b43e0d3e02bc0dedca35f97 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/virtual/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the virtual sub-component of DAL.
 # It provides the control and status of HW CRTC block.
 
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index 3248f699daf2c986b6fb1508f9c9059604630370..4badaedbaaddea8c94278c9d974cd1a57640b21b 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -463,4 +463,11 @@ uint32_t dal_fixed31_32_u2d19(
 uint32_t dal_fixed31_32_u0d19(
 	struct fixed31_32 arg);
 
+
+uint32_t dal_fixed31_32_clamp_u0d14(
+	struct fixed31_32 arg);
+
+uint32_t dal_fixed31_32_clamp_u0d10(
+	struct fixed31_32 arg);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_id.h b/drivers/gpu/drm/amd/display/include/grph_object_id.h
index 03a7a9ca95eac6de24698679790fefba984a7c2a..c4197432eb7c34542952364fc4027ff4819f63f3 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_id.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_id.h
@@ -233,10 +233,6 @@ static inline struct graphics_object_id dal_graphics_object_id_init(
 	return result;
 }
 
-bool dal_graphics_object_id_is_equal(
-	struct graphics_object_id id1,
-	struct graphics_object_id id2);
-
 /* Based on internal data members memory layout */
 static inline uint32_t dal_graphics_object_id_to_uint(
 	struct graphics_object_id id)
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/Makefile b/drivers/gpu/drm/amd/display/modules/freesync/Makefile
index db8e0ff6d7a9c07da7fef4ec5808f73d5bf3a1b9..fb9a499780e8c82c6a833ee424dfe1b36a302fec 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/Makefile
+++ b/drivers/gpu/drm/amd/display/modules/freesync/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for the 'freesync' sub-module of DAL.
 #
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h
index 663d3af35baf120e1636eb293b2d18b81d719762..5bf84c6d0ec362b79b22ae75589c20e679692c3d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h
@@ -436,7 +436,6 @@
 #define mmTA_CNTL_DEFAULT                                                        0x8004d850
 #define mmTA_CNTL_AUX_DEFAULT                                                    0x00000000
 #define mmTA_RESERVED_010C_DEFAULT                                               0x00000000
-#define mmTA_GRAD_ADJ_DEFAULT                                                    0x40000040
 #define mmTA_STATUS_DEFAULT                                                      0x00000000
 #define mmTA_SCRATCH_DEFAULT                                                     0x00000000
 
@@ -1700,7 +1699,6 @@
 #define mmDB_STENCIL_WRITE_BASE_DEFAULT                                          0x00000000
 #define mmDB_STENCIL_WRITE_BASE_HI_DEFAULT                                       0x00000000
 #define mmDB_DFSM_CONTROL_DEFAULT                                                0x00000000
-#define mmDB_RENDER_FILTER_DEFAULT                                               0x00000000
 #define mmDB_Z_INFO2_DEFAULT                                                     0x00000000
 #define mmDB_STENCIL_INFO2_DEFAULT                                               0x00000000
 #define mmTA_BC_BASE_ADDR_DEFAULT                                                0x00000000
@@ -1806,8 +1804,6 @@
 #define mmPA_SC_RIGHT_VERT_GRID_DEFAULT                                          0x00000000
 #define mmPA_SC_LEFT_VERT_GRID_DEFAULT                                           0x00000000
 #define mmPA_SC_HORIZ_GRID_DEFAULT                                               0x00000000
-#define mmPA_SC_FOV_WINDOW_LR_DEFAULT                                            0x00000000
-#define mmPA_SC_FOV_WINDOW_TB_DEFAULT                                            0x00000000
 #define mmVGT_MULTI_PRIM_IB_RESET_INDX_DEFAULT                                   0x00000000
 #define mmCB_BLEND_RED_DEFAULT                                                   0x00000000
 #define mmCB_BLEND_GREEN_DEFAULT                                                 0x00000000
@@ -2072,7 +2068,6 @@
 #define mmVGT_EVENT_INITIATOR_DEFAULT                                            0x00000000
 #define mmVGT_GS_MAX_PRIMS_PER_SUBGROUP_DEFAULT                                  0x00000000
 #define mmVGT_DRAW_PAYLOAD_CNTL_DEFAULT                                          0x00000000
-#define mmVGT_INDEX_PAYLOAD_CNTL_DEFAULT                                         0x00000000
 #define mmVGT_INSTANCE_STEP_RATE_0_DEFAULT                                       0x00000000
 #define mmVGT_INSTANCE_STEP_RATE_1_DEFAULT                                       0x00000000
 #define mmVGT_ESGS_RING_ITEMSIZE_DEFAULT                                         0x00000000
@@ -2490,7 +2485,6 @@
 #define mmWD_INDEX_BUF_BASE_DEFAULT                                              0x00000000
 #define mmWD_INDEX_BUF_BASE_HI_DEFAULT                                           0x00000000
 #define mmIA_MULTI_VGT_PARAM_DEFAULT                                             0x006000ff
-#define mmVGT_OBJECT_ID_DEFAULT                                                  0x00000000
 #define mmVGT_INSTANCE_BASE_ID_DEFAULT                                           0x00000000
 #define mmPA_SU_LINE_STIPPLE_VALUE_DEFAULT                                       0x00000000
 #define mmPA_SC_LINE_STIPPLE_STATE_DEFAULT                                       0x00000000
@@ -2534,7 +2528,6 @@
 #define mmSQC_WRITEBACK_DEFAULT                                                  0x00000000
 #define mmTA_CS_BC_BASE_ADDR_DEFAULT                                             0x00000000
 #define mmTA_CS_BC_BASE_ADDR_HI_DEFAULT                                          0x00000000
-#define mmTA_GRAD_ADJ_UCONFIG_DEFAULT                                            0x40000040
 #define mmDB_OCCLUSION_COUNT0_LOW_DEFAULT                                        0x00000000
 #define mmDB_OCCLUSION_COUNT0_HI_DEFAULT                                         0x00000000
 #define mmDB_OCCLUSION_COUNT1_LOW_DEFAULT                                        0x00000000
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index e6d6171aa8b9c81fbcacd39d3a42139cb068b884..4ce090db7ef776ac1042f4a0f951d2e654402a9c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -841,8 +841,6 @@
 #define mmTA_CNTL_AUX_BASE_IDX                                                                         0
 #define mmTA_RESERVED_010C                                                                             0x0543
 #define mmTA_RESERVED_010C_BASE_IDX                                                                    0
-#define mmTA_GRAD_ADJ                                                                                  0x0544
-#define mmTA_GRAD_ADJ_BASE_IDX                                                                         0
 #define mmTA_STATUS                                                                                    0x0548
 #define mmTA_STATUS_BASE_IDX                                                                           0
 #define mmTA_SCRATCH                                                                                   0x0564
@@ -3330,8 +3328,6 @@
 #define mmDB_STENCIL_WRITE_BASE_HI_BASE_IDX                                                            1
 #define mmDB_DFSM_CONTROL                                                                              0x0018
 #define mmDB_DFSM_CONTROL_BASE_IDX                                                                     1
-#define mmDB_RENDER_FILTER                                                                             0x0019
-#define mmDB_RENDER_FILTER_BASE_IDX                                                                    1
 #define mmDB_Z_INFO2                                                                                   0x001a
 #define mmDB_Z_INFO2_BASE_IDX                                                                          1
 #define mmDB_STENCIL_INFO2                                                                             0x001b
@@ -3542,10 +3538,6 @@
 #define mmPA_SC_LEFT_VERT_GRID_BASE_IDX                                                                1
 #define mmPA_SC_HORIZ_GRID                                                                             0x00ea
 #define mmPA_SC_HORIZ_GRID_BASE_IDX                                                                    1
-#define mmPA_SC_FOV_WINDOW_LR                                                                          0x00eb
-#define mmPA_SC_FOV_WINDOW_LR_BASE_IDX                                                                 1
-#define mmPA_SC_FOV_WINDOW_TB                                                                          0x00ec
-#define mmPA_SC_FOV_WINDOW_TB_BASE_IDX                                                                 1
 #define mmVGT_MULTI_PRIM_IB_RESET_INDX                                                                 0x0103
 #define mmVGT_MULTI_PRIM_IB_RESET_INDX_BASE_IDX                                                        1
 #define mmCB_BLEND_RED                                                                                 0x0105
@@ -4074,8 +4066,6 @@
 #define mmVGT_GS_MAX_PRIMS_PER_SUBGROUP_BASE_IDX                                                       1
 #define mmVGT_DRAW_PAYLOAD_CNTL                                                                        0x02a6
 #define mmVGT_DRAW_PAYLOAD_CNTL_BASE_IDX                                                               1
-#define mmVGT_INDEX_PAYLOAD_CNTL                                                                       0x02a7
-#define mmVGT_INDEX_PAYLOAD_CNTL_BASE_IDX                                                              1
 #define mmVGT_INSTANCE_STEP_RATE_0                                                                     0x02a8
 #define mmVGT_INSTANCE_STEP_RATE_0_BASE_IDX                                                            1
 #define mmVGT_INSTANCE_STEP_RATE_1                                                                     0x02a9
@@ -4908,8 +4898,6 @@
 #define mmWD_INDEX_BUF_BASE_HI_BASE_IDX                                                                1
 #define mmIA_MULTI_VGT_PARAM                                                                           0x2258
 #define mmIA_MULTI_VGT_PARAM_BASE_IDX                                                                  1
-#define mmVGT_OBJECT_ID                                                                                0x2259
-#define mmVGT_OBJECT_ID_BASE_IDX                                                                       1
 #define mmVGT_INSTANCE_BASE_ID                                                                         0x225a
 #define mmVGT_INSTANCE_BASE_ID_BASE_IDX                                                                1
 #define mmPA_SU_LINE_STIPPLE_VALUE                                                                     0x2280
@@ -4996,8 +4984,6 @@
 #define mmTA_CS_BC_BASE_ADDR_BASE_IDX                                                                  1
 #define mmTA_CS_BC_BASE_ADDR_HI                                                                        0x2381
 #define mmTA_CS_BC_BASE_ADDR_HI_BASE_IDX                                                               1
-#define mmTA_GRAD_ADJ_UCONFIG                                                                          0x2382
-#define mmTA_GRAD_ADJ_UCONFIG_BASE_IDX                                                                 1
 #define mmDB_OCCLUSION_COUNT0_LOW                                                                      0x23c0
 #define mmDB_OCCLUSION_COUNT0_LOW_BASE_IDX                                                             1
 #define mmDB_OCCLUSION_COUNT0_HI                                                                       0x23c1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
index 5c5e9b445432d81334a30f617dfbb17b6d671065..2e1214be67a22490284052642d06d962f9ad5438 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
@@ -4576,15 +4576,6 @@
 //TA_RESERVED_010C
 #define TA_RESERVED_010C__Unused__SHIFT                                                                       0x0
 #define TA_RESERVED_010C__Unused_MASK                                                                         0xFFFFFFFFL
-//TA_GRAD_ADJ
-#define TA_GRAD_ADJ__GRAD_ADJ_0__SHIFT                                                                        0x0
-#define TA_GRAD_ADJ__GRAD_ADJ_1__SHIFT                                                                        0x8
-#define TA_GRAD_ADJ__GRAD_ADJ_2__SHIFT                                                                        0x10
-#define TA_GRAD_ADJ__GRAD_ADJ_3__SHIFT                                                                        0x18
-#define TA_GRAD_ADJ__GRAD_ADJ_0_MASK                                                                          0x000000FFL
-#define TA_GRAD_ADJ__GRAD_ADJ_1_MASK                                                                          0x0000FF00L
-#define TA_GRAD_ADJ__GRAD_ADJ_2_MASK                                                                          0x00FF0000L
-#define TA_GRAD_ADJ__GRAD_ADJ_3_MASK                                                                          0xFF000000L
 //TA_STATUS
 #define TA_STATUS__FG_PFIFO_EMPTYB__SHIFT                                                                     0xc
 #define TA_STATUS__FG_LFIFO_EMPTYB__SHIFT                                                                     0xd
@@ -14459,9 +14450,6 @@
 #define DB_DFSM_CONTROL__PUNCHOUT_MODE_MASK                                                                   0x00000003L
 #define DB_DFSM_CONTROL__POPS_DRAIN_PS_ON_OVERLAP_MASK                                                        0x00000004L
 #define DB_DFSM_CONTROL__DISALLOW_OVERFLOW_MASK                                                               0x00000008L
-//DB_RENDER_FILTER
-#define DB_RENDER_FILTER__PS_INVOKE_MASK__SHIFT                                                               0x0
-#define DB_RENDER_FILTER__PS_INVOKE_MASK_MASK                                                                 0x0000FFFFL
 //DB_Z_INFO2
 #define DB_Z_INFO2__EPITCH__SHIFT                                                                             0x0
 #define DB_Z_INFO2__EPITCH_MASK                                                                               0x0000FFFFL
@@ -14959,11 +14947,9 @@
 #define PA_SC_TILE_STEERING_OVERRIDE__ENABLE__SHIFT                                                           0x0
 #define PA_SC_TILE_STEERING_OVERRIDE__NUM_SE__SHIFT                                                           0x1
 #define PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SE__SHIFT                                                    0x5
-#define PA_SC_TILE_STEERING_OVERRIDE__DISABLE_SRBSL_DB_OPTIMIZED_PACKING__SHIFT                               0x8
 #define PA_SC_TILE_STEERING_OVERRIDE__ENABLE_MASK                                                             0x00000001L
 #define PA_SC_TILE_STEERING_OVERRIDE__NUM_SE_MASK                                                             0x00000006L
 #define PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SE_MASK                                                      0x00000060L
-#define PA_SC_TILE_STEERING_OVERRIDE__DISABLE_SRBSL_DB_OPTIMIZED_PACKING_MASK                                 0x00000100L
 //CP_PERFMON_CNTX_CNTL
 #define CP_PERFMON_CNTX_CNTL__PERFMON_ENABLE__SHIFT                                                           0x1f
 #define CP_PERFMON_CNTX_CNTL__PERFMON_ENABLE_MASK                                                             0x80000000L
@@ -15003,20 +14989,6 @@
 #define PA_SC_HORIZ_GRID__TOP_HALF_MASK                                                                       0x0000FF00L
 #define PA_SC_HORIZ_GRID__BOT_HALF_MASK                                                                       0x00FF0000L
 #define PA_SC_HORIZ_GRID__BOT_QTR_MASK                                                                        0xFF000000L
-//PA_SC_FOV_WINDOW_LR
-#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_LEFT__SHIFT                                                         0x0
-#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_RIGHT__SHIFT                                                        0x8
-#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_LEFT__SHIFT                                                        0x10
-#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_RIGHT__SHIFT                                                       0x18
-#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_LEFT_MASK                                                           0x000000FFL
-#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_RIGHT_MASK                                                          0x0000FF00L
-#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_LEFT_MASK                                                          0x00FF0000L
-#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_RIGHT_MASK                                                         0xFF000000L
-//PA_SC_FOV_WINDOW_TB
-#define PA_SC_FOV_WINDOW_TB__FOV_TOP__SHIFT                                                                   0x0
-#define PA_SC_FOV_WINDOW_TB__FOV_BOT__SHIFT                                                                   0x8
-#define PA_SC_FOV_WINDOW_TB__FOV_TOP_MASK                                                                     0x000000FFL
-#define PA_SC_FOV_WINDOW_TB__FOV_BOT_MASK                                                                     0x0000FF00L
 //VGT_MULTI_PRIM_IB_RESET_INDX
 #define VGT_MULTI_PRIM_IB_RESET_INDX__RESET_INDX__SHIFT                                                       0x0
 #define VGT_MULTI_PRIM_IB_RESET_INDX__RESET_INDX_MASK                                                         0xFFFFFFFFL
@@ -17010,13 +16982,11 @@
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__LINE_FILTER_DISABLE__SHIFT                                              0x2
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__POINT_FILTER_DISABLE__SHIFT                                             0x3
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__RECTANGLE_FILTER_DISABLE__SHIFT                                         0x4
-#define PA_SU_SMALL_PRIM_FILTER_CNTL__SRBSL_ENABLE__SHIFT                                                     0x5
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__SMALL_PRIM_FILTER_ENABLE_MASK                                           0x00000001L
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__TRIANGLE_FILTER_DISABLE_MASK                                            0x00000002L
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__LINE_FILTER_DISABLE_MASK                                                0x00000004L
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__POINT_FILTER_DISABLE_MASK                                               0x00000008L
 #define PA_SU_SMALL_PRIM_FILTER_CNTL__RECTANGLE_FILTER_DISABLE_MASK                                           0x00000010L
-#define PA_SU_SMALL_PRIM_FILTER_CNTL__SRBSL_ENABLE_MASK                                                       0x00000020L
 //PA_CL_OBJPRIM_ID_CNTL
 #define PA_CL_OBJPRIM_ID_CNTL__OBJ_ID_SEL__SHIFT                                                              0x0
 #define PA_CL_OBJPRIM_ID_CNTL__ADD_PIPED_PRIM_ID__SHIFT                                                       0x1
@@ -17345,9 +17315,6 @@
 #define VGT_DRAW_PAYLOAD_CNTL__EN_REG_RT_INDEX_MASK                                                           0x00000002L
 #define VGT_DRAW_PAYLOAD_CNTL__EN_PIPELINE_PRIMID_MASK                                                        0x00000004L
 #define VGT_DRAW_PAYLOAD_CNTL__OBJECT_ID_INST_EN_MASK                                                         0x00000008L
-//VGT_INDEX_PAYLOAD_CNTL
-#define VGT_INDEX_PAYLOAD_CNTL__COMPOUND_INDEX_EN__SHIFT                                                      0x0
-#define VGT_INDEX_PAYLOAD_CNTL__COMPOUND_INDEX_EN_MASK                                                        0x00000001L
 //VGT_INSTANCE_STEP_RATE_0
 #define VGT_INSTANCE_STEP_RATE_0__STEP_RATE__SHIFT                                                            0x0
 #define VGT_INSTANCE_STEP_RATE_0__STEP_RATE_MASK                                                              0xFFFFFFFFL
@@ -19849,9 +19816,6 @@
 #define IA_MULTI_VGT_PARAM__EN_INST_OPT_BASIC_MASK                                                            0x00200000L
 #define IA_MULTI_VGT_PARAM__EN_INST_OPT_ADV_MASK                                                              0x00400000L
 #define IA_MULTI_VGT_PARAM__HW_USE_ONLY_MASK                                                                  0x00800000L
-//VGT_OBJECT_ID
-#define VGT_OBJECT_ID__REG_OBJ_ID__SHIFT                                                                      0x0
-#define VGT_OBJECT_ID__REG_OBJ_ID_MASK                                                                        0xFFFFFFFFL
 //VGT_INSTANCE_BASE_ID
 #define VGT_INSTANCE_BASE_ID__INSTANCE_BASE_ID__SHIFT                                                         0x0
 #define VGT_INSTANCE_BASE_ID__INSTANCE_BASE_ID_MASK                                                           0xFFFFFFFFL
@@ -20067,15 +20031,6 @@
 //TA_CS_BC_BASE_ADDR_HI
 #define TA_CS_BC_BASE_ADDR_HI__ADDRESS__SHIFT                                                                 0x0
 #define TA_CS_BC_BASE_ADDR_HI__ADDRESS_MASK                                                                   0x000000FFL
-//TA_GRAD_ADJ_UCONFIG
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_0__SHIFT                                                                0x0
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_1__SHIFT                                                                0x8
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_2__SHIFT                                                                0x10
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_3__SHIFT                                                                0x18
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_0_MASK                                                                  0x000000FFL
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_1_MASK                                                                  0x0000FF00L
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_2_MASK                                                                  0x00FF0000L
-#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_3_MASK                                                                  0xFF000000L
 //DB_OCCLUSION_COUNT0_LOW
 #define DB_OCCLUSION_COUNT0_LOW__COUNT_LOW__SHIFT                                                             0x0
 #define DB_OCCLUSION_COUNT0_LOW__COUNT_LOW_MASK                                                               0xFFFFFFFFL
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h
index db7ef5ede0e52e67607fb3eacef89e2d001b3def..030e0020902b277ff6b2e485d54ac48bb94fc87f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h
@@ -815,8 +815,6 @@
 #define mmTA_CNTL_AUX_BASE_IDX                                                                         0
 #define mmTA_RESERVED_010C                                                                             0x0543
 #define mmTA_RESERVED_010C_BASE_IDX                                                                    0
-#define mmTA_GRAD_ADJ                                                                                  0x0544
-#define mmTA_GRAD_ADJ_BASE_IDX                                                                         0
 #define mmTA_STATUS                                                                                    0x0548
 #define mmTA_STATUS_BASE_IDX                                                                           0
 #define mmTA_SCRATCH                                                                                   0x0564
@@ -3617,8 +3615,6 @@
 #define mmDB_STENCIL_WRITE_BASE_HI_BASE_IDX                                                            1
 #define mmDB_DFSM_CONTROL                                                                              0x0018
 #define mmDB_DFSM_CONTROL_BASE_IDX                                                                     1
-#define mmDB_RENDER_FILTER                                                                             0x0019
-#define mmDB_RENDER_FILTER_BASE_IDX                                                                    1
 #define mmDB_Z_INFO2                                                                                   0x001a
 #define mmDB_Z_INFO2_BASE_IDX                                                                          1
 #define mmDB_STENCIL_INFO2                                                                             0x001b
@@ -3829,10 +3825,6 @@
 #define mmPA_SC_LEFT_VERT_GRID_BASE_IDX                                                                1
 #define mmPA_SC_HORIZ_GRID                                                                             0x00ea
 #define mmPA_SC_HORIZ_GRID_BASE_IDX                                                                    1
-#define mmPA_SC_FOV_WINDOW_LR                                                                          0x00eb
-#define mmPA_SC_FOV_WINDOW_LR_BASE_IDX                                                                 1
-#define mmPA_SC_FOV_WINDOW_TB                                                                          0x00ec
-#define mmPA_SC_FOV_WINDOW_TB_BASE_IDX                                                                 1
 #define mmVGT_MULTI_PRIM_IB_RESET_INDX                                                                 0x0103
 #define mmVGT_MULTI_PRIM_IB_RESET_INDX_BASE_IDX                                                        1
 #define mmCB_BLEND_RED                                                                                 0x0105
@@ -4361,8 +4353,6 @@
 #define mmVGT_GS_MAX_PRIMS_PER_SUBGROUP_BASE_IDX                                                       1
 #define mmVGT_DRAW_PAYLOAD_CNTL                                                                        0x02a6
 #define mmVGT_DRAW_PAYLOAD_CNTL_BASE_IDX                                                               1
-#define mmVGT_INDEX_PAYLOAD_CNTL                                                                       0x02a7
-#define mmVGT_INDEX_PAYLOAD_CNTL_BASE_IDX                                                              1
 #define mmVGT_INSTANCE_STEP_RATE_0                                                                     0x02a8
 #define mmVGT_INSTANCE_STEP_RATE_0_BASE_IDX                                                            1
 #define mmVGT_INSTANCE_STEP_RATE_1                                                                     0x02a9
@@ -5195,8 +5185,6 @@
 #define mmWD_INDEX_BUF_BASE_HI_BASE_IDX                                                                1
 #define mmIA_MULTI_VGT_PARAM                                                                           0x2258
 #define mmIA_MULTI_VGT_PARAM_BASE_IDX                                                                  1
-#define mmVGT_OBJECT_ID                                                                                0x2259
-#define mmVGT_OBJECT_ID_BASE_IDX                                                                       1
 #define mmVGT_INSTANCE_BASE_ID                                                                         0x225a
 #define mmVGT_INSTANCE_BASE_ID_BASE_IDX                                                                1
 #define mmPA_SU_LINE_STIPPLE_VALUE                                                                     0x2280
@@ -5283,8 +5271,6 @@
 #define mmTA_CS_BC_BASE_ADDR_BASE_IDX                                                                  1
 #define mmTA_CS_BC_BASE_ADDR_HI                                                                        0x2381
 #define mmTA_CS_BC_BASE_ADDR_HI_BASE_IDX                                                               1
-#define mmTA_GRAD_ADJ_UCONFIG                                                                          0x2382
-#define mmTA_GRAD_ADJ_UCONFIG_BASE_IDX                                                                 1
 #define mmDB_OCCLUSION_COUNT0_LOW                                                                      0x23c0
 #define mmDB_OCCLUSION_COUNT0_LOW_BASE_IDX                                                             1
 #define mmDB_OCCLUSION_COUNT0_HI                                                                       0x23c1
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index f516fd10e6ba7469856a59d039a8e1d715251a0e..a6752bd0c87131e57801786bac66ce9bbe9c6d1e 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -46,6 +46,28 @@ enum kfd_preempt_type {
 	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 };
 
+struct kfd_cu_info {
+	uint32_t num_shader_engines;
+	uint32_t num_shader_arrays_per_engine;
+	uint32_t num_cu_per_sh;
+	uint32_t cu_active_number;
+	uint32_t cu_ao_mask;
+	uint32_t simd_per_cu;
+	uint32_t max_waves_per_simd;
+	uint32_t wave_front_size;
+	uint32_t max_scratch_slots_per_cu;
+	uint32_t lds_size;
+	uint32_t cu_bitmap[4][4];
+};
+
+/* For getting GPU local memory information from KGD */
+struct kfd_local_mem_info {
+	uint64_t local_mem_size_private;
+	uint64_t local_mem_size_public;
+	uint32_t vram_width;
+	uint32_t mem_clk_max;
+};
+
 enum kgd_memory_pool {
 	KGD_POOL_SYSTEM_CACHEABLE = 1,
 	KGD_POOL_SYSTEM_WRITECOMBINE = 2,
@@ -106,7 +128,7 @@ struct tile_config {
  *
  * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
  *
- * @get_vmem_size: Retrieves (physical) size of VRAM
+ * @get_local_mem_info: Retrieves information about GPU local memory
  *
  * @get_gpu_clock_counter: Retrieves GPU clock counter
  *
@@ -131,6 +153,12 @@ struct tile_config {
  * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
  * used only for no HWS mode.
  *
+ * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs.
+ * Array is allocated with kmalloc, needs to be freed with kfree by caller.
+ *
+ * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs.
+ * Array is allocated with kmalloc, needs to be freed with kfree by caller.
+ *
  * @hqd_is_occupies: Checks if a hqd slot is occupied.
  *
  * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
@@ -147,6 +175,10 @@ struct tile_config {
  *
  * @get_tile_config: Returns GPU-specific tiling mode information
  *
+ * @get_cu_info: Retrieves activated cu info
+ *
+ * @get_vram_usage: Returns current VRAM usage
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -158,7 +190,8 @@ struct kfd2kgd_calls {
 
 	void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
 
-	uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
+	void (*get_local_mem_info)(struct kgd_dev *kgd,
+			struct kfd_local_mem_info *mem_info);
 	uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
 
 	uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
@@ -184,7 +217,16 @@ struct kfd2kgd_calls {
 			uint32_t wptr_shift, uint32_t wptr_mask,
 			struct mm_struct *mm);
 
-	int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
+	int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+
+	int (*hqd_dump)(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs);
+
+	int (*hqd_sdma_dump)(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
 
 	bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id);
@@ -224,6 +266,10 @@ struct kfd2kgd_calls {
 	void (*set_scratch_backing_va)(struct kgd_dev *kgd,
 				uint64_t va, uint32_t vmid);
 	int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
+
+	void (*get_cu_info)(struct kgd_dev *kgd,
+			struct kfd_cu_info *cu_info);
+	uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index 20234820194bde0c1d9ccd8475bd8c694fcf8211..717fbae1d362011bfee7185fffefc01cba3fcf71 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -153,6 +153,8 @@ struct vi_sdma_mqd {
 	uint32_t reserved_125;
 	uint32_t reserved_126;
 	uint32_t reserved_127;
+	uint32_t sdma_engine_id;
+	uint32_t sdma_queue_id;
 };
 
 struct vi_mqd {
diff --git a/drivers/gpu/drm/amd/lib/Makefile b/drivers/gpu/drm/amd/lib/Makefile
index 87cd7009e80f1b59744d4a928c9b66d74c16f5e1..690243001e1aa935be3c29ccf874e0ec82291dad 100644
--- a/drivers/gpu/drm/amd/lib/Makefile
+++ b/drivers/gpu/drm/amd/lib/Makefile
@@ -1,4 +1,25 @@
 #
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
 # Makefile for AMD library routines, which are used by AMD driver
 # components.
 #
diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile
index 8c55c6e254d99eb008231587bb21beb29c09a450..231785a9e24c67bb31695367f7e1294d15f47bb3 100644
--- a/drivers/gpu/drm/amd/powerplay/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/Makefile
@@ -1,4 +1,24 @@
-# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
 
 subdir-ccflags-y += \
 		-I$(FULL_AMD_PATH)/powerplay/inc/  \
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 9d3bdada79d5e74843b58cdea7b7936bd644393d..4c3223a4d62b04d0264686e2eb6e493e6daf7c23 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -389,20 +389,12 @@ static int pp_dpm_force_performance_level(void *handle,
 	if (level == hwmgr->dpm_level)
 		return 0;
 
-	if (hwmgr->hwmgr_func->force_dpm_level == NULL) {
-		pr_info("%s was not implemented.\n", __func__);
-		return 0;
-	}
-
 	mutex_lock(&pp_handle->pp_lock);
 	pp_dpm_en_umd_pstate(hwmgr, &level);
 	hwmgr->request_dpm_level = level;
 	hwmgr_handle_task(pp_handle, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
-	ret = hwmgr->hwmgr_func->force_dpm_level(hwmgr, level);
-	if (!ret)
-		hwmgr->dpm_level = hwmgr->request_dpm_level;
-
 	mutex_unlock(&pp_handle->pp_lock);
+
 	return 0;
 }
 
@@ -726,6 +718,8 @@ static int pp_dpm_get_pp_num_states(void *handle,
 	struct pp_instance *pp_handle = (struct pp_instance *)handle;
 	int ret = 0;
 
+	memset(data, 0, sizeof(*data));
+
 	ret = pp_check(pp_handle);
 
 	if (ret)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
index 824fb6fe54ae97dc2a1768f49141ee6ff0ce3a49..a212c27f2e17c99915e4bede575f6ed40cb263f0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
@@ -1,4 +1,24 @@
-# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
 #
 # Makefile for the 'hw manager' sub-component of powerplay.
 # It provides the hardware management services for the driver.
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index ad1f6b57884b716620b602e51c8a53e566d61209..b314d09d41af41a14664f79741e066b4013d2670 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -728,9 +728,6 @@ static int cz_update_sclk_limit(struct pp_hwmgr *hwmgr)
 
 		if (clock < stable_pstate_sclk)
 			clock = stable_pstate_sclk;
-	} else {
-		if (clock < hwmgr->gfx_arbiter.sclk)
-			clock = hwmgr->gfx_arbiter.sclk;
 	}
 
 	if (cz_hwmgr->sclk_dpm.soft_min_clk != clock) {
@@ -1085,14 +1082,8 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	uint32_t  num_of_active_displays = 0;
 	struct cgs_display_info info = {0};
 
-	cz_ps->evclk = hwmgr->vce_arbiter.evclk;
-	cz_ps->ecclk = hwmgr->vce_arbiter.ecclk;
-
 	cz_ps->need_dfs_bypass = true;
 
-	cz_hwmgr->video_start = (hwmgr->uvd_arbiter.vclk != 0 || hwmgr->uvd_arbiter.dclk != 0 ||
-				hwmgr->vce_arbiter.evclk != 0 || hwmgr->vce_arbiter.ecclk != 0);
-
 	cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label);
 
 	clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ?
@@ -1105,9 +1096,6 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 		clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk;
 
-	if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk)
-		clocks.memoryClock = hwmgr->gfx_arbiter.mclk;
-
 	force_high = (clocks.memoryClock > cz_hwmgr->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1])
 			|| (num_of_active_displays >= 3);
 
@@ -1339,22 +1327,13 @@ int  cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr)
 				cz_hwmgr->vce_dpm.hard_min_clk,
 				PPSMC_MSG_SetEclkHardMin));
 	} else {
-		/*Program HardMin based on the vce_arbiter.ecclk */
-		if (hwmgr->vce_arbiter.ecclk == 0) {
-			smum_send_msg_to_smc_with_parameter(hwmgr,
-					    PPSMC_MSG_SetEclkHardMin, 0);
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetEclkHardMin, 0);
 		/* disable ECLK DPM 0. Otherwise VCE could hang if
 		 * switching SCLK from DPM 0 to 6/7 */
-			smum_send_msg_to_smc_with_parameter(hwmgr,
+		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetEclkSoftMin, 1);
-		} else {
-			cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk;
-			smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_SetEclkHardMin,
-				cz_get_eclk_level(hwmgr,
-					cz_hwmgr->vce_dpm.hard_min_clk,
-					PPSMC_MSG_SetEclkHardMin));
-		}
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 623cff90233d450f60a7021b260edc7a7d63a911..2b0c53fe4c8dfb47a5b6353b0c0fd5fbf9dba753 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -112,26 +112,29 @@ int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level
 
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->force_dpm_level != NULL) {
+	if (hwmgr->hwmgr_func->force_dpm_level != NULL)
 		ret = hwmgr->hwmgr_func->force_dpm_level(hwmgr, level);
-		if (ret)
-			return ret;
-
-		if (hwmgr->hwmgr_func->set_power_profile_state) {
-			if (hwmgr->current_power_profile == AMD_PP_GFX_PROFILE)
-				ret = hwmgr->hwmgr_func->set_power_profile_state(
-						hwmgr,
-						&hwmgr->gfx_power_profile);
-			else if (hwmgr->current_power_profile == AMD_PP_COMPUTE_PROFILE)
-				ret = hwmgr->hwmgr_func->set_power_profile_state(
-						hwmgr,
-						&hwmgr->compute_power_profile);
-		}
-	}
 
 	return ret;
 }
 
+int phm_reset_power_profile_state(struct pp_hwmgr *hwmgr)
+{
+	int ret = 0;
+
+	if (hwmgr->hwmgr_func->set_power_profile_state) {
+		if (hwmgr->current_power_profile == AMD_PP_GFX_PROFILE)
+			ret = hwmgr->hwmgr_func->set_power_profile_state(
+					hwmgr,
+					&hwmgr->gfx_power_profile);
+		else if (hwmgr->current_power_profile == AMD_PP_COMPUTE_PROFILE)
+			ret = hwmgr->hwmgr_func->set_power_profile_state(
+					hwmgr,
+					&hwmgr->compute_power_profile);
+	}
+	return ret;
+}
+
 int phm_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				   struct pp_power_state *adjusted_ps,
 			     const struct pp_power_state *current_ps)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index ce59e0e67cb22d632b27c3dce86708bd19a7fa4a..0229f774f7a93262196d4136a3ffa415b6940904 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -149,6 +149,7 @@ int hwmgr_early_init(struct pp_instance *handle)
 	hwmgr->power_source = PP_PowerSource_AC;
 	hwmgr->pp_table_version = PP_TABLE_V1;
 	hwmgr->dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
+	hwmgr->request_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
 	hwmgr_init_default_caps(hwmgr);
 	hwmgr_set_user_specify_caps(hwmgr);
 	hwmgr->fan_ctrl_is_in_default_mode = true;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c
index 67fae834bc6788680eddd50b8f55a737f4b013ab..8de384bf9a8fb62a12bb90086a17bd4fd04c9de5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c
@@ -1,4 +1,26 @@
-// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
 #include "pp_overdriver.h"
 #include <linux/errno.h>
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
index ffa44bbb218e4e59004a97e6b148d66f48ae1af4..95ab772e0c3e3c3b1eacd4b95b1844ccbdd1cbbd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
@@ -244,6 +244,10 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip,
 	}
 
 	phm_notify_smc_display_config_after_ps_adjustment(hwmgr);
+	if (!phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level))
+		hwmgr->dpm_level = hwmgr->request_dpm_level;
+
+	phm_reset_power_profile_state(hwmgr);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
index 3e0b267c74a88991fde93055f7e3460d2a985379..569073e3a5a1372485caf56976d83479e4330fa4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c
@@ -159,7 +159,6 @@ static int rv_construct_boot_state(struct pp_hwmgr *hwmgr)
 
 static int rv_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input)
 {
-	struct rv_hwmgr *rv_data = (struct rv_hwmgr *)(hwmgr->backend);
 	struct PP_Clocks clocks = {0};
 	struct pp_display_clock_request clock_req;
 
@@ -170,39 +169,6 @@ static int rv_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input)
 	PP_ASSERT_WITH_CODE(!rv_display_clock_voltage_request(hwmgr, &clock_req),
 				"Attempt to set DCF Clock Failed!", return -EINVAL);
 
-	if (((hwmgr->uvd_arbiter.vclk_soft_min / 100) != rv_data->vclk_soft_min) ||
-	    ((hwmgr->uvd_arbiter.dclk_soft_min / 100) != rv_data->dclk_soft_min)) {
-		rv_data->vclk_soft_min = hwmgr->uvd_arbiter.vclk_soft_min / 100;
-		rv_data->dclk_soft_min = hwmgr->uvd_arbiter.dclk_soft_min / 100;
-		smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_SetSoftMinVcn,
-			(rv_data->vclk_soft_min << 16) | rv_data->vclk_soft_min);
-	}
-
-	if((hwmgr->gfx_arbiter.sclk_hard_min != 0) &&
-		((hwmgr->gfx_arbiter.sclk_hard_min / 100) != rv_data->soc_actual_hard_min_freq)) {
-		smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_SetHardMinSocclkByFreq,
-					hwmgr->gfx_arbiter.sclk_hard_min / 100);
-		rv_read_arg_from_smc(hwmgr, &rv_data->soc_actual_hard_min_freq);
-	}
-
-	if ((hwmgr->gfx_arbiter.gfxclk != 0) &&
-		(rv_data->gfx_actual_soft_min_freq != (hwmgr->gfx_arbiter.gfxclk))) {
-		smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_SetMinVideoGfxclkFreq,
-					hwmgr->gfx_arbiter.gfxclk / 100);
-		rv_read_arg_from_smc(hwmgr, &rv_data->gfx_actual_soft_min_freq);
-	}
-
-	if ((hwmgr->gfx_arbiter.fclk != 0) &&
-		(rv_data->fabric_actual_soft_min_freq != (hwmgr->gfx_arbiter.fclk / 100))) {
-		smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_SetMinVideoFclkFreq,
-					hwmgr->gfx_arbiter.fclk / 100);
-		rv_read_arg_from_smc(hwmgr, &rv_data->fabric_actual_soft_min_freq);
-	}
-
 	return 0;
 }
 
@@ -518,17 +484,161 @@ static int rv_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 static int rv_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 				enum amd_dpm_forced_level level)
 {
+	if (hwmgr->smu_version < 0x1E3700) {
+		pr_info("smu firmware version too old, can not set dpm level\n");
+		return 0;
+	}
+
+	switch (level) {
+	case AMD_DPM_FORCED_LEVEL_HIGH:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinGfxClk,
+						RAVEN_UMD_PSTATE_PEAK_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinFclkByFreq,
+						RAVEN_UMD_PSTATE_PEAK_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinSocclkByFreq,
+						RAVEN_UMD_PSTATE_PEAK_SOCCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinVcn,
+						RAVEN_UMD_PSTATE_VCE);
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxGfxClk,
+						RAVEN_UMD_PSTATE_PEAK_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxFclkByFreq,
+						RAVEN_UMD_PSTATE_PEAK_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxSocclkByFreq,
+						RAVEN_UMD_PSTATE_PEAK_SOCCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxVcn,
+						RAVEN_UMD_PSTATE_VCE);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinGfxClk,
+						RAVEN_UMD_PSTATE_MIN_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxGfxClk,
+						RAVEN_UMD_PSTATE_MIN_GFXCLK);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinFclkByFreq,
+						RAVEN_UMD_PSTATE_MIN_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxFclkByFreq,
+						RAVEN_UMD_PSTATE_MIN_FCLK);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinGfxClk,
+						RAVEN_UMD_PSTATE_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinFclkByFreq,
+						RAVEN_UMD_PSTATE_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinSocclkByFreq,
+						RAVEN_UMD_PSTATE_SOCCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinVcn,
+						RAVEN_UMD_PSTATE_VCE);
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxGfxClk,
+						RAVEN_UMD_PSTATE_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxFclkByFreq,
+						RAVEN_UMD_PSTATE_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxSocclkByFreq,
+						RAVEN_UMD_PSTATE_SOCCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxVcn,
+						RAVEN_UMD_PSTATE_VCE);
+		break;
+	case AMD_DPM_FORCED_LEVEL_AUTO:
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinGfxClk,
+						RAVEN_UMD_PSTATE_MIN_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinFclkByFreq,
+						RAVEN_UMD_PSTATE_MIN_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinSocclkByFreq,
+						RAVEN_UMD_PSTATE_MIN_SOCCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinVcn,
+						RAVEN_UMD_PSTATE_MIN_VCE);
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxGfxClk,
+						RAVEN_UMD_PSTATE_PEAK_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxFclkByFreq,
+						RAVEN_UMD_PSTATE_PEAK_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxSocclkByFreq,
+						RAVEN_UMD_PSTATE_PEAK_SOCCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxVcn,
+						RAVEN_UMD_PSTATE_VCE);
+		break;
+	case AMD_DPM_FORCED_LEVEL_LOW:
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinGfxClk,
+						RAVEN_UMD_PSTATE_MIN_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxGfxClk,
+						RAVEN_UMD_PSTATE_MIN_GFXCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinFclkByFreq,
+						RAVEN_UMD_PSTATE_MIN_FCLK);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxFclkByFreq,
+						RAVEN_UMD_PSTATE_MIN_FCLK);
+		break;
+	case AMD_DPM_FORCED_LEVEL_MANUAL:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
+	default:
+		break;
+	}
 	return 0;
 }
 
 static uint32_t rv_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
 {
-	return 0;
+	struct rv_hwmgr *data;
+
+	if (hwmgr == NULL)
+		return -EINVAL;
+
+	data = (struct rv_hwmgr *)(hwmgr->backend);
+
+	if (low)
+		return data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk;
+	else
+		return data->clock_vol_info.vdd_dep_on_fclk->entries[
+			data->clock_vol_info.vdd_dep_on_fclk->count - 1].clk;
 }
 
 static uint32_t rv_dpm_get_sclk(struct pp_hwmgr *hwmgr, bool low)
 {
-	return 0;
+	struct rv_hwmgr *data;
+
+	if (hwmgr == NULL)
+		return -EINVAL;
+
+	data = (struct rv_hwmgr *)(hwmgr->backend);
+
+	if (low)
+		return data->gfx_min_freq_limit;
+	else
+		return data->gfx_max_freq_limit;
 }
 
 static int rv_dpm_patch_boot_state(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h
index 9dc50305539468929663f5887d000ba9ae5521ef..c3bc311dc59ff3e3894f5ca221faecfcc0768c9f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h
@@ -304,4 +304,19 @@ struct pp_hwmgr;
 
 int rv_init_function_pointers(struct pp_hwmgr *hwmgr);
 
+/* UMD PState Raven Msg Parameters in MHz */
+#define RAVEN_UMD_PSTATE_GFXCLK                 700
+#define RAVEN_UMD_PSTATE_SOCCLK                 626
+#define RAVEN_UMD_PSTATE_FCLK                   933
+#define RAVEN_UMD_PSTATE_VCE                    0x03C00320
+
+#define RAVEN_UMD_PSTATE_PEAK_GFXCLK            1100
+#define RAVEN_UMD_PSTATE_PEAK_SOCCLK            757
+#define RAVEN_UMD_PSTATE_PEAK_FCLK              1200
+
+#define RAVEN_UMD_PSTATE_MIN_GFXCLK             200
+#define RAVEN_UMD_PSTATE_MIN_FCLK               400
+#define RAVEN_UMD_PSTATE_MIN_SOCCLK             200
+#define RAVEN_UMD_PSTATE_MIN_VCE                0x0190012C
+
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 8edb0c4c3876c8ca141d8603e7256c7172a3f9ae..41e42beff213905837c15db1841800b2021ec502 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2722,9 +2722,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 		}
 	}
 
-	smu7_ps->vce_clks.evclk = hwmgr->vce_arbiter.evclk;
-	smu7_ps->vce_clks.ecclk = hwmgr->vce_arbiter.ecclk;
-
 	cgs_get_active_displays_info(hwmgr->device, &info);
 
 	minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
@@ -2754,38 +2751,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 		minimum_clocks.memoryClock = stable_pstate_mclk;
 	}
 
-	if (minimum_clocks.engineClock < hwmgr->gfx_arbiter.sclk)
-		minimum_clocks.engineClock = hwmgr->gfx_arbiter.sclk;
-
-	if (minimum_clocks.memoryClock < hwmgr->gfx_arbiter.mclk)
-		minimum_clocks.memoryClock = hwmgr->gfx_arbiter.mclk;
-
-	smu7_ps->sclk_threshold = hwmgr->gfx_arbiter.sclk_threshold;
-
-	if (0 != hwmgr->gfx_arbiter.sclk_over_drive) {
-		PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.sclk_over_drive <=
-				hwmgr->platform_descriptor.overdriveLimit.engineClock),
-				"Overdrive sclk exceeds limit",
-				hwmgr->gfx_arbiter.sclk_over_drive =
-						hwmgr->platform_descriptor.overdriveLimit.engineClock);
-
-		if (hwmgr->gfx_arbiter.sclk_over_drive >= hwmgr->gfx_arbiter.sclk)
-			smu7_ps->performance_levels[1].engine_clock =
-					hwmgr->gfx_arbiter.sclk_over_drive;
-	}
-
-	if (0 != hwmgr->gfx_arbiter.mclk_over_drive) {
-		PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.mclk_over_drive <=
-				hwmgr->platform_descriptor.overdriveLimit.memoryClock),
-				"Overdrive mclk exceeds limit",
-				hwmgr->gfx_arbiter.mclk_over_drive =
-						hwmgr->platform_descriptor.overdriveLimit.memoryClock);
-
-		if (hwmgr->gfx_arbiter.mclk_over_drive >= hwmgr->gfx_arbiter.mclk)
-			smu7_ps->performance_levels[1].memory_clock =
-					hwmgr->gfx_arbiter.mclk_over_drive;
-	}
-
 	disable_mclk_switching_for_frame_lock = phm_cap_enabled(
 				    hwmgr->platform_descriptor.platformCaps,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
@@ -4686,6 +4651,25 @@ static int smu7_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+static int smu7_get_max_high_clocks(struct pp_hwmgr *hwmgr,
+					struct amd_pp_simple_clock_info *clocks)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct smu7_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct smu7_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+
+	if (clocks == NULL)
+		return -EINVAL;
+
+	clocks->memory_max_clock = mclk_table->count > 1 ?
+				mclk_table->dpm_levels[mclk_table->count-1].value :
+				mclk_table->dpm_levels[0].value;
+	clocks->engine_max_clock = sclk_table->count > 1 ?
+				sclk_table->dpm_levels[sclk_table->count-1].value :
+				sclk_table->dpm_levels[0].value;
+	return 0;
+}
+
 static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.backend_init = &smu7_hwmgr_backend_init,
 	.backend_fini = &smu7_hwmgr_backend_fini,
@@ -4738,6 +4722,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
 	.disable_smc_firmware_ctf = smu7_thermal_disable_alert,
 	.start_thermal_controller = smu7_start_thermal_controller,
 	.notify_cac_buffer_info = smu7_notify_cac_buffer_info,
+	.get_max_high_clocks = smu7_get_max_high_clocks,
 };
 
 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 07d256d136addf62d3b5bc5c22a9888dceab6050..2d55dabc77d4112d0b1c50bf1e1ae38ac0505dd0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -426,9 +426,9 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 		data->smu_features[GNLD_VR0HOT].supported = true;
 
 	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
-	vega10_read_arg_from_smc(hwmgr, &(data->smu_version));
+	vega10_read_arg_from_smc(hwmgr, &(hwmgr->smu_version));
 		/* ACG firmware has major version 5 */
-	if ((data->smu_version & 0xff000000) == 0x5000000)
+	if ((hwmgr->smu_version & 0xff000000) == 0x5000000)
 		data->smu_features[GNLD_ACG].supported = true;
 
 	if (data->registry_data.didt_support)
@@ -2879,8 +2879,8 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			"DPM is already running right , skipping re-enablement!",
 			return 0);
 
-	if ((data->smu_version == 0x001c2c00) ||
-			(data->smu_version == 0x001c2d00)) {
+	if ((hwmgr->smu_version == 0x001c2c00) ||
+			(hwmgr->smu_version == 0x001c2d00)) {
 		tmp_result = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_UpdatePkgPwrPidAlpha, 1);
 		PP_ASSERT_WITH_CODE(!tmp_result,
@@ -3124,9 +3124,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 		}
 	}
 
-	vega10_ps->vce_clks.evclk = hwmgr->vce_arbiter.evclk;
-	vega10_ps->vce_clks.ecclk = hwmgr->vce_arbiter.ecclk;
-
 	cgs_get_active_displays_info(hwmgr->device, &info);
 
 	/* result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/
@@ -3165,38 +3162,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 		minimum_clocks.memoryClock = stable_pstate_mclk;
 	}
 
-	if (minimum_clocks.engineClock < hwmgr->gfx_arbiter.sclk)
-		minimum_clocks.engineClock = hwmgr->gfx_arbiter.sclk;
-
-	if (minimum_clocks.memoryClock < hwmgr->gfx_arbiter.mclk)
-		minimum_clocks.memoryClock = hwmgr->gfx_arbiter.mclk;
-
-	vega10_ps->sclk_threshold = hwmgr->gfx_arbiter.sclk_threshold;
-
-	if (hwmgr->gfx_arbiter.sclk_over_drive) {
-		PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.sclk_over_drive <=
-				hwmgr->platform_descriptor.overdriveLimit.engineClock),
-				"Overdrive sclk exceeds limit",
-				hwmgr->gfx_arbiter.sclk_over_drive =
-						hwmgr->platform_descriptor.overdriveLimit.engineClock);
-
-		if (hwmgr->gfx_arbiter.sclk_over_drive >= hwmgr->gfx_arbiter.sclk)
-			vega10_ps->performance_levels[1].gfx_clock =
-					hwmgr->gfx_arbiter.sclk_over_drive;
-	}
-
-	if (hwmgr->gfx_arbiter.mclk_over_drive) {
-		PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.mclk_over_drive <=
-				hwmgr->platform_descriptor.overdriveLimit.memoryClock),
-				"Overdrive mclk exceeds limit",
-				hwmgr->gfx_arbiter.mclk_over_drive =
-						hwmgr->platform_descriptor.overdriveLimit.memoryClock);
-
-		if (hwmgr->gfx_arbiter.mclk_over_drive >= hwmgr->gfx_arbiter.mclk)
-			vega10_ps->performance_levels[1].mem_clock =
-					hwmgr->gfx_arbiter.mclk_over_drive;
-	}
-
 	disable_mclk_switching_for_frame_lock = phm_cap_enabled(
 				    hwmgr->platform_descriptor.platformCaps,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
@@ -3819,10 +3784,7 @@ static int vega10_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 	uint32_t low_sclk_interrupt_threshold = 0;
 
 	if (PP_CAP(PHM_PlatformCaps_SclkThrottleLowNotification) &&
-	    (hwmgr->gfx_arbiter.sclk_threshold !=
-				data->low_sclk_interrupt_threshold)) {
-		data->low_sclk_interrupt_threshold =
-				hwmgr->gfx_arbiter.sclk_threshold;
+		(data->low_sclk_interrupt_threshold != 0)) {
 		low_sclk_interrupt_threshold =
 				data->low_sclk_interrupt_threshold;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
index 8f7358cc3327b779a8000d052da6e6e79d622ac3..e8507ff8dbb3b08fddb72f821b2af3d69788a1a1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
@@ -387,7 +387,6 @@ struct vega10_hwmgr {
 	struct vega10_smc_state_table  smc_state_table;
 
 	uint32_t                       config_telemetry;
-	uint32_t                       smu_version;
 	uint32_t                       acg_loop_state;
 	uint32_t                       mem_channels;
 };
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 57a0467b72676c9a3a06bb79db285ec5f063700f..5716b937a6ad41e616b321762d4ebf990b3b91e9 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -437,5 +437,6 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 
 extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
 extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr);
+extern int phm_reset_power_profile_state(struct pp_hwmgr *hwmgr);
 #endif /* _HARDWARE_MANAGER_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 004a40e88bdef05a675812e049ab805f3dbe4de0..565fe0832f415da540f2685d916dde4b270f8fc2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -105,36 +105,6 @@ struct phm_set_power_state_input {
 	const struct pp_hw_power_state *pnew_state;
 };
 
-struct phm_acp_arbiter {
-	uint32_t acpclk;
-};
-
-struct phm_uvd_arbiter {
-	uint32_t vclk;
-	uint32_t dclk;
-	uint32_t vclk_ceiling;
-	uint32_t dclk_ceiling;
-	uint32_t vclk_soft_min;
-	uint32_t dclk_soft_min;
-};
-
-struct phm_vce_arbiter {
-	uint32_t   evclk;
-	uint32_t   ecclk;
-};
-
-struct phm_gfx_arbiter {
-	uint32_t sclk;
-	uint32_t sclk_hard_min;
-	uint32_t mclk;
-	uint32_t sclk_over_drive;
-	uint32_t mclk_over_drive;
-	uint32_t sclk_threshold;
-	uint32_t num_cus;
-	uint32_t gfxclk;
-	uint32_t fclk;
-};
-
 struct phm_clock_array {
 	uint32_t count;
 	uint32_t values[1];
@@ -722,6 +692,7 @@ enum PP_TABLE_VERSION {
 struct pp_hwmgr {
 	uint32_t chip_family;
 	uint32_t chip_id;
+	uint32_t smu_version;
 
 	uint32_t pp_table_version;
 	void *device;
@@ -737,10 +708,6 @@ struct pp_hwmgr {
 	enum amd_dpm_forced_level dpm_level;
 	enum amd_dpm_forced_level saved_dpm_level;
 	enum amd_dpm_forced_level request_dpm_level;
-	struct phm_gfx_arbiter gfx_arbiter;
-	struct phm_acp_arbiter acp_arbiter;
-	struct phm_uvd_arbiter uvd_arbiter;
-	struct phm_vce_arbiter vce_arbiter;
 	uint32_t usec_timeout;
 	void *pptable;
 	struct phm_platform_descriptor platform_descriptor;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
index 2b3497135bbd67879d3c755e8d41748b63b8a6df..f15f4df9d0a9920888575dd324bbbb89d9a3ce83 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
@@ -75,7 +75,12 @@
 #define PPSMC_MSG_GetMinGfxclkFrequency         0x2C
 #define PPSMC_MSG_GetMaxGfxclkFrequency         0x2D
 #define PPSMC_MSG_SoftReset                     0x2E
-#define PPSMC_Message_Count                     0x2F
+#define PPSMC_MSG_SetSoftMaxGfxClk              0x30
+#define PPSMC_MSG_SetHardMinGfxClk              0x31
+#define PPSMC_MSG_SetSoftMaxSocclkByFreq        0x32
+#define PPSMC_MSG_SetSoftMaxFclkByFreq          0x33
+#define PPSMC_MSG_SetSoftMaxVcn                 0x34
+#define PPSMC_Message_Count                     0x35
 
 
 typedef uint16_t PPSMC_Result;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu72.h b/drivers/gpu/drm/amd/powerplay/inc/smu72.h
index 08cd70c75d8b8e55c8d19b51b680c703441685bf..9ad1cefff79fc17786cbacfd22af8b00365cc8b5 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu72.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu72.h
@@ -1,4 +1,26 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
 #ifndef SMU72_H
 #define SMU72_H
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h
index b2edbc0c3c4dfea4b66d3c8d5b7b5a38b24c165a..2aefbb85f62003de2e9020f5d8496642dcdebac3 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h
@@ -1,4 +1,26 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
 #ifndef SMU72_DISCRETE_H
 #define SMU72_DISCRETE_H
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
index 30d3089d7dbafdc45ce9b666edb4a65916ab029d..98e701e4f55383eeee93d1a7ebd60c8d15e7b80c 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
@@ -1,4 +1,24 @@
-# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
 #
 # Makefile for the 'smu manager' sub-component of powerplay.
 # It provides the smu management services for the driver.
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index c36f00ef46f3955b904cfbf0f73ab98455f67f43..0b4a55660de47f1d5e79f4d19596717864bf6e29 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -2218,10 +2218,7 @@ static int ci_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkThrottleLowNotification)
-		&& (hwmgr->gfx_arbiter.sclk_threshold !=
-				data->low_sclk_interrupt_threshold)) {
-		data->low_sclk_interrupt_threshold =
-				hwmgr->gfx_arbiter.sclk_threshold;
+		&& (data->low_sclk_interrupt_threshold != 0)) {
 		low_sclk_interrupt_threshold =
 				data->low_sclk_interrupt_threshold;
 
@@ -2319,6 +2316,7 @@ static int ci_load_smc_ucode(struct pp_hwmgr *hwmgr)
 	cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU, &info);
 
 	hwmgr->is_kicker = info.is_kicker;
+	hwmgr->smu_version = info.version;
 	byte_count = info.image_size;
 	src = (uint8_t *)info.kptr;
 	start_addr = info.ucode_start_address;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 78ab0556e48f7c0fd2cb316d1ea606e7b802a1ce..4d3aff381bca69c0fc6950b5d621cc13de258ab2 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -709,6 +709,19 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 	uint32_t fw_to_check = 0;
+	struct cgs_firmware_info info = {0};
+	uint32_t index = SMN_MP1_SRAM_START_ADDR +
+			 SMU8_FIRMWARE_HEADER_LOCATION +
+			 offsetof(struct SMU8_Firmware_Header, Version);
+
+
+	if (hwmgr == NULL || hwmgr->device == NULL)
+		return -EINVAL;
+
+	cgs_write_register(hwmgr->device, mmMP0PUB_IND_INDEX, index);
+	hwmgr->smu_version = cgs_read_register(hwmgr->device, mmMP0PUB_IND_DATA);
+	info.version = hwmgr->smu_version >> 8;
+	cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU, &info);
 
 	fw_to_check = UCODE_ID_RLC_G_MASK |
 			UCODE_ID_SDMA0_MASK |
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index f572beff197f0630b413bd948676c4519d6c1da5..085d81c8b33259a1af1b64aa171eb417b3d932a7 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -2385,10 +2385,7 @@ static int fiji_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkThrottleLowNotification)
-		&& (hwmgr->gfx_arbiter.sclk_threshold !=
-				data->low_sclk_interrupt_threshold)) {
-		data->low_sclk_interrupt_threshold =
-				hwmgr->gfx_arbiter.sclk_threshold;
+		&& (data->low_sclk_interrupt_threshold != 0)) {
 		low_sclk_interrupt_threshold =
 				data->low_sclk_interrupt_threshold;
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index d62078681cae9d72d0414b48894f7532dfd16182..125312691f7539689e467fdcdc580bf9d45fa1a7 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -204,7 +204,7 @@ static int iceland_smu_upload_firmware_image(struct pp_hwmgr *hwmgr)
 		pr_err("[ powerplay ] SMC address is beyond the SMC RAM area\n");
 		return -EINVAL;
 	}
-
+	hwmgr->smu_version = info.version;
 	/* wait for smc boot up */
 	PHM_WAIT_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND,
 					 RCU_UC_EVENTS, boot_seq_done, 0);
@@ -2202,10 +2202,7 @@ static int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkThrottleLowNotification)
-		&& (hwmgr->gfx_arbiter.sclk_threshold !=
-				data->low_sclk_interrupt_threshold)) {
-		data->low_sclk_interrupt_threshold =
-				hwmgr->gfx_arbiter.sclk_threshold;
+		&& (data->low_sclk_interrupt_threshold != 0)) {
 		low_sclk_interrupt_threshold =
 				data->low_sclk_interrupt_threshold;
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index bd6be7793ca79ffb187ac2d55e225ba13df2c75d..cdb47657b56774308ebb7cc88848b543da92e10b 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -2369,10 +2369,7 @@ static int polaris10_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkThrottleLowNotification)
-		&& (hwmgr->gfx_arbiter.sclk_threshold !=
-				data->low_sclk_interrupt_threshold)) {
-		data->low_sclk_interrupt_threshold =
-				hwmgr->gfx_arbiter.sclk_threshold;
+		&& (data->low_sclk_interrupt_threshold != 0)) {
 		low_sclk_interrupt_threshold =
 				data->low_sclk_interrupt_threshold;
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c
index b98ade676d128b78b0f6236170281244d58199c2..2d662b44af54b6fe1bcfba18c84576e9107abcbc 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c
@@ -305,6 +305,14 @@ static int rv_smu_fini(struct pp_hwmgr *hwmgr)
 
 static int rv_start_smu(struct pp_hwmgr *hwmgr)
 {
+	struct cgs_firmware_info info = {0};
+
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
+	rv_read_arg_from_smc(hwmgr, &hwmgr->smu_version);
+	info.version = hwmgr->smu_version >> 8;
+
+	cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU, &info);
+
 	if (rv_verify_smc_interface(hwmgr))
 		return -EINVAL;
 	if (rv_smc_enable_sdma(hwmgr))
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h
index 58888400f1b8df4ab9a384ecd782d44f9f675420..caebdbebdcd8a36d560582b991fd56e3e2a9e924 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h
@@ -40,7 +40,7 @@ struct smu_table_entry {
 	uint32_t table_addr_high;
 	uint32_t table_addr_low;
 	uint8_t *table;
-	uint32_t handle;
+	unsigned long handle;
 };
 
 struct smu_table_array {
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
index 7f5359a97ef2d8f4a4182b1e708a2eff071f4be0..311ff371861899c5434eced750aabf8330c814a2 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
@@ -535,7 +535,7 @@ int smu7_upload_smu_firmware_image(struct pp_hwmgr *hwmgr)
 			smu7_convert_fw_type_to_cgs(UCODE_ID_SMU_SK), &info);
 
 	hwmgr->is_kicker = info.is_kicker;
-
+	hwmgr->smu_version = info.version;
 	result = smu7_upload_smc_firmware_data(hwmgr, info.image_size, (uint32_t *)info.kptr, SMU7_SMC_SIZE);
 
 	return result;
@@ -648,6 +648,12 @@ int smu7_init(struct pp_hwmgr *hwmgr)
 
 int smu7_smu_fini(struct pp_hwmgr *hwmgr)
 {
+	struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend);
+
+	smu_free_memory(hwmgr->device, (void *) smu_data->header_buffer.handle);
+	if (!cgs_is_virtualization_enabled(hwmgr->device))
+		smu_free_memory(hwmgr->device, (void *) smu_data->smu_buffer.handle);
+
 	kfree(hwmgr->smu_backend);
 	hwmgr->smu_backend = NULL;
 	cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 81b8790c0d2210740a196799bb64108718af6d97..79e5c05571bcb829b03207f836bba5c01be2c38e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -2654,10 +2654,7 @@ static int tonga_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkThrottleLowNotification)
-		&& (hwmgr->gfx_arbiter.sclk_threshold !=
-				data->low_sclk_interrupt_threshold)) {
-		data->low_sclk_interrupt_threshold =
-				hwmgr->gfx_arbiter.sclk_threshold;
+		&& (data->low_sclk_interrupt_threshold != 0)) {
 		low_sclk_interrupt_threshold =
 				data->low_sclk_interrupt_threshold;
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
deleted file mode 100644
index eebe323c7159cc7754952a90dbd09b6f455730a1..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _GPU_SCHED_TRACE_H_
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <linux/tracepoint.h>
-
-#include <drm/drmP.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM gpu_sched
-#define TRACE_INCLUDE_FILE gpu_sched_trace
-
-TRACE_EVENT(amd_sched_job,
-	    TP_PROTO(struct amd_sched_job *sched_job, struct amd_sched_entity *entity),
-	    TP_ARGS(sched_job, entity),
-	    TP_STRUCT__entry(
-			     __field(struct amd_sched_entity *, entity)
-			     __field(struct dma_fence *, fence)
-			     __field(const char *, name)
-			     __field(uint64_t, id)
-			     __field(u32, job_count)
-			     __field(int, hw_job_count)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->entity = entity;
-			   __entry->id = sched_job->id;
-			   __entry->fence = &sched_job->s_fence->finished;
-			   __entry->name = sched_job->sched->name;
-			   __entry->job_count = spsc_queue_count(&entity->job_queue);
-			   __entry->hw_job_count = atomic_read(
-				   &sched_job->sched->hw_rq_count);
-			   ),
-	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->entity, __entry->id,
-		      __entry->fence, __entry->name,
-		      __entry->job_count, __entry->hw_job_count)
-);
-
-TRACE_EVENT(amd_sched_process_job,
-	    TP_PROTO(struct amd_sched_fence *fence),
-	    TP_ARGS(fence),
-	    TP_STRUCT__entry(
-		    __field(struct dma_fence *, fence)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->fence = &fence->finished;
-		    ),
-	    TP_printk("fence=%p signaled", __entry->fence)
-);
-
-#endif
-
-/* This part must be outside protection */
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
deleted file mode 100644
index b590fcc2786a5da7b9ad2345b5d12b5f0055e762..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef _GPU_SCHEDULER_H_
-#define _GPU_SCHEDULER_H_
-
-#include <linux/kfifo.h>
-#include <linux/dma-fence.h>
-#include "spsc_queue.h"
-
-struct amd_gpu_scheduler;
-struct amd_sched_rq;
-
-enum amd_sched_priority {
-	AMD_SCHED_PRIORITY_MIN,
-	AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
-	AMD_SCHED_PRIORITY_NORMAL,
-	AMD_SCHED_PRIORITY_HIGH_SW,
-	AMD_SCHED_PRIORITY_HIGH_HW,
-	AMD_SCHED_PRIORITY_KERNEL,
-	AMD_SCHED_PRIORITY_MAX,
-	AMD_SCHED_PRIORITY_INVALID = -1,
-	AMD_SCHED_PRIORITY_UNSET = -2
-};
-
-
-/**
- * A scheduler entity is a wrapper around a job queue or a group
- * of other entities. Entities take turns emitting jobs from their
- * job queues to corresponding hardware ring based on scheduling
- * policy.
-*/
-struct amd_sched_entity {
-	struct list_head		list;
-	struct amd_sched_rq		*rq;
-	spinlock_t			rq_lock;
-	struct amd_gpu_scheduler	*sched;
-
-	spinlock_t			queue_lock;
-	struct spsc_queue	job_queue;
-
-	atomic_t			fence_seq;
-	uint64_t                        fence_context;
-
-	struct dma_fence		*dependency;
-	struct dma_fence_cb		cb;
-	atomic_t	*guilty; /* points to ctx's guilty */
-};
-
-/**
- * Run queue is a set of entities scheduling command submissions for
- * one specific ring. It implements the scheduling policy that selects
- * the next entity to emit commands from.
-*/
-struct amd_sched_rq {
-	spinlock_t		lock;
-	struct list_head	entities;
-	struct amd_sched_entity	*current_entity;
-};
-
-struct amd_sched_fence {
-	struct dma_fence                scheduled;
-	struct dma_fence                finished;
-	struct dma_fence_cb             cb;
-	struct dma_fence                *parent;
-	struct amd_gpu_scheduler	*sched;
-	spinlock_t			lock;
-	void                            *owner;
-};
-
-struct amd_sched_job {
-	struct spsc_node queue_node;
-	struct amd_gpu_scheduler        *sched;
-	struct amd_sched_fence          *s_fence;
-	struct dma_fence_cb		finish_cb;
-	struct work_struct		finish_work;
-	struct list_head		node;
-	struct delayed_work		work_tdr;
-	uint64_t			id;
-	atomic_t karma;
-	enum amd_sched_priority s_priority;
-};
-
-extern const struct dma_fence_ops amd_sched_fence_ops_scheduled;
-extern const struct dma_fence_ops amd_sched_fence_ops_finished;
-static inline struct amd_sched_fence *to_amd_sched_fence(struct dma_fence *f)
-{
-	if (f->ops == &amd_sched_fence_ops_scheduled)
-		return container_of(f, struct amd_sched_fence, scheduled);
-
-	if (f->ops == &amd_sched_fence_ops_finished)
-		return container_of(f, struct amd_sched_fence, finished);
-
-	return NULL;
-}
-
-static inline bool amd_sched_invalidate_job(struct amd_sched_job *s_job, int threshold)
-{
-	return (s_job && atomic_inc_return(&s_job->karma) > threshold);
-}
-
-/**
- * Define the backend operations called by the scheduler,
- * these functions should be implemented in driver side
-*/
-struct amd_sched_backend_ops {
-	struct dma_fence *(*dependency)(struct amd_sched_job *sched_job,
-					struct amd_sched_entity *s_entity);
-	struct dma_fence *(*run_job)(struct amd_sched_job *sched_job);
-	void (*timedout_job)(struct amd_sched_job *sched_job);
-	void (*free_job)(struct amd_sched_job *sched_job);
-};
-
-/**
- * One scheduler is implemented for each hardware ring
-*/
-struct amd_gpu_scheduler {
-	const struct amd_sched_backend_ops	*ops;
-	uint32_t			hw_submission_limit;
-	long				timeout;
-	const char			*name;
-	struct amd_sched_rq		sched_rq[AMD_SCHED_PRIORITY_MAX];
-	wait_queue_head_t		wake_up_worker;
-	wait_queue_head_t		job_scheduled;
-	atomic_t			hw_rq_count;
-	atomic64_t			job_id_count;
-	struct task_struct		*thread;
-	struct list_head	ring_mirror_list;
-	spinlock_t			job_list_lock;
-	int hang_limit;
-};
-
-int amd_sched_init(struct amd_gpu_scheduler *sched,
-		   const struct amd_sched_backend_ops *ops,
-		   uint32_t hw_submission, unsigned hang_limit, long timeout, const char *name);
-void amd_sched_fini(struct amd_gpu_scheduler *sched);
-
-int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
-			  struct amd_sched_entity *entity,
-			  struct amd_sched_rq *rq,
-			  uint32_t jobs, atomic_t* guilty);
-void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
-			   struct amd_sched_entity *entity);
-void amd_sched_entity_push_job(struct amd_sched_job *sched_job,
-			       struct amd_sched_entity *entity);
-void amd_sched_entity_set_rq(struct amd_sched_entity *entity,
-			     struct amd_sched_rq *rq);
-
-int amd_sched_fence_slab_init(void);
-void amd_sched_fence_slab_fini(void);
-
-struct amd_sched_fence *amd_sched_fence_create(
-	struct amd_sched_entity *s_entity, void *owner);
-void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
-void amd_sched_fence_finished(struct amd_sched_fence *fence);
-int amd_sched_job_init(struct amd_sched_job *job,
-		       struct amd_gpu_scheduler *sched,
-		       struct amd_sched_entity *entity,
-		       void *owner);
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job);
-void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
-bool amd_sched_dependency_optimized(struct dma_fence* fence,
-				    struct amd_sched_entity *entity);
-void amd_sched_job_kickout(struct amd_sched_job *s_job);
-
-#endif
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index e080e31a8513a0b74c5b9150be2fa0f293df7d49..3d82712d80028cffdfab2bc93e03960c683ea2c4 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/component.h>
-#include <linux/console.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/of_reserved_mem.h>
@@ -24,6 +23,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -183,13 +183,6 @@ static int malidp_set_and_wait_config_valid(struct drm_device *drm)
 	return (ret > 0) ? 0 : -ETIMEDOUT;
 }
 
-static void malidp_output_poll_changed(struct drm_device *drm)
-{
-	struct malidp_drm *malidp = drm->dev_private;
-
-	drm_fbdev_cma_hotplug_event(malidp->fbdev);
-}
-
 static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state)
 {
 	struct drm_pending_vblank_event *event;
@@ -252,7 +245,7 @@ static const struct drm_mode_config_helper_funcs malidp_mode_config_helpers = {
 
 static const struct drm_mode_config_funcs malidp_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
-	.output_poll_changed = malidp_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
@@ -317,19 +310,12 @@ static int malidp_irq_init(struct platform_device *pdev)
 	return 0;
 }
 
-static void malidp_lastclose(struct drm_device *drm)
-{
-	struct malidp_drm *malidp = drm->dev_private;
-
-	drm_fbdev_cma_restore_mode(malidp->fbdev);
-}
-
 DEFINE_DRM_GEM_CMA_FOPS(fops);
 
 static struct drm_driver malidp_driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
 			   DRIVER_PRIME,
-	.lastclose = malidp_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.gem_free_object_unlocked = drm_gem_cma_free_object,
 	.gem_vm_ops = &drm_gem_cma_vm_ops,
 	.dumb_create = drm_gem_cma_dumb_create,
@@ -623,14 +609,9 @@ static int malidp_bind(struct device *dev)
 
 	drm_mode_config_reset(drm);
 
-	malidp->fbdev = drm_fbdev_cma_init(drm, 32,
-					   drm->mode_config.num_connector);
-
-	if (IS_ERR(malidp->fbdev)) {
-		ret = PTR_ERR(malidp->fbdev);
-		malidp->fbdev = NULL;
+	ret = drm_fb_cma_fbdev_init(drm, 32, 0);
+	if (ret)
 		goto fbdev_fail;
-	}
 
 	drm_kms_helper_poll_init(drm);
 
@@ -641,10 +622,7 @@ static int malidp_bind(struct device *dev)
 	return 0;
 
 register_fail:
-	if (malidp->fbdev) {
-		drm_fbdev_cma_fini(malidp->fbdev);
-		malidp->fbdev = NULL;
-	}
+	drm_fb_cma_fbdev_fini(drm);
 	drm_kms_helper_poll_fini(drm);
 fbdev_fail:
 	pm_runtime_get_sync(dev);
@@ -681,10 +659,7 @@ static void malidp_unbind(struct device *dev)
 	struct malidp_drm *malidp = drm->dev_private;
 
 	drm_dev_unregister(drm);
-	if (malidp->fbdev) {
-		drm_fbdev_cma_fini(malidp->fbdev);
-		malidp->fbdev = NULL;
-	}
+	drm_fb_cma_fbdev_fini(drm);
 	drm_kms_helper_poll_fini(drm);
 	pm_runtime_get_sync(dev);
 	malidp_se_irq_fini(drm);
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index 70ed6aeccf052192c50c3b9f48f3c135d112e786..e0d12c9fc6b8093c4a0aaa8f7bacf0798dea2e11 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -20,7 +20,6 @@
 
 struct malidp_drm {
 	struct malidp_hw_device *dev;
-	struct drm_fbdev_cma *fbdev;
 	struct drm_crtc crtc;
 	wait_queue_head_t wq;
 	atomic_t config_valid;
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 2e065facdce74cae05cdc7b86fdcf9643d886ba0..e2adfbef7d6b631b74acd031d8bf53ea5213ad2c 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -13,6 +13,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
 #include "armada_crtc.h"
 #include "armada_drm.h"
 #include "armada_fb.h"
@@ -20,13 +21,6 @@
 #include "armada_hw.h"
 #include "armada_trace.h"
 
-struct armada_frame_work {
-	struct armada_plane_work work;
-	struct drm_pending_vblank_event *event;
-	struct armada_regs regs[4];
-	struct drm_framebuffer *old_fb;
-};
-
 enum csc_mode {
 	CSC_AUTO = 0,
 	CSC_YUV_CCIR601 = 1,
@@ -168,16 +162,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc)
 void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb,
 	int x, int y)
 {
+	const struct drm_format_info *format = fb->format;
+	unsigned int num_planes = format->num_planes;
 	u32 addr = drm_fb_obj(fb)->dev_addr;
-	int num_planes = fb->format->num_planes;
 	int i;
 
 	if (num_planes > 3)
 		num_planes = 3;
 
-	for (i = 0; i < num_planes; i++)
+	addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] +
+		   x * format->cpp[0];
+
+	y /= format->vsub;
+	x /= format->hsub;
+
+	for (i = 1; i < num_planes; i++)
 		addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] +
-			     x * fb->format->cpp[i];
+			     x * format->cpp[i];
 	for (; i < 3; i++)
 		addrs[i] = 0;
 }
@@ -209,6 +210,38 @@ static unsigned armada_drm_crtc_calc_fb(struct drm_framebuffer *fb,
 	return i;
 }
 
+static void armada_drm_plane_work_call(struct armada_crtc *dcrtc,
+	struct armada_plane_work *work,
+	void (*fn)(struct armada_crtc *, struct armada_plane_work *))
+{
+	struct armada_plane *dplane = drm_to_armada_plane(work->plane);
+	struct drm_pending_vblank_event *event;
+	struct drm_framebuffer *fb;
+
+	if (fn)
+		fn(dcrtc, work);
+	drm_crtc_vblank_put(&dcrtc->crtc);
+
+	event = work->event;
+	fb = work->old_fb;
+	if (event || fb) {
+		struct drm_device *dev = dcrtc->crtc.dev;
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		if (event)
+			drm_crtc_send_vblank_event(&dcrtc->crtc, event);
+		if (fb)
+			__armada_drm_queue_unref_work(dev, fb);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+
+	if (work->need_kfree)
+		kfree(work);
+
+	wake_up(&dplane->frame_wait);
+}
+
 static void armada_drm_plane_work_run(struct armada_crtc *dcrtc,
 	struct drm_plane *plane)
 {
@@ -216,24 +249,19 @@ static void armada_drm_plane_work_run(struct armada_crtc *dcrtc,
 	struct armada_plane_work *work = xchg(&dplane->work, NULL);
 
 	/* Handle any pending frame work. */
-	if (work) {
-		work->fn(dcrtc, dplane, work);
-		drm_crtc_vblank_put(&dcrtc->crtc);
-	}
-
-	wake_up(&dplane->frame_wait);
+	if (work)
+		armada_drm_plane_work_call(dcrtc, work, work->fn);
 }
 
 int armada_drm_plane_work_queue(struct armada_crtc *dcrtc,
-	struct armada_plane *plane, struct armada_plane_work *work)
+	struct armada_plane_work *work)
 {
+	struct armada_plane *plane = drm_to_armada_plane(work->plane);
 	int ret;
 
 	ret = drm_crtc_vblank_get(&dcrtc->crtc);
-	if (ret) {
-		DRM_ERROR("failed to acquire vblank counter\n");
+	if (ret)
 		return ret;
-	}
 
 	ret = cmpxchg(&plane->work, NULL, work) ? -EBUSY : 0;
 	if (ret)
@@ -247,51 +275,60 @@ int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout)
 	return wait_event_timeout(plane->frame_wait, !plane->work, timeout);
 }
 
-struct armada_plane_work *armada_drm_plane_work_cancel(
-	struct armada_crtc *dcrtc, struct armada_plane *plane)
+void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc,
+	struct armada_plane *dplane)
 {
-	struct armada_plane_work *work = xchg(&plane->work, NULL);
+	struct armada_plane_work *work = xchg(&dplane->work, NULL);
 
 	if (work)
-		drm_crtc_vblank_put(&dcrtc->crtc);
-
-	return work;
+		armada_drm_plane_work_call(dcrtc, work, work->cancel);
 }
 
-static int armada_drm_crtc_queue_frame_work(struct armada_crtc *dcrtc,
-	struct armada_frame_work *work)
+static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc,
+	struct armada_plane_work *work)
 {
-	struct armada_plane *plane = drm_to_armada_plane(dcrtc->crtc.primary);
+	unsigned long flags;
 
-	return armada_drm_plane_work_queue(dcrtc, plane, &work->work);
+	spin_lock_irqsave(&dcrtc->irq_lock, flags);
+	armada_drm_crtc_update_regs(dcrtc, work->regs);
+	spin_unlock_irqrestore(&dcrtc->irq_lock, flags);
 }
 
-static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc,
-	struct armada_plane *plane, struct armada_plane_work *work)
+static void armada_drm_crtc_complete_disable_work(struct armada_crtc *dcrtc,
+	struct armada_plane_work *work)
 {
-	struct armada_frame_work *fwork = container_of(work, struct armada_frame_work, work);
-	struct drm_device *dev = dcrtc->crtc.dev;
 	unsigned long flags;
 
+	if (dcrtc->plane == work->plane)
+		dcrtc->plane = NULL;
+
 	spin_lock_irqsave(&dcrtc->irq_lock, flags);
-	armada_drm_crtc_update_regs(dcrtc, fwork->regs);
+	armada_drm_crtc_update_regs(dcrtc, work->regs);
 	spin_unlock_irqrestore(&dcrtc->irq_lock, flags);
+}
 
-	if (fwork->event) {
-		spin_lock_irqsave(&dev->event_lock, flags);
-		drm_crtc_send_vblank_event(&dcrtc->crtc, fwork->event);
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-	}
+static struct armada_plane_work *
+armada_drm_crtc_alloc_plane_work(struct drm_plane *plane)
+{
+	struct armada_plane_work *work;
+	int i = 0;
+
+	work = kzalloc(sizeof(*work), GFP_KERNEL);
+	if (!work)
+		return NULL;
 
-	/* Finally, queue the process-half of the cleanup. */
-	__armada_drm_queue_unref_work(dcrtc->crtc.dev, fwork->old_fb);
-	kfree(fwork);
+	work->plane = plane;
+	work->fn = armada_drm_crtc_complete_frame_work;
+	work->need_kfree = true;
+	armada_reg_queue_end(work->regs, i);
+
+	return work;
 }
 
 static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc,
 	struct drm_framebuffer *fb, bool force)
 {
-	struct armada_frame_work *work;
+	struct armada_plane_work *work;
 
 	if (!fb)
 		return;
@@ -302,15 +339,11 @@ static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc,
 		return;
 	}
 
-	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary);
 	if (work) {
-		int i = 0;
-		work->work.fn = armada_drm_crtc_complete_frame_work;
-		work->event = NULL;
 		work->old_fb = fb;
-		armada_reg_queue_end(work->regs, i);
 
-		if (armada_drm_crtc_queue_frame_work(dcrtc, work) == 0)
+		if (armada_drm_plane_work_queue(dcrtc, work) == 0)
 			return;
 
 		kfree(work);
@@ -373,8 +406,11 @@ static void armada_drm_crtc_prepare(struct drm_crtc *crtc)
 	 * the new mode parameters.
 	 */
 	plane = dcrtc->plane;
-	if (plane)
+	if (plane) {
 		drm_plane_force_disable(plane);
+		WARN_ON(!armada_drm_plane_work_wait(drm_to_armada_plane(plane),
+						    HZ));
+	}
 }
 
 /* The mode_config.mutex will be held for this call */
@@ -440,11 +476,11 @@ static void armada_drm_crtc_irq(struct armada_crtc *dcrtc, u32 stat)
 	if (stat & VSYNC_IRQ)
 		drm_crtc_handle_vblank(&dcrtc->crtc);
 
-	spin_lock(&dcrtc->irq_lock);
 	ovl_plane = dcrtc->plane;
 	if (ovl_plane)
 		armada_drm_plane_work_run(dcrtc, ovl_plane);
 
+	spin_lock(&dcrtc->irq_lock);
 	if (stat & GRA_FRAME_IRQ && dcrtc->interlaced) {
 		int i = stat & GRA_FRAME_IRQ0 ? 0 : 1;
 		uint32_t val;
@@ -536,18 +572,14 @@ static uint32_t armada_drm_crtc_calculate_csc(struct armada_crtc *dcrtc)
 	return val;
 }
 
-static void armada_drm_primary_set(struct drm_crtc *crtc,
-	struct drm_plane *plane, int x, int y)
+static void armada_drm_gra_plane_regs(struct armada_regs *regs,
+	struct drm_framebuffer *fb, struct armada_plane_state *state,
+	int x, int y, bool interlaced)
 {
-	struct armada_plane_state *state = &drm_to_armada_plane(plane)->state;
-	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
-	struct armada_regs regs[8];
-	bool interlaced = dcrtc->interlaced;
-	unsigned i;
+	unsigned int i;
 	u32 ctrl0;
 
-	i = armada_drm_crtc_calc_fb(plane->fb, x, y, regs, interlaced);
-
+	i = armada_drm_crtc_calc_fb(fb, x, y, regs, interlaced);
 	armada_reg_queue_set(regs, i, state->dst_yx, LCD_SPU_GRA_OVSA_HPXL_VLN);
 	armada_reg_queue_set(regs, i, state->src_hw, LCD_SPU_GRA_HPXL_VLN);
 	armada_reg_queue_set(regs, i, state->dst_hw, LCD_SPU_GZM_HPXL_VLN);
@@ -559,9 +591,21 @@ static void armada_drm_primary_set(struct drm_crtc *crtc,
 	armada_reg_queue_mod(regs, i, ctrl0, CFG_GRAFORMAT |
 			     CFG_GRA_MOD(CFG_SWAPRB | CFG_SWAPUV |
 					 CFG_SWAPYU | CFG_YUV2RGB) |
-			     CFG_PALETTE_ENA | CFG_GRA_FTOGGLE,
+			     CFG_PALETTE_ENA | CFG_GRA_FTOGGLE |
+			     CFG_GRA_HSMOOTH | CFG_GRA_ENA,
 			     LCD_SPU_DMA_CTRL0);
 	armada_reg_queue_end(regs, i);
+}
+
+static void armada_drm_primary_set(struct drm_crtc *crtc,
+	struct drm_plane *plane, int x, int y)
+{
+	struct armada_plane_state *state = &drm_to_armada_plane(plane)->state;
+	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+	struct armada_regs regs[8];
+	bool interlaced = dcrtc->interlaced;
+
+	armada_drm_gra_plane_regs(regs, plane->fb, state, x, y, interlaced);
 	armada_drm_crtc_update_regs(dcrtc, regs);
 }
 
@@ -581,7 +625,7 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc,
 
 	interlaced = !!(adj->flags & DRM_MODE_FLAG_INTERLACE);
 
-	val = CFG_GRA_ENA | CFG_GRA_HSMOOTH;
+	val = CFG_GRA_ENA;
 	val |= CFG_GRA_FMT(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->fmt);
 	val |= CFG_GRA_MOD(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->mod);
 
@@ -633,8 +677,6 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc,
 	/* Now compute the divider for real */
 	dcrtc->variant->compute_clock(dcrtc, adj, &sclk);
 
-	/* Ensure graphic fifo is enabled */
-	armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1);
 	armada_reg_queue_set(regs, i, sclk, LCD_CFG_SCLK_DIV);
 
 	if (interlaced ^ dcrtc->interlaced) {
@@ -647,6 +689,9 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc,
 
 	spin_lock_irqsave(&dcrtc->irq_lock, flags);
 
+	/* Ensure graphic fifo is enabled */
+	armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1);
+
 	/* Even interlaced/progressive frame */
 	dcrtc->v[1].spu_v_h_total = adj->crtc_vtotal << 16 |
 				    adj->crtc_htotal;
@@ -729,47 +774,13 @@ static int armada_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 	return 0;
 }
 
-void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc,
-	struct drm_plane *plane)
-{
-	u32 sram_para1, dma_ctrl0_mask;
-
-	/*
-	 * Drop our reference on any framebuffer attached to this plane.
-	 * We don't need to NULL this out as drm_plane_force_disable(),
-	 * and __setplane_internal() will do so for an overlay plane, and
-	 * __drm_helper_disable_unused_functions() will do so for the
-	 * primary plane.
-	 */
-	if (plane->fb)
-		drm_framebuffer_put(plane->fb);
-
-	/* Power down the Y/U/V FIFOs */
-	sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
-
-	/* Power down most RAMs and FIFOs if this is the primary plane */
-	if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
-		sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
-			      CFG_PDWN32x32 | CFG_PDWN64x66;
-		dma_ctrl0_mask = CFG_GRA_ENA;
-	} else {
-		dma_ctrl0_mask = CFG_DMA_ENA;
-	}
-
-	spin_lock_irq(&dcrtc->irq_lock);
-	armada_updatel(0, dma_ctrl0_mask, dcrtc->base + LCD_SPU_DMA_CTRL0);
-	spin_unlock_irq(&dcrtc->irq_lock);
-
-	armada_updatel(sram_para1, 0, dcrtc->base + LCD_SPU_SRAM_PARA1);
-}
-
 /* The mode_config.mutex will be held for this call */
 static void armada_drm_crtc_disable(struct drm_crtc *crtc)
 {
-	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
-
 	armada_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-	armada_drm_crtc_plane_disable(dcrtc, crtc->primary);
+
+	/* Disable our primary plane when we disable the CRTC. */
+	crtc->primary->funcs->disable_plane(crtc->primary, NULL);
 }
 
 static const struct drm_crtc_helper_funcs armada_crtc_helper_funcs = {
@@ -879,9 +890,11 @@ static int armada_drm_crtc_cursor_update(struct armada_crtc *dcrtc, bool reload)
 		return 0;
 	}
 
+	spin_lock_irq(&dcrtc->irq_lock);
 	para1 = readl_relaxed(dcrtc->base + LCD_SPU_SRAM_PARA1);
 	armada_updatel(CFG_CSB_256x32, CFG_CSB_256x32 | CFG_PDWN256x32,
 		       dcrtc->base + LCD_SPU_SRAM_PARA1);
+	spin_unlock_irq(&dcrtc->irq_lock);
 
 	/*
 	 * Initialize the transparency if the SRAM was powered down.
@@ -1021,7 +1034,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc,
 	struct drm_modeset_acquire_ctx *ctx)
 {
 	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
-	struct armada_frame_work *work;
+	struct armada_plane_work *work;
 	unsigned i;
 	int ret;
 
@@ -1029,11 +1042,10 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc,
 	if (fb->format != crtc->primary->fb->format)
 		return -EINVAL;
 
-	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary);
 	if (!work)
 		return -ENOMEM;
 
-	work->work.fn = armada_drm_crtc_complete_frame_work;
 	work->event = event;
 	work->old_fb = dcrtc->crtc.primary->fb;
 
@@ -1047,7 +1059,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc,
 	 */
 	drm_framebuffer_get(fb);
 
-	ret = armada_drm_crtc_queue_frame_work(dcrtc, work);
+	ret = armada_drm_plane_work_queue(dcrtc, work);
 	if (ret) {
 		/* Undo our reference above */
 		drm_framebuffer_put(fb);
@@ -1127,14 +1139,195 @@ static const struct drm_crtc_funcs armada_crtc_funcs = {
 	.disable_vblank	= armada_drm_crtc_disable_vblank,
 };
 
+static void armada_drm_primary_update_state(struct drm_plane_state *state,
+	struct armada_regs *regs)
+{
+	struct armada_plane *dplane = drm_to_armada_plane(state->plane);
+	struct armada_crtc *dcrtc = drm_to_armada_crtc(state->crtc);
+	struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb);
+	bool was_disabled;
+	unsigned int idx = 0;
+	u32 val;
+
+	val = CFG_GRA_FMT(dfb->fmt) | CFG_GRA_MOD(dfb->mod);
+	if (dfb->fmt > CFG_420)
+		val |= CFG_PALETTE_ENA;
+	if (state->visible)
+		val |= CFG_GRA_ENA;
+	if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst))
+		val |= CFG_GRA_HSMOOTH;
+
+	was_disabled = !(dplane->state.ctrl0 & CFG_GRA_ENA);
+	if (was_disabled)
+		armada_reg_queue_mod(regs, idx,
+				     0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1);
+
+	dplane->state.ctrl0 = val;
+	dplane->state.src_hw = (drm_rect_height(&state->src) & 0xffff0000) |
+				drm_rect_width(&state->src) >> 16;
+	dplane->state.dst_hw = drm_rect_height(&state->dst) << 16 |
+			       drm_rect_width(&state->dst);
+	dplane->state.dst_yx = state->dst.y1 << 16 | state->dst.x1;
+
+	armada_drm_gra_plane_regs(regs + idx, &dfb->fb, &dplane->state,
+				  state->src.x1 >> 16, state->src.y1 >> 16,
+				  dcrtc->interlaced);
+
+	dplane->state.vsync_update = !was_disabled;
+	dplane->state.changed = true;
+}
+
+static int armada_drm_primary_update(struct drm_plane *plane,
+	struct drm_crtc *crtc, struct drm_framebuffer *fb,
+	int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h,
+	uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h,
+	struct drm_modeset_acquire_ctx *ctx)
+{
+	struct armada_plane *dplane = drm_to_armada_plane(plane);
+	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+	struct armada_plane_work *work;
+	struct drm_plane_state state = {
+		.plane = plane,
+		.crtc = crtc,
+		.fb = fb,
+		.src_x = src_x,
+		.src_y = src_y,
+		.src_w = src_w,
+		.src_h = src_h,
+		.crtc_x = crtc_x,
+		.crtc_y = crtc_y,
+		.crtc_w = crtc_w,
+		.crtc_h = crtc_h,
+		.rotation = DRM_MODE_ROTATE_0,
+	};
+	const struct drm_rect clip = {
+		.x2 = crtc->mode.hdisplay,
+		.y2 = crtc->mode.vdisplay,
+	};
+	int ret;
+
+	ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0,
+						  INT_MAX, true, false);
+	if (ret)
+		return ret;
+
+	work = &dplane->works[dplane->next_work];
+	work->fn = armada_drm_crtc_complete_frame_work;
+
+	if (plane->fb != fb) {
+		/*
+		 * Take a reference on the new framebuffer - we want to
+		 * hold on to it while the hardware is displaying it.
+		 */
+		drm_framebuffer_reference(fb);
+
+		work->old_fb = plane->fb;
+	} else {
+		work->old_fb = NULL;
+	}
+
+	armada_drm_primary_update_state(&state, work->regs);
+
+	if (!dplane->state.changed)
+		return 0;
+
+	/* Wait for pending work to complete */
+	if (armada_drm_plane_work_wait(dplane, HZ / 10) == 0)
+		armada_drm_plane_work_cancel(dcrtc, dplane);
+
+	if (!dplane->state.vsync_update) {
+		work->fn(dcrtc, work);
+		if (work->old_fb)
+			drm_framebuffer_unreference(work->old_fb);
+		return 0;
+	}
+
+	/* Queue it for update on the next interrupt if we are enabled */
+	ret = armada_drm_plane_work_queue(dcrtc, work);
+	if (ret) {
+		work->fn(dcrtc, work);
+		if (work->old_fb)
+			drm_framebuffer_unreference(work->old_fb);
+	}
+
+	dplane->next_work = !dplane->next_work;
+
+	return 0;
+}
+
+int armada_drm_plane_disable(struct drm_plane *plane,
+			     struct drm_modeset_acquire_ctx *ctx)
+{
+	struct armada_plane *dplane = drm_to_armada_plane(plane);
+	struct armada_crtc *dcrtc;
+	struct armada_plane_work *work;
+	unsigned int idx = 0;
+	u32 sram_para1, enable_mask;
+
+	if (!plane->crtc)
+		return 0;
+
+	/*
+	 * Arrange to power down most RAMs and FIFOs if this is the primary
+	 * plane, otherwise just the YUV FIFOs for the overlay plane.
+	 */
+	if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+		sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
+			     CFG_PDWN32x32 | CFG_PDWN64x66;
+		enable_mask = CFG_GRA_ENA;
+	} else {
+		sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
+		enable_mask = CFG_DMA_ENA;
+	}
+
+	dplane->state.ctrl0 &= ~enable_mask;
+
+	dcrtc = drm_to_armada_crtc(plane->crtc);
+
+	/*
+	 * Try to disable the plane and drop our ref on the framebuffer
+	 * at the next frame update. If we fail for any reason, disable
+	 * the plane immediately.
+	 */
+	work = &dplane->works[dplane->next_work];
+	work->fn = armada_drm_crtc_complete_disable_work;
+	work->cancel = armada_drm_crtc_complete_disable_work;
+	work->old_fb = plane->fb;
+
+	armada_reg_queue_mod(work->regs, idx,
+			     0, enable_mask, LCD_SPU_DMA_CTRL0);
+	armada_reg_queue_mod(work->regs, idx,
+			     sram_para1, 0, LCD_SPU_SRAM_PARA1);
+	armada_reg_queue_end(work->regs, idx);
+
+	/* Wait for any preceding work to complete, but don't wedge */
+	if (WARN_ON(!armada_drm_plane_work_wait(dplane, HZ)))
+		armada_drm_plane_work_cancel(dcrtc, dplane);
+
+	if (armada_drm_plane_work_queue(dcrtc, work)) {
+		work->fn(dcrtc, work);
+		if (work->old_fb)
+			drm_framebuffer_unreference(work->old_fb);
+	}
+
+	dplane->next_work = !dplane->next_work;
+
+	return 0;
+}
+
 static const struct drm_plane_funcs armada_primary_plane_funcs = {
-	.update_plane	= drm_primary_helper_update,
-	.disable_plane	= drm_primary_helper_disable,
+	.update_plane	= armada_drm_primary_update,
+	.disable_plane	= armada_drm_plane_disable,
 	.destroy	= drm_primary_helper_destroy,
 };
 
 int armada_drm_plane_init(struct armada_plane *plane)
 {
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(plane->works); i++)
+		plane->works[i].plane = &plane->base;
+
 	init_waitqueue_head(&plane->frame_wait);
 
 	return 0;
@@ -1225,17 +1418,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 
 	ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc",
 			       dcrtc);
-	if (ret < 0) {
-		kfree(dcrtc);
-		return ret;
-	}
+	if (ret < 0)
+		goto err_crtc;
 
 	if (dcrtc->variant->init) {
 		ret = dcrtc->variant->init(dcrtc, dev);
-		if (ret) {
-			kfree(dcrtc);
-			return ret;
-		}
+		if (ret)
+			goto err_crtc;
 	}
 
 	/* Ensure AXI pipeline is enabled */
@@ -1246,13 +1435,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 	dcrtc->crtc.port = port;
 
 	primary = kzalloc(sizeof(*primary), GFP_KERNEL);
-	if (!primary)
-		return -ENOMEM;
+	if (!primary) {
+		ret = -ENOMEM;
+		goto err_crtc;
+	}
 
 	ret = armada_drm_plane_init(primary);
 	if (ret) {
 		kfree(primary);
-		return ret;
+		goto err_crtc;
 	}
 
 	ret = drm_universal_plane_init(drm, &primary->base, 0,
@@ -1263,7 +1454,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
-		return ret;
+		goto err_crtc;
 	}
 
 	ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL,
@@ -1282,6 +1473,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 
 err_crtc_init:
 	primary->base.funcs->destroy(&primary->base);
+err_crtc:
+	kfree(dcrtc);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h
index bab11f48357591325d5bab2f80de34981f4d5482..445829b8877af4b328c2fb8d1402a69700612c7b 100644
--- a/drivers/gpu/drm/armada/armada_crtc.h
+++ b/drivers/gpu/drm/armada/armada_crtc.h
@@ -36,21 +36,31 @@ struct armada_plane;
 struct armada_variant;
 
 struct armada_plane_work {
-	void			(*fn)(struct armada_crtc *,
-				      struct armada_plane *,
-				      struct armada_plane_work *);
+	void (*fn)(struct armada_crtc *, struct armada_plane_work *);
+	void (*cancel)(struct armada_crtc *, struct armada_plane_work *);
+	bool need_kfree;
+	struct drm_plane *plane;
+	struct drm_framebuffer *old_fb;
+	struct drm_pending_vblank_event *event;
+	struct armada_regs regs[14];
 };
 
 struct armada_plane_state {
+	u16 src_x;
+	u16 src_y;
 	u32 src_hw;
 	u32 dst_hw;
 	u32 dst_yx;
 	u32 ctrl0;
+	bool changed;
+	bool vsync_update;
 };
 
 struct armada_plane {
 	struct drm_plane	base;
 	wait_queue_head_t	frame_wait;
+	bool			next_work;
+	struct armada_plane_work works[2];
 	struct armada_plane_work *work;
 	struct armada_plane_state state;
 };
@@ -58,10 +68,10 @@ struct armada_plane {
 
 int armada_drm_plane_init(struct armada_plane *plane);
 int armada_drm_plane_work_queue(struct armada_crtc *dcrtc,
-	struct armada_plane *plane, struct armada_plane_work *work);
+	struct armada_plane_work *work);
 int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout);
-struct armada_plane_work *armada_drm_plane_work_cancel(
-	struct armada_crtc *dcrtc, struct armada_plane *plane);
+void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc,
+	struct armada_plane *plane);
 void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb,
 	int x, int y);
 
@@ -104,8 +114,8 @@ struct armada_crtc {
 
 void armada_drm_crtc_update_regs(struct armada_crtc *, struct armada_regs *);
 
-void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc,
-	struct drm_plane *plane);
+int armada_drm_plane_disable(struct drm_plane *plane,
+			     struct drm_modeset_acquire_ctx *ctx);
 
 extern struct platform_driver armada_lcd_platform_driver;
 
diff --git a/drivers/gpu/drm/armada/armada_drm.h b/drivers/gpu/drm/armada/armada_drm.h
index b064879ecdbd1073a379dbb438854094cdd30f80..cc4c557c9f66421737c77bd4dd92e977ab4fc75a 100644
--- a/drivers/gpu/drm/armada/armada_drm.h
+++ b/drivers/gpu/drm/armada/armada_drm.h
@@ -84,7 +84,6 @@ void armada_drm_queue_unref_work(struct drm_device *,
 extern const struct drm_mode_config_funcs armada_drm_mode_config_funcs;
 
 int armada_fbdev_init(struct drm_device *);
-void armada_fbdev_lastclose(struct drm_device *);
 void armada_fbdev_fini(struct drm_device *);
 
 int armada_overlay_plane_create(struct drm_device *, unsigned long);
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index e857b88a9799427e701ffb8f119a5572e3e3ab72..4b11b6b52f1debe27a974e85d67e1826ba6eeb2b 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/of_graph.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_of.h>
 #include "armada_crtc.h"
 #include "armada_drm.h"
@@ -54,15 +55,10 @@ static struct drm_ioctl_desc armada_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(ARMADA_GEM_PWRITE, armada_gem_pwrite_ioctl, 0),
 };
 
-static void armada_drm_lastclose(struct drm_device *dev)
-{
-	armada_fbdev_lastclose(dev);
-}
-
 DEFINE_DRM_GEM_FOPS(armada_drm_fops);
 
 static struct drm_driver armada_drm_driver = {
-	.lastclose		= armada_drm_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.gem_free_object_unlocked = armada_gem_free_object,
 	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c
index a38d5a0892a97d0e2c7debd50621fa10d6f9c704..ac92bce07ecd92aedffe35ea481df054e24175b8 100644
--- a/drivers/gpu/drm/armada/armada_fb.c
+++ b/drivers/gpu/drm/armada/armada_fb.c
@@ -154,16 +154,7 @@ static struct drm_framebuffer *armada_fb_create(struct drm_device *dev,
 	return ERR_PTR(ret);
 }
 
-static void armada_output_poll_changed(struct drm_device *dev)
-{
-	struct armada_private *priv = dev->dev_private;
-	struct drm_fb_helper *fbh = priv->fbdev;
-
-	if (fbh)
-		drm_fb_helper_hotplug_event(fbh);
-}
-
 const struct drm_mode_config_funcs armada_drm_mode_config_funcs = {
 	.fb_create		= armada_fb_create,
-	.output_poll_changed	= armada_output_poll_changed,
+	.output_poll_changed	= drm_fb_helper_output_poll_changed,
 };
diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c
index a2ce83f8480009e5bdc29988da10fdff614bf6dd..2a59db0994b2e704d92edb5b0ca1b03d1888737b 100644
--- a/drivers/gpu/drm/armada/armada_fbdev.c
+++ b/drivers/gpu/drm/armada/armada_fbdev.c
@@ -159,14 +159,6 @@ int armada_fbdev_init(struct drm_device *dev)
 	return ret;
 }
 
-void armada_fbdev_lastclose(struct drm_device *dev)
-{
-	struct armada_private *priv = dev->dev_private;
-
-	if (priv->fbdev)
-		drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
-}
-
 void armada_fbdev_fini(struct drm_device *dev)
 {
 	struct armada_private *priv = dev->dev_private;
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index b411b608821a555320a16df12fe56a524a3c4dae..77b55adaa2acd8e6d2c98e553946eb4df5f42890 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -7,7 +7,7 @@
  * published by the Free Software Foundation.
  */
 #include <drm/drmP.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
 #include "armada_crtc.h"
 #include "armada_drm.h"
 #include "armada_fb.h"
@@ -32,11 +32,6 @@ struct armada_ovl_plane_properties {
 
 struct armada_ovl_plane {
 	struct armada_plane base;
-	struct drm_framebuffer *old_fb;
-	struct {
-		struct armada_plane_work work;
-		struct armada_regs regs[13];
-	} vbl;
 	struct armada_ovl_plane_properties prop;
 };
 #define drm_to_armada_ovl_plane(p) \
@@ -67,218 +62,204 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop,
 	spin_unlock_irq(&dcrtc->irq_lock);
 }
 
-static void armada_ovl_retire_fb(struct armada_ovl_plane *dplane,
-	struct drm_framebuffer *fb)
-{
-	struct drm_framebuffer *old_fb;
-
-	old_fb = xchg(&dplane->old_fb, fb);
-
-	if (old_fb)
-		armada_drm_queue_unref_work(dplane->base.base.dev, old_fb);
-}
-
 /* === Plane support === */
 static void armada_ovl_plane_work(struct armada_crtc *dcrtc,
-	struct armada_plane *plane, struct armada_plane_work *work)
+	struct armada_plane_work *work)
 {
-	struct armada_ovl_plane *dplane = container_of(plane, struct armada_ovl_plane, base);
+	unsigned long flags;
 
-	trace_armada_ovl_plane_work(&dcrtc->crtc, &plane->base);
+	trace_armada_ovl_plane_work(&dcrtc->crtc, work->plane);
 
-	armada_drm_crtc_update_regs(dcrtc, dplane->vbl.regs);
-	armada_ovl_retire_fb(dplane, NULL);
+	spin_lock_irqsave(&dcrtc->irq_lock, flags);
+	armada_drm_crtc_update_regs(dcrtc, work->regs);
+	spin_unlock_irqrestore(&dcrtc->irq_lock, flags);
 }
 
-static int
-armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
-	struct drm_framebuffer *fb,
-	int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h,
-	uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h,
-	struct drm_modeset_acquire_ctx *ctx)
+static void armada_ovl_plane_update_state(struct drm_plane_state *state,
+	struct armada_regs *regs)
 {
-	struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane);
-	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
-	struct drm_rect src = {
-		.x1 = src_x,
-		.y1 = src_y,
-		.x2 = src_x + src_w,
-		.y2 = src_y + src_h,
-	};
-	struct drm_rect dest = {
-		.x1 = crtc_x,
-		.y1 = crtc_y,
-		.x2 = crtc_x + crtc_w,
-		.y2 = crtc_y + crtc_h,
-	};
-	const struct drm_rect clip = {
-		.x2 = crtc->mode.hdisplay,
-		.y2 = crtc->mode.vdisplay,
-	};
-	uint32_t val, ctrl0;
-	unsigned idx = 0;
-	bool visible;
-	int ret;
-
-	trace_armada_ovl_plane_update(plane, crtc, fb,
-				 crtc_x, crtc_y, crtc_w, crtc_h,
-				 src_x, src_y, src_w, src_h);
-
-	ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip,
-					    DRM_MODE_ROTATE_0,
-					    0, INT_MAX, true, false, &visible);
-	if (ret)
-		return ret;
-
-	ctrl0 = CFG_DMA_FMT(drm_fb_to_armada_fb(fb)->fmt) |
-		CFG_DMA_MOD(drm_fb_to_armada_fb(fb)->mod) |
-		CFG_CBSH_ENA | CFG_DMA_HSMOOTH | CFG_DMA_ENA;
-
-	/* Does the position/size result in nothing to display? */
-	if (!visible)
-		ctrl0 &= ~CFG_DMA_ENA;
-
-	if (!dcrtc->plane) {
-		dcrtc->plane = plane;
-		armada_ovl_update_attr(&dplane->prop, dcrtc);
-	}
-
-	/* FIXME: overlay on an interlaced display */
-	/* Just updating the position/size? */
-	if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) {
-		val = (drm_rect_height(&src) & 0xffff0000) |
-		      drm_rect_width(&src) >> 16;
-		dplane->base.state.src_hw = val;
-		writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_HPXL_VLN);
-
-		val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest);
-		dplane->base.state.dst_hw = val;
-		writel_relaxed(val, dcrtc->base + LCD_SPU_DZM_HPXL_VLN);
-
-		val = dest.y1 << 16 | dest.x1;
-		dplane->base.state.dst_yx = val;
-		writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_OVSA_HPXL_VLN);
-
-		return 0;
-	} else if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) {
+	struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(state->plane);
+	struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb);
+	const struct drm_format_info *format;
+	unsigned int idx = 0;
+	bool fb_changed;
+	u32 val, ctrl0;
+	u16 src_x, src_y;
+
+	ctrl0 = CFG_DMA_FMT(dfb->fmt) | CFG_DMA_MOD(dfb->mod) | CFG_CBSH_ENA;
+	if (state->visible)
+		ctrl0 |= CFG_DMA_ENA;
+	if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst))
+		ctrl0 |= CFG_DMA_HSMOOTH;
+
+	/*
+	 * Shifting a YUV packed format image by one pixel causes the U/V
+	 * planes to swap.  Compensate for it by also toggling the UV swap.
+	 */
+	format = dfb->fb.format;
+	if (format->num_planes == 1 && state->src.x1 >> 16 & (format->hsub - 1))
+		ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
+
+	if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) {
 		/* Power up the Y/U/V FIFOs on ENA 0->1 transitions */
-		armada_updatel(0, CFG_PDWN16x66 | CFG_PDWN32x66,
-			       dcrtc->base + LCD_SPU_SRAM_PARA1);
+		armada_reg_queue_mod(regs, idx,
+				     0, CFG_PDWN16x66 | CFG_PDWN32x66,
+				     LCD_SPU_SRAM_PARA1);
 	}
 
-	if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0)
-		armada_drm_plane_work_cancel(dcrtc, &dplane->base);
-
-	if (plane->fb != fb) {
-		u32 addrs[3], pixel_format;
-		int num_planes, hsub;
-
-		/*
-		 * Take a reference on the new framebuffer - we want to
-		 * hold on to it while the hardware is displaying it.
-		 */
-		drm_framebuffer_get(fb);
-
-		if (plane->fb)
-			armada_ovl_retire_fb(dplane, plane->fb);
+	fb_changed = dplane->base.base.fb != &dfb->fb ||
+		     dplane->base.state.src_x != state->src.x1 >> 16 ||
+	             dplane->base.state.src_y != state->src.y1 >> 16;
 
-		src_y = src.y1 >> 16;
-		src_x = src.x1 >> 16;
+	dplane->base.state.vsync_update = fb_changed;
 
-		armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y);
+	/* FIXME: overlay on an interlaced display */
+	if (fb_changed) {
+		u32 addrs[3];
 
-		pixel_format = fb->format->format;
-		hsub = drm_format_horz_chroma_subsampling(pixel_format);
-		num_planes = fb->format->num_planes;
+		dplane->base.state.src_y = src_y = state->src.y1 >> 16;
+		dplane->base.state.src_x = src_x = state->src.x1 >> 16;
 
-		/*
-		 * Annoyingly, shifting a YUYV-format image by one pixel
-		 * causes the U/V planes to toggle.  Toggle the UV swap.
-		 * (Unfortunately, this causes momentary colour flickering.)
-		 */
-		if (src_x & (hsub - 1) && num_planes == 1)
-			ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
+		armada_drm_plane_calc_addrs(addrs, &dfb->fb, src_x, src_y);
 
-		armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0],
+		armada_reg_queue_set(regs, idx, addrs[0],
 				     LCD_SPU_DMA_START_ADDR_Y0);
-		armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1],
+		armada_reg_queue_set(regs, idx, addrs[1],
 				     LCD_SPU_DMA_START_ADDR_U0);
-		armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2],
+		armada_reg_queue_set(regs, idx, addrs[2],
 				     LCD_SPU_DMA_START_ADDR_V0);
-		armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0],
+		armada_reg_queue_set(regs, idx, addrs[0],
 				     LCD_SPU_DMA_START_ADDR_Y1);
-		armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1],
+		armada_reg_queue_set(regs, idx, addrs[1],
 				     LCD_SPU_DMA_START_ADDR_U1);
-		armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2],
+		armada_reg_queue_set(regs, idx, addrs[2],
 				     LCD_SPU_DMA_START_ADDR_V1);
 
-		val = fb->pitches[0] << 16 | fb->pitches[0];
-		armada_reg_queue_set(dplane->vbl.regs, idx, val,
+		val = dfb->fb.pitches[0] << 16 | dfb->fb.pitches[0];
+		armada_reg_queue_set(regs, idx, val,
 				     LCD_SPU_DMA_PITCH_YC);
-		val = fb->pitches[1] << 16 | fb->pitches[2];
-		armada_reg_queue_set(dplane->vbl.regs, idx, val,
+		val = dfb->fb.pitches[1] << 16 | dfb->fb.pitches[2];
+		armada_reg_queue_set(regs, idx, val,
 				     LCD_SPU_DMA_PITCH_UV);
 	}
 
-	val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16;
+	val = (drm_rect_height(&state->src) & 0xffff0000) |
+	       drm_rect_width(&state->src) >> 16;
 	if (dplane->base.state.src_hw != val) {
 		dplane->base.state.src_hw = val;
-		armada_reg_queue_set(dplane->vbl.regs, idx, val,
+		armada_reg_queue_set(regs, idx, val,
 				     LCD_SPU_DMA_HPXL_VLN);
 	}
 
-	val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest);
+	val = drm_rect_height(&state->dst) << 16 | drm_rect_width(&state->dst);
 	if (dplane->base.state.dst_hw != val) {
 		dplane->base.state.dst_hw = val;
-		armada_reg_queue_set(dplane->vbl.regs, idx, val,
+		armada_reg_queue_set(regs, idx, val,
 				     LCD_SPU_DZM_HPXL_VLN);
 	}
 
-	val = dest.y1 << 16 | dest.x1;
+	val = state->dst.y1 << 16 | state->dst.x1;
 	if (dplane->base.state.dst_yx != val) {
 		dplane->base.state.dst_yx = val;
-		armada_reg_queue_set(dplane->vbl.regs, idx, val,
+		armada_reg_queue_set(regs, idx, val,
 				     LCD_SPU_DMA_OVSA_HPXL_VLN);
 	}
 
 	if (dplane->base.state.ctrl0 != ctrl0) {
 		dplane->base.state.ctrl0 = ctrl0;
-		armada_reg_queue_mod(dplane->vbl.regs, idx, ctrl0,
+		armada_reg_queue_mod(regs, idx, ctrl0,
 			CFG_CBSH_ENA | CFG_DMAFORMAT | CFG_DMA_FTOGGLE |
 			CFG_DMA_HSMOOTH | CFG_DMA_TSTMODE |
 			CFG_DMA_MOD(CFG_SWAPRB | CFG_SWAPUV | CFG_SWAPYU |
 			CFG_YUV2RGB) | CFG_DMA_ENA,
 			LCD_SPU_DMA_CTRL0);
+		dplane->base.state.vsync_update = true;
 	}
-	if (idx) {
-		armada_reg_queue_end(dplane->vbl.regs, idx);
-		armada_drm_plane_work_queue(dcrtc, &dplane->base,
-					    &dplane->vbl.work);
-	}
-	return 0;
+
+	dplane->base.state.changed = idx != 0;
+
+	armada_reg_queue_end(regs, idx);
 }
 
-static int armada_ovl_plane_disable(struct drm_plane *plane,
-				    struct drm_modeset_acquire_ctx *ctx)
+static int
+armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
+	struct drm_framebuffer *fb,
+	int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h,
+	uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h,
+	struct drm_modeset_acquire_ctx *ctx)
 {
 	struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane);
-	struct drm_framebuffer *fb;
-	struct armada_crtc *dcrtc;
+	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+	struct armada_plane_work *work;
+	struct drm_plane_state state = {
+		.plane = plane,
+		.crtc = crtc,
+		.fb = fb,
+		.src_x = src_x,
+		.src_y = src_y,
+		.src_w = src_w,
+		.src_h = src_h,
+		.crtc_x = crtc_x,
+		.crtc_y = crtc_y,
+		.crtc_w = crtc_w,
+		.crtc_h = crtc_h,
+		.rotation = DRM_MODE_ROTATE_0,
+	};
+	const struct drm_rect clip = {
+		.x2 = crtc->mode.hdisplay,
+		.y2 = crtc->mode.vdisplay,
+	};
+	int ret;
 
-	if (!dplane->base.base.crtc)
+	trace_armada_ovl_plane_update(plane, crtc, fb,
+				 crtc_x, crtc_y, crtc_w, crtc_h,
+				 src_x, src_y, src_w, src_h);
+
+	ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0,
+						  INT_MAX, true, false);
+	if (ret)
+		return ret;
+
+	work = &dplane->base.works[dplane->base.next_work];
+
+	if (plane->fb != fb) {
+		/*
+		 * Take a reference on the new framebuffer - we want to
+		 * hold on to it while the hardware is displaying it.
+		 */
+		drm_framebuffer_reference(fb);
+
+		work->old_fb = plane->fb;
+	} else {
+		work->old_fb = NULL;
+	}
+
+	armada_ovl_plane_update_state(&state, work->regs);
+
+	if (!dplane->base.state.changed)
 		return 0;
 
-	dcrtc = drm_to_armada_crtc(dplane->base.base.crtc);
+	/* Wait for pending work to complete */
+	if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0)
+		armada_drm_plane_work_cancel(dcrtc, &dplane->base);
+
+	/* Just updating the position/size? */
+	if (!dplane->base.state.vsync_update) {
+		armada_ovl_plane_work(dcrtc, work);
+		return 0;
+	}
 
-	armada_drm_plane_work_cancel(dcrtc, &dplane->base);
-	armada_drm_crtc_plane_disable(dcrtc, plane);
+	if (!dcrtc->plane) {
+		dcrtc->plane = plane;
+		armada_ovl_update_attr(&dplane->prop, dcrtc);
+	}
 
-	dcrtc->plane = NULL;
-	dplane->base.state.ctrl0 = 0;
+	/* Queue it for update on the next interrupt if we are enabled */
+	ret = armada_drm_plane_work_queue(dcrtc, work);
+	if (ret)
+		DRM_ERROR("failed to queue plane work: %d\n", ret);
 
-	fb = xchg(&dplane->old_fb, NULL);
-	if (fb)
-		drm_framebuffer_put(fb);
+	dplane->base.next_work = !dplane->base.next_work;
 
 	return 0;
 }
@@ -362,7 +343,7 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane,
 
 static const struct drm_plane_funcs armada_ovl_plane_funcs = {
 	.update_plane	= armada_ovl_plane_update,
-	.disable_plane	= armada_ovl_plane_disable,
+	.disable_plane	= armada_drm_plane_disable,
 	.destroy	= armada_ovl_plane_destroy,
 	.set_property	= armada_ovl_plane_set_property,
 };
@@ -454,7 +435,8 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs)
 		return ret;
 	}
 
-	dplane->vbl.work.fn = armada_ovl_plane_work;
+	dplane->base.works[0].fn = armada_ovl_plane_work;
+	dplane->base.works[1].fn = armada_ovl_plane_work;
 
 	ret = drm_universal_plane_init(dev, &dplane->base.base, crtcs,
 				       &armada_ovl_plane_funcs,
diff --git a/drivers/gpu/drm/armada/armada_trace.h b/drivers/gpu/drm/armada/armada_trace.h
index 8dbfea7a00fe328fe953dae40720e5be97c935c7..f03a56bda59615860a41a7ec0fa356a0adb8f437 100644
--- a/drivers/gpu/drm/armada/armada_trace.h
+++ b/drivers/gpu/drm/armada/armada_trace.h
@@ -34,14 +34,34 @@ TRACE_EVENT(armada_ovl_plane_update,
 		__field(struct drm_plane *, plane)
 		__field(struct drm_crtc *, crtc)
 		__field(struct drm_framebuffer *, fb)
+		__field(int, crtc_x)
+		__field(int, crtc_y)
+		__field(unsigned int, crtc_w)
+		__field(unsigned int, crtc_h)
+		__field(u32, src_x)
+		__field(u32, src_y)
+		__field(u32, src_w)
+		__field(u32, src_h)
 	),
 	TP_fast_assign(
 		__entry->plane = plane;
 		__entry->crtc = crtc;
 		__entry->fb = fb;
+		__entry->crtc_x = crtc_x;
+		__entry->crtc_y = crtc_y;
+		__entry->crtc_w = crtc_w;
+		__entry->crtc_h = crtc_h;
+		__entry->src_x = src_x;
+		__entry->src_y = src_y;
+		__entry->src_w = src_w;
+		__entry->src_h = src_h;
 	),
-	TP_printk("plane %p crtc %p fb %p",
-		__entry->plane, __entry->crtc, __entry->fb)
+	TP_printk("plane %p crtc %p fb %p crtc @ (%d,%d, %ux%u) src @ (%u,%u, %ux%u)",
+		__entry->plane, __entry->crtc, __entry->fb,
+		__entry->crtc_x, __entry->crtc_y,
+		__entry->crtc_w, __entry->crtc_h,
+		__entry->src_x >> 16, __entry->src_y >> 16,
+		__entry->src_w >> 16, __entry->src_h >> 16)
 );
 
 TRACE_EVENT(armada_ovl_plane_work,
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 28da7c2b7ed9dc060cacc0e67a4df45c4f1e0233..7b784d91e258e9c424fa9cad901f0de03f435080 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -216,9 +216,10 @@ static struct ttm_tt *ast_ttm_tt_create(struct ttm_bo_device *bdev,
 	return tt;
 }
 
-static int ast_ttm_tt_populate(struct ttm_tt *ttm)
+static int ast_ttm_tt_populate(struct ttm_tt *ttm,
+			struct ttm_operation_ctx *ctx)
 {
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 
 static void ast_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -237,7 +238,6 @@ struct ttm_bo_driver ast_bo_driver = {
 	.verify_access = ast_bo_verify_access,
 	.io_mem_reserve = &ast_ttm_io_mem_reserve,
 	.io_mem_free = &ast_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int ast_mm_init(struct ast_private *ast)
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index c6e8061ffcfc181415f67a697646c77d3fbb1417..c1ea5c36b0061a9220a0c168eab6a8d588054f25 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -461,13 +461,6 @@ static struct drm_framebuffer *atmel_hlcdc_fb_create(struct drm_device *dev,
 	return drm_gem_fb_create(dev, file_priv, mode_cmd);
 }
 
-static void atmel_hlcdc_fb_output_poll_changed(struct drm_device *dev)
-{
-	struct atmel_hlcdc_dc *dc = dev->dev_private;
-
-	drm_fbdev_cma_hotplug_event(dc->fbdev);
-}
-
 struct atmel_hlcdc_dc_commit {
 	struct work_struct work;
 	struct drm_device *dev;
@@ -563,7 +556,7 @@ static int atmel_hlcdc_dc_atomic_commit(struct drm_device *dev,
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
 	.fb_create = atmel_hlcdc_fb_create,
-	.output_poll_changed = atmel_hlcdc_fb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = atmel_hlcdc_dc_atomic_commit,
 };
@@ -665,10 +658,7 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev)
 
 	platform_set_drvdata(pdev, dev);
 
-	dc->fbdev = drm_fbdev_cma_init(dev, 24,
-			dev->mode_config.num_connector);
-	if (IS_ERR(dc->fbdev))
-		dc->fbdev = NULL;
+	drm_fb_cma_fbdev_init(dev, 24, 0);
 
 	drm_kms_helper_poll_init(dev);
 
@@ -688,8 +678,7 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
 
-	if (dc->fbdev)
-		drm_fbdev_cma_fini(dc->fbdev);
+	drm_fb_cma_fbdev_fini(dev);
 	flush_workqueue(dc->wq);
 	drm_kms_helper_poll_fini(dev);
 	drm_mode_config_cleanup(dev);
@@ -705,13 +694,6 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev)
 	destroy_workqueue(dc->wq);
 }
 
-static void atmel_hlcdc_dc_lastclose(struct drm_device *dev)
-{
-	struct atmel_hlcdc_dc *dc = dev->dev_private;
-
-	drm_fbdev_cma_restore_mode(dc->fbdev);
-}
-
 static int atmel_hlcdc_dc_irq_postinstall(struct drm_device *dev)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
@@ -744,7 +726,7 @@ static struct drm_driver atmel_hlcdc_dc_driver = {
 	.driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM |
 			   DRIVER_MODESET | DRIVER_PRIME |
 			   DRIVER_ATOMIC,
-	.lastclose = atmel_hlcdc_dc_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.irq_handler = atmel_hlcdc_dc_irq_handler,
 	.irq_preinstall = atmel_hlcdc_dc_irq_uninstall,
 	.irq_postinstall = atmel_hlcdc_dc_irq_postinstall,
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
index 6833ee253cfacafbbbf218340a3e1f8983ca805a..ab32d5b268d2d4e1ff18e1befe24e8e42f874ad0 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
@@ -32,6 +32,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -374,7 +375,6 @@ struct atmel_hlcdc_dc {
 	const struct atmel_hlcdc_dc_desc *desc;
 	struct dma_pool *dscrpool;
 	struct atmel_hlcdc *hlcdc;
-	struct drm_fbdev_cma *fbdev;
 	struct drm_crtc *crtc;
 	struct atmel_hlcdc_layer *layers[ATMEL_HLCDC_MAX_LAYERS];
 	struct workqueue_struct *wq;
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 8250b5e612d25f22b3a826aaf9c9c3c7a9a5f459..704e879711e4f74a7b275a76019c283a1140df42 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -205,7 +205,6 @@ struct ttm_bo_driver bochs_bo_driver = {
 	.verify_access = bochs_bo_verify_access,
 	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
 	.io_mem_free = &bochs_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int bochs_mm_init(struct bochs_device *bochs)
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 5dd3f1cd074a1d36f17022af23c7a23638eaa41e..a8905049b9da022802a44cb836c22fbb86f90560 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -946,7 +946,9 @@ static int analogix_dp_get_modes(struct drm_connector *connector)
 			return 0;
 		}
 
+		pm_runtime_get_sync(dp->dev);
 		edid = drm_get_edid(connector, &dp->aux.ddc);
+		pm_runtime_put(dp->dev);
 		if (edid) {
 			drm_mode_connector_update_edid_property(&dp->connector,
 								edid);
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 2a5b54d3a03aedb1d7b60a635f3b8cea32939338..a8e31ea07382552869b5b6c2196b4fe2260d6abe 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -216,9 +216,10 @@ static struct ttm_tt *cirrus_ttm_tt_create(struct ttm_bo_device *bdev,
 	return tt;
 }
 
-static int cirrus_ttm_tt_populate(struct ttm_tt *ttm)
+static int cirrus_ttm_tt_populate(struct ttm_tt *ttm,
+		struct ttm_operation_ctx *ctx)
 {
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 
 static void cirrus_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -237,7 +238,6 @@ struct ttm_bo_driver cirrus_bo_driver = {
 	.verify_access = cirrus_bo_verify_access,
 	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
 	.io_mem_free = &cirrus_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int cirrus_mm_init(struct cirrus_device *cirrus)
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 37445d50816a5c2654b5b145c3c397044a429db1..b76d49218cf1de132be35d1ccb9226f8ba8a7704 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -50,7 +50,8 @@ EXPORT_SYMBOL(__drm_crtc_commit_free);
  * @state: atomic state
  *
  * Free all the memory allocated by drm_atomic_state_init.
- * This is useful for drivers that subclass the atomic state.
+ * This should only be used by drivers which are still subclassing
+ * &drm_atomic_state and haven't switched to &drm_private_state yet.
  */
 void drm_atomic_state_default_release(struct drm_atomic_state *state)
 {
@@ -67,7 +68,8 @@ EXPORT_SYMBOL(drm_atomic_state_default_release);
  * @state: atomic state
  *
  * Default implementation for filling in a new atomic state.
- * This is useful for drivers that subclass the atomic state.
+ * This should only be used by drivers which are still subclassing
+ * &drm_atomic_state and haven't switched to &drm_private_state yet.
  */
 int
 drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
@@ -132,7 +134,8 @@ EXPORT_SYMBOL(drm_atomic_state_alloc);
  * @state: atomic state
  *
  * Default implementation for clearing atomic state.
- * This is useful for drivers that subclass the atomic state.
+ * This should only be used by drivers which are still subclassing
+ * &drm_atomic_state and haven't switched to &drm_private_state yet.
  */
 void drm_atomic_state_default_clear(struct drm_atomic_state *state)
 {
@@ -946,6 +949,42 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 		plane->funcs->atomic_print_state(p, state);
 }
 
+/**
+ * DOC: handling driver private state
+ *
+ * Very often the DRM objects exposed to userspace in the atomic modeset api
+ * (&drm_connector, &drm_crtc and &drm_plane) do not map neatly to the
+ * underlying hardware. Especially for any kind of shared resources (e.g. shared
+ * clocks, scaler units, bandwidth and fifo limits shared among a group of
+ * planes or CRTCs, and so on) it makes sense to model these as independent
+ * objects. Drivers then need to do similar state tracking and commit ordering for
+ * such private (since not exposed to userpace) objects as the atomic core and
+ * helpers already provide for connectors, planes and CRTCs.
+ *
+ * To make this easier on drivers the atomic core provides some support to track
+ * driver private state objects using struct &drm_private_obj, with the
+ * associated state struct &drm_private_state.
+ *
+ * Similar to userspace-exposed objects, private state structures can be
+ * acquired by calling drm_atomic_get_private_obj_state(). Since this function
+ * does not take care of locking, drivers should wrap it for each type of
+ * private state object they have with the required call to drm_modeset_lock()
+ * for the corresponding &drm_modeset_lock.
+ *
+ * All private state structures contained in a &drm_atomic_state update can be
+ * iterated using for_each_oldnew_private_obj_in_state(),
+ * for_each_new_private_obj_in_state() and for_each_old_private_obj_in_state().
+ * Drivers are recommended to wrap these for each type of driver private state
+ * object they have, filtering on &drm_private_obj.funcs using for_each_if(), at
+ * least if they want to iterate over all objects of a given type.
+ *
+ * An earlier way to handle driver private state was by subclassing struct
+ * &drm_atomic_state. But since that encourages non-standard ways to implement
+ * the check/commit split atomic requires (by using e.g. "check and rollback or
+ * commit instead" of "duplicate state, check, then either commit or release
+ * duplicated state) it is deprecated in favour of using &drm_private_state.
+ */
+
 /**
  * drm_atomic_private_obj_init - initialize private object
  * @obj: private object
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 2e5e089dd9123fa324e1a7d343d8bb47a200739b..4c62dff14893646f7e818d1fb87947da52a3ccef 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -214,9 +214,11 @@ EXPORT_SYMBOL(drm_rotation_simplify);
  * This function initializes generic mutable zpos property and enables support
  * for it in drm core. Drivers can then attach this property to planes to enable
  * support for configurable planes arrangement during blending operation.
- * Once mutable zpos property has been enabled, the DRM core will automatically
- * calculate &drm_plane_state.normalized_zpos values. Usually min should be set
- * to 0 and max to maximal number of planes for given crtc - 1.
+ * Drivers that attach a mutable zpos property to any plane should call the
+ * drm_atomic_normalize_zpos() helper during their implementation of
+ * &drm_mode_config_funcs.atomic_check(), which will update the normalized zpos
+ * values and store them in &drm_plane_state.normalized_zpos. Usually min
+ * should be set to 0 and max to maximal number of planes for given crtc - 1.
  *
  * If zpos of some planes cannot be changed (like fixed background or
  * cursor/topmost planes), driver should adjust min/max values and assign those
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 624edeb5c50d38ecedbed490c8ce05fca11fed99..e6a21e69059cdc312a6b079f75b9716345f3f4db 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -153,6 +153,25 @@ static void drm_connector_free(struct kref *kref)
 	connector->funcs->destroy(connector);
 }
 
+void drm_connector_free_work_fn(struct work_struct *work)
+{
+	struct drm_connector *connector, *n;
+	struct drm_device *dev =
+		container_of(work, struct drm_device, mode_config.connector_free_work);
+	struct drm_mode_config *config = &dev->mode_config;
+	unsigned long flags;
+	struct llist_node *freed;
+
+	spin_lock_irqsave(&config->connector_list_lock, flags);
+	freed = llist_del_all(&config->connector_free_list);
+	spin_unlock_irqrestore(&config->connector_list_lock, flags);
+
+	llist_for_each_entry_safe(connector, n, freed, free_node) {
+		drm_mode_object_unregister(dev, &connector->base);
+		connector->funcs->destroy(connector);
+	}
+}
+
 /**
  * drm_connector_init - Init a preallocated connector
  * @dev: DRM device
@@ -532,6 +551,25 @@ void drm_connector_list_iter_begin(struct drm_device *dev,
 }
 EXPORT_SYMBOL(drm_connector_list_iter_begin);
 
+/*
+ * Extra-safe connector put function that works in any context. Should only be
+ * used from the connector_iter functions, where we never really expect to
+ * actually release the connector when dropping our final reference.
+ */
+static void
+__drm_connector_put_safe(struct drm_connector *conn)
+{
+	struct drm_mode_config *config = &conn->dev->mode_config;
+
+	lockdep_assert_held(&config->connector_list_lock);
+
+	if (!refcount_dec_and_test(&conn->base.refcount.refcount))
+		return;
+
+	llist_add(&conn->free_node, &config->connector_free_list);
+	schedule_work(&config->connector_free_work);
+}
+
 /**
  * drm_connector_list_iter_next - return next connector
  * @iter: connectr_list iterator
@@ -561,10 +599,10 @@ drm_connector_list_iter_next(struct drm_connector_list_iter *iter)
 
 		/* loop until it's not a zombie connector */
 	} while (!kref_get_unless_zero(&iter->conn->base.refcount));
-	spin_unlock_irqrestore(&config->connector_list_lock, flags);
 
 	if (old_conn)
-		drm_connector_put(old_conn);
+		__drm_connector_put_safe(old_conn);
+	spin_unlock_irqrestore(&config->connector_list_lock, flags);
 
 	return iter->conn;
 }
@@ -581,9 +619,15 @@ EXPORT_SYMBOL(drm_connector_list_iter_next);
  */
 void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
 {
+	struct drm_mode_config *config = &iter->dev->mode_config;
+	unsigned long flags;
+
 	iter->dev = NULL;
-	if (iter->conn)
-		drm_connector_put(iter->conn);
+	if (iter->conn) {
+		spin_lock_irqsave(&config->connector_list_lock, flags);
+		__drm_connector_put_safe(iter->conn);
+		spin_unlock_irqrestore(&config->connector_list_lock, flags);
+	}
 	lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
 }
 EXPORT_SYMBOL(drm_connector_list_iter_end);
@@ -1229,6 +1273,19 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector,
 	if (edid)
 		size = EDID_LENGTH * (1 + edid->extensions);
 
+	/* Set the display info, using edid if available, otherwise
+	 * reseting the values to defaults. This duplicates the work
+	 * done in drm_add_edid_modes, but that function is not
+	 * consistently called before this one in all drivers and the
+	 * computation is cheap enough that it seems better to
+	 * duplicate it rather than attempt to ensure some arbitrary
+	 * ordering of calls.
+	 */
+	if (edid)
+		drm_add_display_info(connector, edid);
+	else
+		drm_reset_display_info(connector);
+
 	drm_object_property_set_value(&connector->base,
 				      dev->mode_config.non_desktop_property,
 				      connector->display_info.non_desktop);
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 9ebb8841778cc99095a2235ab8b1d89f654816f2..af00f42ba269b0da3111ac5be480688815875da8 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -142,6 +142,7 @@ int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj,
 				    uint64_t value);
 int drm_connector_create_standard_properties(struct drm_device *dev);
 const char *drm_get_connector_force_name(enum drm_connector_force force);
+void drm_connector_free_work_fn(struct work_struct *work);
 
 /* IOCTL */
 int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 524eace3d460fe66841884c7e434f8d5b0406d85..ddd5379145758014f1a5a9915abb1528edc1cf17 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1731,7 +1731,7 @@ EXPORT_SYMBOL(drm_edid_duplicate);
  *
  * Returns true if @vendor is in @edid, false otherwise
  */
-static bool edid_vendor(struct edid *edid, const char *vendor)
+static bool edid_vendor(const struct edid *edid, const char *vendor)
 {
 	char edid_vendor[3];
 
@@ -1749,7 +1749,7 @@ static bool edid_vendor(struct edid *edid, const char *vendor)
  *
  * This tells subsequent routines what fixes they need to apply.
  */
-static u32 edid_get_quirks(struct edid *edid)
+static u32 edid_get_quirks(const struct edid *edid)
 {
 	const struct edid_quirk *quirk;
 	int i;
@@ -2813,7 +2813,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 /*
  * Search EDID for CEA extension block.
  */
-static u8 *drm_find_edid_extension(struct edid *edid, int ext_id)
+static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id)
 {
 	u8 *edid_ext = NULL;
 	int i;
@@ -2835,12 +2835,12 @@ static u8 *drm_find_edid_extension(struct edid *edid, int ext_id)
 	return edid_ext;
 }
 
-static u8 *drm_find_cea_extension(struct edid *edid)
+static u8 *drm_find_cea_extension(const struct edid *edid)
 {
 	return drm_find_edid_extension(edid, CEA_EXT);
 }
 
-static u8 *drm_find_displayid_extension(struct edid *edid)
+static u8 *drm_find_displayid_extension(const struct edid *edid)
 {
 	return drm_find_edid_extension(edid, DISPLAYID_EXT);
 }
@@ -4378,7 +4378,7 @@ drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db)
 }
 
 static void drm_parse_cea_ext(struct drm_connector *connector,
-			      struct edid *edid)
+			      const struct edid *edid)
 {
 	struct drm_display_info *info = &connector->display_info;
 	const u8 *edid_ext;
@@ -4412,11 +4412,34 @@ static void drm_parse_cea_ext(struct drm_connector *connector,
 	}
 }
 
-static void drm_add_display_info(struct drm_connector *connector,
-				 struct edid *edid, u32 quirks)
+/* A connector has no EDID information, so we've got no EDID to compute quirks from. Reset
+ * all of the values which would have been set from EDID
+ */
+void
+drm_reset_display_info(struct drm_connector *connector)
 {
 	struct drm_display_info *info = &connector->display_info;
 
+	info->width_mm = 0;
+	info->height_mm = 0;
+
+	info->bpc = 0;
+	info->color_formats = 0;
+	info->cea_rev = 0;
+	info->max_tmds_clock = 0;
+	info->dvi_dual = false;
+	info->has_hdmi_infoframe = false;
+
+	info->non_desktop = 0;
+}
+EXPORT_SYMBOL_GPL(drm_reset_display_info);
+
+u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid)
+{
+	struct drm_display_info *info = &connector->display_info;
+
+	u32 quirks = edid_get_quirks(edid);
+
 	info->width_mm = edid->width_cm * 10;
 	info->height_mm = edid->height_cm * 10;
 
@@ -4430,11 +4453,13 @@ static void drm_add_display_info(struct drm_connector *connector,
 
 	info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP);
 
+	DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop);
+
 	if (edid->revision < 3)
-		return;
+		return quirks;
 
 	if (!(edid->input & DRM_EDID_INPUT_DIGITAL))
-		return;
+		return quirks;
 
 	drm_parse_cea_ext(connector, edid);
 
@@ -4454,7 +4479,7 @@ static void drm_add_display_info(struct drm_connector *connector,
 
 	/* Only defined for 1.4 with digital displays */
 	if (edid->revision < 4)
-		return;
+		return quirks;
 
 	switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) {
 	case DRM_EDID_DIGITAL_DEPTH_6:
@@ -4489,7 +4514,9 @@ static void drm_add_display_info(struct drm_connector *connector,
 		info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
 	if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422)
 		info->color_formats |= DRM_COLOR_FORMAT_YCRCB422;
+	return quirks;
 }
+EXPORT_SYMBOL_GPL(drm_add_display_info);
 
 static int validate_displayid(u8 *displayid, int length, int idx)
 {
@@ -4645,8 +4672,6 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 		return 0;
 	}
 
-	quirks = edid_get_quirks(edid);
-
 	drm_edid_to_eld(connector, edid);
 
 	/*
@@ -4654,7 +4679,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	 * To avoid multiple parsing of same block, lets parse that map
 	 * from sink info, before parsing CEA modes.
 	 */
-	drm_add_display_info(connector, edid, quirks);
+	quirks = drm_add_display_info(connector, edid);
 
 	/*
 	 * EDID spec says modes should be preferred in this order:
@@ -4846,6 +4871,11 @@ EXPORT_SYMBOL(drm_hdmi_avi_infoframe_from_display_mode);
  * @mode: DRM display mode
  * @rgb_quant_range: RGB quantization range (Q)
  * @rgb_quant_range_selectable: Sink support selectable RGB quantization range (QS)
+ * @is_hdmi2_sink: HDMI 2.0 sink, which has different default recommendations
+ *
+ * Note that @is_hdmi2_sink can be derived by looking at the
+ * &drm_scdc.supported flag stored in &drm_hdmi_info.scdc,
+ * &drm_display_info.hdmi, which can be found in &drm_connector.display_info.
  */
 void
 drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame,
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 35b56dfba92949dca9c27ffb002ce9317c1b1f5a..186d00adfb5f86675bfd27a8dd9d812e1475d9fe 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -23,6 +23,7 @@
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_print.h>
 #include <linux/module.h>
 
 #define DEFAULT_FBDEFIO_DELAY_MS 50
@@ -42,7 +43,7 @@ struct drm_fbdev_cma {
  * callback function to create a cma backed framebuffer.
  *
  * An fbdev framebuffer backed by cma is also available by calling
- * drm_fbdev_cma_init(). drm_fbdev_cma_fini() tears it down.
+ * drm_fb_cma_fbdev_init(). drm_fb_cma_fbdev_fini() tears it down.
  * If the &drm_framebuffer_funcs.dirty callback is set, fb_deferred_io will be
  * set up automatically. &drm_framebuffer_funcs.dirty is called by
  * drm_fb_helper_deferred_io() in process context (&struct delayed_work).
@@ -68,7 +69,7 @@ struct drm_fbdev_cma {
  *
  * Initialize::
  *
- *     fbdev = drm_fbdev_cma_init_with_funcs(dev, 16,
+ *     fbdev = drm_fb_cma_fbdev_init_with_funcs(dev, 16,
  *                                           dev->mode_config.num_crtc,
  *                                           dev->mode_config.num_connector,
  *                                           &driver_fb_funcs);
@@ -256,7 +257,7 @@ drm_fbdev_cma_create(struct drm_fb_helper *helper,
 	fbi->screen_size = size;
 	fbi->fix.smem_len = size;
 
-	if (fbdev_cma->fb_funcs->dirty) {
+	if (fb->funcs->dirty) {
 		ret = drm_fbdev_cma_defio_init(fbi, obj);
 		if (ret)
 			goto err_cma_destroy;
@@ -277,6 +278,118 @@ static const struct drm_fb_helper_funcs drm_fb_cma_helper_funcs = {
 	.fb_probe = drm_fbdev_cma_create,
 };
 
+/**
+ * drm_fb_cma_fbdev_init_with_funcs() - Allocate and initialize fbdev emulation
+ * @dev: DRM device
+ * @preferred_bpp: Preferred bits per pixel for the device.
+ *                 @dev->mode_config.preferred_depth is used if this is zero.
+ * @max_conn_count: Maximum number of connectors.
+ *                  @dev->mode_config.num_connector is used if this is zero.
+ * @funcs: Framebuffer functions, in particular a custom dirty() callback.
+ *         Can be NULL.
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_fb_cma_fbdev_init_with_funcs(struct drm_device *dev,
+	unsigned int preferred_bpp, unsigned int max_conn_count,
+	const struct drm_framebuffer_funcs *funcs)
+{
+	struct drm_fbdev_cma *fbdev_cma;
+	struct drm_fb_helper *fb_helper;
+	int ret;
+
+	if (!preferred_bpp)
+		preferred_bpp = dev->mode_config.preferred_depth;
+	if (!preferred_bpp)
+		preferred_bpp = 32;
+
+	if (!max_conn_count)
+		max_conn_count = dev->mode_config.num_connector;
+
+	fbdev_cma = kzalloc(sizeof(*fbdev_cma), GFP_KERNEL);
+	if (!fbdev_cma)
+		return -ENOMEM;
+
+	fbdev_cma->fb_funcs = funcs;
+	fb_helper = &fbdev_cma->fb_helper;
+
+	drm_fb_helper_prepare(dev, fb_helper, &drm_fb_cma_helper_funcs);
+
+	ret = drm_fb_helper_init(dev, fb_helper, max_conn_count);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev->dev, "Failed to initialize fbdev helper.\n");
+		goto err_free;
+	}
+
+	ret = drm_fb_helper_single_add_all_connectors(fb_helper);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev->dev, "Failed to add connectors.\n");
+		goto err_drm_fb_helper_fini;
+	}
+
+	ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev->dev, "Failed to set fbdev configuration.\n");
+		goto err_drm_fb_helper_fini;
+	}
+
+	return 0;
+
+err_drm_fb_helper_fini:
+	drm_fb_helper_fini(fb_helper);
+err_free:
+	kfree(fbdev_cma);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(drm_fb_cma_fbdev_init_with_funcs);
+
+/**
+ * drm_fb_cma_fbdev_init() - Allocate and initialize fbdev emulation
+ * @dev: DRM device
+ * @preferred_bpp: Preferred bits per pixel for the device.
+ *                 @dev->mode_config.preferred_depth is used if this is zero.
+ * @max_conn_count: Maximum number of connectors.
+ *                  @dev->mode_config.num_connector is used if this is zero.
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_fb_cma_fbdev_init(struct drm_device *dev, unsigned int preferred_bpp,
+			  unsigned int max_conn_count)
+{
+	return drm_fb_cma_fbdev_init_with_funcs(dev, preferred_bpp,
+						max_conn_count, NULL);
+}
+EXPORT_SYMBOL_GPL(drm_fb_cma_fbdev_init);
+
+/**
+ * drm_fb_cma_fbdev_fini() - Teardown fbdev emulation
+ * @dev: DRM device
+ */
+void drm_fb_cma_fbdev_fini(struct drm_device *dev)
+{
+	struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+	if (!fb_helper)
+		return;
+
+	/* Unregister if it hasn't been done already */
+	if (fb_helper->fbdev && fb_helper->fbdev->dev)
+		drm_fb_helper_unregister_fbi(fb_helper);
+
+	if (fb_helper->fbdev)
+		drm_fbdev_cma_defio_fini(fb_helper->fbdev);
+
+	if (fb_helper->fb)
+		drm_framebuffer_remove(fb_helper->fb);
+
+	drm_fb_helper_fini(fb_helper);
+	kfree(to_fbdev_cma(fb_helper));
+}
+EXPORT_SYMBOL_GPL(drm_fb_cma_fbdev_fini);
+
 /**
  * drm_fbdev_cma_init_with_funcs() - Allocate and initializes a drm_fbdev_cma struct
  * @dev: DRM device
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 04a3a5ce370a059d8fb36f700a7f93a48182e2c6..035784ddd13363eec627425cf7e6a2600b9b7469 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -66,19 +66,23 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * helper functions used by many drivers to implement the kernel mode setting
  * interfaces.
  *
- * Initialization is done as a four-step process with drm_fb_helper_prepare(),
- * drm_fb_helper_init(), drm_fb_helper_single_add_all_connectors() and
- * drm_fb_helper_initial_config(). Drivers with fancier requirements than the
- * default behaviour can override the third step with their own code.
- * Teardown is done with drm_fb_helper_fini() after the fbdev device is
- * unregisters using drm_fb_helper_unregister_fbi().
- *
- * At runtime drivers should restore the fbdev console by calling
- * drm_fb_helper_restore_fbdev_mode_unlocked() from their &drm_driver.lastclose
- * callback.  They should also notify the fb helper code from updates to the
- * output configuration by calling drm_fb_helper_hotplug_event(). For easier
- * integration with the output polling code in drm_crtc_helper.c the modeset
- * code provides a &drm_mode_config_funcs.output_poll_changed callback.
+ * Setup fbdev emulation by calling drm_fb_helper_fbdev_setup() and tear it
+ * down by calling drm_fb_helper_fbdev_teardown().
+ *
+ * Drivers that need to handle connector hotplugging (e.g. dp mst) can't use
+ * the setup helper and will need to do the whole four-step setup process with
+ * drm_fb_helper_prepare(), drm_fb_helper_init(),
+ * drm_fb_helper_single_add_all_connectors(), enable hotplugging and
+ * drm_fb_helper_initial_config() to avoid a possible race window.
+ *
+ * At runtime drivers should restore the fbdev console by using
+ * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback.
+ * They should also notify the fb helper code from updates to the output
+ * configuration by using drm_fb_helper_output_poll_changed() as their
+ * &drm_mode_config_funcs.output_poll_changed callback.
+ *
+ * For suspend/resume consider using drm_mode_config_helper_suspend() and
+ * drm_mode_config_helper_resume() which takes care of fbdev as well.
  *
  * All other functions exported by the fb helper library can be used to
  * implement the fbdev driver interface by the driver.
@@ -103,7 +107,8 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * always run in process context since the fb_*() function could be running in
  * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io
  * callback it will also schedule dirty_work with the damage collected from the
- * mmap page writes.
+ * mmap page writes. Drivers can use drm_fb_helper_defio_init() to setup
+ * deferred I/O (coupled with drm_fb_helper_fbdev_teardown()).
  */
 
 #define drm_fb_helper_for_each_connector(fbh, i__) \
@@ -1024,6 +1029,49 @@ void drm_fb_helper_deferred_io(struct fb_info *info,
 }
 EXPORT_SYMBOL(drm_fb_helper_deferred_io);
 
+/**
+ * drm_fb_helper_defio_init - fbdev deferred I/O initialization
+ * @fb_helper: driver-allocated fbdev helper
+ *
+ * This function allocates &fb_deferred_io, sets callback to
+ * drm_fb_helper_deferred_io(), delay to 50ms and calls fb_deferred_io_init().
+ * It should be called from the &drm_fb_helper_funcs->fb_probe callback.
+ * drm_fb_helper_fbdev_teardown() cleans up deferred I/O.
+ *
+ * NOTE: A copy of &fb_ops is made and assigned to &info->fbops. This is done
+ * because fb_deferred_io_cleanup() clears &fbops->fb_mmap and would thereby
+ * affect other instances of that &fb_ops.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
+{
+	struct fb_info *info = fb_helper->fbdev;
+	struct fb_deferred_io *fbdefio;
+	struct fb_ops *fbops;
+
+	fbdefio = kzalloc(sizeof(*fbdefio), GFP_KERNEL);
+	fbops = kzalloc(sizeof(*fbops), GFP_KERNEL);
+	if (!fbdefio || !fbops) {
+		kfree(fbdefio);
+		kfree(fbops);
+		return -ENOMEM;
+	}
+
+	info->fbdefio = fbdefio;
+	fbdefio->delay = msecs_to_jiffies(50);
+	fbdefio->deferred_io = drm_fb_helper_deferred_io;
+
+	*fbops = *info->fbops;
+	info->fbops = fbops;
+
+	fb_deferred_io_init(info);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_defio_init);
+
 /**
  * drm_fb_helper_sys_read - wrapper around fb_sys_read
  * @info: fb_info struct pointer
@@ -1848,6 +1896,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	if (ret < 0)
 		return ret;
 
+	strcpy(fb_helper->fb->comm, "[fbcon]");
 	return 0;
 }
 
@@ -2729,6 +2778,120 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_hotplug_event);
 
+/**
+ * drm_fb_helper_fbdev_setup() - Setup fbdev emulation
+ * @dev: DRM device
+ * @fb_helper: fbdev helper structure to set up
+ * @funcs: fbdev helper functions
+ * @preferred_bpp: Preferred bits per pixel for the device.
+ *                 @dev->mode_config.preferred_depth is used if this is zero.
+ * @max_conn_count: Maximum number of connectors.
+ *                  @dev->mode_config.num_connector is used if this is zero.
+ *
+ * This function sets up fbdev emulation and registers fbdev for access by
+ * userspace. If all connectors are disconnected, setup is deferred to the next
+ * time drm_fb_helper_hotplug_event() is called.
+ * The caller must to provide a &drm_fb_helper_funcs->fb_probe callback
+ * function.
+ *
+ * See also: drm_fb_helper_initial_config()
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_fb_helper_fbdev_setup(struct drm_device *dev,
+			      struct drm_fb_helper *fb_helper,
+			      const struct drm_fb_helper_funcs *funcs,
+			      unsigned int preferred_bpp,
+			      unsigned int max_conn_count)
+{
+	int ret;
+
+	if (!preferred_bpp)
+		preferred_bpp = dev->mode_config.preferred_depth;
+	if (!preferred_bpp)
+		preferred_bpp = 32;
+
+	if (!max_conn_count)
+		max_conn_count = dev->mode_config.num_connector;
+	if (!max_conn_count) {
+		DRM_DEV_ERROR(dev->dev, "No connectors\n");
+		return -EINVAL;
+	}
+
+	drm_fb_helper_prepare(dev, fb_helper, funcs);
+
+	ret = drm_fb_helper_init(dev, fb_helper, max_conn_count);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev->dev, "Failed to initialize fbdev helper\n");
+		return ret;
+	}
+
+	ret = drm_fb_helper_single_add_all_connectors(fb_helper);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev->dev, "Failed to add connectors\n");
+		goto err_drm_fb_helper_fini;
+	}
+
+	if (!drm_drv_uses_atomic_modeset(dev))
+		drm_helper_disable_unused_functions(dev);
+
+	ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev->dev, "Failed to set fbdev configuration\n");
+		goto err_drm_fb_helper_fini;
+	}
+
+	return 0;
+
+err_drm_fb_helper_fini:
+	drm_fb_helper_fini(fb_helper);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_fbdev_setup);
+
+/**
+ * drm_fb_helper_fbdev_teardown - Tear down fbdev emulation
+ * @dev: DRM device
+ *
+ * This function unregisters fbdev if not already done and cleans up the
+ * associated resources including the &drm_framebuffer.
+ * The driver is responsible for freeing the &drm_fb_helper structure which is
+ * stored in &drm_device->fb_helper. Do note that this pointer has been cleared
+ * when this function returns.
+ *
+ * In order to support device removal/unplug while file handles are still open,
+ * drm_fb_helper_unregister_fbi() should be called on device removal and
+ * drm_fb_helper_fbdev_teardown() in the &drm_driver->release callback when
+ * file handles are closed.
+ */
+void drm_fb_helper_fbdev_teardown(struct drm_device *dev)
+{
+	struct drm_fb_helper *fb_helper = dev->fb_helper;
+	struct fb_ops *fbops = NULL;
+
+	if (!fb_helper)
+		return;
+
+	/* Unregister if it hasn't been done already */
+	if (fb_helper->fbdev && fb_helper->fbdev->dev)
+		drm_fb_helper_unregister_fbi(fb_helper);
+
+	if (fb_helper->fbdev && fb_helper->fbdev->fbdefio) {
+		fb_deferred_io_cleanup(fb_helper->fbdev);
+		kfree(fb_helper->fbdev->fbdefio);
+		fbops = fb_helper->fbdev->fbops;
+	}
+
+	drm_fb_helper_fini(fb_helper);
+	kfree(fbops);
+
+	if (fb_helper->fb)
+		drm_framebuffer_remove(fb_helper->fb);
+}
+EXPORT_SYMBOL(drm_fb_helper_fbdev_teardown);
+
 /**
  * drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation
  * @dev: DRM device
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index d63d4c2ac4c8126e8d38cdcc32638893cb1fcfb2..5a13ff29f4f04af99c61fa07261a35a44ac0dd8e 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -664,6 +664,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
 	INIT_LIST_HEAD(&fb->filp_head);
 
 	fb->funcs = funcs;
+	strcpy(fb->comm, current->comm);
 
 	ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB,
 				    false, drm_framebuffer_free);
@@ -978,6 +979,7 @@ void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
 	struct drm_format_name_buf format_name;
 	unsigned int i;
 
+	drm_printf_indent(p, indent, "allocated by = %s\n", fb->comm);
 	drm_printf_indent(p, indent, "refcount=%u\n",
 			  drm_framebuffer_read_refcount(fb));
 	drm_printf_indent(p, indent, "format=%s\n",
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index aa8cb9bfa49900387dd448522a34f1cef2c053b5..4d682a6e8bcbc6d780c6545dd282f9b69d103069 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -272,7 +272,8 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb);
  * @sizes: fbdev size description
  * @pitch_align: Optional pitch alignment
  * @obj: GEM object backing the framebuffer
- * @funcs: vtable to be used for the new framebuffer object
+ * @funcs: Optional vtable to be used for the new framebuffer object when the
+ *         dirty callback is needed.
  *
  * This function creates a framebuffer from a &drm_fb_helper_surface_size
  * description for use in the &drm_fb_helper_funcs.fb_probe callback.
@@ -300,6 +301,9 @@ drm_gem_fbdev_fb_create(struct drm_device *dev,
 	if (obj->size < mode_cmd.pitches[0] * mode_cmd.height)
 		return ERR_PTR(-EINVAL);
 
+	if (!funcs)
+		funcs = &drm_gem_fb_funcs;
+
 	return drm_gem_fb_alloc(dev, &mode_cmd, &obj, 1, funcs);
 }
 EXPORT_SYMBOL(drm_gem_fbdev_fb_create);
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index d1eb56a1eff4078d0d29b33ac2f17b06c4ae0d76..1402c0e71b03d18866139056b12f0d5fd84b6afb 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
 
 	mutex_lock(&dev->mode_config.idr_mutex);
 
-	/* Insert the new lessee into the tree */
-	id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
-	if (id < 0) {
-		error = id;
-		goto out_lessee;
-	}
-
-	lessee->lessee_id = id;
-	lessee->lessor = drm_master_get(lessor);
-	list_add_tail(&lessee->lessee_list, &lessor->lessees);
-
 	idr_for_each_entry(leases, entry, object) {
 		error = 0;
 		if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
 		}
 	}
 
+	/* Insert the new lessee into the tree */
+	id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
+	if (id < 0) {
+		error = id;
+		goto out_lessee;
+	}
+
+	lessee->lessee_id = id;
+	lessee->lessor = drm_master_get(lessor);
+	list_add_tail(&lessee->lessee_list, &lessor->lessees);
+
 	/* Move the leases over */
 	lessee->leases = *leases;
 	DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
@@ -254,10 +254,10 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
 	return lessee;
 
 out_lessee:
-	drm_master_put(&lessee);
-
 	mutex_unlock(&dev->mode_config.idr_mutex);
 
+	drm_master_put(&lessee);
+
 	return ERR_PTR(error);
 }
 
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index eb86bc3f753b36dd85e2244ba97a77a8e525768b..186c4e90cc1cd25e06a19a89da1bf88a7d9e0cf7 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -575,21 +575,23 @@ EXPORT_SYMBOL(drm_mm_remove_node);
  */
 void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
+	struct drm_mm *mm = old->mm;
+
 	DRM_MM_BUG_ON(!old->allocated);
 
 	*new = *old;
 
 	list_replace(&old->node_list, &new->node_list);
-	rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root);
+	rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree);
 
 	if (drm_mm_hole_follows(old)) {
 		list_replace(&old->hole_stack, &new->hole_stack);
 		rb_replace_node(&old->rb_hole_size,
 				&new->rb_hole_size,
-				&old->mm->holes_size);
+				&mm->holes_size);
 		rb_replace_node(&old->rb_hole_addr,
 				&new->rb_hole_addr,
-				&old->mm->holes_addr);
+				&mm->holes_addr);
 	}
 
 	old->allocated = false;
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index cda8bfab6d3b49e3e31516ae5895b878ed4e7581..e5c653357024dc5ea522706df1a03be1f12d094d 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -382,6 +382,9 @@ void drm_mode_config_init(struct drm_device *dev)
 	ida_init(&dev->mode_config.connector_ida);
 	spin_lock_init(&dev->mode_config.connector_list_lock);
 
+	init_llist_head(&dev->mode_config.connector_free_list);
+	INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn);
+
 	drm_mode_create_standard_properties(dev);
 
 	/* Just to be sure */
@@ -431,6 +434,8 @@ void drm_mode_config_cleanup(struct drm_device *dev)
 		drm_connector_put(connector);
 	}
 	drm_connector_list_iter_end(&conn_iter);
+	/* connector_iter drops references in a work item. */
+	flush_work(&dev->mode_config.connector_free_work);
 	if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) {
 		drm_connector_list_iter_begin(dev, &conn_iter);
 		drm_for_each_connector_iter(connector, &conn_iter)
@@ -467,6 +472,9 @@ void drm_mode_config_cleanup(struct drm_device *dev)
 	 */
 	WARN_ON(!list_empty(&dev->mode_config.fb_list));
 	list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) {
+		struct drm_printer p = drm_debug_printer("[leaked fb]");
+		drm_printf(&p, "framebuffer[%u]:\n", fb->base.id);
+		drm_framebuffer_print_info(&p, 1, fb);
 		drm_framebuffer_free(&fb->base.refcount);
 	}
 
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 901a4e9a87a3c9b08d8750e3a807e47f52d54295..1f2af707ce03e9670fec85a779404f70fcf29585 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/dmi.h>
+#include <linux/module.h>
 #include <drm/drm_connector.h>
 
 #ifdef CONFIG_DMI
@@ -172,3 +173,5 @@ int drm_get_panel_orientation_quirk(int width, int height)
 EXPORT_SYMBOL(drm_get_panel_orientation_quirk);
 
 #endif
+
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 37a93cdffb4ad0e7986a634df4d70ccc3fef286e..2c90519576a3e8b63a4c8361f18672db853ebcec 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
 }
 
 /*
- * setplane_internal - setplane handler for internal callers
+ * __setplane_internal - setplane handler for internal callers
  *
- * Note that we assume an extra reference has already been taken on fb.  If the
- * update fails, this reference will be dropped before return; if it succeeds,
- * the previous framebuffer (if any) will be unreferenced instead.
+ * This function will take a reference on the new fb for the plane
+ * on success.
  *
  * src_{x,y,w,h} are provided in 16.16 fixed point format
  */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
 	if (!ret) {
 		plane->crtc = crtc;
 		plane->fb = fb;
-		fb = NULL;
+		drm_framebuffer_get(plane->fb);
 	} else {
 		plane->old_fb = NULL;
 	}
 
 out:
-	if (fb)
-		drm_framebuffer_put(fb);
 	if (plane->old_fb)
 		drm_framebuffer_put(plane->old_fb);
 	plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 	struct drm_plane *plane;
 	struct drm_crtc *crtc = NULL;
 	struct drm_framebuffer *fb = NULL;
+	int ret;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 		}
 	}
 
-	/*
-	 * setplane_internal will take care of deref'ing either the old or new
-	 * framebuffer depending on success.
-	 */
-	return setplane_internal(plane, crtc, fb,
-				 plane_req->crtc_x, plane_req->crtc_y,
-				 plane_req->crtc_w, plane_req->crtc_h,
-				 plane_req->src_x, plane_req->src_y,
-				 plane_req->src_w, plane_req->src_h);
+	ret = setplane_internal(plane, crtc, fb,
+				plane_req->crtc_x, plane_req->crtc_y,
+				plane_req->crtc_w, plane_req->crtc_h,
+				plane_req->src_x, plane_req->src_y,
+				plane_req->src_w, plane_req->src_h);
+
+	if (fb)
+		drm_framebuffer_put(fb);
+
+	return ret;
 }
 
 static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
 		src_h = fb->height << 16;
 	}
 
-	/*
-	 * setplane_internal will take care of deref'ing either the old or new
-	 * framebuffer depending on success.
-	 */
 	ret = __setplane_internal(crtc->cursor, crtc, fb,
-				crtc_x, crtc_y, crtc_w, crtc_h,
-				0, 0, src_w, src_h, ctx);
+				  crtc_x, crtc_y, crtc_w, crtc_h,
+				  0, 0, src_w, src_h, ctx);
+
+	if (fb)
+		drm_framebuffer_put(fb);
 
 	/* Update successful; save new cursor position, if necessary */
 	if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 9b733c510cbfc3030600ff33de96ee54b965b6e3..0b7b0d1ad2d595a2efacde1cdd4472b401a73278 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -29,9 +29,9 @@
 /**
  * DOC: Overview
  *
- * DRM synchronisation objects (syncobj) are a persistent objects,
- * that contain an optional fence. The fence can be updated with a new
- * fence, or be NULL.
+ * DRM synchronisation objects (syncobj, see struct &drm_syncobj) are
+ * persistent objects that contain an optional fence. The fence can be updated
+ * with a new fence, or be NULL.
  *
  * syncobj's can be waited upon, where it will wait for the underlying
  * fence.
@@ -61,7 +61,8 @@
  * @file_private: drm file private pointer
  * @handle: sync object handle to lookup.
  *
- * Returns a reference to the syncobj pointed to by handle or NULL.
+ * Returns a reference to the syncobj pointed to by handle or NULL. The
+ * reference must be released by calling drm_syncobj_put().
  */
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle)
@@ -229,6 +230,19 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 	return 0;
 }
 
+/**
+ * drm_syncobj_find_fence - lookup and reference the fence in a sync object
+ * @file_private: drm file private pointer
+ * @handle: sync object handle to lookup.
+ * @fence: out parameter for the fence
+ *
+ * This is just a convenience function that combines drm_syncobj_find() and
+ * drm_syncobj_fence_get().
+ *
+ * Returns 0 on success or a negative error value on failure. On success @fence
+ * contains a reference to the fence, which must be released by calling
+ * dma_fence_put().
+ */
 int drm_syncobj_find_fence(struct drm_file *file_private,
 			   u32 handle,
 			   struct dma_fence **fence)
@@ -269,6 +283,12 @@ EXPORT_SYMBOL(drm_syncobj_free);
  * @out_syncobj: returned syncobj
  * @flags: DRM_SYNCOBJ_* flags
  * @fence: if non-NULL, the syncobj will represent this fence
+ *
+ * This is the first function to create a sync object. After creating, drivers
+ * probably want to make it available to userspace, either through
+ * drm_syncobj_get_handle() or drm_syncobj_get_fd().
+ *
+ * Returns 0 on success or a negative error value on failure.
  */
 int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 		       struct dma_fence *fence)
@@ -302,6 +322,14 @@ EXPORT_SYMBOL(drm_syncobj_create);
 
 /**
  * drm_syncobj_get_handle - get a handle from a syncobj
+ * @file_private: drm file private pointer
+ * @syncobj: Sync object to export
+ * @handle: out parameter with the new handle
+ *
+ * Exports a sync object created with drm_syncobj_create() as a handle on
+ * @file_private to userspace.
+ *
+ * Returns 0 on success or a negative error value on failure.
  */
 int drm_syncobj_get_handle(struct drm_file *file_private,
 			   struct drm_syncobj *syncobj, u32 *handle)
@@ -371,40 +399,35 @@ static const struct file_operations drm_syncobj_file_fops = {
 	.release = drm_syncobj_file_release,
 };
 
-static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
-{
-	struct file *file = anon_inode_getfile("syncobj_file",
-					       &drm_syncobj_file_fops,
-					       syncobj, 0);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	drm_syncobj_get(syncobj);
-	if (cmpxchg(&syncobj->file, NULL, file)) {
-		/* lost the race */
-		fput(file);
-	}
-
-	return 0;
-}
-
+/**
+ * drm_syncobj_get_fd - get a file descriptor from a syncobj
+ * @syncobj: Sync object to export
+ * @p_fd: out parameter with the new file descriptor
+ *
+ * Exports a sync object created with drm_syncobj_create() as a file descriptor.
+ *
+ * Returns 0 on success or a negative error value on failure.
+ */
 int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
 {
-	int ret;
+	struct file *file;
 	int fd;
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0)
 		return fd;
 
-	if (!syncobj->file) {
-		ret = drm_syncobj_alloc_file(syncobj);
-		if (ret) {
-			put_unused_fd(fd);
-			return ret;
-		}
+	file = anon_inode_getfile("syncobj_file",
+				  &drm_syncobj_file_fops,
+				  syncobj, 0);
+	if (IS_ERR(file)) {
+		put_unused_fd(fd);
+		return PTR_ERR(file);
 	}
-	fd_install(fd, syncobj->file);
+
+	drm_syncobj_get(syncobj);
+	fd_install(fd, file);
+
 	*p_fd = fd;
 	return 0;
 }
@@ -424,31 +447,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
 	return ret;
 }
 
-static struct drm_syncobj *drm_syncobj_fdget(int fd)
-{
-	struct file *file = fget(fd);
-
-	if (!file)
-		return NULL;
-	if (file->f_op != &drm_syncobj_file_fops)
-		goto err;
-
-	return file->private_data;
-err:
-	fput(file);
-	return NULL;
-};
-
 static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
 				    int fd, u32 *handle)
 {
-	struct drm_syncobj *syncobj = drm_syncobj_fdget(fd);
+	struct drm_syncobj *syncobj;
+	struct file *file;
 	int ret;
 
-	if (!syncobj)
+	file = fget(fd);
+	if (!file)
 		return -EINVAL;
 
+	if (file->f_op != &drm_syncobj_file_fops) {
+		fput(file);
+		return -EINVAL;
+	}
+
 	/* take a reference to put in the idr */
+	syncobj = file->private_data;
 	drm_syncobj_get(syncobj);
 
 	idr_preload(GFP_KERNEL);
@@ -457,12 +473,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
 	spin_unlock(&file_private->syncobj_table_lock);
 	idr_preload_end();
 
-	if (ret < 0) {
-		fput(syncobj->file);
-		return ret;
-	}
-	*handle = ret;
-	return 0;
+	if (ret > 0) {
+		*handle = ret;
+		ret = 0;
+	} else
+		drm_syncobj_put(syncobj);
+
+	fput(file);
+	return ret;
 }
 
 static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
index a29b8f59eb15d984b7ebdd40d97055d70f845cc5..3f58b4077767d4fd5317fc016323cf91bf1ef827 100644
--- a/drivers/gpu/drm/etnaviv/Kconfig
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -6,6 +6,7 @@ config DRM_ETNAVIV
 	depends on MMU
 	select SHMEM
 	select SYNC_FILE
+	select THERMAL if DRM_ETNAVIV_THERMAL
 	select TMPFS
 	select WANT_DEV_COREDUMP
 	select CMA if HAVE_DMA_CONTIGUOUS
@@ -13,6 +14,14 @@ config DRM_ETNAVIV
 	help
 	  DRM driver for Vivante GPUs.
 
+config DRM_ETNAVIV_THERMAL
+	bool "enable ETNAVIV thermal throttling"
+	depends on DRM_ETNAVIV
+	default y
+	help
+	  Compile in support for thermal throttling.
+	  Say Y unless you want to risk burning your SoC.
+
 config DRM_ETNAVIV_REGISTER_LOGGING
 	bool "enable ETNAVIV register logging"
 	depends on DRM_ETNAVIV
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index 9e7098e3207f5d3b7914ec22dc9dc44ebd020d7a..99ad2f073c6e44bef9789cefc42ef6301155e4e1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -100,6 +100,8 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
 {
 	u32 flush = 0;
 
+	lockdep_assert_held(&gpu->lock);
+
 	/*
 	 * This assumes that if we're switching to 2D, we're switching
 	 * away from 3D, and vice versa.  Hence, if we're switching to
@@ -164,7 +166,9 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
 
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 {
-	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
+
+	lockdep_assert_held(&gpu->lock);
 
 	/* initialize buffer */
 	buffer->user_size = 0;
@@ -178,7 +182,9 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 
 u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
 {
-	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
+
+	lockdep_assert_held(&gpu->lock);
 
 	buffer->user_size = 0;
 
@@ -211,10 +217,12 @@ u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe
 
 void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
 {
-	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
 	unsigned int waitlink_offset = buffer->user_size - 16;
 	u32 link_target, flush = 0;
 
+	lockdep_assert_held(&gpu->lock);
+
 	if (gpu->exec_state == ETNA_PIPE_2D)
 		flush = VIVS_GL_FLUSH_CACHE_PE2D;
 	else if (gpu->exec_state == ETNA_PIPE_3D)
@@ -253,10 +261,12 @@ void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
 /* Append a 'sync point' to the ring buffer. */
 void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
 {
-	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
 	unsigned int waitlink_offset = buffer->user_size - 16;
 	u32 dwords, target;
 
+	lockdep_assert_held(&gpu->lock);
+
 	/*
 	 * We need at most 3 dwords in the return target:
 	 * 1 event + 1 end + 1 wait + 1 link.
@@ -287,13 +297,16 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
 }
 
 /* Append a command buffer to the ring buffer. */
-void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
-	struct etnaviv_cmdbuf *cmdbuf)
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
+	unsigned int event, struct etnaviv_cmdbuf *cmdbuf)
 {
-	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
 	unsigned int waitlink_offset = buffer->user_size - 16;
 	u32 return_target, return_dwords;
 	u32 link_target, link_dwords;
+	bool switch_context = gpu->exec_state != exec_state;
+
+	lockdep_assert_held(&gpu->lock);
 
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
@@ -306,7 +319,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	 * need to append a mmu flush load state, followed by a new
 	 * link to this buffer - a total of four additional words.
 	 */
-	if (gpu->mmu->need_flush || gpu->switch_context) {
+	if (gpu->mmu->need_flush || switch_context) {
 		u32 target, extra_dwords;
 
 		/* link command */
@@ -321,7 +334,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 		}
 
 		/* pipe switch commands */
-		if (gpu->switch_context)
+		if (switch_context)
 			extra_dwords += 4;
 
 		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
@@ -349,10 +362,9 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 			gpu->mmu->need_flush = false;
 		}
 
-		if (gpu->switch_context) {
-			etnaviv_cmd_select_pipe(gpu, buffer, cmdbuf->exec_state);
-			gpu->exec_state = cmdbuf->exec_state;
-			gpu->switch_context = false;
+		if (switch_context) {
+			etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
+			gpu->exec_state = exec_state;
 		}
 
 		/* And the link to the submitted buffer */
@@ -421,4 +433,6 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
+
+	gpu->lastctx = cmdbuf->ctx;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
index 6e3bbcf24160eb297d7269e79828a0bfdac7bdc5..68e6d3772ad84d636166e2992a29368723946d05 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -78,6 +78,7 @@ static const struct {
 	ST(0x17c0, 8),
 	ST(0x17e0, 8),
 	ST(0x2400, 14 * 16),
+	ST(0x3824, 1),
 	ST(0x10800, 32 * 16),
 	ST(0x14600, 16),
 	ST(0x14800, 8 * 8),
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
index 66ac79558bbd4e2d8afb6f83f9d9949d7e93c2c5..3746827f45ebd76426968ac2066c06ae428fd192 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -86,26 +86,11 @@ void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
 	kfree(suballoc);
 }
 
-struct etnaviv_cmdbuf *
-etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
-		   size_t nr_bos, size_t nr_pmrs)
+int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
+			struct etnaviv_cmdbuf *cmdbuf, u32 size)
 {
-	struct etnaviv_cmdbuf *cmdbuf;
-	struct etnaviv_perfmon_request *pmrs;
-	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
-				 sizeof(*cmdbuf));
 	int granule_offs, order, ret;
 
-	cmdbuf = kzalloc(sz, GFP_KERNEL);
-	if (!cmdbuf)
-		return NULL;
-
-	sz = sizeof(*pmrs) * nr_pmrs;
-	pmrs = kzalloc(sz, GFP_KERNEL);
-	if (!pmrs)
-		goto out_free_cmdbuf;
-
-	cmdbuf->pmrs = pmrs;
 	cmdbuf->suballoc = suballoc;
 	cmdbuf->size = size;
 
@@ -123,7 +108,7 @@ etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
 		if (!ret) {
 			dev_err(suballoc->gpu->dev,
 				"Timeout waiting for cmdbuf space\n");
-			return NULL;
+			return -ETIMEDOUT;
 		}
 		goto retry;
 	}
@@ -131,11 +116,7 @@ etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
 	cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE;
 	cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset;
 
-	return cmdbuf;
-
-out_free_cmdbuf:
-	kfree(cmdbuf);
-	return NULL;
+	return 0;
 }
 
 void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
@@ -151,8 +132,6 @@ void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 	suballoc->free_space = 1;
 	mutex_unlock(&suballoc->lock);
 	wake_up_all(&suballoc->free_event);
-	kfree(cmdbuf->pmrs);
-	kfree(cmdbuf);
 }
 
 u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
index b6348b9f2a9ddb9959dd51dc310e86f1be4d53e8..ddc3f7ea169cde73ec240582f031bcd2b1f3f13a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -33,27 +33,15 @@ struct etnaviv_cmdbuf {
 	void *vaddr;
 	u32 size;
 	u32 user_size;
-	/* fence after which this buffer is to be disposed */
-	struct dma_fence *fence;
-	/* target exec state */
-	u32 exec_state;
-	/* per GPU in-flight list */
-	struct list_head node;
-	/* perfmon requests */
-	unsigned int nr_pmrs;
-	struct etnaviv_perfmon_request *pmrs;
-	/* BOs attached to this command buffer */
-	unsigned int nr_bos;
-	struct etnaviv_vram_mapping *bo_map[0];
 };
 
 struct etnaviv_cmdbuf_suballoc *
 etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
 void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
 
-struct etnaviv_cmdbuf *
-etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
-		   size_t nr_bos, size_t nr_pmrs);
+
+int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
+		struct etnaviv_cmdbuf *cmdbuf, u32 size);
 void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
 
 u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 491eddf9b15022177bc3106181aef4cccaf89fa5..6faf4042db2340e6069ee2be3c0caffba4a4d0a9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -172,7 +172,7 @@ static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
 
 static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m)
 {
-	struct etnaviv_cmdbuf *buf = gpu->buffer;
+	struct etnaviv_cmdbuf *buf = &gpu->buffer;
 	u32 size = buf->size;
 	u32 *ptr = buf->vaddr;
 	u32 i;
@@ -459,9 +459,6 @@ static int etnaviv_ioctl_pm_query_dom(struct drm_device *dev, void *data,
 	struct drm_etnaviv_pm_domain *args = data;
 	struct etnaviv_gpu *gpu;
 
-	/* reject as long as the feature isn't stable */
-	return -EINVAL;
-
 	if (args->pipe >= ETNA_MAX_PIPES)
 		return -EINVAL;
 
@@ -479,9 +476,6 @@ static int etnaviv_ioctl_pm_query_sig(struct drm_device *dev, void *data,
 	struct drm_etnaviv_pm_signal *args = data;
 	struct etnaviv_gpu *gpu;
 
-	/* reject as long as the feature isn't stable */
-	return -EINVAL;
-
 	if (args->pipe >= ETNA_MAX_PIPES)
 		return -EINVAL;
 
@@ -556,7 +550,7 @@ static struct drm_driver etnaviv_drm_driver = {
 	.desc               = "etnaviv DRM",
 	.date               = "20151214",
 	.major              = 1,
-	.minor              = 1,
+	.minor              = 2,
 };
 
 /*
@@ -580,12 +574,6 @@ static int etnaviv_bind(struct device *dev)
 	}
 	drm->dev_private = priv;
 
-	priv->wq = alloc_ordered_workqueue("etnaviv", 0);
-	if (!priv->wq) {
-		ret = -ENOMEM;
-		goto out_wq;
-	}
-
 	mutex_init(&priv->gem_lock);
 	INIT_LIST_HEAD(&priv->gem_list);
 	priv->num_gpus = 0;
@@ -607,9 +595,6 @@ static int etnaviv_bind(struct device *dev)
 out_register:
 	component_unbind_all(dev, drm);
 out_bind:
-	flush_workqueue(priv->wq);
-	destroy_workqueue(priv->wq);
-out_wq:
 	kfree(priv);
 out_unref:
 	drm_dev_unref(drm);
@@ -624,9 +609,6 @@ static void etnaviv_unbind(struct device *dev)
 
 	drm_dev_unregister(drm);
 
-	flush_workqueue(priv->wq);
-	destroy_workqueue(priv->wq);
-
 	component_unbind_all(dev, drm);
 
 	drm->dev_private = NULL;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d249acb6da0825e6e92427b0d3fe1e5f1f975a19..a54f0b758a5c9ca2ded2916e48490a509b2747a4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -56,18 +56,8 @@ struct etnaviv_drm_private {
 	/* list of GEM objects: */
 	struct mutex gem_lock;
 	struct list_head gem_list;
-
-	struct workqueue_struct *wq;
 };
 
-static inline void etnaviv_queue_work(struct drm_device *dev,
-	struct work_struct *w)
-{
-	struct etnaviv_drm_private *priv = dev->dev_private;
-
-	queue_work(priv->wq, w);
-}
-
 int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file);
 
@@ -97,8 +87,8 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
 u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
 void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
 void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
-void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
-	struct etnaviv_cmdbuf *cmdbuf);
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
+	unsigned int event, struct etnaviv_cmdbuf *cmdbuf);
 void etnaviv_validate_init(void);
 bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
 	u32 *stream, unsigned int size,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 2d955d7d7b6d83ecdfae58e5bf26084f28a98999..6d0909c589d10995873c6a567dc275caf7556901 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -120,7 +120,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	struct core_dump_iterator iter;
 	struct etnaviv_vram_mapping *vram;
 	struct etnaviv_gem_object *obj;
-	struct etnaviv_cmdbuf *cmd;
+	struct etnaviv_gem_submit *submit;
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
@@ -132,11 +132,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	n_bomap_pages = 0;
 	file_size = ARRAY_SIZE(etnaviv_dump_registers) *
 			sizeof(struct etnaviv_dump_registers) +
-		    mmu_size + gpu->buffer->size;
+		    mmu_size + gpu->buffer.size;
 
 	/* Add in the active command buffers */
-	list_for_each_entry(cmd, &gpu->active_cmd_list, node) {
-		file_size += cmd->size;
+	list_for_each_entry(submit, &gpu->active_submit_list, node) {
+		file_size += submit->cmdbuf.size;
 		n_obj++;
 	}
 
@@ -176,13 +176,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
 	etnaviv_core_dump_registers(&iter, gpu);
 	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
-	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr,
-			      gpu->buffer->size,
-			      etnaviv_cmdbuf_get_va(gpu->buffer));
-
-	list_for_each_entry(cmd, &gpu->active_cmd_list, node)
-		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr,
-				      cmd->size, etnaviv_cmdbuf_get_va(cmd));
+	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
+			      gpu->buffer.size,
+			      etnaviv_cmdbuf_get_va(&gpu->buffer));
+
+	list_for_each_entry(submit, &gpu->active_submit_list, node)
+		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
+				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
+				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index daee3f1196df831b131ad1c229c25fba2d8364b5..fcc969fa0e6927bd189ca66fb8f3964c40cd5609 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -24,6 +24,9 @@
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 
+static struct lock_class_key etnaviv_shm_lock_class;
+static struct lock_class_key etnaviv_userptr_lock_class;
+
 static void etnaviv_gem_scatter_map(struct etnaviv_gem_object *etnaviv_obj)
 {
 	struct drm_device *dev = etnaviv_obj->base.dev;
@@ -583,7 +586,7 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj)
 	kfree(etnaviv_obj);
 }
 
-int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
+void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
 {
 	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
@@ -591,8 +594,6 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
 	mutex_lock(&priv->gem_lock);
 	list_add_tail(&etnaviv_obj->gem_node, &priv->gem_list);
 	mutex_unlock(&priv->gem_lock);
-
-	return 0;
 }
 
 static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags,
@@ -640,8 +641,9 @@ static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags,
 	return 0;
 }
 
-static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
-		u32 size, u32 flags)
+/* convenience method to construct a GEM buffer object, and userspace handle */
+int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+	u32 size, u32 flags, u32 *handle)
 {
 	struct drm_gem_object *obj = NULL;
 	int ret;
@@ -653,6 +655,8 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
+	lockdep_set_class(&to_etnaviv_bo(obj)->lock, &etnaviv_shm_lock_class);
+
 	ret = drm_gem_object_init(dev, obj, size);
 	if (ret == 0) {
 		struct address_space *mapping;
@@ -660,7 +664,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
 		/*
 		 * Our buffers are kept pinned, so allocating them
 		 * from the MOVABLE zone is a really bad idea, and
-		 * conflicts with CMA.  See coments above new_inode()
+		 * conflicts with CMA. See comments above new_inode()
 		 * why this is required _and_ expected if you're
 		 * going to pin these pages.
 		 */
@@ -672,33 +676,12 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
-	return obj;
-
-fail:
-	drm_gem_object_put_unlocked(obj);
-	return ERR_PTR(ret);
-}
-
-/* convenience method to construct a GEM buffer object, and userspace handle */
-int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
-		u32 size, u32 flags, u32 *handle)
-{
-	struct drm_gem_object *obj;
-	int ret;
-
-	obj = __etnaviv_gem_new(dev, size, flags);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	ret = etnaviv_gem_obj_add(dev, obj);
-	if (ret < 0) {
-		drm_gem_object_put_unlocked(obj);
-		return ret;
-	}
+	etnaviv_gem_obj_add(dev, obj);
 
 	ret = drm_gem_handle_create(file, obj, handle);
 
 	/* drop reference from allocate - handle holds it now */
+fail:
 	drm_gem_object_put_unlocked(obj);
 
 	return ret;
@@ -722,139 +705,41 @@ int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
 	return 0;
 }
 
-struct get_pages_work {
-	struct work_struct work;
-	struct mm_struct *mm;
-	struct task_struct *task;
-	struct etnaviv_gem_object *etnaviv_obj;
-};
-
-static struct page **etnaviv_gem_userptr_do_get_pages(
-	struct etnaviv_gem_object *etnaviv_obj, struct mm_struct *mm, struct task_struct *task)
-{
-	int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
-	struct page **pvec;
-	uintptr_t ptr;
-	unsigned int flags = 0;
-
-	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
-	if (!pvec)
-		return ERR_PTR(-ENOMEM);
-
-	if (!etnaviv_obj->userptr.ro)
-		flags |= FOLL_WRITE;
-
-	pinned = 0;
-	ptr = etnaviv_obj->userptr.ptr;
-
-	down_read(&mm->mmap_sem);
-	while (pinned < npages) {
-		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
-					    flags, pvec + pinned, NULL, NULL);
-		if (ret < 0)
-			break;
-
-		ptr += ret * PAGE_SIZE;
-		pinned += ret;
-	}
-	up_read(&mm->mmap_sem);
-
-	if (ret < 0) {
-		release_pages(pvec, pinned);
-		kvfree(pvec);
-		return ERR_PTR(ret);
-	}
-
-	return pvec;
-}
-
-static void __etnaviv_gem_userptr_get_pages(struct work_struct *_work)
-{
-	struct get_pages_work *work = container_of(_work, typeof(*work), work);
-	struct etnaviv_gem_object *etnaviv_obj = work->etnaviv_obj;
-	struct page **pvec;
-
-	pvec = etnaviv_gem_userptr_do_get_pages(etnaviv_obj, work->mm, work->task);
-
-	mutex_lock(&etnaviv_obj->lock);
-	if (IS_ERR(pvec)) {
-		etnaviv_obj->userptr.work = ERR_CAST(pvec);
-	} else {
-		etnaviv_obj->userptr.work = NULL;
-		etnaviv_obj->pages = pvec;
-	}
-
-	mutex_unlock(&etnaviv_obj->lock);
-	drm_gem_object_put_unlocked(&etnaviv_obj->base);
-
-	mmput(work->mm);
-	put_task_struct(work->task);
-	kfree(work);
-}
-
 static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 {
 	struct page **pvec = NULL;
-	struct get_pages_work *work;
-	struct mm_struct *mm;
-	int ret, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
-
-	if (etnaviv_obj->userptr.work) {
-		if (IS_ERR(etnaviv_obj->userptr.work)) {
-			ret = PTR_ERR(etnaviv_obj->userptr.work);
-			etnaviv_obj->userptr.work = NULL;
-		} else {
-			ret = -EAGAIN;
-		}
-		return ret;
-	}
+	struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
+	int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
 
-	mm = get_task_mm(etnaviv_obj->userptr.task);
-	pinned = 0;
-	if (mm == current->mm) {
-		pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
-		if (!pvec) {
-			mmput(mm);
-			return -ENOMEM;
-		}
-
-		pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages,
-					       !etnaviv_obj->userptr.ro, pvec);
-		if (pinned < 0) {
-			kvfree(pvec);
-			mmput(mm);
-			return pinned;
-		}
-
-		if (pinned == npages) {
-			etnaviv_obj->pages = pvec;
-			mmput(mm);
-			return 0;
-		}
-	}
+	might_lock_read(&current->mm->mmap_sem);
 
-	release_pages(pvec, pinned);
-	kvfree(pvec);
+	if (userptr->mm != current->mm)
+		return -EPERM;
 
-	work = kmalloc(sizeof(*work), GFP_KERNEL);
-	if (!work) {
-		mmput(mm);
+	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+	if (!pvec)
 		return -ENOMEM;
-	}
 
-	get_task_struct(current);
-	drm_gem_object_get(&etnaviv_obj->base);
+	do {
+		unsigned num_pages = npages - pinned;
+		uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
+		struct page **pages = pvec + pinned;
 
-	work->mm = mm;
-	work->task = current;
-	work->etnaviv_obj = etnaviv_obj;
+		ret = get_user_pages_fast(ptr, num_pages,
+					  !userptr->ro ? FOLL_WRITE : 0, pages);
+		if (ret < 0) {
+			release_pages(pvec, pinned);
+			kvfree(pvec);
+			return ret;
+		}
+
+		pinned += ret;
 
-	etnaviv_obj->userptr.work = &work->work;
-	INIT_WORK(&work->work, __etnaviv_gem_userptr_get_pages);
+	} while (pinned < npages);
 
-	etnaviv_queue_work(etnaviv_obj->base.dev, &work->work);
+	etnaviv_obj->pages = pvec;
 
-	return -EAGAIN;
+	return 0;
 }
 
 static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
@@ -870,7 +755,6 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 		release_pages(etnaviv_obj->pages, npages);
 		kvfree(etnaviv_obj->pages);
 	}
-	put_task_struct(etnaviv_obj->userptr.task);
 }
 
 static int etnaviv_gem_userptr_mmap_obj(struct etnaviv_gem_object *etnaviv_obj,
@@ -897,17 +781,16 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
 	if (ret)
 		return ret;
 
+	lockdep_set_class(&etnaviv_obj->lock, &etnaviv_userptr_lock_class);
+
 	etnaviv_obj->userptr.ptr = ptr;
-	etnaviv_obj->userptr.task = current;
+	etnaviv_obj->userptr.mm = current->mm;
 	etnaviv_obj->userptr.ro = !(flags & ETNA_USERPTR_WRITE);
-	get_task_struct(current);
 
-	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
-	if (ret)
-		goto unreference;
+	etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
 
 	ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle);
-unreference:
+
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_put_unlocked(&etnaviv_obj->base);
 	return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index e437fba1209d925cca7bf7f33b5651c3eeeda21a..be72a9833f2b4723c28da411896bb9eaa4ffe334 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -18,6 +18,7 @@
 #define __ETNAVIV_GEM_H__
 
 #include <linux/reservation.h>
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 
 struct dma_fence;
@@ -26,8 +27,7 @@ struct etnaviv_gem_object;
 
 struct etnaviv_gem_userptr {
 	uintptr_t ptr;
-	struct task_struct *task;
-	struct work_struct *work;
+	struct mm_struct *mm;
 	bool ro;
 };
 
@@ -98,26 +98,32 @@ struct etnaviv_gem_submit_bo {
 
 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
  * associated with the cmdstream submission for synchronization (and
- * make it easier to unwind when things go wrong, etc).  This only
- * lasts for the duration of the submit-ioctl.
+ * make it easier to unwind when things go wrong, etc).
  */
 struct etnaviv_gem_submit {
-	struct drm_device *dev;
+	struct kref refcount;
 	struct etnaviv_gpu *gpu;
-	struct ww_acquire_ctx ticket;
-	struct dma_fence *fence;
+	struct dma_fence *out_fence, *in_fence;
+	struct list_head node; /* GPU active submit list */
+	struct etnaviv_cmdbuf cmdbuf;
+	bool runtime_resumed;
+	u32 exec_state;
 	u32 flags;
+	unsigned int nr_pmrs;
+	struct etnaviv_perfmon_request *pmrs;
 	unsigned int nr_bos;
 	struct etnaviv_gem_submit_bo bos[0];
 	/* No new members here, the previous one is variable-length! */
 };
 
+void etnaviv_submit_put(struct etnaviv_gem_submit * submit);
+
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
 	struct timespec *timeout);
 int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
 	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
 	struct etnaviv_gem_object **res);
-int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
+void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
 struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
 void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index ae884723e9b1b7a59c3f38a4518d6a804be6b074..5704305d41e69f57466ded0628f37be669304fbd 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -19,6 +19,7 @@
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
 
+static struct lock_class_key etnaviv_prime_lock_class;
 
 struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
@@ -125,6 +126,8 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret < 0)
 		return ERR_PTR(ret);
 
+	lockdep_set_class(&etnaviv_obj->lock, &etnaviv_prime_lock_class);
+
 	npages = size / PAGE_SIZE;
 
 	etnaviv_obj->sgt = sgt;
@@ -139,9 +142,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
-	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
-	if (ret)
-		goto fail;
+	etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
 
 	return &etnaviv_obj->base;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index ff911541a190a4819e0809ea60159c0e08d3f6e0..1f8202bca061542a2f0d994d6f2554540809ac90 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -33,22 +33,25 @@
 #define BO_PINNED   0x2000
 
 static struct etnaviv_gem_submit *submit_create(struct drm_device *dev,
-		struct etnaviv_gpu *gpu, size_t nr)
+		struct etnaviv_gpu *gpu, size_t nr_bos, size_t nr_pmrs)
 {
 	struct etnaviv_gem_submit *submit;
-	size_t sz = size_vstruct(nr, sizeof(submit->bos[0]), sizeof(*submit));
+	size_t sz = size_vstruct(nr_bos, sizeof(submit->bos[0]), sizeof(*submit));
 
-	submit = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-	if (submit) {
-		submit->dev = dev;
-		submit->gpu = gpu;
+	submit = kzalloc(sz, GFP_KERNEL);
+	if (!submit)
+		return NULL;
 
-		/* initially, until copy_from_user() and bo lookup succeeds: */
-		submit->nr_bos = 0;
-		submit->fence = NULL;
-
-		ww_acquire_init(&submit->ticket, &reservation_ww_class);
+	submit->pmrs = kcalloc(nr_pmrs, sizeof(struct etnaviv_perfmon_request),
+			       GFP_KERNEL);
+	if (!submit->pmrs) {
+		kfree(submit);
+		return NULL;
 	}
+	submit->nr_pmrs = nr_pmrs;
+
+	submit->gpu = gpu;
+	kref_init(&submit->refcount);
 
 	return submit;
 }
@@ -111,7 +114,8 @@ static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i)
 	}
 }
 
-static int submit_lock_objects(struct etnaviv_gem_submit *submit)
+static int submit_lock_objects(struct etnaviv_gem_submit *submit,
+		struct ww_acquire_ctx *ticket)
 {
 	int contended, slow_locked = -1, i, ret = 0;
 
@@ -126,7 +130,7 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit)
 
 		if (!(submit->bos[i].flags & BO_LOCKED)) {
 			ret = ww_mutex_lock_interruptible(&etnaviv_obj->resv->lock,
-					&submit->ticket);
+							  ticket);
 			if (ret == -EALREADY)
 				DRM_ERROR("BO at index %u already on submit list\n",
 					  i);
@@ -136,7 +140,7 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit)
 		}
 	}
 
-	ww_acquire_done(&submit->ticket);
+	ww_acquire_done(ticket);
 
 	return 0;
 
@@ -154,7 +158,7 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit)
 
 		/* we lost out in a seqno race, lock and retry.. */
 		ret = ww_mutex_lock_slow_interruptible(&etnaviv_obj->resv->lock,
-				&submit->ticket);
+						       ticket);
 		if (!ret) {
 			submit->bos[contended].flags |= BO_LOCKED;
 			slow_locked = contended;
@@ -181,19 +185,33 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
 			break;
 	}
 
+	if (submit->flags & ETNA_SUBMIT_FENCE_FD_IN) {
+		/*
+		 * Wait if the fence is from a foreign context, or if the fence
+		 * array contains any fence from a foreign context.
+		 */
+		if (!dma_fence_match_context(submit->in_fence, context))
+			ret = dma_fence_wait(submit->in_fence, true);
+	}
+
 	return ret;
 }
 
-static void submit_unpin_objects(struct etnaviv_gem_submit *submit)
+static void submit_attach_object_fences(struct etnaviv_gem_submit *submit)
 {
 	int i;
 
 	for (i = 0; i < submit->nr_bos; i++) {
-		if (submit->bos[i].flags & BO_PINNED)
-			etnaviv_gem_mapping_unreference(submit->bos[i].mapping);
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
+			reservation_object_add_excl_fence(etnaviv_obj->resv,
+							  submit->out_fence);
+		else
+			reservation_object_add_shared_fence(etnaviv_obj->resv,
+							    submit->out_fence);
 
-		submit->bos[i].mapping = NULL;
-		submit->bos[i].flags &= ~BO_PINNED;
+		submit_unlock_object(submit, i);
 	}
 }
 
@@ -211,6 +229,7 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
 			ret = PTR_ERR(mapping);
 			break;
 		}
+		atomic_inc(&etnaviv_obj->gpu_active);
 
 		submit->bos[i].flags |= BO_PINNED;
 		submit->bos[i].mapping = mapping;
@@ -285,13 +304,11 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 }
 
 static int submit_perfmon_validate(struct etnaviv_gem_submit *submit,
-		struct etnaviv_cmdbuf *cmdbuf,
-		const struct drm_etnaviv_gem_submit_pmr *pmrs,
-		u32 nr_pms)
+		u32 exec_state, const struct drm_etnaviv_gem_submit_pmr *pmrs)
 {
 	u32 i;
 
-	for (i = 0; i < nr_pms; i++) {
+	for (i = 0; i < submit->nr_pmrs; i++) {
 		const struct drm_etnaviv_gem_submit_pmr *r = pmrs + i;
 		struct etnaviv_gem_submit_bo *bo;
 		int ret;
@@ -316,39 +333,65 @@ static int submit_perfmon_validate(struct etnaviv_gem_submit *submit,
 			return -EINVAL;
 		}
 
-		if (etnaviv_pm_req_validate(r, cmdbuf->exec_state)) {
+		if (etnaviv_pm_req_validate(r, exec_state)) {
 			DRM_ERROR("perfmon request: domain or signal not valid");
 			return -EINVAL;
 		}
 
-		cmdbuf->pmrs[i].flags = r->flags;
-		cmdbuf->pmrs[i].domain = r->domain;
-		cmdbuf->pmrs[i].signal = r->signal;
-		cmdbuf->pmrs[i].sequence = r->sequence;
-		cmdbuf->pmrs[i].offset = r->read_offset;
-		cmdbuf->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base);
+		submit->pmrs[i].flags = r->flags;
+		submit->pmrs[i].domain = r->domain;
+		submit->pmrs[i].signal = r->signal;
+		submit->pmrs[i].sequence = r->sequence;
+		submit->pmrs[i].offset = r->read_offset;
+		submit->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base);
 	}
 
 	return 0;
 }
 
-static void submit_cleanup(struct etnaviv_gem_submit *submit)
+static void submit_cleanup(struct kref *kref)
 {
+	struct etnaviv_gem_submit *submit =
+			container_of(kref, struct etnaviv_gem_submit, refcount);
 	unsigned i;
 
+	if (submit->runtime_resumed)
+		pm_runtime_put_autosuspend(submit->gpu->dev);
+
+	if (submit->cmdbuf.suballoc)
+		etnaviv_cmdbuf_free(&submit->cmdbuf);
+
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
 
+		/* unpin all objects */
+		if (submit->bos[i].flags & BO_PINNED) {
+			etnaviv_gem_mapping_unreference(submit->bos[i].mapping);
+			atomic_dec(&etnaviv_obj->gpu_active);
+			submit->bos[i].mapping = NULL;
+			submit->bos[i].flags &= ~BO_PINNED;
+		}
+
+		/* if the GPU submit failed, objects might still be locked */
 		submit_unlock_object(submit, i);
 		drm_gem_object_put_unlocked(&etnaviv_obj->base);
 	}
 
-	ww_acquire_fini(&submit->ticket);
-	if (submit->fence)
-		dma_fence_put(submit->fence);
+	wake_up_all(&submit->gpu->fence_event);
+
+	if (submit->in_fence)
+		dma_fence_put(submit->in_fence);
+	if (submit->out_fence)
+		dma_fence_put(submit->out_fence);
+	kfree(submit->pmrs);
 	kfree(submit);
 }
 
+void etnaviv_submit_put(struct etnaviv_gem_submit *submit)
+{
+	kref_put(&submit->refcount, submit_cleanup);
+}
+
 int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -358,10 +401,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct drm_etnaviv_gem_submit_pmr *pmrs;
 	struct drm_etnaviv_gem_submit_bo *bos;
 	struct etnaviv_gem_submit *submit;
-	struct etnaviv_cmdbuf *cmdbuf;
 	struct etnaviv_gpu *gpu;
-	struct dma_fence *in_fence = NULL;
 	struct sync_file *sync_file = NULL;
+	struct ww_acquire_ctx ticket;
 	int out_fence_fd = -1;
 	void *stream;
 	int ret;
@@ -399,17 +441,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	relocs = kvmalloc_array(args->nr_relocs, sizeof(*relocs), GFP_KERNEL);
 	pmrs = kvmalloc_array(args->nr_pmrs, sizeof(*pmrs), GFP_KERNEL);
 	stream = kvmalloc_array(1, args->stream_size, GFP_KERNEL);
-	cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
-				    ALIGN(args->stream_size, 8) + 8,
-				    args->nr_bos, args->nr_pmrs);
-	if (!bos || !relocs || !pmrs || !stream || !cmdbuf) {
+	if (!bos || !relocs || !pmrs || !stream) {
 		ret = -ENOMEM;
 		goto err_submit_cmds;
 	}
 
-	cmdbuf->exec_state = args->exec_state;
-	cmdbuf->ctx = file->driver_priv;
-
 	ret = copy_from_user(bos, u64_to_user_ptr(args->bos),
 			     args->nr_bos * sizeof(*bos));
 	if (ret) {
@@ -430,7 +466,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		ret = -EFAULT;
 		goto err_submit_cmds;
 	}
-	cmdbuf->nr_pmrs = args->nr_pmrs;
 
 	ret = copy_from_user(stream, u64_to_user_ptr(args->stream),
 			     args->stream_size);
@@ -447,19 +482,28 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		}
 	}
 
-	submit = submit_create(dev, gpu, args->nr_bos);
+	ww_acquire_init(&ticket, &reservation_ww_class);
+
+	submit = submit_create(dev, gpu, args->nr_bos, args->nr_pmrs);
 	if (!submit) {
 		ret = -ENOMEM;
-		goto err_submit_cmds;
+		goto err_submit_ww_acquire;
 	}
 
+	ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &submit->cmdbuf,
+				  ALIGN(args->stream_size, 8) + 8);
+	if (ret)
+		goto err_submit_objects;
+
+	submit->cmdbuf.ctx = file->driver_priv;
+	submit->exec_state = args->exec_state;
 	submit->flags = args->flags;
 
 	ret = submit_lookup_objects(submit, file, bos, args->nr_bos);
 	if (ret)
 		goto err_submit_objects;
 
-	ret = submit_lock_objects(submit);
+	ret = submit_lock_objects(submit, &ticket);
 	if (ret)
 		goto err_submit_objects;
 
@@ -470,21 +514,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	}
 
 	if (args->flags & ETNA_SUBMIT_FENCE_FD_IN) {
-		in_fence = sync_file_get_fence(args->fence_fd);
-		if (!in_fence) {
+		submit->in_fence = sync_file_get_fence(args->fence_fd);
+		if (!submit->in_fence) {
 			ret = -EINVAL;
 			goto err_submit_objects;
 		}
-
-		/*
-		 * Wait if the fence is from a foreign context, or if the fence
-		 * array contains any fence from a foreign context.
-		 */
-		if (!dma_fence_match_context(in_fence, gpu->fence_context)) {
-			ret = dma_fence_wait(in_fence, true);
-			if (ret)
-				goto err_submit_objects;
-		}
 	}
 
 	ret = submit_fence_sync(submit);
@@ -493,25 +527,25 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 	ret = submit_pin_objects(submit);
 	if (ret)
-		goto out;
+		goto err_submit_objects;
 
 	ret = submit_reloc(submit, stream, args->stream_size / 4,
 			   relocs, args->nr_relocs);
 	if (ret)
-		goto out;
+		goto err_submit_objects;
 
-	ret = submit_perfmon_validate(submit, cmdbuf, pmrs, args->nr_pmrs);
+	ret = submit_perfmon_validate(submit, args->exec_state, pmrs);
 	if (ret)
-		goto out;
+		goto err_submit_objects;
 
-	memcpy(cmdbuf->vaddr, stream, args->stream_size);
-	cmdbuf->user_size = ALIGN(args->stream_size, 8);
+	memcpy(submit->cmdbuf.vaddr, stream, args->stream_size);
+	submit->cmdbuf.user_size = ALIGN(args->stream_size, 8);
 
-	ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
+	ret = etnaviv_gpu_submit(gpu, submit);
 	if (ret)
-		goto out;
+		goto err_submit_objects;
 
-	cmdbuf = NULL;
+	submit_attach_object_fences(submit);
 
 	if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
 		/*
@@ -520,39 +554,26 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		 * fence to the sync file here, eliminating the ENOMEM
 		 * possibility at this stage.
 		 */
-		sync_file = sync_file_create(submit->fence);
+		sync_file = sync_file_create(submit->out_fence);
 		if (!sync_file) {
 			ret = -ENOMEM;
-			goto out;
+			goto err_submit_objects;
 		}
 		fd_install(out_fence_fd, sync_file->file);
 	}
 
 	args->fence_fd = out_fence_fd;
-	args->fence = submit->fence->seqno;
-
-out:
-	submit_unpin_objects(submit);
-
-	/*
-	 * If we're returning -EAGAIN, it may be due to the userptr code
-	 * wanting to run its workqueue outside of any locks. Flush our
-	 * workqueue to ensure that it is run in a timely manner.
-	 */
-	if (ret == -EAGAIN)
-		flush_workqueue(priv->wq);
+	args->fence = submit->out_fence->seqno;
 
 err_submit_objects:
-	if (in_fence)
-		dma_fence_put(in_fence);
-	submit_cleanup(submit);
+	etnaviv_submit_put(submit);
+
+err_submit_ww_acquire:
+	ww_acquire_fini(&ticket);
 
 err_submit_cmds:
 	if (ret && (out_fence_fd >= 0))
 		put_unused_fd(out_fence_fd);
-	/* if we still own the cmdbuf */
-	if (cmdbuf)
-		etnaviv_cmdbuf_free(cmdbuf);
 	if (stream)
 		kvfree(stream);
 	if (bos)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index e19cbe05da2a31ca9456d2476e388167aedee219..21d0d22f11688fbaf7606faa8e7011e6ba82a932 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -644,7 +644,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 	prefetch = etnaviv_buffer_init(gpu);
 
 	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
-	etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(gpu->buffer),
+	etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(&gpu->buffer),
 			     prefetch);
 }
 
@@ -717,15 +717,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	/* Create buffer: */
-	gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0, 0);
-	if (!gpu->buffer) {
-		ret = -ENOMEM;
+	ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &gpu->buffer,
+				  PAGE_SIZE);
+	if (ret) {
 		dev_err(gpu->dev, "could not create command buffer\n");
 		goto destroy_iommu;
 	}
 
 	if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
-	    etnaviv_cmdbuf_get_va(gpu->buffer) > 0x80000000) {
+	    etnaviv_cmdbuf_get_va(&gpu->buffer) > 0x80000000) {
 		ret = -EINVAL;
 		dev_err(gpu->dev,
 			"command buffer outside valid memory window\n");
@@ -751,8 +751,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	return 0;
 
 free_buffer:
-	etnaviv_cmdbuf_free(gpu->buffer);
-	gpu->buffer = NULL;
+	etnaviv_cmdbuf_free(&gpu->buffer);
 destroy_iommu:
 	etnaviv_iommu_destroy(gpu->mmu);
 	gpu->mmu = NULL;
@@ -958,7 +957,7 @@ static void recover_worker(struct work_struct *work)
 	pm_runtime_put_autosuspend(gpu->dev);
 
 	/* Retire the buffer objects in a work */
-	etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+	queue_work(gpu->wq, &gpu->retire_work);
 }
 
 static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
@@ -994,7 +993,7 @@ static void hangcheck_handler(struct timer_list *t)
 		dev_err(gpu->dev, "     completed fence: %u\n", fence);
 		dev_err(gpu->dev, "     active fence: %u\n",
 			gpu->active_fence);
-		etnaviv_queue_work(gpu->drm, &gpu->recover_work);
+		queue_work(gpu->wq, &gpu->recover_work);
 	}
 
 	/* if still more pending work, reset the hangcheck timer: */
@@ -1201,42 +1200,23 @@ static void retire_worker(struct work_struct *work)
 	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
 					       retire_work);
 	u32 fence = gpu->completed_fence;
-	struct etnaviv_cmdbuf *cmdbuf, *tmp;
-	unsigned int i;
+	struct etnaviv_gem_submit *submit, *tmp;
+	LIST_HEAD(retire_list);
 
 	mutex_lock(&gpu->lock);
-	list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) {
-		if (!dma_fence_is_signaled(cmdbuf->fence))
+	list_for_each_entry_safe(submit, tmp, &gpu->active_submit_list, node) {
+		if (!dma_fence_is_signaled(submit->out_fence))
 			break;
 
-		list_del(&cmdbuf->node);
-		dma_fence_put(cmdbuf->fence);
-
-		for (i = 0; i < cmdbuf->nr_bos; i++) {
-			struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i];
-			struct etnaviv_gem_object *etnaviv_obj = mapping->object;
-
-			atomic_dec(&etnaviv_obj->gpu_active);
-			/* drop the refcount taken in etnaviv_gpu_submit */
-			etnaviv_gem_mapping_unreference(mapping);
-		}
-
-		etnaviv_cmdbuf_free(cmdbuf);
-		/*
-		 * We need to balance the runtime PM count caused by
-		 * each submission.  Upon submission, we increment
-		 * the runtime PM counter, and allocate one event.
-		 * So here, we put the runtime PM count for each
-		 * completed event.
-		 */
-		pm_runtime_put_autosuspend(gpu->dev);
+		list_move(&submit->node, &retire_list);
 	}
 
 	gpu->retired_fence = fence;
 
 	mutex_unlock(&gpu->lock);
 
-	wake_up_all(&gpu->fence_event);
+	list_for_each_entry_safe(submit, tmp, &retire_list, node)
+		etnaviv_submit_put(submit);
 }
 
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
@@ -1295,41 +1275,25 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
 	ret = wait_event_interruptible_timeout(gpu->fence_event,
 					       !is_active(etnaviv_obj),
 					       remaining);
-	if (ret > 0) {
-		struct etnaviv_drm_private *priv = gpu->drm->dev_private;
-
-		/* Synchronise with the retire worker */
-		flush_workqueue(priv->wq);
+	if (ret > 0)
 		return 0;
-	} else if (ret == -ERESTARTSYS) {
+	else if (ret == -ERESTARTSYS)
 		return -ERESTARTSYS;
-	} else {
+	else
 		return -ETIMEDOUT;
-	}
-}
-
-int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu)
-{
-	return pm_runtime_get_sync(gpu->dev);
-}
-
-void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
-{
-	pm_runtime_mark_last_busy(gpu->dev);
-	pm_runtime_put_autosuspend(gpu->dev);
 }
 
 static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
 	struct etnaviv_event *event, unsigned int flags)
 {
-	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+	const struct etnaviv_gem_submit *submit = event->submit;
 	unsigned int i;
 
-	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
-		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+	for (i = 0; i < submit->nr_pmrs; i++) {
+		const struct etnaviv_perfmon_request *pmr = submit->pmrs + i;
 
 		if (pmr->flags == flags)
-			etnaviv_perfmon_process(gpu, pmr);
+			etnaviv_perfmon_process(gpu, pmr, submit->exec_state);
 	}
 }
 
@@ -1354,14 +1318,14 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
 static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
 	struct etnaviv_event *event)
 {
-	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+	const struct etnaviv_gem_submit *submit = event->submit;
 	unsigned int i;
 	u32 val;
 
 	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
 
-	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
-		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+	for (i = 0; i < submit->nr_pmrs; i++) {
+		const struct etnaviv_perfmon_request *pmr = submit->pmrs + i;
 
 		*pmr->bo_vma = pmr->sequence;
 	}
@@ -1380,24 +1344,15 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
 
 /* add bo's to gpu's ring, and kick gpu: */
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
-	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
+	struct etnaviv_gem_submit *submit)
 {
-	struct dma_fence *fence;
 	unsigned int i, nr_events = 1, event[3];
 	int ret;
 
-	ret = etnaviv_gpu_pm_get_sync(gpu);
+	ret = pm_runtime_get_sync(gpu->dev);
 	if (ret < 0)
 		return ret;
-
-	/*
-	 * TODO
-	 *
-	 * - flush
-	 * - data endian
-	 * - prefetch
-	 *
-	 */
+	submit->runtime_resumed = true;
 
 	/*
 	 * if there are performance monitor requests we need to have
@@ -1406,19 +1361,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	 * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
 	 *   and update the sequence number for userspace.
 	 */
-	if (cmdbuf->nr_pmrs)
+	if (submit->nr_pmrs)
 		nr_events = 3;
 
 	ret = event_alloc(gpu, nr_events, event);
 	if (ret) {
 		DRM_ERROR("no free events\n");
-		goto out_pm_put;
+		return ret;
 	}
 
 	mutex_lock(&gpu->lock);
 
-	fence = etnaviv_gpu_fence_alloc(gpu);
-	if (!fence) {
+	submit->out_fence = etnaviv_gpu_fence_alloc(gpu);
+	if (!submit->out_fence) {
 		for (i = 0; i < nr_events; i++)
 			event_free(gpu, event[i]);
 
@@ -1426,80 +1381,51 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		goto out_unlock;
 	}
 
-	gpu->event[event[0]].fence = fence;
-	submit->fence = dma_fence_get(fence);
-	gpu->active_fence = submit->fence->seqno;
+	gpu->active_fence = submit->out_fence->seqno;
 
-	if (gpu->lastctx != cmdbuf->ctx) {
-		gpu->mmu->need_flush = true;
-		gpu->switch_context = true;
-		gpu->lastctx = cmdbuf->ctx;
-	}
-
-	if (cmdbuf->nr_pmrs) {
+	if (submit->nr_pmrs) {
 		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
-		gpu->event[event[1]].cmdbuf = cmdbuf;
+		kref_get(&submit->refcount);
+		gpu->event[event[1]].submit = submit;
 		etnaviv_sync_point_queue(gpu, event[1]);
 	}
 
-	etnaviv_buffer_queue(gpu, event[0], cmdbuf);
+	kref_get(&submit->refcount);
+	gpu->event[event[0]].fence = submit->out_fence;
+	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
+			     &submit->cmdbuf);
 
-	if (cmdbuf->nr_pmrs) {
+	if (submit->nr_pmrs) {
 		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
-		gpu->event[event[2]].cmdbuf = cmdbuf;
+		kref_get(&submit->refcount);
+		gpu->event[event[2]].submit = submit;
 		etnaviv_sync_point_queue(gpu, event[2]);
 	}
 
-	cmdbuf->fence = fence;
-	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
-
-	/* We're committed to adding this command buffer, hold a PM reference */
-	pm_runtime_get_noresume(gpu->dev);
-
-	for (i = 0; i < submit->nr_bos; i++) {
-		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+	list_add_tail(&submit->node, &gpu->active_submit_list);
 
-		/* Each cmdbuf takes a refcount on the mapping */
-		etnaviv_gem_mapping_reference(submit->bos[i].mapping);
-		cmdbuf->bo_map[i] = submit->bos[i].mapping;
-		atomic_inc(&etnaviv_obj->gpu_active);
-
-		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
-			reservation_object_add_excl_fence(etnaviv_obj->resv,
-							  fence);
-		else
-			reservation_object_add_shared_fence(etnaviv_obj->resv,
-							    fence);
-	}
-	cmdbuf->nr_bos = submit->nr_bos;
 	hangcheck_timer_reset(gpu);
 	ret = 0;
 
 out_unlock:
 	mutex_unlock(&gpu->lock);
 
-out_pm_put:
-	etnaviv_gpu_pm_put(gpu);
-
 	return ret;
 }
 
-static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu,
-	struct etnaviv_event *event)
-{
-	u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
-
-	event->sync_point(gpu, event);
-	etnaviv_gpu_start_fe(gpu, addr + 2, 2);
-}
-
 static void sync_point_worker(struct work_struct *work)
 {
 	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
 					       sync_point_work);
+	struct etnaviv_event *event = &gpu->event[gpu->sync_point_event];
+	u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
 
-	etnaviv_process_sync_point(gpu, &gpu->event[gpu->sync_point_event]);
+	event->sync_point(gpu, event);
+	etnaviv_submit_put(event->submit);
 	event_free(gpu, gpu->sync_point_event);
+
+	/* restart FE last to avoid GPU and IRQ racing against this worker */
+	etnaviv_gpu_start_fe(gpu, addr + 2, 2);
 }
 
 /*
@@ -1550,7 +1476,7 @@ static irqreturn_t irq_handler(int irq, void *data)
 
 			if (gpu->event[event].sync_point) {
 				gpu->sync_point_event = event;
-				etnaviv_queue_work(gpu->drm, &gpu->sync_point_work);
+				queue_work(gpu->wq, &gpu->sync_point_work);
 			}
 
 			fence = gpu->event[event].fence;
@@ -1576,7 +1502,7 @@ static irqreturn_t irq_handler(int irq, void *data)
 		}
 
 		/* Retire the buffer objects in a work */
-		etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+		queue_work(gpu->wq, &gpu->retire_work);
 
 		ret = IRQ_HANDLED;
 	}
@@ -1653,9 +1579,11 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms)
 
 static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 {
-	if (gpu->buffer) {
+	if (gpu->buffer.suballoc) {
 		/* Replace the last WAIT with END */
+		mutex_lock(&gpu->lock);
 		etnaviv_buffer_end(gpu);
+		mutex_unlock(&gpu->lock);
 
 		/*
 		 * We know that only the FE is busy here, this should
@@ -1680,7 +1608,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_update_clock(gpu);
 	etnaviv_gpu_hw_init(gpu);
 
-	gpu->switch_context = true;
+	gpu->lastctx = NULL;
 	gpu->exec_state = -1;
 
 	mutex_unlock(&gpu->lock);
@@ -1738,20 +1666,29 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
 	int ret;
 
-	if (IS_ENABLED(CONFIG_THERMAL)) {
+	if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) {
 		gpu->cooling = thermal_of_cooling_device_register(dev->of_node,
 				(char *)dev_name(dev), gpu, &cooling_ops);
 		if (IS_ERR(gpu->cooling))
 			return PTR_ERR(gpu->cooling);
 	}
 
+	gpu->wq = alloc_ordered_workqueue(dev_name(dev), 0);
+	if (!gpu->wq) {
+		if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+			thermal_cooling_device_unregister(gpu->cooling);
+		return -ENOMEM;
+	}
+
 #ifdef CONFIG_PM
 	ret = pm_runtime_get_sync(gpu->dev);
 #else
 	ret = etnaviv_gpu_clk_enable(gpu);
 #endif
 	if (ret < 0) {
-		thermal_cooling_device_unregister(gpu->cooling);
+		destroy_workqueue(gpu->wq);
+		if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+			thermal_cooling_device_unregister(gpu->cooling);
 		return ret;
 	}
 
@@ -1759,7 +1696,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	gpu->fence_context = dma_fence_context_alloc(1);
 	spin_lock_init(&gpu->fence_spinlock);
 
-	INIT_LIST_HEAD(&gpu->active_cmd_list);
+	INIT_LIST_HEAD(&gpu->active_submit_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
 	INIT_WORK(&gpu->sync_point_work, sync_point_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
@@ -1784,6 +1721,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 
 	hangcheck_disable(gpu);
 
+	flush_workqueue(gpu->wq);
+	destroy_workqueue(gpu->wq);
+
 #ifdef CONFIG_PM
 	pm_runtime_get_sync(gpu->dev);
 	pm_runtime_put_sync_suspend(gpu->dev);
@@ -1791,10 +1731,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 	etnaviv_gpu_hw_suspend(gpu);
 #endif
 
-	if (gpu->buffer) {
-		etnaviv_cmdbuf_free(gpu->buffer);
-		gpu->buffer = NULL;
-	}
+	if (gpu->buffer.suballoc)
+		etnaviv_cmdbuf_free(&gpu->buffer);
 
 	if (gpu->cmdbuf_suballoc) {
 		etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
@@ -1808,7 +1746,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 
 	gpu->drm = NULL;
 
-	thermal_cooling_device_unregister(gpu->cooling);
+	if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+		thermal_cooling_device_unregister(gpu->cooling);
 	gpu->cooling = NULL;
 }
 
@@ -1931,7 +1870,7 @@ static int etnaviv_gpu_rpm_resume(struct device *dev)
 		return ret;
 
 	/* Re-initialise the basic hardware state */
-	if (gpu->drm && gpu->buffer) {
+	if (gpu->drm && gpu->buffer.suballoc) {
 		ret = etnaviv_gpu_hw_resume(gpu);
 		if (ret) {
 			etnaviv_gpu_clk_disable(gpu);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 4f10f147297ab25cb233e0032e3042b862f98442..7623905210dc3ab8e182d939874f612181c0018a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -20,6 +20,7 @@
 #include <linux/clk.h>
 #include <linux/regulator/consumer.h>
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 
 struct etnaviv_gem_submit;
@@ -89,7 +90,7 @@ struct etnaviv_chip_identity {
 
 struct etnaviv_event {
 	struct dma_fence *fence;
-	struct etnaviv_cmdbuf *cmdbuf;
+	struct etnaviv_gem_submit *submit;
 
 	void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
 };
@@ -106,10 +107,10 @@ struct etnaviv_gpu {
 	struct mutex lock;
 	struct etnaviv_chip_identity identity;
 	struct etnaviv_file_private *lastctx;
-	bool switch_context;
+	struct workqueue_struct *wq;
 
 	/* 'ring'-buffer: */
-	struct etnaviv_cmdbuf *buffer;
+	struct etnaviv_cmdbuf buffer;
 	int exec_state;
 
 	/* bus base address of memory  */
@@ -122,7 +123,7 @@ struct etnaviv_gpu {
 	spinlock_t event_spinlock;
 
 	/* list of currently in-flight command buffers */
-	struct list_head active_cmd_list;
+	struct list_head active_submit_list;
 
 	u32 idle_mask;
 
@@ -202,7 +203,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
-	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
+	struct etnaviv_gem_submit *submit);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 14e24ac6573febea9d7c891f9f45dd7048e1a4e0..7a8c9473174863d74cca9ce3856e7b058f557043 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -70,9 +70,8 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < PT_ENTRIES; i++)
-		etnaviv_domain->pgtable_cpu[i] =
-				etnaviv_domain->base.bad_page_dma;
+	memset32(etnaviv_domain->pgtable_cpu, etnaviv_domain->base.bad_page_dma,
+		 PT_ENTRIES);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index fc60fc8ddbf0198ae0a82b4c4c123bba6fb477fe..1e956e266aa38428efb0fb680656a12c184e1653 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -229,7 +229,7 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
 	prefetch = etnaviv_buffer_config_mmuv2(gpu,
 				(u32)etnaviv_domain->mtlb_dma,
 				(u32)etnaviv_domain->base.bad_page_dma);
-	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(gpu->buffer),
+	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer),
 			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 35074b9447780f5e639f56f3fdf80dc6e29481ec..d113fe06e6b57e7aab168d320d77ad0c3ceae506 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -263,18 +263,16 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 		if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) {
 			mapping->iova = iova;
 			list_add_tail(&mapping->mmu_node, &mmu->mappings);
-			mutex_unlock(&mmu->lock);
-			return 0;
+			ret = 0;
+			goto unlock;
 		}
 	}
 
 	node = &mapping->vram_node;
 
 	ret = etnaviv_iommu_find_iova(mmu, node, etnaviv_obj->base.size);
-	if (ret < 0) {
-		mutex_unlock(&mmu->lock);
-		return ret;
-	}
+	if (ret < 0)
+		goto unlock;
 
 	mmu->last_iova = node->start + etnaviv_obj->base.size;
 	mapping->iova = node->start;
@@ -283,12 +281,12 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 
 	if (ret < 0) {
 		drm_mm_remove_node(node);
-		mutex_unlock(&mmu->lock);
-		return ret;
+		goto unlock;
 	}
 
 	list_add_tail(&mapping->mmu_node, &mmu->mappings);
 	mmu->need_flush = true;
+unlock:
 	mutex_unlock(&mmu->lock);
 
 	return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
index 768f5aafdd186f8fb97b5b0d1f957128f98e8e36..26dddfc41aacb5ee43708a7893988d14959350b8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
@@ -479,9 +479,9 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r,
 }
 
 void etnaviv_perfmon_process(struct etnaviv_gpu *gpu,
-	const struct etnaviv_perfmon_request *pmr)
+	const struct etnaviv_perfmon_request *pmr, u32 exec_state)
 {
-	const struct etnaviv_pm_domain_meta *meta = &doms_meta[gpu->exec_state];
+	const struct etnaviv_pm_domain_meta *meta = &doms_meta[exec_state];
 	const struct etnaviv_pm_domain *dom;
 	const struct etnaviv_pm_signal *sig;
 	u32 *bo = pmr->bo_vma;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
index 35dce194cb0042eff1349ba06ae3237515a638bb..c1653c64ab6b1e954fca5d70154dc348c05a50be 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
@@ -44,6 +44,6 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r,
 	u32 exec_state);
 
 void etnaviv_perfmon_process(struct etnaviv_gpu *gpu,
-	const struct etnaviv_perfmon_request *pmr);
+	const struct etnaviv_perfmon_request *pmr, u32 exec_state);
 
 #endif /* __ETNAVIV_PERFMON_H__ */
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 5a7c9d8abd6b70f6893b303c34718b887af13182..735ce47688f9269c8ef2b1c4a220ced80440ca54 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -95,26 +95,21 @@ config DRM_EXYNOS_G2D
 	help
 	  Choose this option if you want to use Exynos G2D for DRM.
 
-config DRM_EXYNOS_IPP
-	bool "Image Post Processor"
-	help
-	  Choose this option if you want to use IPP feature for DRM.
-
 config DRM_EXYNOS_FIMC
 	bool "FIMC"
-	depends on DRM_EXYNOS_IPP && MFD_SYSCON
+	depends on BROKEN && MFD_SYSCON
 	help
 	  Choose this option if you want to use Exynos FIMC for DRM.
 
 config DRM_EXYNOS_ROTATOR
 	bool "Rotator"
-	depends on DRM_EXYNOS_IPP
+	depends on BROKEN
 	help
 	  Choose this option if you want to use Exynos Rotator for DRM.
 
 config DRM_EXYNOS_GSC
 	bool "GScaler"
-	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
+	depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
 	help
 	  Choose this option if you want to use Exynos GSC for DRM.
 
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index bdf4212dde7b9bd97a354d93ac50f539051f29b8..a51c5459bb135bb7b63f3e9b4492833336de949b 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -18,7 +18,6 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER)	+= exynos_mixer.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI)	+= exynos_hdmi.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI)	+= exynos_drm_vidi.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_G2D)	+= exynos_drm_g2d.o
-exynosdrm-$(CONFIG_DRM_EXYNOS_IPP)	+= exynos_drm_ipp.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC)	+= exynos_drm_fimc.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR)	+= exynos_drm_rotator.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_GSC)	+= exynos_drm_gsc.o
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 6be5b53c3b279f42ee6dbcc445173fa539721ce5..1c330f2a7a5d11b09b3dbdf4a86cc4839788aa9a 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -21,13 +21,12 @@
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 
-#include <video/exynos5433_decon.h>
-
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
+#include "regs-decon5433.h"
 
 #define DSD_CFG_MUX 0x1004
 #define DSD_CFG_MUX_TE_UNMASK_GLOBAL BIT(13)
@@ -744,11 +743,6 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "cannot find IO resource\n");
-		return -ENXIO;
-	}
-
 	ctx->addr = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ctx->addr)) {
 		dev_err(dev, "ioremap failed\n");
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 615efcf7782a130dce3c06975a611323b12dcf5e..3931d5e33fe07aa77ec5a39c30875f9ac0b8edd6 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -25,13 +25,13 @@
 
 #include <video/of_display_timing.h>
 #include <video/of_videomode.h>
-#include <video/exynos7_decon.h>
 
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
 #include "exynos_drm_iommu.h"
+#include "regs-decon7.h"
 
 /*
  * DECON stands for Display and Enhancement controller.
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 82b72425a42f7977c993134a2142434d8689227f..a518e9c6d6ccf5284895a599ae73d1d775b9a285 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -16,6 +16,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 
 #include <linux/component.h>
 
@@ -28,7 +29,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_vidi.h"
 #include "exynos_drm_g2d.h"
-#include "exynos_drm_ipp.h"
 #include "exynos_drm_iommu.h"
 
 #define DRIVER_NAME	"exynos"
@@ -37,8 +37,6 @@
 #define DRIVER_MAJOR	1
 #define DRIVER_MINOR	0
 
-static struct device *exynos_drm_get_dma_device(void);
-
 int exynos_atomic_check(struct drm_device *dev,
 			struct drm_atomic_state *state)
 {
@@ -89,11 +87,6 @@ static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 	file->driver_priv = NULL;
 }
 
-static void exynos_drm_lastclose(struct drm_device *dev)
-{
-	exynos_drm_fbdev_restore_mode(dev);
-}
-
 static const struct vm_operations_struct exynos_drm_gem_vm_ops = {
 	.fault = exynos_drm_gem_fault,
 	.open = drm_gem_vm_open,
@@ -115,14 +108,6 @@ static const struct drm_ioctl_desc exynos_ioctls[] = {
 			DRM_AUTH | DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl,
 			DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_PROPERTY, exynos_drm_ipp_get_property,
-			DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_SET_PROPERTY, exynos_drm_ipp_set_property,
-			DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_QUEUE_BUF, exynos_drm_ipp_queue_buf,
-			DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_CMD_CTRL, exynos_drm_ipp_cmd_ctrl,
-			DRM_AUTH | DRM_RENDER_ALLOW),
 };
 
 static const struct file_operations exynos_drm_driver_fops = {
@@ -140,7 +125,7 @@ static struct drm_driver exynos_drm_driver = {
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME
 				  | DRIVER_ATOMIC | DRIVER_RENDER,
 	.open			= exynos_drm_open,
-	.lastclose		= exynos_drm_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.postclose		= exynos_drm_postclose,
 	.gem_free_object_unlocked = exynos_drm_gem_free_object,
 	.gem_vm_ops		= &exynos_drm_gem_vm_ops,
@@ -148,7 +133,7 @@ static struct drm_driver exynos_drm_driver = {
 	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
 	.gem_prime_export	= drm_gem_prime_export,
-	.gem_prime_import	= drm_gem_prime_import,
+	.gem_prime_import	= exynos_drm_gem_prime_import,
 	.gem_prime_get_sg_table	= exynos_drm_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table	= exynos_drm_gem_prime_import_sg_table,
 	.gem_prime_vmap		= exynos_drm_gem_prime_vmap,
@@ -262,9 +247,6 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
 		DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR),
 	}, {
 		DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC),
-	}, {
-		DRV_PTR(ipp_driver, CONFIG_DRM_EXYNOS_IPP),
-		DRM_VIRTUAL_DEVICE
 	}, {
 		&exynos_drm_platform_driver,
 		DRM_VIRTUAL_DEVICE
@@ -301,6 +283,27 @@ static struct component_match *exynos_drm_match_add(struct device *dev)
 	return match ?: ERR_PTR(-ENODEV);
 }
 
+static struct device *exynos_drm_get_dma_device(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(exynos_drm_drivers); ++i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
+		struct device *dev;
+
+		if (!info->driver || !(info->flags & DRM_DMA_DEVICE))
+			continue;
+
+		while ((dev = bus_find_device(&platform_bus_type, NULL,
+					    &info->driver->driver,
+					    (void *)platform_bus_type.match))) {
+			put_device(dev);
+			return dev;
+		}
+	}
+	return NULL;
+}
+
 static int exynos_drm_bind(struct device *dev)
 {
 	struct exynos_drm_private *private;
@@ -469,27 +472,6 @@ static struct platform_driver exynos_drm_platform_driver = {
 	},
 };
 
-static struct device *exynos_drm_get_dma_device(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(exynos_drm_drivers); ++i) {
-		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
-		struct device *dev;
-
-		if (!info->driver || !(info->flags & DRM_DMA_DEVICE))
-			continue;
-
-		while ((dev = bus_find_device(&platform_bus_type, NULL,
-					    &info->driver->driver,
-					    (void *)platform_bus_type.match))) {
-			put_device(dev);
-			return dev;
-		}
-	}
-	return NULL;
-}
-
 static void exynos_drm_unregister_devices(void)
 {
 	int i;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index c6847fa708fa1d227fd09283a5d54b1f83aea19e..df2262f70d91681ae88cf1ff37da4d8cbaa23cdd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -188,17 +188,11 @@ struct exynos_drm_g2d_private {
 
 struct drm_exynos_file_private {
 	struct exynos_drm_g2d_private	*g2d_priv;
-	struct device			*ipp_dev;
 };
 
 /*
  * Exynos drm private structure.
  *
- * @da_start: start address to device address space.
- *	with iommu, device address space starts from this address
- *	otherwise default one.
- * @da_space_size: size of device address space.
- *	if 0 then default value is used for it.
  * @pending: the crtcs that have pending updates to finish
  * @lock: protect access to @pending
  * @wait: wait an atomic commit to finish
@@ -296,6 +290,5 @@ extern struct platform_driver g2d_driver;
 extern struct platform_driver fimc_driver;
 extern struct platform_driver rotator_driver;
 extern struct platform_driver gsc_driver;
-extern struct platform_driver ipp_driver;
 extern struct platform_driver mic_driver;
 #endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index 8208df56a88f7f9fcc6b82ff6b1fc1f569671e9e..0faaf829f5bf52a9d730d47f5de035a16a77a165 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -205,7 +205,7 @@ static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = {
 
 static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = {
 	.fb_create = exynos_user_fb_create,
-	.output_poll_changed = exynos_drm_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = exynos_atomic_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index dfb66ecf417b5c8ba9bd26b8966a52a5f14b4aaa..132dd52d0ac7f1d2dda11e1607a44aa9495f8cc4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -270,24 +270,6 @@ void exynos_drm_fbdev_fini(struct drm_device *dev)
 	private->fb_helper = NULL;
 }
 
-void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
-{
-	struct exynos_drm_private *private = dev->dev_private;
-
-	if (!private || !private->fb_helper)
-		return;
-
-	drm_fb_helper_restore_fbdev_mode_unlocked(private->fb_helper);
-}
-
-void exynos_drm_output_poll_changed(struct drm_device *dev)
-{
-	struct exynos_drm_private *private = dev->dev_private;
-	struct drm_fb_helper *fb_helper = private->fb_helper;
-
-	drm_fb_helper_hotplug_event(fb_helper);
-}
-
 void exynos_drm_fbdev_suspend(struct drm_device *dev)
 {
 	struct exynos_drm_private *private = dev->dev_private;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
index 645d1bb7f665faed0dfa30bf3ab36c090a2c1c1f..b33847223a85506cd94022874ca6da7f82bf9364 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -19,8 +19,6 @@
 
 int exynos_drm_fbdev_init(struct drm_device *dev);
 void exynos_drm_fbdev_fini(struct drm_device *dev);
-void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
-void exynos_drm_output_poll_changed(struct drm_device *dev);
 void exynos_drm_fbdev_suspend(struct drm_device *drm);
 void exynos_drm_fbdev_resume(struct drm_device *drm);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 077de014d61017d77853c41a7e5a2d4084e092f1..11cc01b47bc0ad2990a7d419b320668d11b64696 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -247,6 +247,15 @@ struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev,
 	if (IS_ERR(exynos_gem))
 		return exynos_gem;
 
+	if (!is_drm_iommu_supported(dev) && (flags & EXYNOS_BO_NONCONTIG)) {
+		/*
+		 * when no IOMMU is available, all allocated buffers are
+		 * contiguous anyway, so drop EXYNOS_BO_NONCONTIG flag
+		 */
+		flags &= ~EXYNOS_BO_NONCONTIG;
+		DRM_WARN("Non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n");
+	}
+
 	/* set memory type and cache attribute from user side. */
 	exynos_gem->flags = flags;
 
@@ -506,6 +515,12 @@ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 }
 
 /* low-level interface prime helpers */
+struct drm_gem_object *exynos_drm_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf)
+{
+	return drm_gem_prime_import_dev(dev, dma_buf, to_dma_dev(dev));
+}
+
 struct sg_table *exynos_drm_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 	struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index e86d1a9518c31bc4de5cbbf196303d8a6b833ce7..5a4c7de80f657a5219aedcf9edc266a8655e9d54 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -117,6 +117,8 @@ int exynos_drm_gem_fault(struct vm_fault *vmf);
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
 /* low-level interface prime helpers */
+struct drm_gem_object *exynos_drm_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
 struct sg_table *exynos_drm_gem_prime_get_sg_table(struct drm_gem_object *obj);
 struct drm_gem_object *
 exynos_drm_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
deleted file mode 100644
index 3edda18cc2d2d61b7010b57817aeecababe1d4c5..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ /dev/null
@@ -1,1806 +0,0 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Authors:
- *	Eunchul Kim <chulspro.kim@samsung.com>
- *	Jinyoung Jeon <jy0.jeon@samsung.com>
- *	Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/types.h>
-#include <linux/clk.h>
-#include <linux/pm_runtime.h>
-
-#include <drm/drmP.h>
-#include <drm/exynos_drm.h>
-#include "exynos_drm_drv.h"
-#include "exynos_drm_gem.h"
-#include "exynos_drm_ipp.h"
-#include "exynos_drm_iommu.h"
-
-/*
- * IPP stands for Image Post Processing and
- * supports image scaler/rotator and input/output DMA operations.
- * using FIMC, GSC, Rotator, so on.
- * IPP is integration device driver of same attribute h/w
- */
-
-/*
- * TODO
- * 1. expand command control id.
- * 2. integrate	property and config.
- * 3. removed send_event id check routine.
- * 4. compare send_event id if needed.
- * 5. free subdrv_remove notifier callback list if needed.
- * 6. need to check subdrv_open about multi-open.
- * 7. need to power_on implement power and sysmmu ctrl.
- */
-
-#define get_ipp_context(dev)	platform_get_drvdata(to_platform_device(dev))
-#define ipp_is_m2m_cmd(c)	(c == IPP_CMD_M2M)
-
-/*
- * A structure of event.
- *
- * @base: base of event.
- * @event: ipp event.
- */
-struct drm_exynos_ipp_send_event {
-	struct drm_pending_event	base;
-	struct drm_exynos_ipp_event	event;
-};
-
-/*
- * A structure of memory node.
- *
- * @list: list head to memory queue information.
- * @ops_id: id of operations.
- * @prop_id: id of property.
- * @buf_id: id of buffer.
- * @buf_info: gem objects and dma address, size.
- * @filp: a pointer to drm_file.
- */
-struct drm_exynos_ipp_mem_node {
-	struct list_head	list;
-	enum drm_exynos_ops_id	ops_id;
-	u32	prop_id;
-	u32	buf_id;
-	struct drm_exynos_ipp_buf_info	buf_info;
-};
-
-/*
- * A structure of ipp context.
- *
- * @subdrv: prepare initialization using subdrv.
- * @ipp_lock: lock for synchronization of access to ipp_idr.
- * @prop_lock: lock for synchronization of access to prop_idr.
- * @ipp_idr: ipp driver idr.
- * @prop_idr: property idr.
- * @event_workq: event work queue.
- * @cmd_workq: command work queue.
- */
-struct ipp_context {
-	struct exynos_drm_subdrv	subdrv;
-	struct mutex	ipp_lock;
-	struct mutex	prop_lock;
-	struct idr	ipp_idr;
-	struct idr	prop_idr;
-	struct workqueue_struct	*event_workq;
-	struct workqueue_struct	*cmd_workq;
-};
-
-static LIST_HEAD(exynos_drm_ippdrv_list);
-static DEFINE_MUTEX(exynos_drm_ippdrv_lock);
-static BLOCKING_NOTIFIER_HEAD(exynos_drm_ippnb_list);
-
-int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv)
-{
-	mutex_lock(&exynos_drm_ippdrv_lock);
-	list_add_tail(&ippdrv->drv_list, &exynos_drm_ippdrv_list);
-	mutex_unlock(&exynos_drm_ippdrv_lock);
-
-	return 0;
-}
-
-int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv)
-{
-	mutex_lock(&exynos_drm_ippdrv_lock);
-	list_del(&ippdrv->drv_list);
-	mutex_unlock(&exynos_drm_ippdrv_lock);
-
-	return 0;
-}
-
-static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj)
-{
-	int ret;
-
-	mutex_lock(lock);
-	ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL);
-	mutex_unlock(lock);
-
-	return ret;
-}
-
-static void ipp_remove_id(struct idr *id_idr, struct mutex *lock, u32 id)
-{
-	mutex_lock(lock);
-	idr_remove(id_idr, id);
-	mutex_unlock(lock);
-}
-
-static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id)
-{
-	void *obj;
-
-	mutex_lock(lock);
-	obj = idr_find(id_idr, id);
-	mutex_unlock(lock);
-
-	return obj;
-}
-
-static int ipp_check_driver(struct exynos_drm_ippdrv *ippdrv,
-			    struct drm_exynos_ipp_property *property)
-{
-	if (ippdrv->dedicated || (!ipp_is_m2m_cmd(property->cmd) &&
-				  !pm_runtime_suspended(ippdrv->dev)))
-		return -EBUSY;
-
-	if (ippdrv->check_property &&
-	    ippdrv->check_property(ippdrv->dev, property))
-		return -EINVAL;
-
-	return 0;
-}
-
-static struct exynos_drm_ippdrv *ipp_find_driver(struct ipp_context *ctx,
-		struct drm_exynos_ipp_property *property)
-{
-	struct exynos_drm_ippdrv *ippdrv;
-	u32 ipp_id = property->ipp_id;
-	int ret;
-
-	if (ipp_id) {
-		ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock, ipp_id);
-		if (!ippdrv) {
-			DRM_DEBUG("ipp%d driver not found\n", ipp_id);
-			return ERR_PTR(-ENODEV);
-		}
-
-		ret = ipp_check_driver(ippdrv, property);
-		if (ret < 0) {
-			DRM_DEBUG("ipp%d driver check error %d\n", ipp_id, ret);
-			return ERR_PTR(ret);
-		}
-
-		return ippdrv;
-	} else {
-		list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-			ret = ipp_check_driver(ippdrv, property);
-			if (ret == 0)
-				return ippdrv;
-		}
-
-		DRM_DEBUG("cannot find driver suitable for given property.\n");
-	}
-
-	return ERR_PTR(-ENODEV);
-}
-
-static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
-{
-	struct exynos_drm_ippdrv *ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	int count = 0;
-
-	DRM_DEBUG_KMS("prop_id[%d]\n", prop_id);
-
-	/*
-	 * This case is search ipp driver by prop_id handle.
-	 * sometimes, ipp subsystem find driver by prop_id.
-	 * e.g PAUSE state, queue buf, command control.
-	 */
-	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv);
-
-		mutex_lock(&ippdrv->cmd_lock);
-		list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
-			if (c_node->property.prop_id == prop_id) {
-				mutex_unlock(&ippdrv->cmd_lock);
-				return ippdrv;
-			}
-		}
-		mutex_unlock(&ippdrv->cmd_lock);
-	}
-
-	return ERR_PTR(-ENODEV);
-}
-
-int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data,
-		struct drm_file *file)
-{
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
-	struct device *dev = file_priv->ipp_dev;
-	struct ipp_context *ctx = get_ipp_context(dev);
-	struct drm_exynos_ipp_prop_list *prop_list = data;
-	struct exynos_drm_ippdrv *ippdrv;
-	int count = 0;
-
-	if (!ctx) {
-		DRM_ERROR("invalid context.\n");
-		return -EINVAL;
-	}
-
-	if (!prop_list) {
-		DRM_ERROR("invalid property parameter.\n");
-		return -EINVAL;
-	}
-
-	DRM_DEBUG_KMS("ipp_id[%d]\n", prop_list->ipp_id);
-
-	if (!prop_list->ipp_id) {
-		list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list)
-			count++;
-
-		/*
-		 * Supports ippdrv list count for user application.
-		 * First step user application getting ippdrv count.
-		 * and second step getting ippdrv capability using ipp_id.
-		 */
-		prop_list->count = count;
-	} else {
-		/*
-		 * Getting ippdrv capability by ipp_id.
-		 * some device not supported wb, output interface.
-		 * so, user application detect correct ipp driver
-		 * using this ioctl.
-		 */
-		ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock,
-						prop_list->ipp_id);
-		if (!ippdrv) {
-			DRM_ERROR("not found ipp%d driver.\n",
-					prop_list->ipp_id);
-			return -ENODEV;
-		}
-
-		*prop_list = ippdrv->prop_list;
-	}
-
-	return 0;
-}
-
-static void ipp_print_property(struct drm_exynos_ipp_property *property,
-		int idx)
-{
-	struct drm_exynos_ipp_config *config = &property->config[idx];
-	struct drm_exynos_pos *pos = &config->pos;
-	struct drm_exynos_sz *sz = &config->sz;
-
-	DRM_DEBUG_KMS("prop_id[%d]ops[%s]fmt[0x%x]\n",
-		property->prop_id, idx ? "dst" : "src", config->fmt);
-
-	DRM_DEBUG_KMS("pos[%d %d %d %d]sz[%d %d]f[%d]r[%d]\n",
-		pos->x, pos->y, pos->w, pos->h,
-		sz->hsize, sz->vsize, config->flip, config->degree);
-}
-
-static struct drm_exynos_ipp_cmd_work *ipp_create_cmd_work(void)
-{
-	struct drm_exynos_ipp_cmd_work *cmd_work;
-
-	cmd_work = kzalloc(sizeof(*cmd_work), GFP_KERNEL);
-	if (!cmd_work)
-		return ERR_PTR(-ENOMEM);
-
-	INIT_WORK((struct work_struct *)cmd_work, ipp_sched_cmd);
-
-	return cmd_work;
-}
-
-static struct drm_exynos_ipp_event_work *ipp_create_event_work(void)
-{
-	struct drm_exynos_ipp_event_work *event_work;
-
-	event_work = kzalloc(sizeof(*event_work), GFP_KERNEL);
-	if (!event_work)
-		return ERR_PTR(-ENOMEM);
-
-	INIT_WORK(&event_work->work, ipp_sched_event);
-
-	return event_work;
-}
-
-int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
-		struct drm_file *file)
-{
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
-	struct device *dev = file_priv->ipp_dev;
-	struct ipp_context *ctx = get_ipp_context(dev);
-	struct drm_exynos_ipp_property *property = data;
-	struct exynos_drm_ippdrv *ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	u32 prop_id;
-	int ret, i;
-
-	if (!ctx) {
-		DRM_ERROR("invalid context.\n");
-		return -EINVAL;
-	}
-
-	if (!property) {
-		DRM_ERROR("invalid property parameter.\n");
-		return -EINVAL;
-	}
-
-	prop_id = property->prop_id;
-
-	/*
-	 * This is log print for user application property.
-	 * user application set various property.
-	 */
-	for_each_ipp_ops(i)
-		ipp_print_property(property, i);
-
-	/*
-	 * In case prop_id is not zero try to set existing property.
-	 */
-	if (prop_id) {
-		c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, prop_id);
-
-		if (!c_node || c_node->filp != file) {
-			DRM_DEBUG_KMS("prop_id[%d] not found\n", prop_id);
-			return -EINVAL;
-		}
-
-		if (c_node->state != IPP_STATE_STOP) {
-			DRM_DEBUG_KMS("prop_id[%d] not stopped\n", prop_id);
-			return -EINVAL;
-		}
-
-		c_node->property = *property;
-
-		return 0;
-	}
-
-	/* find ipp driver using ipp id */
-	ippdrv = ipp_find_driver(ctx, property);
-	if (IS_ERR(ippdrv)) {
-		DRM_ERROR("failed to get ipp driver.\n");
-		return -EINVAL;
-	}
-
-	/* allocate command node */
-	c_node = kzalloc(sizeof(*c_node), GFP_KERNEL);
-	if (!c_node)
-		return -ENOMEM;
-
-	ret = ipp_create_id(&ctx->prop_idr, &ctx->prop_lock, c_node);
-	if (ret < 0) {
-		DRM_ERROR("failed to create id.\n");
-		goto err_clear;
-	}
-	property->prop_id = ret;
-
-	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n",
-		property->prop_id, property->cmd, ippdrv);
-
-	/* stored property information and ippdrv in private data */
-	c_node->property = *property;
-	c_node->state = IPP_STATE_IDLE;
-	c_node->filp = file;
-
-	c_node->start_work = ipp_create_cmd_work();
-	if (IS_ERR(c_node->start_work)) {
-		DRM_ERROR("failed to create start work.\n");
-		ret = PTR_ERR(c_node->start_work);
-		goto err_remove_id;
-	}
-
-	c_node->stop_work = ipp_create_cmd_work();
-	if (IS_ERR(c_node->stop_work)) {
-		DRM_ERROR("failed to create stop work.\n");
-		ret = PTR_ERR(c_node->stop_work);
-		goto err_free_start;
-	}
-
-	c_node->event_work = ipp_create_event_work();
-	if (IS_ERR(c_node->event_work)) {
-		DRM_ERROR("failed to create event work.\n");
-		ret = PTR_ERR(c_node->event_work);
-		goto err_free_stop;
-	}
-
-	mutex_init(&c_node->lock);
-	mutex_init(&c_node->mem_lock);
-	mutex_init(&c_node->event_lock);
-
-	init_completion(&c_node->start_complete);
-	init_completion(&c_node->stop_complete);
-
-	for_each_ipp_ops(i)
-		INIT_LIST_HEAD(&c_node->mem_list[i]);
-
-	INIT_LIST_HEAD(&c_node->event_list);
-	mutex_lock(&ippdrv->cmd_lock);
-	list_add_tail(&c_node->list, &ippdrv->cmd_list);
-	mutex_unlock(&ippdrv->cmd_lock);
-
-	/* make dedicated state without m2m */
-	if (!ipp_is_m2m_cmd(property->cmd))
-		ippdrv->dedicated = true;
-
-	return 0;
-
-err_free_stop:
-	kfree(c_node->stop_work);
-err_free_start:
-	kfree(c_node->start_work);
-err_remove_id:
-	ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, property->prop_id);
-err_clear:
-	kfree(c_node);
-	return ret;
-}
-
-static int ipp_validate_mem_node(struct drm_device *drm_dev,
-				 struct drm_exynos_ipp_mem_node *m_node,
-				 struct drm_exynos_ipp_cmd_node *c_node)
-{
-	struct drm_exynos_ipp_config *ipp_cfg;
-	unsigned int num_plane;
-	unsigned long size, buf_size = 0, plane_size, img_size = 0;
-	unsigned int bpp, width, height;
-	int i;
-
-	ipp_cfg = &c_node->property.config[m_node->ops_id];
-	num_plane = drm_format_num_planes(ipp_cfg->fmt);
-
-	/**
-	 * This is a rather simplified validation of a memory node.
-	 * It basically verifies provided gem object handles
-	 * and the buffer sizes with respect to current configuration.
-	 * This is not the best that can be done
-	 * but it seems more than enough
-	 */
-	for (i = 0; i < num_plane; ++i) {
-		width = ipp_cfg->sz.hsize;
-		height = ipp_cfg->sz.vsize;
-		bpp = drm_format_plane_cpp(ipp_cfg->fmt, i);
-
-		/*
-		 * The result of drm_format_plane_cpp() for chroma planes must
-		 * be used with drm_format_xxxx_chroma_subsampling() for
-		 * correct result.
-		 */
-		if (i > 0) {
-			width /= drm_format_horz_chroma_subsampling(
-								ipp_cfg->fmt);
-			height /= drm_format_vert_chroma_subsampling(
-								ipp_cfg->fmt);
-		}
-		plane_size = width * height * bpp;
-		img_size += plane_size;
-
-		if (m_node->buf_info.handles[i]) {
-			size = exynos_drm_gem_get_size(drm_dev,
-					m_node->buf_info.handles[i],
-					c_node->filp);
-			if (plane_size > size) {
-				DRM_ERROR(
-					"buffer %d is smaller than required\n",
-					i);
-				return -EINVAL;
-			}
-
-			buf_size += size;
-		}
-	}
-
-	if (buf_size < img_size) {
-		DRM_ERROR("size of buffers(%lu) is smaller than image(%lu)\n",
-			buf_size, img_size);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int ipp_put_mem_node(struct drm_device *drm_dev,
-		struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_mem_node *m_node)
-{
-	int i;
-
-	DRM_DEBUG_KMS("node[%pK]\n", m_node);
-
-	if (!m_node) {
-		DRM_ERROR("invalid dequeue node.\n");
-		return -EFAULT;
-	}
-
-	DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id);
-
-	/* put gem buffer */
-	for_each_ipp_planar(i) {
-		unsigned long handle = m_node->buf_info.handles[i];
-		if (handle)
-			exynos_drm_gem_put_dma_addr(drm_dev, handle,
-							c_node->filp);
-	}
-
-	list_del(&m_node->list);
-	kfree(m_node);
-
-	return 0;
-}
-
-static struct drm_exynos_ipp_mem_node
-		*ipp_get_mem_node(struct drm_device *drm_dev,
-		struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_queue_buf *qbuf)
-{
-	struct drm_exynos_ipp_mem_node *m_node;
-	struct drm_exynos_ipp_buf_info *buf_info;
-	int i;
-
-	m_node = kzalloc(sizeof(*m_node), GFP_KERNEL);
-	if (!m_node)
-		return ERR_PTR(-ENOMEM);
-
-	buf_info = &m_node->buf_info;
-
-	/* operations, buffer id */
-	m_node->ops_id = qbuf->ops_id;
-	m_node->prop_id = qbuf->prop_id;
-	m_node->buf_id = qbuf->buf_id;
-	INIT_LIST_HEAD(&m_node->list);
-
-	DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id);
-	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
-
-	for_each_ipp_planar(i) {
-		DRM_DEBUG_KMS("i[%d]handle[0x%x]\n", i, qbuf->handle[i]);
-
-		/* get dma address by handle */
-		if (qbuf->handle[i]) {
-			dma_addr_t *addr;
-
-			addr = exynos_drm_gem_get_dma_addr(drm_dev,
-					qbuf->handle[i], c_node->filp);
-			if (IS_ERR(addr)) {
-				DRM_ERROR("failed to get addr.\n");
-				ipp_put_mem_node(drm_dev, c_node, m_node);
-				return ERR_PTR(-EFAULT);
-			}
-
-			buf_info->handles[i] = qbuf->handle[i];
-			buf_info->base[i] = *addr;
-			DRM_DEBUG_KMS("i[%d]base[%pad]hd[0x%lx]\n", i,
-				      &buf_info->base[i], buf_info->handles[i]);
-		}
-	}
-
-	mutex_lock(&c_node->mem_lock);
-	if (ipp_validate_mem_node(drm_dev, m_node, c_node)) {
-		ipp_put_mem_node(drm_dev, c_node, m_node);
-		mutex_unlock(&c_node->mem_lock);
-		return ERR_PTR(-EFAULT);
-	}
-	list_add_tail(&m_node->list, &c_node->mem_list[qbuf->ops_id]);
-	mutex_unlock(&c_node->mem_lock);
-
-	return m_node;
-}
-
-static void ipp_clean_mem_nodes(struct drm_device *drm_dev,
-			       struct drm_exynos_ipp_cmd_node *c_node, int ops)
-{
-	struct drm_exynos_ipp_mem_node *m_node, *tm_node;
-	struct list_head *head = &c_node->mem_list[ops];
-
-	mutex_lock(&c_node->mem_lock);
-
-	list_for_each_entry_safe(m_node, tm_node, head, list) {
-		int ret;
-
-		ret = ipp_put_mem_node(drm_dev, c_node, m_node);
-		if (ret)
-			DRM_ERROR("failed to put m_node.\n");
-	}
-
-	mutex_unlock(&c_node->mem_lock);
-}
-
-static int ipp_get_event(struct drm_device *drm_dev,
-		struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_queue_buf *qbuf)
-{
-	struct drm_exynos_ipp_send_event *e;
-	int ret;
-
-	DRM_DEBUG_KMS("ops_id[%d]buf_id[%d]\n", qbuf->ops_id, qbuf->buf_id);
-
-	e = kzalloc(sizeof(*e), GFP_KERNEL);
-	if (!e)
-		return -ENOMEM;
-
-	/* make event */
-	e->event.base.type = DRM_EXYNOS_IPP_EVENT;
-	e->event.base.length = sizeof(e->event);
-	e->event.user_data = qbuf->user_data;
-	e->event.prop_id = qbuf->prop_id;
-	e->event.buf_id[EXYNOS_DRM_OPS_DST] = qbuf->buf_id;
-
-	ret = drm_event_reserve_init(drm_dev, c_node->filp, &e->base, &e->event.base);
-	if (ret) {
-		kfree(e);
-		return ret;
-	}
-
-	mutex_lock(&c_node->event_lock);
-	list_add_tail(&e->base.link, &c_node->event_list);
-	mutex_unlock(&c_node->event_lock);
-
-	return 0;
-}
-
-static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_queue_buf *qbuf)
-{
-	struct drm_exynos_ipp_send_event *e, *te;
-	int count = 0;
-
-	mutex_lock(&c_node->event_lock);
-	list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
-		DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e);
-
-		/*
-		 * qbuf == NULL condition means all event deletion.
-		 * stop operations want to delete all event list.
-		 * another case delete only same buf id.
-		 */
-		if (!qbuf) {
-			/* delete list */
-			list_del(&e->base.link);
-			kfree(e);
-		}
-
-		/* compare buffer id */
-		if (qbuf && (qbuf->buf_id ==
-		    e->event.buf_id[EXYNOS_DRM_OPS_DST])) {
-			/* delete list */
-			list_del(&e->base.link);
-			kfree(e);
-			goto out_unlock;
-		}
-	}
-
-out_unlock:
-	mutex_unlock(&c_node->event_lock);
-	return;
-}
-
-static void ipp_clean_cmd_node(struct ipp_context *ctx,
-				struct drm_exynos_ipp_cmd_node *c_node)
-{
-	int i;
-
-	/* cancel works */
-	cancel_work_sync(&c_node->start_work->work);
-	cancel_work_sync(&c_node->stop_work->work);
-	cancel_work_sync(&c_node->event_work->work);
-
-	/* put event */
-	ipp_put_event(c_node, NULL);
-
-	for_each_ipp_ops(i)
-		ipp_clean_mem_nodes(ctx->subdrv.drm_dev, c_node, i);
-
-	/* delete list */
-	list_del(&c_node->list);
-
-	ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock,
-			c_node->property.prop_id);
-
-	/* destroy mutex */
-	mutex_destroy(&c_node->lock);
-	mutex_destroy(&c_node->mem_lock);
-	mutex_destroy(&c_node->event_lock);
-
-	/* free command node */
-	kfree(c_node->start_work);
-	kfree(c_node->stop_work);
-	kfree(c_node->event_work);
-	kfree(c_node);
-}
-
-static bool ipp_check_mem_list(struct drm_exynos_ipp_cmd_node *c_node)
-{
-	switch (c_node->property.cmd) {
-	case IPP_CMD_WB:
-		return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]);
-	case IPP_CMD_OUTPUT:
-		return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]);
-	case IPP_CMD_M2M:
-	default:
-		return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]) &&
-		       !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]);
-	}
-}
-
-static struct drm_exynos_ipp_mem_node
-		*ipp_find_mem_node(struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_queue_buf *qbuf)
-{
-	struct drm_exynos_ipp_mem_node *m_node;
-	struct list_head *head;
-	int count = 0;
-
-	DRM_DEBUG_KMS("buf_id[%d]\n", qbuf->buf_id);
-
-	/* source/destination memory list */
-	head = &c_node->mem_list[qbuf->ops_id];
-
-	/* find memory node from memory list */
-	list_for_each_entry(m_node, head, list) {
-		DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node);
-
-		/* compare buffer id */
-		if (m_node->buf_id == qbuf->buf_id)
-			return m_node;
-	}
-
-	return NULL;
-}
-
-static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
-		struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_mem_node *m_node)
-{
-	struct exynos_drm_ipp_ops *ops = NULL;
-	int ret = 0;
-
-	DRM_DEBUG_KMS("node[%pK]\n", m_node);
-
-	if (!m_node) {
-		DRM_ERROR("invalid queue node.\n");
-		return -EFAULT;
-	}
-
-	DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id);
-
-	/* get operations callback */
-	ops = ippdrv->ops[m_node->ops_id];
-	if (!ops) {
-		DRM_ERROR("not support ops.\n");
-		return -EFAULT;
-	}
-
-	/* set address and enable irq */
-	if (ops->set_addr) {
-		ret = ops->set_addr(ippdrv->dev, &m_node->buf_info,
-			m_node->buf_id, IPP_BUF_ENQUEUE);
-		if (ret) {
-			DRM_ERROR("failed to set addr.\n");
-			return ret;
-		}
-	}
-
-	return ret;
-}
-
-static void ipp_handle_cmd_work(struct device *dev,
-		struct exynos_drm_ippdrv *ippdrv,
-		struct drm_exynos_ipp_cmd_work *cmd_work,
-		struct drm_exynos_ipp_cmd_node *c_node)
-{
-	struct ipp_context *ctx = get_ipp_context(dev);
-
-	cmd_work->ippdrv = ippdrv;
-	cmd_work->c_node = c_node;
-	queue_work(ctx->cmd_workq, &cmd_work->work);
-}
-
-static int ipp_queue_buf_with_run(struct device *dev,
-		struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_mem_node *m_node,
-		struct drm_exynos_ipp_queue_buf *qbuf)
-{
-	struct exynos_drm_ippdrv *ippdrv;
-	struct drm_exynos_ipp_property *property;
-	struct exynos_drm_ipp_ops *ops;
-	int ret;
-
-	ippdrv = ipp_find_drv_by_handle(qbuf->prop_id);
-	if (IS_ERR(ippdrv)) {
-		DRM_ERROR("failed to get ipp driver.\n");
-		return -EFAULT;
-	}
-
-	ops = ippdrv->ops[qbuf->ops_id];
-	if (!ops) {
-		DRM_ERROR("failed to get ops.\n");
-		return -EFAULT;
-	}
-
-	property = &c_node->property;
-
-	if (c_node->state != IPP_STATE_START) {
-		DRM_DEBUG_KMS("bypass for invalid state.\n");
-		return 0;
-	}
-
-	mutex_lock(&c_node->mem_lock);
-	if (!ipp_check_mem_list(c_node)) {
-		mutex_unlock(&c_node->mem_lock);
-		DRM_DEBUG_KMS("empty memory.\n");
-		return 0;
-	}
-
-	/*
-	 * If set destination buffer and enabled clock,
-	 * then m2m operations need start operations at queue_buf
-	 */
-	if (ipp_is_m2m_cmd(property->cmd)) {
-		struct drm_exynos_ipp_cmd_work *cmd_work = c_node->start_work;
-
-		cmd_work->ctrl = IPP_CTRL_PLAY;
-		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-	} else {
-		ret = ipp_set_mem_node(ippdrv, c_node, m_node);
-		if (ret) {
-			mutex_unlock(&c_node->mem_lock);
-			DRM_ERROR("failed to set m node.\n");
-			return ret;
-		}
-	}
-	mutex_unlock(&c_node->mem_lock);
-
-	return 0;
-}
-
-static void ipp_clean_queue_buf(struct drm_device *drm_dev,
-		struct drm_exynos_ipp_cmd_node *c_node,
-		struct drm_exynos_ipp_queue_buf *qbuf)
-{
-	struct drm_exynos_ipp_mem_node *m_node, *tm_node;
-
-	/* delete list */
-	mutex_lock(&c_node->mem_lock);
-	list_for_each_entry_safe(m_node, tm_node,
-		&c_node->mem_list[qbuf->ops_id], list) {
-		if (m_node->buf_id == qbuf->buf_id &&
-		    m_node->ops_id == qbuf->ops_id)
-			ipp_put_mem_node(drm_dev, c_node, m_node);
-	}
-	mutex_unlock(&c_node->mem_lock);
-}
-
-int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
-		struct drm_file *file)
-{
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
-	struct device *dev = file_priv->ipp_dev;
-	struct ipp_context *ctx = get_ipp_context(dev);
-	struct drm_exynos_ipp_queue_buf *qbuf = data;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	struct drm_exynos_ipp_mem_node *m_node;
-	int ret;
-
-	if (!qbuf) {
-		DRM_ERROR("invalid buf parameter.\n");
-		return -EINVAL;
-	}
-
-	if (qbuf->ops_id >= EXYNOS_DRM_OPS_MAX) {
-		DRM_ERROR("invalid ops parameter.\n");
-		return -EINVAL;
-	}
-
-	DRM_DEBUG_KMS("prop_id[%d]ops_id[%s]buf_id[%d]buf_type[%d]\n",
-		qbuf->prop_id, qbuf->ops_id ? "dst" : "src",
-		qbuf->buf_id, qbuf->buf_type);
-
-	/* find command node */
-	c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
-		qbuf->prop_id);
-	if (!c_node || c_node->filp != file) {
-		DRM_ERROR("failed to get command node.\n");
-		return -ENODEV;
-	}
-
-	/* buffer control */
-	switch (qbuf->buf_type) {
-	case IPP_BUF_ENQUEUE:
-		/* get memory node */
-		m_node = ipp_get_mem_node(drm_dev, c_node, qbuf);
-		if (IS_ERR(m_node)) {
-			DRM_ERROR("failed to get m_node.\n");
-			return PTR_ERR(m_node);
-		}
-
-		/*
-		 * first step get event for destination buffer.
-		 * and second step when M2M case run with destination buffer
-		 * if needed.
-		 */
-		if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) {
-			/* get event for destination buffer */
-			ret = ipp_get_event(drm_dev, c_node, qbuf);
-			if (ret) {
-				DRM_ERROR("failed to get event.\n");
-				goto err_clean_node;
-			}
-
-			/*
-			 * M2M case run play control for streaming feature.
-			 * other case set address and waiting.
-			 */
-			ret = ipp_queue_buf_with_run(dev, c_node, m_node, qbuf);
-			if (ret) {
-				DRM_ERROR("failed to run command.\n");
-				goto err_clean_node;
-			}
-		}
-		break;
-	case IPP_BUF_DEQUEUE:
-		mutex_lock(&c_node->lock);
-
-		/* put event for destination buffer */
-		if (qbuf->ops_id == EXYNOS_DRM_OPS_DST)
-			ipp_put_event(c_node, qbuf);
-
-		ipp_clean_queue_buf(drm_dev, c_node, qbuf);
-
-		mutex_unlock(&c_node->lock);
-		break;
-	default:
-		DRM_ERROR("invalid buffer control.\n");
-		return -EINVAL;
-	}
-
-	return 0;
-
-err_clean_node:
-	DRM_ERROR("clean memory nodes.\n");
-
-	ipp_clean_queue_buf(drm_dev, c_node, qbuf);
-	return ret;
-}
-
-static bool exynos_drm_ipp_check_valid(struct device *dev,
-		enum drm_exynos_ipp_ctrl ctrl, enum drm_exynos_ipp_state state)
-{
-	if (ctrl != IPP_CTRL_PLAY) {
-		if (pm_runtime_suspended(dev)) {
-			DRM_ERROR("pm:runtime_suspended.\n");
-			goto err_status;
-		}
-	}
-
-	switch (ctrl) {
-	case IPP_CTRL_PLAY:
-		if (state != IPP_STATE_IDLE)
-			goto err_status;
-		break;
-	case IPP_CTRL_STOP:
-		if (state == IPP_STATE_STOP)
-			goto err_status;
-		break;
-	case IPP_CTRL_PAUSE:
-		if (state != IPP_STATE_START)
-			goto err_status;
-		break;
-	case IPP_CTRL_RESUME:
-		if (state != IPP_STATE_STOP)
-			goto err_status;
-		break;
-	default:
-		DRM_ERROR("invalid state.\n");
-		goto err_status;
-	}
-
-	return true;
-
-err_status:
-	DRM_ERROR("invalid status:ctrl[%d]state[%d]\n", ctrl, state);
-	return false;
-}
-
-int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data,
-		struct drm_file *file)
-{
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
-	struct exynos_drm_ippdrv *ippdrv = NULL;
-	struct device *dev = file_priv->ipp_dev;
-	struct ipp_context *ctx = get_ipp_context(dev);
-	struct drm_exynos_ipp_cmd_ctrl *cmd_ctrl = data;
-	struct drm_exynos_ipp_cmd_work *cmd_work;
-	struct drm_exynos_ipp_cmd_node *c_node;
-
-	if (!ctx) {
-		DRM_ERROR("invalid context.\n");
-		return -EINVAL;
-	}
-
-	if (!cmd_ctrl) {
-		DRM_ERROR("invalid control parameter.\n");
-		return -EINVAL;
-	}
-
-	DRM_DEBUG_KMS("ctrl[%d]prop_id[%d]\n",
-		cmd_ctrl->ctrl, cmd_ctrl->prop_id);
-
-	ippdrv = ipp_find_drv_by_handle(cmd_ctrl->prop_id);
-	if (IS_ERR(ippdrv)) {
-		DRM_ERROR("failed to get ipp driver.\n");
-		return PTR_ERR(ippdrv);
-	}
-
-	c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
-		cmd_ctrl->prop_id);
-	if (!c_node || c_node->filp != file) {
-		DRM_ERROR("invalid command node list.\n");
-		return -ENODEV;
-	}
-
-	if (!exynos_drm_ipp_check_valid(ippdrv->dev, cmd_ctrl->ctrl,
-	    c_node->state)) {
-		DRM_ERROR("invalid state.\n");
-		return -EINVAL;
-	}
-
-	switch (cmd_ctrl->ctrl) {
-	case IPP_CTRL_PLAY:
-		if (pm_runtime_suspended(ippdrv->dev))
-			pm_runtime_get_sync(ippdrv->dev);
-
-		c_node->state = IPP_STATE_START;
-
-		cmd_work = c_node->start_work;
-		cmd_work->ctrl = cmd_ctrl->ctrl;
-		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-		break;
-	case IPP_CTRL_STOP:
-		cmd_work = c_node->stop_work;
-		cmd_work->ctrl = cmd_ctrl->ctrl;
-		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-
-		if (!wait_for_completion_timeout(&c_node->stop_complete,
-		    msecs_to_jiffies(300))) {
-			DRM_ERROR("timeout stop:prop_id[%d]\n",
-				c_node->property.prop_id);
-		}
-
-		c_node->state = IPP_STATE_STOP;
-		ippdrv->dedicated = false;
-		mutex_lock(&ippdrv->cmd_lock);
-		ipp_clean_cmd_node(ctx, c_node);
-
-		if (list_empty(&ippdrv->cmd_list))
-			pm_runtime_put_sync(ippdrv->dev);
-		mutex_unlock(&ippdrv->cmd_lock);
-		break;
-	case IPP_CTRL_PAUSE:
-		cmd_work = c_node->stop_work;
-		cmd_work->ctrl = cmd_ctrl->ctrl;
-		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-
-		if (!wait_for_completion_timeout(&c_node->stop_complete,
-		    msecs_to_jiffies(200))) {
-			DRM_ERROR("timeout stop:prop_id[%d]\n",
-				c_node->property.prop_id);
-		}
-
-		c_node->state = IPP_STATE_STOP;
-		break;
-	case IPP_CTRL_RESUME:
-		c_node->state = IPP_STATE_START;
-		cmd_work = c_node->start_work;
-		cmd_work->ctrl = cmd_ctrl->ctrl;
-		ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-		break;
-	default:
-		DRM_ERROR("could not support this state currently.\n");
-		return -EINVAL;
-	}
-
-	DRM_DEBUG_KMS("done ctrl[%d]prop_id[%d]\n",
-		cmd_ctrl->ctrl, cmd_ctrl->prop_id);
-
-	return 0;
-}
-
-int exynos_drm_ippnb_register(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(
-		&exynos_drm_ippnb_list, nb);
-}
-
-int exynos_drm_ippnb_unregister(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_unregister(
-		&exynos_drm_ippnb_list, nb);
-}
-
-int exynos_drm_ippnb_send_event(unsigned long val, void *v)
-{
-	return blocking_notifier_call_chain(
-		&exynos_drm_ippnb_list, val, v);
-}
-
-static int ipp_set_property(struct exynos_drm_ippdrv *ippdrv,
-		struct drm_exynos_ipp_property *property)
-{
-	struct exynos_drm_ipp_ops *ops = NULL;
-	bool swap = false;
-	int ret, i;
-
-	if (!property) {
-		DRM_ERROR("invalid property parameter.\n");
-		return -EINVAL;
-	}
-
-	DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id);
-
-	/* reset h/w block */
-	if (ippdrv->reset &&
-	    ippdrv->reset(ippdrv->dev)) {
-		return -EINVAL;
-	}
-
-	/* set source,destination operations */
-	for_each_ipp_ops(i) {
-		struct drm_exynos_ipp_config *config =
-			&property->config[i];
-
-		ops = ippdrv->ops[i];
-		if (!ops || !config) {
-			DRM_ERROR("not support ops and config.\n");
-			return -EINVAL;
-		}
-
-		/* set format */
-		if (ops->set_fmt) {
-			ret = ops->set_fmt(ippdrv->dev, config->fmt);
-			if (ret)
-				return ret;
-		}
-
-		/* set transform for rotation, flip */
-		if (ops->set_transf) {
-			ret = ops->set_transf(ippdrv->dev, config->degree,
-				config->flip, &swap);
-			if (ret)
-				return ret;
-		}
-
-		/* set size */
-		if (ops->set_size) {
-			ret = ops->set_size(ippdrv->dev, swap, &config->pos,
-				&config->sz);
-			if (ret)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
-		struct drm_exynos_ipp_cmd_node *c_node)
-{
-	struct drm_exynos_ipp_mem_node *m_node;
-	struct drm_exynos_ipp_property *property = &c_node->property;
-	struct list_head *head;
-	int ret, i;
-
-	DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id);
-
-	/* store command info in ippdrv */
-	ippdrv->c_node = c_node;
-
-	mutex_lock(&c_node->mem_lock);
-	if (!ipp_check_mem_list(c_node)) {
-		DRM_DEBUG_KMS("empty memory.\n");
-		ret = -ENOMEM;
-		goto err_unlock;
-	}
-
-	/* set current property in ippdrv */
-	ret = ipp_set_property(ippdrv, property);
-	if (ret) {
-		DRM_ERROR("failed to set property.\n");
-		ippdrv->c_node = NULL;
-		goto err_unlock;
-	}
-
-	/* check command */
-	switch (property->cmd) {
-	case IPP_CMD_M2M:
-		for_each_ipp_ops(i) {
-			/* source/destination memory list */
-			head = &c_node->mem_list[i];
-
-			m_node = list_first_entry(head,
-				struct drm_exynos_ipp_mem_node, list);
-
-			DRM_DEBUG_KMS("m_node[%pK]\n", m_node);
-
-			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
-			if (ret) {
-				DRM_ERROR("failed to set m node.\n");
-				goto err_unlock;
-			}
-		}
-		break;
-	case IPP_CMD_WB:
-		/* destination memory list */
-		head = &c_node->mem_list[EXYNOS_DRM_OPS_DST];
-
-		list_for_each_entry(m_node, head, list) {
-			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
-			if (ret) {
-				DRM_ERROR("failed to set m node.\n");
-				goto err_unlock;
-			}
-		}
-		break;
-	case IPP_CMD_OUTPUT:
-		/* source memory list */
-		head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
-
-		list_for_each_entry(m_node, head, list) {
-			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
-			if (ret) {
-				DRM_ERROR("failed to set m node.\n");
-				goto err_unlock;
-			}
-		}
-		break;
-	default:
-		DRM_ERROR("invalid operations.\n");
-		ret = -EINVAL;
-		goto err_unlock;
-	}
-	mutex_unlock(&c_node->mem_lock);
-
-	DRM_DEBUG_KMS("cmd[%d]\n", property->cmd);
-
-	/* start operations */
-	if (ippdrv->start) {
-		ret = ippdrv->start(ippdrv->dev, property->cmd);
-		if (ret) {
-			DRM_ERROR("failed to start ops.\n");
-			ippdrv->c_node = NULL;
-			return ret;
-		}
-	}
-
-	return 0;
-
-err_unlock:
-	mutex_unlock(&c_node->mem_lock);
-	ippdrv->c_node = NULL;
-	return ret;
-}
-
-static int ipp_stop_property(struct drm_device *drm_dev,
-		struct exynos_drm_ippdrv *ippdrv,
-		struct drm_exynos_ipp_cmd_node *c_node)
-{
-	struct drm_exynos_ipp_property *property = &c_node->property;
-	int i;
-
-	DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id);
-
-	/* stop operations */
-	if (ippdrv->stop)
-		ippdrv->stop(ippdrv->dev, property->cmd);
-
-	/* check command */
-	switch (property->cmd) {
-	case IPP_CMD_M2M:
-		for_each_ipp_ops(i)
-			ipp_clean_mem_nodes(drm_dev, c_node, i);
-		break;
-	case IPP_CMD_WB:
-		ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_DST);
-		break;
-	case IPP_CMD_OUTPUT:
-		ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_SRC);
-		break;
-	default:
-		DRM_ERROR("invalid operations.\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-void ipp_sched_cmd(struct work_struct *work)
-{
-	struct drm_exynos_ipp_cmd_work *cmd_work =
-		container_of(work, struct drm_exynos_ipp_cmd_work, work);
-	struct exynos_drm_ippdrv *ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	struct drm_exynos_ipp_property *property;
-	int ret;
-
-	ippdrv = cmd_work->ippdrv;
-	if (!ippdrv) {
-		DRM_ERROR("invalid ippdrv list.\n");
-		return;
-	}
-
-	c_node = cmd_work->c_node;
-	if (!c_node) {
-		DRM_ERROR("invalid command node list.\n");
-		return;
-	}
-
-	mutex_lock(&c_node->lock);
-
-	property = &c_node->property;
-
-	switch (cmd_work->ctrl) {
-	case IPP_CTRL_PLAY:
-	case IPP_CTRL_RESUME:
-		ret = ipp_start_property(ippdrv, c_node);
-		if (ret) {
-			DRM_ERROR("failed to start property:prop_id[%d]\n",
-				c_node->property.prop_id);
-			goto err_unlock;
-		}
-
-		/*
-		 * M2M case supports wait_completion of transfer.
-		 * because M2M case supports single unit operation
-		 * with multiple queue.
-		 * M2M need to wait completion of data transfer.
-		 */
-		if (ipp_is_m2m_cmd(property->cmd)) {
-			if (!wait_for_completion_timeout
-			    (&c_node->start_complete, msecs_to_jiffies(200))) {
-				DRM_ERROR("timeout event:prop_id[%d]\n",
-					c_node->property.prop_id);
-				goto err_unlock;
-			}
-		}
-		break;
-	case IPP_CTRL_STOP:
-	case IPP_CTRL_PAUSE:
-		ret = ipp_stop_property(ippdrv->drm_dev, ippdrv,
-			c_node);
-		if (ret) {
-			DRM_ERROR("failed to stop property.\n");
-			goto err_unlock;
-		}
-
-		complete(&c_node->stop_complete);
-		break;
-	default:
-		DRM_ERROR("unknown control type\n");
-		break;
-	}
-
-	DRM_DEBUG_KMS("ctrl[%d] done.\n", cmd_work->ctrl);
-
-err_unlock:
-	mutex_unlock(&c_node->lock);
-}
-
-static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv,
-		struct drm_exynos_ipp_cmd_node *c_node, int *buf_id)
-{
-	struct drm_device *drm_dev = ippdrv->drm_dev;
-	struct drm_exynos_ipp_property *property = &c_node->property;
-	struct drm_exynos_ipp_mem_node *m_node;
-	struct drm_exynos_ipp_queue_buf qbuf;
-	struct drm_exynos_ipp_send_event *e;
-	struct list_head *head;
-	struct timeval now;
-	u32 tbuf_id[EXYNOS_DRM_OPS_MAX] = {0, };
-	int ret, i;
-
-	for_each_ipp_ops(i)
-		DRM_DEBUG_KMS("%s buf_id[%d]\n", i ? "dst" : "src", buf_id[i]);
-
-	if (!drm_dev) {
-		DRM_ERROR("failed to get drm_dev.\n");
-		return -EINVAL;
-	}
-
-	if (!property) {
-		DRM_ERROR("failed to get property.\n");
-		return -EINVAL;
-	}
-
-	mutex_lock(&c_node->event_lock);
-	if (list_empty(&c_node->event_list)) {
-		DRM_DEBUG_KMS("event list is empty.\n");
-		ret = 0;
-		goto err_event_unlock;
-	}
-
-	mutex_lock(&c_node->mem_lock);
-	if (!ipp_check_mem_list(c_node)) {
-		DRM_DEBUG_KMS("empty memory.\n");
-		ret = 0;
-		goto err_mem_unlock;
-	}
-
-	/* check command */
-	switch (property->cmd) {
-	case IPP_CMD_M2M:
-		for_each_ipp_ops(i) {
-			/* source/destination memory list */
-			head = &c_node->mem_list[i];
-
-			m_node = list_first_entry(head,
-				struct drm_exynos_ipp_mem_node, list);
-
-			tbuf_id[i] = m_node->buf_id;
-			DRM_DEBUG_KMS("%s buf_id[%d]\n",
-				i ? "dst" : "src", tbuf_id[i]);
-
-			ret = ipp_put_mem_node(drm_dev, c_node, m_node);
-			if (ret)
-				DRM_ERROR("failed to put m_node.\n");
-		}
-		break;
-	case IPP_CMD_WB:
-		/* clear buf for finding */
-		memset(&qbuf, 0x0, sizeof(qbuf));
-		qbuf.ops_id = EXYNOS_DRM_OPS_DST;
-		qbuf.buf_id = buf_id[EXYNOS_DRM_OPS_DST];
-
-		/* get memory node entry */
-		m_node = ipp_find_mem_node(c_node, &qbuf);
-		if (!m_node) {
-			DRM_ERROR("empty memory node.\n");
-			ret = -ENOMEM;
-			goto err_mem_unlock;
-		}
-
-		tbuf_id[EXYNOS_DRM_OPS_DST] = m_node->buf_id;
-
-		ret = ipp_put_mem_node(drm_dev, c_node, m_node);
-		if (ret)
-			DRM_ERROR("failed to put m_node.\n");
-		break;
-	case IPP_CMD_OUTPUT:
-		/* source memory list */
-		head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
-
-		m_node = list_first_entry(head,
-			struct drm_exynos_ipp_mem_node, list);
-
-		tbuf_id[EXYNOS_DRM_OPS_SRC] = m_node->buf_id;
-
-		ret = ipp_put_mem_node(drm_dev, c_node, m_node);
-		if (ret)
-			DRM_ERROR("failed to put m_node.\n");
-		break;
-	default:
-		DRM_ERROR("invalid operations.\n");
-		ret = -EINVAL;
-		goto err_mem_unlock;
-	}
-	mutex_unlock(&c_node->mem_lock);
-
-	if (tbuf_id[EXYNOS_DRM_OPS_DST] != buf_id[EXYNOS_DRM_OPS_DST])
-		DRM_ERROR("failed to match buf_id[%d %d]prop_id[%d]\n",
-			tbuf_id[1], buf_id[1], property->prop_id);
-
-	/*
-	 * command node have event list of destination buffer
-	 * If destination buffer enqueue to mem list,
-	 * then we make event and link to event list tail.
-	 * so, we get first event for first enqueued buffer.
-	 */
-	e = list_first_entry(&c_node->event_list,
-		struct drm_exynos_ipp_send_event, base.link);
-
-	do_gettimeofday(&now);
-	DRM_DEBUG_KMS("tv_sec[%ld]tv_usec[%ld]\n", now.tv_sec, now.tv_usec);
-	e->event.tv_sec = now.tv_sec;
-	e->event.tv_usec = now.tv_usec;
-	e->event.prop_id = property->prop_id;
-
-	/* set buffer id about source destination */
-	for_each_ipp_ops(i)
-		e->event.buf_id[i] = tbuf_id[i];
-
-	drm_send_event(drm_dev, &e->base);
-	mutex_unlock(&c_node->event_lock);
-
-	DRM_DEBUG_KMS("done cmd[%d]prop_id[%d]buf_id[%d]\n",
-		property->cmd, property->prop_id, tbuf_id[EXYNOS_DRM_OPS_DST]);
-
-	return 0;
-
-err_mem_unlock:
-	mutex_unlock(&c_node->mem_lock);
-err_event_unlock:
-	mutex_unlock(&c_node->event_lock);
-	return ret;
-}
-
-void ipp_sched_event(struct work_struct *work)
-{
-	struct drm_exynos_ipp_event_work *event_work =
-		container_of(work, struct drm_exynos_ipp_event_work, work);
-	struct exynos_drm_ippdrv *ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	int ret;
-
-	if (!event_work) {
-		DRM_ERROR("failed to get event_work.\n");
-		return;
-	}
-
-	DRM_DEBUG_KMS("buf_id[%d]\n", event_work->buf_id[EXYNOS_DRM_OPS_DST]);
-
-	ippdrv = event_work->ippdrv;
-	if (!ippdrv) {
-		DRM_ERROR("failed to get ipp driver.\n");
-		return;
-	}
-
-	c_node = ippdrv->c_node;
-	if (!c_node) {
-		DRM_ERROR("failed to get command node.\n");
-		return;
-	}
-
-	/*
-	 * IPP supports command thread, event thread synchronization.
-	 * If IPP close immediately from user land, then IPP make
-	 * synchronization with command thread, so make complete event.
-	 * or going out operations.
-	 */
-	if (c_node->state != IPP_STATE_START) {
-		DRM_DEBUG_KMS("bypass state[%d]prop_id[%d]\n",
-			c_node->state, c_node->property.prop_id);
-		goto err_completion;
-	}
-
-	ret = ipp_send_event(ippdrv, c_node, event_work->buf_id);
-	if (ret) {
-		DRM_ERROR("failed to send event.\n");
-		goto err_completion;
-	}
-
-err_completion:
-	if (ipp_is_m2m_cmd(c_node->property.cmd))
-		complete(&c_node->start_complete);
-}
-
-static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
-{
-	struct ipp_context *ctx = get_ipp_context(dev);
-	struct exynos_drm_ippdrv *ippdrv;
-	int ret, count = 0;
-
-	/* get ipp driver entry */
-	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		ippdrv->drm_dev = drm_dev;
-
-		ret = ipp_create_id(&ctx->ipp_idr, &ctx->ipp_lock, ippdrv);
-		if (ret < 0) {
-			DRM_ERROR("failed to create id.\n");
-			goto err;
-		}
-		ippdrv->prop_list.ipp_id = ret;
-
-		DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n",
-			count++, ippdrv, ret);
-
-		/* store parent device for node */
-		ippdrv->parent_dev = dev;
-
-		/* store event work queue and handler */
-		ippdrv->event_workq = ctx->event_workq;
-		ippdrv->sched_event = ipp_sched_event;
-		INIT_LIST_HEAD(&ippdrv->cmd_list);
-		mutex_init(&ippdrv->cmd_lock);
-
-		ret = drm_iommu_attach_device(drm_dev, ippdrv->dev);
-		if (ret) {
-			DRM_ERROR("failed to activate iommu\n");
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	/* get ipp driver entry */
-	list_for_each_entry_continue_reverse(ippdrv, &exynos_drm_ippdrv_list,
-						drv_list) {
-		drm_iommu_detach_device(drm_dev, ippdrv->dev);
-
-		ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock,
-				ippdrv->prop_list.ipp_id);
-	}
-
-	return ret;
-}
-
-static void ipp_subdrv_remove(struct drm_device *drm_dev, struct device *dev)
-{
-	struct exynos_drm_ippdrv *ippdrv, *t;
-	struct ipp_context *ctx = get_ipp_context(dev);
-
-	/* get ipp driver entry */
-	list_for_each_entry_safe(ippdrv, t, &exynos_drm_ippdrv_list, drv_list) {
-		drm_iommu_detach_device(drm_dev, ippdrv->dev);
-
-		ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock,
-				ippdrv->prop_list.ipp_id);
-
-		ippdrv->drm_dev = NULL;
-		exynos_drm_ippdrv_unregister(ippdrv);
-	}
-}
-
-static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
-		struct drm_file *file)
-{
-	struct drm_exynos_file_private *file_priv = file->driver_priv;
-
-	file_priv->ipp_dev = dev;
-
-	DRM_DEBUG_KMS("done priv[%pK]\n", dev);
-
-	return 0;
-}
-
-static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
-		struct drm_file *file)
-{
-	struct exynos_drm_ippdrv *ippdrv = NULL;
-	struct ipp_context *ctx = get_ipp_context(dev);
-	struct drm_exynos_ipp_cmd_node *c_node, *tc_node;
-	int count = 0;
-
-	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		mutex_lock(&ippdrv->cmd_lock);
-		list_for_each_entry_safe(c_node, tc_node,
-			&ippdrv->cmd_list, list) {
-			DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n",
-				count++, ippdrv);
-
-			if (c_node->filp == file) {
-				/*
-				 * userland goto unnormal state. process killed.
-				 * and close the file.
-				 * so, IPP didn't called stop cmd ctrl.
-				 * so, we are make stop operation in this state.
-				 */
-				if (c_node->state == IPP_STATE_START) {
-					ipp_stop_property(drm_dev, ippdrv,
-						c_node);
-					c_node->state = IPP_STATE_STOP;
-				}
-
-				ippdrv->dedicated = false;
-				ipp_clean_cmd_node(ctx, c_node);
-				if (list_empty(&ippdrv->cmd_list))
-					pm_runtime_put_sync(ippdrv->dev);
-			}
-		}
-		mutex_unlock(&ippdrv->cmd_lock);
-	}
-
-	return;
-}
-
-static int ipp_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ipp_context *ctx;
-	struct exynos_drm_subdrv *subdrv;
-	int ret;
-
-	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	mutex_init(&ctx->ipp_lock);
-	mutex_init(&ctx->prop_lock);
-
-	idr_init(&ctx->ipp_idr);
-	idr_init(&ctx->prop_idr);
-
-	/*
-	 * create single thread for ipp event
-	 * IPP supports event thread for IPP drivers.
-	 * IPP driver send event_work to this thread.
-	 * and IPP event thread send event to user process.
-	 */
-	ctx->event_workq = create_singlethread_workqueue("ipp_event");
-	if (!ctx->event_workq) {
-		dev_err(dev, "failed to create event workqueue\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * create single thread for ipp command
-	 * IPP supports command thread for user process.
-	 * user process make command node using set property ioctl.
-	 * and make start_work and send this work to command thread.
-	 * and then this command thread start property.
-	 */
-	ctx->cmd_workq = create_singlethread_workqueue("ipp_cmd");
-	if (!ctx->cmd_workq) {
-		dev_err(dev, "failed to create cmd workqueue\n");
-		ret = -EINVAL;
-		goto err_event_workq;
-	}
-
-	/* set sub driver informations */
-	subdrv = &ctx->subdrv;
-	subdrv->dev = dev;
-	subdrv->probe = ipp_subdrv_probe;
-	subdrv->remove = ipp_subdrv_remove;
-	subdrv->open = ipp_subdrv_open;
-	subdrv->close = ipp_subdrv_close;
-
-	platform_set_drvdata(pdev, ctx);
-
-	ret = exynos_drm_subdrv_register(subdrv);
-	if (ret < 0) {
-		DRM_ERROR("failed to register drm ipp device.\n");
-		goto err_cmd_workq;
-	}
-
-	dev_info(dev, "drm ipp registered successfully.\n");
-
-	return 0;
-
-err_cmd_workq:
-	destroy_workqueue(ctx->cmd_workq);
-err_event_workq:
-	destroy_workqueue(ctx->event_workq);
-	return ret;
-}
-
-static int ipp_remove(struct platform_device *pdev)
-{
-	struct ipp_context *ctx = platform_get_drvdata(pdev);
-
-	/* unregister sub driver */
-	exynos_drm_subdrv_unregister(&ctx->subdrv);
-
-	/* remove,destroy ipp idr */
-	idr_destroy(&ctx->ipp_idr);
-	idr_destroy(&ctx->prop_idr);
-
-	mutex_destroy(&ctx->ipp_lock);
-	mutex_destroy(&ctx->prop_lock);
-
-	/* destroy command, event work queue */
-	destroy_workqueue(ctx->cmd_workq);
-	destroy_workqueue(ctx->event_workq);
-
-	return 0;
-}
-
-struct platform_driver ipp_driver = {
-	.probe		= ipp_probe,
-	.remove		= ipp_remove,
-	.driver		= {
-		.name	= "exynos-drm-ipp",
-		.owner	= THIS_MODULE,
-	},
-};
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
deleted file mode 100644
index 2a61547a39d0b56b86bfabe1a0bc5fcb4b930b21..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *	Eunchul Kim <chulspro.kim@samsung.com>
- *	Jinyoung Jeon <jy0.jeon@samsung.com>
- *	Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_IPP_H_
-#define _EXYNOS_DRM_IPP_H_
-
-#define for_each_ipp_ops(pos)	\
-	for (pos = 0; pos < EXYNOS_DRM_OPS_MAX; pos++)
-#define for_each_ipp_planar(pos)	\
-	for (pos = 0; pos < EXYNOS_DRM_PLANAR_MAX; pos++)
-
-#define IPP_GET_LCD_WIDTH	_IOR('F', 302, int)
-#define IPP_GET_LCD_HEIGHT	_IOR('F', 303, int)
-#define IPP_SET_WRITEBACK	_IOW('F', 304, u32)
-
-/* definition of state */
-enum drm_exynos_ipp_state {
-	IPP_STATE_IDLE,
-	IPP_STATE_START,
-	IPP_STATE_STOP,
-};
-
-/*
- * A structure of command work information.
- * @work: work structure.
- * @ippdrv: current work ippdrv.
- * @c_node: command node information.
- * @ctrl: command control.
- */
-struct drm_exynos_ipp_cmd_work {
-	struct work_struct	work;
-	struct exynos_drm_ippdrv	*ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	enum drm_exynos_ipp_ctrl	ctrl;
-};
-
-/*
- * A structure of command node.
- *
- * @list: list head to command queue information.
- * @event_list: list head of event.
- * @mem_list: list head to source,destination memory queue information.
- * @lock: lock for synchronization of access to ioctl.
- * @mem_lock: lock for synchronization of access to memory nodes.
- * @event_lock: lock for synchronization of access to scheduled event.
- * @start_complete: completion of start of command.
- * @stop_complete: completion of stop of command.
- * @property: property information.
- * @start_work: start command work structure.
- * @stop_work: stop command work structure.
- * @event_work: event work structure.
- * @state: state of command node.
- * @filp: associated file pointer.
- */
-struct drm_exynos_ipp_cmd_node {
-	struct list_head	list;
-	struct list_head	event_list;
-	struct list_head	mem_list[EXYNOS_DRM_OPS_MAX];
-	struct mutex	lock;
-	struct mutex	mem_lock;
-	struct mutex	event_lock;
-	struct completion	start_complete;
-	struct completion	stop_complete;
-	struct drm_exynos_ipp_property	property;
-	struct drm_exynos_ipp_cmd_work *start_work;
-	struct drm_exynos_ipp_cmd_work *stop_work;
-	struct drm_exynos_ipp_event_work *event_work;
-	enum drm_exynos_ipp_state	state;
-	struct drm_file	*filp;
-};
-
-/*
- * A structure of buffer information.
- *
- * @handles: Y, Cb, Cr each gem object handle.
- * @base: Y, Cb, Cr each planar address.
- */
-struct drm_exynos_ipp_buf_info {
-	unsigned long	handles[EXYNOS_DRM_PLANAR_MAX];
-	dma_addr_t	base[EXYNOS_DRM_PLANAR_MAX];
-};
-
-/*
- * A structure of wb setting information.
- *
- * @enable: enable flag for wb.
- * @refresh: HZ of the refresh rate.
- */
-struct drm_exynos_ipp_set_wb {
-	__u32	enable;
-	__u32	refresh;
-};
-
-/*
- * A structure of event work information.
- *
- * @work: work structure.
- * @ippdrv: current work ippdrv.
- * @buf_id: id of src, dst buffer.
- */
-struct drm_exynos_ipp_event_work {
-	struct work_struct	work;
-	struct exynos_drm_ippdrv *ippdrv;
-	u32	buf_id[EXYNOS_DRM_OPS_MAX];
-};
-
-/*
- * A structure of source,destination operations.
- *
- * @set_fmt: set format of image.
- * @set_transf: set transform(rotations, flip).
- * @set_size: set size of region.
- * @set_addr: set address for dma.
- */
-struct exynos_drm_ipp_ops {
-	int (*set_fmt)(struct device *dev, u32 fmt);
-	int (*set_transf)(struct device *dev,
-		enum drm_exynos_degree degree,
-		enum drm_exynos_flip flip, bool *swap);
-	int (*set_size)(struct device *dev, int swap,
-		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz);
-	int (*set_addr)(struct device *dev,
-		 struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type);
-};
-
-/*
- * A structure of ipp driver.
- *
- * @drv_list: list head for registed sub driver information.
- * @parent_dev: parent device information.
- * @dev: platform device.
- * @drm_dev: drm device.
- * @dedicated: dedicated ipp device.
- * @ops: source, destination operations.
- * @event_workq: event work queue.
- * @c_node: current command information.
- * @cmd_list: list head for command information.
- * @cmd_lock: lock for synchronization of access to cmd_list.
- * @prop_list: property informations of current ipp driver.
- * @check_property: check property about format, size, buffer.
- * @reset: reset ipp block.
- * @start: ipp each device start.
- * @stop: ipp each device stop.
- * @sched_event: work schedule handler.
- */
-struct exynos_drm_ippdrv {
-	struct list_head	drv_list;
-	struct device	*parent_dev;
-	struct device	*dev;
-	struct drm_device	*drm_dev;
-	bool	dedicated;
-	struct exynos_drm_ipp_ops	*ops[EXYNOS_DRM_OPS_MAX];
-	struct workqueue_struct	*event_workq;
-	struct drm_exynos_ipp_cmd_node *c_node;
-	struct list_head	cmd_list;
-	struct mutex	cmd_lock;
-	struct drm_exynos_ipp_prop_list prop_list;
-
-	int (*check_property)(struct device *dev,
-		struct drm_exynos_ipp_property *property);
-	int (*reset)(struct device *dev);
-	int (*start)(struct device *dev, enum drm_exynos_ipp_cmd cmd);
-	void (*stop)(struct device *dev, enum drm_exynos_ipp_cmd cmd);
-	void (*sched_event)(struct work_struct *work);
-};
-
-#ifdef CONFIG_DRM_EXYNOS_IPP
-extern int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv);
-extern int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv);
-extern int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data,
-					 struct drm_file *file);
-extern int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
-					 struct drm_file *file);
-extern int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
-					 struct drm_file *file);
-extern int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data,
-					 struct drm_file *file);
-extern int exynos_drm_ippnb_register(struct notifier_block *nb);
-extern int exynos_drm_ippnb_unregister(struct notifier_block *nb);
-extern int exynos_drm_ippnb_send_event(unsigned long val, void *v);
-extern void ipp_sched_cmd(struct work_struct *work);
-extern void ipp_sched_event(struct work_struct *work);
-
-#else
-static inline int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv)
-{
-	return -ENODEV;
-}
-
-static inline int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv)
-{
-	return -ENODEV;
-}
-
-static inline int exynos_drm_ipp_get_property(struct drm_device *drm_dev,
-						void *data,
-						struct drm_file *file_priv)
-{
-	return -ENOTTY;
-}
-
-static inline int exynos_drm_ipp_set_property(struct drm_device *drm_dev,
-						void *data,
-						struct drm_file *file_priv)
-{
-	return -ENOTTY;
-}
-
-static inline int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev,
-						void *data,
-						struct drm_file *file)
-{
-	return -ENOTTY;
-}
-
-static inline int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev,
-						void *data,
-						struct drm_file *file)
-{
-	return -ENOTTY;
-}
-
-static inline int exynos_drm_ippnb_register(struct notifier_block *nb)
-{
-	return -ENODEV;
-}
-
-static inline int exynos_drm_ippnb_unregister(struct notifier_block *nb)
-{
-	return -ENODEV;
-}
-
-static inline int exynos_drm_ippnb_send_event(unsigned long val, void *v)
-{
-	return -ENOTTY;
-}
-#endif
-
-#endif /* _EXYNOS_DRM_IPP_H_ */
-
diff --git a/include/video/exynos5433_decon.h b/drivers/gpu/drm/exynos/regs-decon5433.h
similarity index 98%
rename from include/video/exynos5433_decon.h
rename to drivers/gpu/drm/exynos/regs-decon5433.h
index 78957c9626f5880f302e1cf08fc5a44b2e40dcf3..19ad9e47945eb6f3fa3902fb8b80084fdbf7672f 100644
--- a/include/video/exynos5433_decon.h
+++ b/drivers/gpu/drm/exynos/regs-decon5433.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundationr
  */
 
-#ifndef EXYNOS_REGS_DECON_H
-#define EXYNOS_REGS_DECON_H
+#ifndef EXYNOS_REGS_DECON5433_H
+#define EXYNOS_REGS_DECON5433_H
 
 /* Exynos543X DECON */
 #define DECON_VIDCON0			0x0000
@@ -206,4 +206,4 @@
 #define CRCCTRL_CRCEN			(0x1 << 0)
 #define CRCCTRL_MASK			(0x7)
 
-#endif /* EXYNOS_REGS_DECON_H */
+#endif /* EXYNOS_REGS_DECON5433_H */
diff --git a/include/video/exynos7_decon.h b/drivers/gpu/drm/exynos/regs-decon7.h
similarity index 99%
rename from include/video/exynos7_decon.h
rename to drivers/gpu/drm/exynos/regs-decon7.h
index a62b11b613f65225577b1d88206d5f256d22a827..5df7765d2397d79f478c9824fdee01f366c5b133 100644
--- a/include/video/exynos7_decon.h
+++ b/drivers/gpu/drm/exynos/regs-decon7.h
@@ -1,5 +1,4 @@
-/* include/video/exynos7_decon.h
- *
+/*
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Ajay Kumar <ajaykumar.rs@samsung.com>
  *
@@ -9,6 +8,9 @@
  * option) any later version.
  */
 
+#ifndef EXYNOS_REGS_DECON7_H
+#define EXYNOS_REGS_DECON7_H
+
 /* VIDCON0 */
 #define VIDCON0					0x00
 
@@ -347,3 +349,5 @@
 
 #define DECON_UPDATE_SLAVE_SYNC			(1 << 4)
 #define DECON_UPDATE_STANDALONE_F		(1 << 0)
+
+#endif /* EXYNOS_REGS_DECON7_H */
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 2570c7f647a637f3ea5eabf4030e77888116a33b..cb0a2ae916e094b4e1e52d123a320d5212c776aa 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -576,13 +576,6 @@ static void psb_fbdev_fini(struct drm_device *dev)
 	dev_priv->fbdev = NULL;
 }
 
-static void psbfb_output_poll_changed(struct drm_device *dev)
-{
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_fbdev *fbdev = (struct psb_fbdev *)dev_priv->fbdev;
-	drm_fb_helper_hotplug_event(&fbdev->psb_fb_helper);
-}
-
 /**
  *	psb_user_framebuffer_create_handle - add hamdle to a framebuffer
  *	@fb: framebuffer
@@ -623,7 +616,7 @@ static void psb_user_framebuffer_destroy(struct drm_framebuffer *fb)
 
 static const struct drm_mode_config_funcs psb_mode_funcs = {
 	.fb_create = psb_user_framebuffer_create,
-	.output_poll_changed = psbfb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 };
 
 static void psb_setup_outputs(struct drm_device *dev)
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 8f5cc1f471cd2bd373cfa6d4fdb2c9bcbda0cc01..38d09d4b3ed5fd39fe22a230806c12386e1cedf9 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -107,19 +107,6 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
 static const struct drm_ioctl_desc psb_ioctls[] = {
 };
 
-static void psb_driver_lastclose(struct drm_device *dev)
-{
-	int ret;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_fbdev *fbdev = dev_priv->fbdev;
-
-	ret = drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev->psb_fb_helper);
-	if (ret)
-		DRM_DEBUG("failed to restore crtc mode\n");
-
-	return;
-}
-
 static int psb_do_init(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
@@ -479,7 +466,7 @@ static struct drm_driver driver = {
 			   DRIVER_MODESET | DRIVER_GEM,
 	.load = psb_driver_load,
 	.unload = psb_driver_unload,
-	.lastclose = psb_driver_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 
 	.num_ioctls = ARRAY_SIZE(psb_ioctls),
 	.irq_preinstall = psb_irq_preinstall,
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index ab4ee5953615acebe0f3c3c92c68fd45487e6afc..8516e005643f91faae004d8c423e401087fa023e 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -223,9 +223,10 @@ static struct ttm_tt *hibmc_ttm_tt_create(struct ttm_bo_device *bdev,
 	return tt;
 }
 
-static int hibmc_ttm_tt_populate(struct ttm_tt *ttm)
+static int hibmc_ttm_tt_populate(struct ttm_tt *ttm,
+		struct ttm_operation_ctx *ctx)
 {
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 
 static void hibmc_ttm_tt_unpopulate(struct ttm_tt *ttm)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index edec15d195387476a9f04241feb0e4f43d345f48..c8454ac43fae0fbdece898d38abc9c8e7059582b 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2819,12 +2819,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 }
 
 static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
-		unsigned int opcode, int rings)
+		unsigned int opcode, unsigned long rings)
 {
 	struct cmd_info *info = NULL;
 	unsigned int ring;
 
-	for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) {
+	for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
 		info = find_cmd_entry(gvt, opcode, ring);
 		if (info)
 			break;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c4f752eeadcc8e79453014e8c1829a66a456038c..a529d2bd393cb9fd736dbf59f243a69876d41cc9 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1410,12 +1410,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(
 			return ret;
 	} else {
 		if (!test_bit(index, spt->post_shadow_bitmap)) {
+			int type = spt->shadow_page.type;
+
 			ppgtt_get_shadow_entry(spt, &se, index);
 			ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
 			if (ret)
 				return ret;
+			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
+			ppgtt_set_shadow_entry(spt, &se, index);
 		}
-
 		ppgtt_set_post_shadow(spt, index);
 	}
 
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 17d2f3a1c562bcb34e132c855965d1b2ace89726..1d053bbefc02c694af593008ae19312a7826c994 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -38,7 +38,6 @@
 struct imx_drm_device {
 	struct drm_device			*drm;
 	unsigned int				pipes;
-	struct drm_fbdev_cma			*fbhelper;
 	struct drm_atomic_state			*state;
 };
 
@@ -47,13 +46,6 @@ static int legacyfb_depth = 16;
 module_param(legacyfb_depth, int, 0444);
 #endif
 
-static void imx_drm_driver_lastclose(struct drm_device *drm)
-{
-	struct imx_drm_device *imxdrm = drm->dev_private;
-
-	drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
-}
-
 DEFINE_DRM_GEM_CMA_FOPS(imx_drm_driver_fops);
 
 void imx_drm_connector_destroy(struct drm_connector *connector)
@@ -69,13 +61,6 @@ void imx_drm_encoder_destroy(struct drm_encoder *encoder)
 }
 EXPORT_SYMBOL_GPL(imx_drm_encoder_destroy);
 
-static void imx_drm_output_poll_changed(struct drm_device *drm)
-{
-	struct imx_drm_device *imxdrm = drm->dev_private;
-
-	drm_fbdev_cma_hotplug_event(imxdrm->fbhelper);
-}
-
 static int imx_drm_atomic_check(struct drm_device *dev,
 				struct drm_atomic_state *state)
 {
@@ -107,7 +92,7 @@ static int imx_drm_atomic_check(struct drm_device *dev,
 
 static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
-	.output_poll_changed = imx_drm_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = imx_drm_atomic_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
@@ -186,7 +171,7 @@ static const struct drm_ioctl_desc imx_drm_ioctls[] = {
 static struct drm_driver imx_drm_driver = {
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
 				  DRIVER_ATOMIC,
-	.lastclose		= imx_drm_driver_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.gem_free_object_unlocked = drm_gem_cma_free_object,
 	.gem_vm_ops		= &drm_gem_cma_vm_ops,
 	.dumb_create		= drm_gem_cma_dumb_create,
@@ -272,6 +257,7 @@ static int imx_drm_bind(struct device *dev)
 	drm->mode_config.max_height = 4096;
 	drm->mode_config.funcs = &imx_drm_mode_config_funcs;
 	drm->mode_config.helper_private = &imx_drm_mode_config_helpers;
+	drm->mode_config.allow_fb_modifiers = true;
 
 	drm_mode_config_init(drm);
 
@@ -298,12 +284,9 @@ static int imx_drm_bind(struct device *dev)
 		dev_warn(dev, "Invalid legacyfb_depth.  Defaulting to 16bpp\n");
 		legacyfb_depth = 16;
 	}
-	imxdrm->fbhelper = drm_fbdev_cma_init(drm, legacyfb_depth, MAX_CRTC);
-	if (IS_ERR(imxdrm->fbhelper)) {
-		ret = PTR_ERR(imxdrm->fbhelper);
-		imxdrm->fbhelper = NULL;
+	ret = drm_fb_cma_fbdev_init(drm, legacyfb_depth, MAX_CRTC);
+	if (ret)
 		goto err_unbind;
-	}
 #endif
 
 	drm_kms_helper_poll_init(drm);
@@ -317,8 +300,7 @@ static int imx_drm_bind(struct device *dev)
 err_fbhelper:
 	drm_kms_helper_poll_fini(drm);
 #if IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION)
-	if (imxdrm->fbhelper)
-		drm_fbdev_cma_fini(imxdrm->fbhelper);
+	drm_fb_cma_fbdev_fini(drm);
 err_unbind:
 #endif
 	component_unbind_all(drm->dev, drm);
@@ -333,14 +315,12 @@ static int imx_drm_bind(struct device *dev)
 static void imx_drm_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct imx_drm_device *imxdrm = drm->dev_private;
 
 	drm_dev_unregister(drm);
 
 	drm_kms_helper_poll_fini(drm);
 
-	if (imxdrm->fbhelper)
-		drm_fbdev_cma_fini(imxdrm->fbhelper);
+	drm_fb_cma_fbdev_fini(drm);
 
 	drm_mode_config_cleanup(drm);
 
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index f0b7556c0857d40c41a0d8b8d6e853d5c94be25d..15c2bec47a04cc0aaa37c5954a7dc69233c0eb38 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -8,7 +8,6 @@ struct drm_connector;
 struct drm_device;
 struct drm_display_mode;
 struct drm_encoder;
-struct drm_fbdev_cma;
 struct drm_framebuffer;
 struct drm_plane;
 struct imx_drm_crtc;
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 5a67daedcf4d7e99ae6eac85996d26a70ad2b0d4..57ed56d8623fcb67133f1fe79f390ad45c38c257 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -77,6 +77,18 @@ static const uint32_t ipu_plane_formats[] = {
 	DRM_FORMAT_BGRX8888_A8,
 };
 
+static const uint64_t ipu_format_modifiers[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
+static const uint64_t pre_format_modifiers[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_VIVANTE_TILED,
+	DRM_FORMAT_MOD_VIVANTE_SUPER_TILED,
+	DRM_FORMAT_MOD_INVALID
+};
+
 int ipu_plane_irq(struct ipu_plane *ipu_plane)
 {
 	return ipu_idmac_channel_irq(ipu_plane->ipu, ipu_plane->ipu_ch,
@@ -303,6 +315,22 @@ void ipu_plane_destroy_state(struct drm_plane *plane,
 	kfree(ipu_state);
 }
 
+static bool ipu_plane_format_mod_supported(struct drm_plane *plane,
+					   uint32_t format, uint64_t modifier)
+{
+	struct ipu_soc *ipu = to_ipu_plane(plane)->ipu;
+
+	/* linear is supported for all planes and formats */
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	/* without a PRG there are no supported modifiers */
+	if (!ipu_prg_present(ipu))
+		return false;
+
+	return ipu_prg_format_supported(ipu, format, modifier);
+}
+
 static const struct drm_plane_funcs ipu_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
@@ -310,6 +338,7 @@ static const struct drm_plane_funcs ipu_plane_funcs = {
 	.reset		= ipu_plane_state_reset,
 	.atomic_duplicate_state	= ipu_plane_duplicate_state,
 	.atomic_destroy_state	= ipu_plane_destroy_state,
+	.format_mod_supported = ipu_plane_format_mod_supported,
 };
 
 static int ipu_plane_atomic_check(struct drm_plane *plane,
@@ -550,8 +579,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
 		ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id,
 					  drm_rect_width(&state->src) >> 16,
 					  drm_rect_height(&state->src) >> 16,
-					  fb->pitches[0],
-					  fb->format->format, &eba);
+					  fb->pitches[0], fb->format->format,
+					  fb->modifier, &eba);
 	}
 
 	if (old_state->fb && !drm_atomic_crtc_needs_modeset(crtc_state)) {
@@ -700,18 +729,71 @@ static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = {
 int ipu_planes_assign_pre(struct drm_device *dev,
 			  struct drm_atomic_state *state)
 {
+	struct drm_crtc_state *old_crtc_state, *crtc_state;
 	struct drm_plane_state *plane_state;
+	struct ipu_plane_state *ipu_state;
+	struct ipu_plane *ipu_plane;
 	struct drm_plane *plane;
+	struct drm_crtc *crtc;
 	int available_pres = ipu_prg_max_active_channels();
-	int i;
+	int ret, i;
 
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, crtc_state, i) {
+		ret = drm_atomic_add_affected_planes(state, crtc);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * We are going over the planes in 2 passes: first we assign PREs to
+	 * planes with a tiling modifier, which need the PREs to resolve into
+	 * linear. Any failure to assign a PRE there is fatal. In the second
+	 * pass we try to assign PREs to linear FBs, to improve memory access
+	 * patterns for them. Failure at this point is non-fatal, as we can
+	 * scan out linear FBs without a PRE.
+	 */
 	for_each_new_plane_in_state(state, plane, plane_state, i) {
-		struct ipu_plane_state *ipu_state =
-				to_ipu_plane_state(plane_state);
-		struct ipu_plane *ipu_plane = to_ipu_plane(plane);
+		ipu_state = to_ipu_plane_state(plane_state);
+		ipu_plane = to_ipu_plane(plane);
+
+		if (!plane_state->fb) {
+			ipu_state->use_pre = false;
+			continue;
+		}
+
+		if (!(plane_state->fb->flags & DRM_MODE_FB_MODIFIERS) ||
+		    plane_state->fb->modifier == DRM_FORMAT_MOD_LINEAR)
+			continue;
+
+		if (!ipu_prg_present(ipu_plane->ipu) || !available_pres)
+			return -EINVAL;
+
+		if (!ipu_prg_format_supported(ipu_plane->ipu,
+					      plane_state->fb->format->format,
+					      plane_state->fb->modifier))
+			return -EINVAL;
+
+		ipu_state->use_pre = true;
+		available_pres--;
+	}
+
+	for_each_new_plane_in_state(state, plane, plane_state, i) {
+		ipu_state = to_ipu_plane_state(plane_state);
+		ipu_plane = to_ipu_plane(plane);
+
+		if (!plane_state->fb) {
+			ipu_state->use_pre = false;
+			continue;
+		}
+
+		if ((plane_state->fb->flags & DRM_MODE_FB_MODIFIERS) &&
+		    plane_state->fb->modifier != DRM_FORMAT_MOD_LINEAR)
+			continue;
+
+		/* make sure that modifier is initialized */
+		plane_state->fb->modifier = DRM_FORMAT_MOD_LINEAR;
 
 		if (ipu_prg_present(ipu_plane->ipu) && available_pres &&
-		    plane_state->fb &&
 		    ipu_prg_format_supported(ipu_plane->ipu,
 					     plane_state->fb->format->format,
 					     plane_state->fb->modifier)) {
@@ -731,6 +813,7 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 				 enum drm_plane_type type)
 {
 	struct ipu_plane *ipu_plane;
+	const uint64_t *modifiers = ipu_format_modifiers;
 	int ret;
 
 	DRM_DEBUG_KMS("channel %d, dp flow %d, possible_crtcs=0x%x\n",
@@ -746,10 +829,13 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 	ipu_plane->dma = dma;
 	ipu_plane->dp_flow = dp;
 
+	if (ipu_prg_present(ipu))
+		modifiers = pre_format_modifiers;
+
 	ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs,
 				       &ipu_plane_funcs, ipu_plane_formats,
 				       ARRAY_SIZE(ipu_plane_formats),
-				       NULL, type, NULL);
+				       modifiers, type, NULL);
 	if (ret) {
 		DRM_ERROR("failed to initialize plane\n");
 		kfree(ipu_plane);
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 3b804fdaf7a05f3b19b60ce2bb647d70dedd0d34..f9ad0e960263fd6e112c9f42f88df611774fd5c5 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -151,6 +151,14 @@ static struct regmap_config meson_regmap_config = {
 	.max_register   = 0x1000,
 };
 
+static void meson_vpu_init(struct meson_drm *priv)
+{
+	writel_relaxed(0x210000, priv->io_base + _REG(VPU_RDARB_MODE_L1C1));
+	writel_relaxed(0x10000, priv->io_base + _REG(VPU_RDARB_MODE_L1C2));
+	writel_relaxed(0x900000, priv->io_base + _REG(VPU_RDARB_MODE_L2C1));
+	writel_relaxed(0x20000, priv->io_base + _REG(VPU_WRARB_MODE_L2C1));
+}
+
 static int meson_drv_bind_master(struct device *dev, bool has_components)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -222,6 +230,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
 
 	/* Hardware Initialization */
 
+	meson_vpu_init(priv);
 	meson_venc_init(priv);
 	meson_vpp_init(priv);
 	meson_viu_init(priv);
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index cef414466f9fed4256cd22cdca30e5fa8cc3f737..17de3afd98f6a6a3cae133944024c0ad2c41aa6c 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -23,6 +23,7 @@
 #include <linux/of_graph.h>
 #include <linux/reset.h>
 #include <linux/clk.h>
+#include <linux/regulator/consumer.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
@@ -137,6 +138,7 @@ struct meson_dw_hdmi {
 	struct reset_control *hdmitx_phy;
 	struct clk *hdmi_pclk;
 	struct clk *venci_clk;
+	struct regulator *hdmi_supply;
 	u32 irq_stat;
 };
 #define encoder_to_meson_dw_hdmi(x) \
@@ -751,6 +753,17 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
 	dw_plat_data = &meson_dw_hdmi->dw_plat_data;
 	encoder = &meson_dw_hdmi->encoder;
 
+	meson_dw_hdmi->hdmi_supply = devm_regulator_get_optional(dev, "hdmi");
+	if (IS_ERR(meson_dw_hdmi->hdmi_supply)) {
+		if (PTR_ERR(meson_dw_hdmi->hdmi_supply) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		meson_dw_hdmi->hdmi_supply = NULL;
+	} else {
+		ret = regulator_enable(meson_dw_hdmi->hdmi_supply);
+		if (ret)
+			return ret;
+	}
+
 	meson_dw_hdmi->hdmitx_apb = devm_reset_control_get_exclusive(dev,
 						"hdmitx_apb");
 	if (IS_ERR(meson_dw_hdmi->hdmitx_apb)) {
diff --git a/drivers/gpu/drm/meson/meson_registers.h b/drivers/gpu/drm/meson/meson_registers.h
index 284738196af9cb315f17b1756ae996288ffb38b0..bca87143e54880864a528da2fd5043ce9ca18e46 100644
--- a/drivers/gpu/drm/meson/meson_registers.h
+++ b/drivers/gpu/drm/meson/meson_registers.h
@@ -1363,6 +1363,10 @@
 #define VPU_PROT3_STAT_1 0x277a
 #define VPU_PROT3_STAT_2 0x277b
 #define VPU_PROT3_REQ_ONOFF 0x277c
+#define VPU_RDARB_MODE_L1C1 0x2790
+#define VPU_RDARB_MODE_L1C2 0x2799
+#define VPU_RDARB_MODE_L2C1 0x279d
+#define VPU_WRARB_MODE_L2C1 0x27a2
 
 /* osd super scale */
 #define OSDSR_HV_SIZEIN 0x3130
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index f03da63abc7b7ee0c7758cc63ea2fea6ab29495f..c97009bb77dd887c5c688db8cede559479a8a86e 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -216,9 +216,10 @@ static struct ttm_tt *mgag200_ttm_tt_create(struct ttm_bo_device *bdev,
 	return tt;
 }
 
-static int mgag200_ttm_tt_populate(struct ttm_tt *ttm)
+static int mgag200_ttm_tt_populate(struct ttm_tt *ttm,
+			struct ttm_operation_ctx *ctx)
 {
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 
 static void mgag200_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -237,7 +238,6 @@ struct ttm_bo_driver mgag200_bo_driver = {
 	.verify_access = mgag200_bo_verify_access,
 	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
 	.io_mem_free = &mgag200_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int mgag200_mm_init(struct mga_device *mdev)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index a1f4eeeb73e2f4467aaa9ab3f89711a7896b982c..7e09d44e4a153b65e557aeb23fc1dadec7eb7296 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -17,6 +17,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/of_address.h>
 #include <linux/soc/qcom/mdt_loader.h>
+#include <linux/pm_opp.h>
+#include <linux/nvmem-consumer.h>
 #include "msm_gem.h"
 #include "msm_mmu.h"
 #include "a5xx_gpu.h"
@@ -595,6 +597,12 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	/* Turn on performance counters */
 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 
+	/* Select CP0 to always count cycles */
+	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
+
+	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
+	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
+
 	/* Increase VFD cache access so LRZ and other data gets evicted less */
 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 
@@ -1165,6 +1173,14 @@ static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
 	return a5xx_gpu->cur_ring;
 }
 
+static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
+{
+	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
+		REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
+
+	return 0;
+}
+
 static const struct adreno_gpu_funcs funcs = {
 	.base = {
 		.get_param = adreno_get_param,
@@ -1180,10 +1196,30 @@ static const struct adreno_gpu_funcs funcs = {
 #ifdef CONFIG_DEBUG_FS
 		.show = a5xx_show,
 #endif
+		.gpu_busy = a5xx_gpu_busy,
 	},
 	.get_timestamp = a5xx_get_timestamp,
 };
 
+static void check_speed_bin(struct device *dev)
+{
+	struct nvmem_cell *cell;
+	u32 bin, val;
+
+	cell = nvmem_cell_get(dev, "speed_bin");
+
+	/* If a nvmem cell isn't defined, nothing to do */
+	if (IS_ERR(cell))
+		return;
+
+	bin = *((u32 *) nvmem_cell_read(cell, NULL));
+	nvmem_cell_put(cell);
+
+	val = (1 << bin);
+
+	dev_pm_opp_set_supported_hw(dev, &val, 1);
+}
+
 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -1210,6 +1246,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 
 	a5xx_gpu->lm_leakage = 0x4E001A;
 
+	check_speed_bin(&pdev->dev);
+
 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
 	if (ret) {
 		a5xx_destroy(&(a5xx_gpu->base.base));
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c
index e5700bbf09dd2fe9ae6fa2d1fcd62ee7d02aa84e..4e4d965fd9ab5924aca6feb607e367c255b43c37 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_power.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c
@@ -103,10 +103,16 @@ static inline uint32_t _get_mvolts(struct msm_gpu *gpu, uint32_t freq)
 	struct msm_drm_private *priv = dev->dev_private;
 	struct platform_device *pdev = priv->gpu_pdev;
 	struct dev_pm_opp *opp;
+	u32 ret = 0;
 
 	opp = dev_pm_opp_find_freq_exact(&pdev->dev, freq, true);
 
-	return (!IS_ERR(opp)) ? dev_pm_opp_get_voltage(opp) / 1000 : 0;
+	if (!IS_ERR(opp)) {
+		ret = dev_pm_opp_get_voltage(opp) / 1000;
+		dev_pm_opp_put(opp);
+	}
+
+	return ret;
 }
 
 /* Setup thermal limit management */
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 05022ea2a0077fb1062426fea94e5bd0f5cc3805..62bdb7316da1c175d7f011a63efc5c370f432ed7 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -17,7 +17,6 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/pm_opp.h>
 #include "adreno_gpu.h"
 
 #define ANY_ID 0xff
@@ -90,14 +89,19 @@ static const struct adreno_info gpulist[] = {
 	},
 };
 
-MODULE_FIRMWARE("a300_pm4.fw");
-MODULE_FIRMWARE("a300_pfp.fw");
-MODULE_FIRMWARE("a330_pm4.fw");
-MODULE_FIRMWARE("a330_pfp.fw");
-MODULE_FIRMWARE("a420_pm4.fw");
-MODULE_FIRMWARE("a420_pfp.fw");
-MODULE_FIRMWARE("a530_fm4.fw");
-MODULE_FIRMWARE("a530_pfp.fw");
+MODULE_FIRMWARE("qcom/a300_pm4.fw");
+MODULE_FIRMWARE("qcom/a300_pfp.fw");
+MODULE_FIRMWARE("qcom/a330_pm4.fw");
+MODULE_FIRMWARE("qcom/a330_pfp.fw");
+MODULE_FIRMWARE("qcom/a420_pm4.fw");
+MODULE_FIRMWARE("qcom/a420_pfp.fw");
+MODULE_FIRMWARE("qcom/a530_pm4.fw");
+MODULE_FIRMWARE("qcom/a530_pfp.fw");
+MODULE_FIRMWARE("qcom/a530v3_gpmu.fw2");
+MODULE_FIRMWARE("qcom/a530_zap.mdt");
+MODULE_FIRMWARE("qcom/a530_zap.b00");
+MODULE_FIRMWARE("qcom/a530_zap.b01");
+MODULE_FIRMWARE("qcom/a530_zap.b02");
 
 static inline bool _rev_match(uint8_t entry, uint8_t id)
 {
@@ -125,11 +129,14 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct platform_device *pdev = priv->gpu_pdev;
-	struct msm_gpu *gpu = platform_get_drvdata(priv->gpu_pdev);
+	struct msm_gpu *gpu = NULL;
 	int ret;
 
+	if (pdev)
+		gpu = platform_get_drvdata(pdev);
+
 	if (!gpu) {
-		dev_err(dev->dev, "no adreno device\n");
+		dev_err_once(dev->dev, "no GPU device was found\n");
 		return NULL;
 	}
 
@@ -153,101 +160,45 @@ static void set_gpu_pdev(struct drm_device *dev,
 	priv->gpu_pdev = pdev;
 }
 
-static int find_chipid(struct device *dev, u32 *chipid)
+static int find_chipid(struct device *dev, struct adreno_rev *rev)
 {
 	struct device_node *node = dev->of_node;
 	const char *compat;
 	int ret;
+	u32 chipid;
 
 	/* first search the compat strings for qcom,adreno-XYZ.W: */
 	ret = of_property_read_string_index(node, "compatible", 0, &compat);
 	if (ret == 0) {
-		unsigned rev, patch;
+		unsigned int r, patch;
 
-		if (sscanf(compat, "qcom,adreno-%u.%u", &rev, &patch) == 2) {
-			*chipid = 0;
-			*chipid |= (rev / 100) << 24;  /* core */
-			rev %= 100;
-			*chipid |= (rev / 10) << 16;   /* major */
-			rev %= 10;
-			*chipid |= rev << 8;           /* minor */
-			*chipid |= patch;
+		if (sscanf(compat, "qcom,adreno-%u.%u", &r, &patch) == 2) {
+			rev->core = r / 100;
+			r %= 100;
+			rev->major = r / 10;
+			r %= 10;
+			rev->minor = r;
+			rev->patchid = patch;
 
 			return 0;
 		}
 	}
 
 	/* and if that fails, fall back to legacy "qcom,chipid" property: */
-	ret = of_property_read_u32(node, "qcom,chipid", chipid);
-	if (ret)
+	ret = of_property_read_u32(node, "qcom,chipid", &chipid);
+	if (ret) {
+		dev_err(dev, "could not parse qcom,chipid: %d\n", ret);
 		return ret;
-
-	dev_warn(dev, "Using legacy qcom,chipid binding!\n");
-	dev_warn(dev, "Use compatible qcom,adreno-%u%u%u.%u instead.\n",
-			(*chipid >> 24) & 0xff, (*chipid >> 16) & 0xff,
-			(*chipid >> 8) & 0xff, *chipid & 0xff);
-
-	return 0;
-}
-
-/* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */
-static int adreno_get_legacy_pwrlevels(struct device *dev)
-{
-	struct device_node *child, *node;
-	int ret;
-
-	node = of_find_compatible_node(dev->of_node, NULL,
-		"qcom,gpu-pwrlevels");
-	if (!node) {
-		dev_err(dev, "Could not find the GPU powerlevels\n");
-		return -ENXIO;
 	}
 
-	for_each_child_of_node(node, child) {
-		unsigned int val;
-
-		ret = of_property_read_u32(child, "qcom,gpu-freq", &val);
-		if (ret)
-			continue;
+	rev->core = (chipid >> 24) & 0xff;
+	rev->major = (chipid >> 16) & 0xff;
+	rev->minor = (chipid >> 8) & 0xff;
+	rev->patchid = (chipid & 0xff);
 
-		/*
-		 * Skip the intentionally bogus clock value found at the bottom
-		 * of most legacy frequency tables
-		 */
-		if (val != 27000000)
-			dev_pm_opp_add(dev, val, 0);
-	}
-
-	return 0;
-}
-
-static int adreno_get_pwrlevels(struct device *dev,
-		struct adreno_platform_config *config)
-{
-	unsigned long freq = ULONG_MAX;
-	struct dev_pm_opp *opp;
-	int ret;
-
-	/* You down with OPP? */
-	if (!of_find_property(dev->of_node, "operating-points-v2", NULL))
-		ret = adreno_get_legacy_pwrlevels(dev);
-	else
-		ret = dev_pm_opp_of_add_table(dev);
-
-	if (ret)
-		return ret;
-
-	/* Find the fastest defined rate */
-	opp = dev_pm_opp_find_freq_floor(dev, &freq);
-	if (!IS_ERR(opp))
-		config->fast_rate = dev_pm_opp_get_freq(opp);
-
-	if (!config->fast_rate) {
-		DRM_DEV_INFO(dev,
-			"Could not find clock rate. Using default\n");
-		/* Pick a suitably safe clock speed for any target */
-		config->fast_rate = 200000000;
-	}
+	dev_warn(dev, "Using legacy qcom,chipid binding!\n");
+	dev_warn(dev, "Use compatible qcom,adreno-%u%u%u.%u instead.\n",
+		rev->core, rev->major, rev->minor, rev->patchid);
 
 	return 0;
 }
@@ -258,22 +209,9 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
 	const struct adreno_info *info;
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_gpu *gpu;
-	u32 val;
 	int ret;
 
-	ret = find_chipid(dev, &val);
-	if (ret) {
-		dev_err(dev, "could not find chipid: %d\n", ret);
-		return ret;
-	}
-
-	config.rev = ADRENO_REV((val >> 24) & 0xff,
-			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
-
-	/* find clock rates: */
-	config.fast_rate = 0;
-
-	ret = adreno_get_pwrlevels(dev, &config);
+	ret = find_chipid(dev, &config.rev);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index e2ffecce59a3be5d5bf06ae40cdb991860a85067..de63ff26a062214d077df7a38fdfe5c7d77f22b1 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -17,11 +17,11 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/pm_opp.h>
 #include "adreno_gpu.h"
 #include "msm_gem.h"
 #include "msm_mmu.h"
 
-
 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -461,10 +461,80 @@ void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
 {
 	if (spin_until(ring_freewords(ring) >= ndwords))
 		DRM_DEV_ERROR(ring->gpu->dev->dev,
-			"timeout waiting for space in ringubffer %d\n",
+			"timeout waiting for space in ringbuffer %d\n",
 			ring->id);
 }
 
+/* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */
+static int adreno_get_legacy_pwrlevels(struct device *dev)
+{
+	struct device_node *child, *node;
+	int ret;
+
+	node = of_find_compatible_node(dev->of_node, NULL,
+		"qcom,gpu-pwrlevels");
+	if (!node) {
+		dev_err(dev, "Could not find the GPU powerlevels\n");
+		return -ENXIO;
+	}
+
+	for_each_child_of_node(node, child) {
+		unsigned int val;
+
+		ret = of_property_read_u32(child, "qcom,gpu-freq", &val);
+		if (ret)
+			continue;
+
+		/*
+		 * Skip the intentionally bogus clock value found at the bottom
+		 * of most legacy frequency tables
+		 */
+		if (val != 27000000)
+			dev_pm_opp_add(dev, val, 0);
+	}
+
+	return 0;
+}
+
+static int adreno_get_pwrlevels(struct device *dev,
+		struct msm_gpu *gpu)
+{
+	unsigned long freq = ULONG_MAX;
+	struct dev_pm_opp *opp;
+	int ret;
+
+	gpu->fast_rate = 0;
+
+	/* You down with OPP? */
+	if (!of_find_property(dev->of_node, "operating-points-v2", NULL))
+		ret = adreno_get_legacy_pwrlevels(dev);
+	else {
+		ret = dev_pm_opp_of_add_table(dev);
+		if (ret)
+			dev_err(dev, "Unable to set the OPP table\n");
+	}
+
+	if (!ret) {
+		/* Find the fastest defined rate */
+		opp = dev_pm_opp_find_freq_floor(dev, &freq);
+		if (!IS_ERR(opp)) {
+			gpu->fast_rate = freq;
+			dev_pm_opp_put(opp);
+		}
+	}
+
+	if (!gpu->fast_rate) {
+		dev_warn(dev,
+			"Could not find a clock rate. Using a reasonable default\n");
+		/* Pick a suitably safe clock speed for any target */
+		gpu->fast_rate = 200000000;
+	}
+
+	DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate);
+
+	return 0;
+}
+
 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct adreno_gpu *adreno_gpu,
 		const struct adreno_gpu_funcs *funcs, int nr_rings)
@@ -479,15 +549,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	adreno_gpu->revn = adreno_gpu->info->revn;
 	adreno_gpu->rev = config->rev;
 
-	gpu->fast_rate = config->fast_rate;
-	gpu->bus_freq  = config->bus_freq;
-#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-	gpu->bus_scale_table = config->bus_scale_table;
-#endif
-
-	DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u",
-			gpu->fast_rate, gpu->bus_freq);
-
 	adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
 	adreno_gpu_config.irqname = "kgsl_3d0_irq";
 
@@ -496,6 +557,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 
 	adreno_gpu_config.nr_rings = nr_rings;
 
+	adreno_get_pwrlevels(&pdev->dev, gpu);
+
 	pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 28e3de6e5f94ac394515ae7b42e31095b67b5893..8d3d0a9249082ab451cc166c73803b1d89724f33 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -129,10 +129,6 @@ struct adreno_gpu {
 /* platform config data (ie. from DT, or pdata) */
 struct adreno_platform_config {
 	struct adreno_rev rev;
-	uint32_t fast_rate, bus_freq;
-#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-	struct msm_bus_scale_pdata *bus_scale_table;
-#endif
 };
 
 #define ADRENO_IDLE_TIMEOUT msecs_to_jiffies(1000)
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index 940de51ac5cdaaab128acc40a2b18a8834c6026a..a1b3e31e959e46fb0f5fc6068832cbeff1f9148a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -234,10 +234,6 @@ static inline struct clk *mpd4_lvds_pll_init(struct drm_device *dev)
 #endif
 
 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-static inline int match_dev_name(struct device *dev, void *data)
-{
-	return !strcmp(dev_name(dev), data);
-}
 /* bus scaling data is associated with extra pointless platform devices,
  * "dtv", etc.. this is a bit of a hack, but we need a way for encoders
  * to find their pdata to make the bus-scaling stuff work.
@@ -245,8 +241,7 @@ static inline int match_dev_name(struct device *dev, void *data)
 static inline void *mdp4_find_pdata(const char *devname)
 {
 	struct device *dev;
-	dev = bus_find_device(&platform_bus_type, NULL,
-			(void *)devname, match_dev_name);
+	dev = bus_find_device_by_name(&platform_bus_type, NULL, devname);
 	return dev ? dev->platform_data : NULL;
 }
 #endif
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index ee41423baeb7c9db07cb5152e655fc6060d4826c..29678876fc094f3f01cd59fd10a6af8bcd667c8a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -966,8 +966,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	uint32_t src_x, src_y;
 	uint32_t src_w, src_h;
 	uint32_t src_img_w, src_img_h;
-	uint32_t src_x_r;
-	int crtc_x_r;
 	int ret;
 
 	nplanes = fb->format->num_planes;
@@ -1012,9 +1010,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 		crtc_w /= 2;
 		src_w /= 2;
 		src_img_w /= 2;
-
-		crtc_x_r = crtc_x + crtc_w;
-		src_x_r = src_x + src_w;
 	}
 
 	ret = calc_scalex_steps(plane, pix_format, src_w, crtc_w, step.x);
@@ -1054,9 +1049,9 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	if (right_hwpipe)
 		mdp5_hwpipe_mode_set(mdp5_kms, right_hwpipe, fb, &step, &pe,
 				     config, hdecm, vdecm, hflip, vflip,
-				     crtc_x_r, crtc_y, crtc_w, crtc_h,
+				     crtc_x + crtc_w, crtc_y, crtc_w, crtc_h,
 				     src_img_w, src_img_h,
-				     src_x_r, src_y, src_w, src_h);
+				     src_x + src_w, src_y, src_w, src_h);
 
 	plane->fb = fb;
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 0a3ea3034e39a65f6afefc99c2f0294aa906d94b..d90ef1d78a1b3f4bbe9acb305c85d5ac1958a47a 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -37,16 +37,9 @@
 #define MSM_VERSION_MINOR	3
 #define MSM_VERSION_PATCHLEVEL	0
 
-static void msm_fb_output_poll_changed(struct drm_device *dev)
-{
-	struct msm_drm_private *priv = dev->dev_private;
-	if (priv->fbdev)
-		drm_fb_helper_hotplug_event(priv->fbdev);
-}
-
 static const struct drm_mode_config_funcs mode_config_funcs = {
 	.fb_create = msm_framebuffer_create,
-	.output_poll_changed = msm_fb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = msm_atomic_commit,
 	.atomic_state_alloc = msm_atomic_state_alloc,
@@ -551,13 +544,6 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file)
 	context_close(ctx);
 }
 
-static void msm_lastclose(struct drm_device *dev)
-{
-	struct msm_drm_private *priv = dev->dev_private;
-	if (priv->fbdev)
-		drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
-}
-
 static irqreturn_t msm_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -866,7 +852,7 @@ static struct drm_driver msm_driver = {
 				DRIVER_MODESET,
 	.open               = msm_open,
 	.postclose           = msm_postclose,
-	.lastclose          = msm_lastclose,
+	.lastclose          = drm_fb_helper_lastclose,
 	.irq_handler        = msm_irq,
 	.irq_preinstall     = msm_irq_preinstall,
 	.irq_postinstall    = msm_irq_postinstall,
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index c646843d8822777afac6f9100e846c612a1c069e..0a653dd2e6182323b3f2a6942707db28bdf1d0e0 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -303,7 +303,8 @@ int msm_perf_debugfs_init(struct drm_minor *minor);
 void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
 #else
 static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; }
-static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {}
+static inline void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+		const char *fmt, ...) {}
 static inline void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) {}
 static inline void msm_perf_debugfs_cleanup(struct msm_drm_private *priv) {}
 #endif
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 81fe6d6740cec7582d11b4f92e267509dffb2c29..07376de9ff4cca630fb03cf1035e5be31b2ffbe8 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -93,14 +93,17 @@ static struct page **get_pages(struct drm_gem_object *obj)
 			return p;
 		}
 
+		msm_obj->pages = p;
+
 		msm_obj->sgt = drm_prime_pages_to_sg(p, npages);
 		if (IS_ERR(msm_obj->sgt)) {
+			void *ptr = ERR_CAST(msm_obj->sgt);
+
 			dev_err(dev->dev, "failed to allocate sgt\n");
-			return ERR_CAST(msm_obj->sgt);
+			msm_obj->sgt = NULL;
+			return ptr;
 		}
 
-		msm_obj->pages = p;
-
 		/* For non-cached buffers, ensure the new pages are clean
 		 * because display controller, GPU, etc. are not coherent:
 		 */
@@ -135,7 +138,10 @@ static void put_pages(struct drm_gem_object *obj)
 		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
 			dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
 					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
-		sg_free_table(msm_obj->sgt);
+
+		if (msm_obj->sgt)
+			sg_free_table(msm_obj->sgt);
+
 		kfree(msm_obj->sgt);
 
 		if (use_pages(obj))
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 2322014034398110d879d6c97f7d2e5fe25c68fe..bd376f9e18a7791105f923a6ebe34cb2359e4a22 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -21,42 +21,90 @@
 #include "msm_fence.h"
 
 #include <linux/string_helpers.h>
+#include <linux/pm_opp.h>
+#include <linux/devfreq.h>
 
 
 /*
  * Power Management:
  */
 
-#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-#include <mach/board.h>
-static void bs_init(struct msm_gpu *gpu)
+static int msm_devfreq_target(struct device *dev, unsigned long *freq,
+		u32 flags)
 {
-	if (gpu->bus_scale_table) {
-		gpu->bsc = msm_bus_scale_register_client(gpu->bus_scale_table);
-		DBG("bus scale client: %08x", gpu->bsc);
-	}
+	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
+	struct dev_pm_opp *opp;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	clk_set_rate(gpu->core_clk, *freq);
+	dev_pm_opp_put(opp);
+
+	return 0;
 }
 
-static void bs_fini(struct msm_gpu *gpu)
+static int msm_devfreq_get_dev_status(struct device *dev,
+		struct devfreq_dev_status *status)
 {
-	if (gpu->bsc) {
-		msm_bus_scale_unregister_client(gpu->bsc);
-		gpu->bsc = 0;
-	}
+	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
+	u64 cycles;
+	u32 freq = ((u32) status->current_frequency) / 1000000;
+	ktime_t time;
+
+	status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk);
+	gpu->funcs->gpu_busy(gpu, &cycles);
+
+	status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq;
+
+	gpu->devfreq.busy_cycles = cycles;
+
+	time = ktime_get();
+	status->total_time = ktime_us_delta(time, gpu->devfreq.time);
+	gpu->devfreq.time = time;
+
+	return 0;
+}
+
+static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
+
+	*freq = (unsigned long) clk_get_rate(gpu->core_clk);
+
+	return 0;
 }
 
-static void bs_set(struct msm_gpu *gpu, int idx)
+static struct devfreq_dev_profile msm_devfreq_profile = {
+	.polling_ms = 10,
+	.target = msm_devfreq_target,
+	.get_dev_status = msm_devfreq_get_dev_status,
+	.get_cur_freq = msm_devfreq_get_cur_freq,
+};
+
+static void msm_devfreq_init(struct msm_gpu *gpu)
 {
-	if (gpu->bsc) {
-		DBG("set bus scaling: %d", idx);
-		msm_bus_scale_client_update_request(gpu->bsc, idx);
+	/* We need target support to do devfreq */
+	if (!gpu->funcs->gpu_busy)
+		return;
+
+	msm_devfreq_profile.initial_freq = gpu->fast_rate;
+
+	/*
+	 * Don't set the freq_table or max_state and let devfreq build the table
+	 * from OPP
+	 */
+
+	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
+			&msm_devfreq_profile, "simple_ondemand", NULL);
+
+	if (IS_ERR(gpu->devfreq.devfreq)) {
+		dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
+		gpu->devfreq.devfreq = NULL;
 	}
 }
-#else
-static void bs_init(struct msm_gpu *gpu) {}
-static void bs_fini(struct msm_gpu *gpu) {}
-static void bs_set(struct msm_gpu *gpu, int idx) {}
-#endif
 
 static int enable_pwrrail(struct msm_gpu *gpu)
 {
@@ -143,8 +191,6 @@ static int enable_axi(struct msm_gpu *gpu)
 {
 	if (gpu->ebi1_clk)
 		clk_prepare_enable(gpu->ebi1_clk);
-	if (gpu->bus_freq)
-		bs_set(gpu, gpu->bus_freq);
 	return 0;
 }
 
@@ -152,8 +198,6 @@ static int disable_axi(struct msm_gpu *gpu)
 {
 	if (gpu->ebi1_clk)
 		clk_disable_unprepare(gpu->ebi1_clk);
-	if (gpu->bus_freq)
-		bs_set(gpu, 0);
 	return 0;
 }
 
@@ -175,6 +219,13 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu)
 	if (ret)
 		return ret;
 
+	if (gpu->devfreq.devfreq) {
+		gpu->devfreq.busy_cycles = 0;
+		gpu->devfreq.time = ktime_get();
+
+		devfreq_resume_device(gpu->devfreq.devfreq);
+	}
+
 	gpu->needs_hw_init = true;
 
 	return 0;
@@ -186,6 +237,9 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu)
 
 	DBG("%s", gpu->name);
 
+	if (gpu->devfreq.devfreq)
+		devfreq_suspend_device(gpu->devfreq.devfreq);
+
 	ret = disable_axi(gpu);
 	if (ret)
 		return ret;
@@ -294,6 +348,8 @@ static void recover_worker(struct work_struct *work)
 
 			msm_rd_dump_submit(priv->hangrd, submit,
 				"offending task: %s (%s)", task->comm, cmd);
+
+			kfree(cmd);
 		} else {
 			msm_rd_dump_submit(priv->hangrd, submit, NULL);
 		}
@@ -306,7 +362,7 @@ static void recover_worker(struct work_struct *work)
 	 * needs to happen after msm_rd_dump_submit() to ensure that the
 	 * bo's referenced by the offending submit are still around.
 	 */
-	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+	for (i = 0; i < gpu->nr_rings; i++) {
 		struct msm_ringbuffer *ring = gpu->rb[i];
 
 		uint32_t fence = ring->memptrs->fence;
@@ -753,7 +809,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	gpu->pdev = pdev;
 	platform_set_drvdata(pdev, gpu);
 
-	bs_init(gpu);
+	msm_devfreq_init(gpu);
 
 	gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
 		config->va_start, config->va_end);
@@ -824,8 +880,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 
 	WARN_ON(!list_empty(&gpu->active_list));
 
-	bs_fini(gpu);
-
 	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
 		msm_ringbuffer_destroy(gpu->rb[i]);
 		gpu->rb[i] = NULL;
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index e113d64574d3c3cd096100d8d0221952f782f147..fccfccd303af7605b43c38aad85a20a666929c6a 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -66,6 +66,7 @@ struct msm_gpu_funcs {
 	/* show GPU status in debugfs: */
 	void (*show)(struct msm_gpu *gpu, struct seq_file *m);
 #endif
+	int (*gpu_busy)(struct msm_gpu *gpu, uint64_t *value);
 };
 
 struct msm_gpu {
@@ -108,12 +109,7 @@ struct msm_gpu {
 	struct clk **grp_clks;
 	int nr_clocks;
 	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
-	uint32_t fast_rate, bus_freq;
-
-#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-	struct msm_bus_scale_pdata *bus_scale_table;
-	uint32_t bsc;
-#endif
+	uint32_t fast_rate;
 
 	/* Hang and Inactivity Detection:
 	 */
@@ -125,6 +121,12 @@ struct msm_gpu {
 	struct work_struct recover_work;
 
 	struct drm_gem_object *memptrs_bo;
+
+	struct {
+		struct devfreq *devfreq;
+		u64 busy_cycles;
+		ktime_t time;
+	} devfreq;
 };
 
 /* It turns out that all targets use the same ringbuffer size */
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 949bf6b3feab3aa0d25ac1ee51a0d6bb20a4ef61..41e7f2927443e0dc16818b3a5bfbcee79458c046 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 		/* Determine if we can get a cache-coherent map, forcing
 		 * uncached mapping if we can't.
 		 */
-		if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
+		if (!nouveau_drm_use_coherent_gpu_mapping(drm))
 			nvbo->force_coherent = true;
 	}
 
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 		if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
 		    (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
 			continue;
-		if ((flags & TTM_PL_FLAG_TT  ) && !vmm->page[i].host)
+		if ((flags & TTM_PL_FLAG_TT) &&
+		    (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
 			continue;
 
 		/* Select this page size if it's the first that supports
@@ -1218,7 +1219,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		return ret;
 
-	ret = ttm_tt_bind(bo->ttm, &tmp_reg);
+	ret = ttm_tt_bind(bo->ttm, &tmp_reg, &ctx);
 	if (ret)
 		goto out;
 
@@ -1226,7 +1227,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_reg);
+	ret = ttm_bo_move_ttm(bo, &ctx, new_reg);
 out:
 	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
@@ -1255,7 +1256,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_reg);
+	ret = ttm_bo_move_ttm(bo, &ctx, &tmp_reg);
 	if (ret)
 		goto out;
 
@@ -1380,8 +1381,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, ctx->interruptible,
-					 ctx->no_wait_gpu, new_reg);
+		ret = ttm_bo_move_memcpy(bo, ctx, new_reg);
 
 out:
 	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
@@ -1548,7 +1548,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 }
 
 static int
-nouveau_ttm_tt_populate(struct ttm_tt *ttm)
+nouveau_ttm_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
 	struct ttm_dma_tt *ttm_dma = (void *)ttm;
 	struct nouveau_drm *drm;
@@ -1573,17 +1573,17 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
-		return ttm_agp_tt_populate(ttm);
+		return ttm_agp_tt_populate(ttm, ctx);
 	}
 #endif
 
 #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
 	if (swiotlb_nr_tbl()) {
-		return ttm_dma_populate((void *)ttm, dev);
+		return ttm_dma_populate((void *)ttm, dev, ctx);
 	}
 #endif
 
-	r = ttm_pool_populate(ttm);
+	r = ttm_pool_populate(ttm, ctx);
 	if (r) {
 		return r;
 	}
@@ -1673,5 +1673,4 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
 	.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
 	.io_mem_free = &nouveau_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 2e7785f49e6d54c1c0941c69de2ff0a869cfd3f0..009713404cc4f8a67f9fce431462770bc21b6866 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -29,6 +29,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 
 #include <nvif/class.h>
 
@@ -292,7 +293,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 
 static const struct drm_mode_config_funcs nouveau_mode_config_funcs = {
 	.fb_create = nouveau_user_framebuffer_create,
-	.output_poll_changed = nouveau_fbcon_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 };
 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 8d4a5be3b913016c410c8226ad5b05b0bf75299b..56fe261b62683a8690ac8f3439426ca1e4c269ef 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
 	work->cli = cli;
 	mutex_lock(&cli->lock);
 	list_add_tail(&work->head, &cli->worker);
-	mutex_unlock(&cli->lock);
 	if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence))
 		nouveau_cli_work_fence(fence, &work->cb);
+	mutex_unlock(&cli->lock);
 }
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 3331e82ae9e7130b18f4a6f307cc284519f873d3..96f6bd8aee5d3a248d76c683b6146ebb8ef673c7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -157,8 +157,8 @@ struct nouveau_drm {
 		struct nvif_object copy;
 		int mtrr;
 		int type_vram;
-		int type_host;
-		int type_ncoh;
+		int type_host[2];
+		int type_ncoh[2];
 	} ttm;
 
 	/* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
 	return dev->dev_private;
 }
 
+static inline bool
+nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
+{
+	struct nvif_mmu *mmu = &drm->client.mmu;
+	return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
+}
+
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
 bool nouveau_pmops_runtime(void);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index c533d8e04afc0f1fc4708d85e069323291c428c3..ee5d1dc2eaf5ac06433e91f5c5b3ffec3c3a05ff 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -413,14 +413,6 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	return ret;
 }
 
-void
-nouveau_fbcon_output_poll_changed(struct drm_device *dev)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon)
-		drm_fb_helper_hotplug_event(&drm->fbcon->helper);
-}
-
 static int
 nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 {
@@ -429,7 +421,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 	drm_fb_helper_unregister_fbi(&fbcon->helper);
 	drm_fb_helper_fini(&fbcon->helper);
 
-	if (nouveau_fb->nvbo) {
+	if (nouveau_fb && nouveau_fb->nvbo) {
 		nouveau_vma_del(&nouveau_fb->vma);
 		nouveau_bo_unmap(nouveau_fb->nvbo);
 		nouveau_bo_unpin(nouveau_fb->nvbo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index e2bca729721ed5ae1d908f18575eeaab8002567b..a6f192ea3fa6758934f2a54054f10708d2c73f14 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -68,8 +68,6 @@ void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
 void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
 void nouveau_fbcon_accel_restore(struct drm_device *dev);
 
-void nouveau_fbcon_output_poll_changed(struct drm_device *dev);
-
 extern int nouveau_nofbaccel;
 
 #endif /* __NV50_FBCON_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 589a9621db763f98454485081a3f80e2324e717c..c002f896850739b343624247e7d52d94e34bf99d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
 	u8 type;
 	int ret;
 
-	if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
-		type = drm->ttm.type_ncoh;
+	if (!nouveau_drm_use_coherent_gpu_mapping(drm))
+		type = drm->ttm.type_ncoh[!!mem->kind];
 	else
-		type = drm->ttm.type_host;
+		type = drm->ttm.type_host[0];
 
 	if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
 		mem->comp = mem->kind = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 08b974b3048279813e2d67ad0d5b0055e68998c2..dff51a0ee0281e8f5924ffc0135d8b4baf8542f9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
 	drm->ttm.mem_global_ref.release = NULL;
 }
 
-int
-nouveau_ttm_init(struct nouveau_drm *drm)
+static int
+nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
 {
-	struct nvkm_device *device = nvxx_device(&drm->client.device);
-	struct nvkm_pci *pci = device->pci;
 	struct nvif_mmu *mmu = &drm->client.mmu;
-	struct drm_device *dev = drm->dev;
-	int typei, ret;
+	int typei;
 
 	typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
-						   NVIF_MEM_COHERENT);
+					    kind | NVIF_MEM_COHERENT);
 	if (typei < 0)
 		return -ENOSYS;
 
-	drm->ttm.type_host = typei;
+	drm->ttm.type_host[!!kind] = typei;
 
-	typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE);
+	typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
 	if (typei < 0)
 		return -ENOSYS;
 
-	drm->ttm.type_ncoh = typei;
+	drm->ttm.type_ncoh[!!kind] = typei;
+	return 0;
+}
+
+int
+nouveau_ttm_init(struct nouveau_drm *drm)
+{
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
+	struct nvkm_pci *pci = device->pci;
+	struct nvif_mmu *mmu = &drm->client.mmu;
+	struct drm_device *dev = drm->dev;
+	int typei, ret;
+
+	ret = nouveau_ttm_init_host(drm, 0);
+	if (ret)
+		return ret;
+
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+	    drm->client.device.info.chipset != 0x50) {
+		ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
+		if (ret)
+			return ret;
+	}
 
 	if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
 	    drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index 52e52a360fb120300a3decc4b2ebe5f33f16f6fc..3da5a4305aa4847a32c0abf398a50e7a0dc90b7e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -4,6 +4,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_acpi.h"
@@ -61,7 +62,7 @@ static void
 nouveau_switcheroo_reprobe(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	nouveau_fbcon_output_poll_changed(dev);
+	drm_fb_helper_output_poll_changed(dev);
 }
 
 static bool
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 9e2628dd8e4d6734c2d7c5d073012bbb95b4fa4c..f5371d96b003c23cac9e1f34cf3deca3b54b06a6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
 			nvif_vmm_put(&vma->vmm->vmm, &tmp);
 		}
 		list_del(&vma->head);
-		*pvma = NULL;
 		kfree(*pvma);
+		*pvma = NULL;
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 65336948e807877746f39c8f6c778668c1a2e149..b22c37bde13ffb97aa9d5a1ca557c93ef4294d1f 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -4311,7 +4311,7 @@ nv50_disp_atomic_state_alloc(struct drm_device *dev)
 static const struct drm_mode_config_funcs
 nv50_disp_func = {
 	.fb_create = nouveau_user_framebuffer_create,
-	.output_poll_changed = nouveau_fbcon_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = nv50_disp_atomic_check,
 	.atomic_commit = nv50_disp_atomic_commit,
 	.atomic_state_alloc = nv50_disp_atomic_state_alloc,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index e146436156985a534fa14e0829db4560d6eb1459..00eeaaffeae565a04044fc55e52990eb71d1063b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
 	.imem = gk20a_instmem_new,
 	.ltc = gp100_ltc_new,
 	.mc = gp10b_mc_new,
-	.mmu = gf100_mmu_new,
+	.mmu = gp10b_mmu_new,
 	.secboot = gp10b_secboot_new,
 	.pmu = gm20b_pmu_new,
 	.timer = gk20a_timer_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index a2978a37b4f3c72ef4e6f03b1fb7f2c0c7500478..700fc754f28a4c32a001c243822b5e97733ba0a7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -174,6 +174,7 @@ gf119_sor = {
 		.links = gf119_sor_dp_links,
 		.power = g94_sor_dp_power,
 		.pattern = gf119_sor_dp_pattern,
+		.drive = gf119_sor_dp_drive,
 		.vcpi = gf119_sor_dp_vcpi,
 		.audio = gf119_sor_dp_audio,
 		.audio_sym = gf119_sor_dp_audio_sym,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 972370ed36f090d0c0323253b79735edd355db07..7c7efa4ea0d0edb391a27db2c6e99179799070f1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 			if (data) {
 				*ver = nvbios_rd08(bios, data + 0x00);
 				switch (*ver) {
+				case 0x20:
 				case 0x21:
 				case 0x30:
 				case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
 	if (data && idx < *cnt) {
 		u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
 		switch (*ver * !!outp) {
+		case 0x20:
 		case 0x21:
 		case 0x30:
 			*hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
 		info->type = nvbios_rd16(bios, data + 0x00);
 		info->mask = nvbios_rd16(bios, data + 0x02);
 		switch (*ver) {
+		case 0x20:
+			info->mask |= 0x00c0; /* match any link */
+			/* fall-through */
 		case 0x21:
 		case 0x30:
 			info->flags     = nvbios_rd08(bios, data + 0x05);
 			info->script[0] = nvbios_rd16(bios, data + 0x06);
 			info->script[1] = nvbios_rd16(bios, data + 0x08);
-			info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
+			if (*len >= 0x0c)
+				info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
 			if (*len >= 0x0f) {
 				info->script[2] = nvbios_rd16(bios, data + 0x0c);
 				info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 	memset(info, 0x00, sizeof(*info));
 	if (data) {
 		switch (*ver) {
+		case 0x20:
 		case 0x21:
 			info->dc    = nvbios_rd08(bios, data + 0x02);
 			info->pe    = nvbios_rd08(bios, data + 0x03);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index 1ba7289684aa2116b6fcc4d05869f0d2b8322a39..db48a1daca0c7a3d786332ce25435839fcc10760 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
 			iobj->base.memory.ptrs = &nv50_instobj_fast;
 		else
 			iobj->base.memory.ptrs = &nv50_instobj_slow;
-		refcount_inc(&iobj->maps);
+		refcount_set(&iobj->maps, 1);
 	}
 
 	mutex_unlock(&imem->subdev.mutex);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index b1b1f3626b96298fcdb76f1819fd7b97801d5b37..deb96de54b0030244ec88014bce526119c3fae91 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
 		return ret;
 
 	pci->irq = pdev->irq;
+
+	/* Ensure MSI interrupts are armed, for the case where there are
+	 * already interrupts pending (for whatever reason) at load time.
+	 */
+	if (pci->msi)
+		pci->func->msi_rearm(pci);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-analog-tv.c b/drivers/gpu/drm/omapdrm/displays/connector-analog-tv.c
index 542a76503fbd0eb15a689008fd798d5df4b9e7f9..95ea6abae9142e99dc299ea1cff124a7d35c6d5b 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-analog-tv.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-analog-tv.c
@@ -1,7 +1,7 @@
 /*
  * Analog TV Connector driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
index 05fa24a518c87783b16e3faed17edb7167569760..10b4b97d359571a28fb0cb254822323fc46b784c 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
@@ -1,7 +1,7 @@
 /*
  * Generic DVI Connector driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c b/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c
index 4600d3841c2512a0406bd797d71bea298ec3c740..2867476419dc3ec9de9c18ce7083fb96baf7ead8 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c
@@ -1,7 +1,7 @@
 /*
  * HDMI Connector driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-opa362.c b/drivers/gpu/drm/omapdrm/displays/encoder-opa362.c
index b1f6aa09f699c52d7a76fc3b2c790a451b5c81b8..d523c67a3ae36ead9fc51de55e376712267b98c3 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-opa362.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-opa362.c
@@ -6,7 +6,7 @@
  *
  * based on encoder-tfp410
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c b/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
index 947295f9e30f0fc2d8a778ba1f3105a87d35a4cf..e01ab3db6d86b3f0bd4a77f656b6c1020371b5e3 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
@@ -1,7 +1,7 @@
 /*
  * TFP410 DPI-to-DVI encoder driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -173,7 +173,8 @@ static int tfp410_probe_of(struct platform_device *pdev)
 	if (gpio_is_valid(gpio) || gpio == -ENOENT) {
 		ddata->pd_gpio = gpio;
 	} else {
-		dev_err(&pdev->dev, "failed to parse PD gpio\n");
+		if (gpio != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to parse PD gpio\n");
 		return gpio;
 	}
 
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c b/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
index e3d98d78fc401167cb5afeefc190a1c871e27e79..1fd493e5fa3dd0d2d683790803eb91908af834c3 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
@@ -1,7 +1,7 @@
 /*
  * TPD12S015 HDMI ESD protection & level shifter chip driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dpi.c b/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
index e065f7e10cca50579f55845d5b1bac99e85e9f77..efff6dbbb86fe70494beaef60f19f15b8af8e3b0 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
@@ -1,7 +1,7 @@
 /*
  * Generic MIPI DPI Panel Driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index 92c556ac22c7c6818ceae5a290a7dc287818a1db..15399a1a666b8703fc1799a99160921d9cd21bff 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -1,7 +1,7 @@
 /*
  * Generic DSI Command Mode panel driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -22,9 +22,10 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/regulator/consumer.h>
 
 #include <video/mipi_display.h>
+#include <video/of_display_timing.h>
 
 #include "../dss/omapdss.h"
 
@@ -49,6 +50,7 @@ struct panel_drv_data {
 	struct mutex lock;
 
 	struct backlight_device *bldev;
+	struct backlight_device *extbldev;
 
 	unsigned long	hw_guard_end;	/* next value of jiffies when we can
 					 * issue the next sleep in/out command
@@ -56,11 +58,17 @@ struct panel_drv_data {
 	unsigned long	hw_guard_wait;	/* max guard time in jiffies */
 
 	/* panel HW configuration from DT or platform data */
-	int reset_gpio;
-	int ext_te_gpio;
+	struct gpio_desc *reset_gpio;
+	struct gpio_desc *ext_te_gpio;
+
+	struct regulator *vpnl;
+	struct regulator *vddi;
 
 	bool use_dsi_backlight;
 
+	int width_mm;
+	int height_mm;
+
 	struct omap_dsi_pin_config pin_config;
 
 	/* runtime variables */
@@ -92,6 +100,30 @@ static int dsicm_panel_reset(struct panel_drv_data *ddata);
 
 static void dsicm_ulps_work(struct work_struct *work);
 
+static void dsicm_bl_power(struct panel_drv_data *ddata, bool enable)
+{
+	struct backlight_device *backlight;
+
+	if (ddata->bldev)
+		backlight = ddata->bldev;
+	else if (ddata->extbldev)
+		backlight = ddata->extbldev;
+	else
+		return;
+
+	if (enable) {
+		backlight->props.fb_blank = FB_BLANK_UNBLANK;
+		backlight->props.state = ~(BL_CORE_FBBLANK | BL_CORE_SUSPENDED);
+		backlight->props.power = FB_BLANK_UNBLANK;
+	} else {
+		backlight->props.fb_blank = FB_BLANK_NORMAL;
+		backlight->props.power = FB_BLANK_POWERDOWN;
+		backlight->props.state |= BL_CORE_FBBLANK | BL_CORE_SUSPENDED;
+	}
+
+	backlight_update_status(backlight);
+}
+
 static void hw_guard_start(struct panel_drv_data *ddata, int guard_msec)
 {
 	ddata->hw_guard_wait = msecs_to_jiffies(guard_msec);
@@ -255,8 +287,8 @@ static int dsicm_enter_ulps(struct panel_drv_data *ddata)
 	if (r)
 		goto err;
 
-	if (gpio_is_valid(ddata->ext_te_gpio))
-		disable_irq(gpio_to_irq(ddata->ext_te_gpio));
+	if (ddata->ext_te_gpio)
+		disable_irq(gpiod_to_irq(ddata->ext_te_gpio));
 
 	in->ops.dsi->disable(in, false, true);
 
@@ -297,8 +329,8 @@ static int dsicm_exit_ulps(struct panel_drv_data *ddata)
 		goto err2;
 	}
 
-	if (gpio_is_valid(ddata->ext_te_gpio))
-		enable_irq(gpio_to_irq(ddata->ext_te_gpio));
+	if (ddata->ext_te_gpio)
+		enable_irq(gpiod_to_irq(ddata->ext_te_gpio));
 
 	dsicm_queue_ulps_work(ddata);
 
@@ -311,8 +343,8 @@ static int dsicm_exit_ulps(struct panel_drv_data *ddata)
 
 	r = dsicm_panel_reset(ddata);
 	if (!r) {
-		if (gpio_is_valid(ddata->ext_te_gpio))
-			enable_irq(gpio_to_irq(ddata->ext_te_gpio));
+		if (ddata->ext_te_gpio)
+			enable_irq(gpiod_to_irq(ddata->ext_te_gpio));
 		ddata->ulps_enabled = false;
 	}
 err1:
@@ -335,7 +367,7 @@ static int dsicm_bl_update_status(struct backlight_device *dev)
 {
 	struct panel_drv_data *ddata = dev_get_drvdata(&dev->dev);
 	struct omap_dss_device *in = ddata->in;
-	int r;
+	int r = 0;
 	int level;
 
 	if (dev->props.fb_blank == FB_BLANK_UNBLANK &&
@@ -356,8 +388,6 @@ static int dsicm_bl_update_status(struct backlight_device *dev)
 			r = dsicm_dcs_write_1(ddata, DCS_BRIGHTNESS, level);
 
 		in->ops.dsi->bus_unlock(in);
-	} else {
-		r = 0;
 	}
 
 	mutex_unlock(&ddata->lock);
@@ -560,16 +590,13 @@ static const struct attribute_group dsicm_attr_group = {
 
 static void dsicm_hw_reset(struct panel_drv_data *ddata)
 {
-	if (!gpio_is_valid(ddata->reset_gpio))
-		return;
-
-	gpio_set_value(ddata->reset_gpio, 1);
+	gpiod_set_value(ddata->reset_gpio, 1);
 	udelay(10);
 	/* reset the panel */
-	gpio_set_value(ddata->reset_gpio, 0);
+	gpiod_set_value(ddata->reset_gpio, 0);
 	/* assert reset */
 	udelay(10);
-	gpio_set_value(ddata->reset_gpio, 1);
+	gpiod_set_value(ddata->reset_gpio, 1);
 	/* wait after releasing reset */
 	usleep_range(5000, 10000);
 }
@@ -589,25 +616,43 @@ static int dsicm_power_on(struct panel_drv_data *ddata)
 		.lp_clk_max = 10000000,
 	};
 
+	if (ddata->vpnl) {
+		r = regulator_enable(ddata->vpnl);
+		if (r) {
+			dev_err(&ddata->pdev->dev,
+				"failed to enable VPNL: %d\n", r);
+			return r;
+		}
+	}
+
+	if (ddata->vddi) {
+		r = regulator_enable(ddata->vddi);
+		if (r) {
+			dev_err(&ddata->pdev->dev,
+				"failed to enable VDDI: %d\n", r);
+			goto err_vpnl;
+		}
+	}
+
 	if (ddata->pin_config.num_pins > 0) {
 		r = in->ops.dsi->configure_pins(in, &ddata->pin_config);
 		if (r) {
 			dev_err(&ddata->pdev->dev,
 				"failed to configure DSI pins\n");
-			goto err0;
+			goto err_vddi;
 		}
 	}
 
 	r = in->ops.dsi->set_config(in, &dsi_config);
 	if (r) {
 		dev_err(&ddata->pdev->dev, "failed to configure DSI\n");
-		goto err0;
+		goto err_vddi;
 	}
 
 	r = in->ops.dsi->enable(in);
 	if (r) {
 		dev_err(&ddata->pdev->dev, "failed to enable DSI\n");
-		goto err0;
+		goto err_vddi;
 	}
 
 	dsicm_hw_reset(ddata);
@@ -665,7 +710,13 @@ static int dsicm_power_on(struct panel_drv_data *ddata)
 	dsicm_hw_reset(ddata);
 
 	in->ops.dsi->disable(in, true, false);
-err0:
+err_vddi:
+	if (ddata->vddi)
+		regulator_disable(ddata->vddi);
+err_vpnl:
+	if (ddata->vpnl)
+		regulator_disable(ddata->vpnl);
+
 	return r;
 }
 
@@ -688,6 +739,11 @@ static void dsicm_power_off(struct panel_drv_data *ddata)
 
 	in->ops.dsi->disable(in, true, false);
 
+	if (ddata->vddi)
+		regulator_disable(ddata->vddi);
+	if (ddata->vpnl)
+		regulator_disable(ddata->vpnl);
+
 	ddata->enabled = 0;
 }
 
@@ -782,6 +838,8 @@ static int dsicm_enable(struct omap_dss_device *dssdev)
 
 	mutex_unlock(&ddata->lock);
 
+	dsicm_bl_power(ddata, true);
+
 	return 0;
 err:
 	dev_dbg(&ddata->pdev->dev, "enable failed\n");
@@ -797,6 +855,8 @@ static void dsicm_disable(struct omap_dss_device *dssdev)
 
 	dev_dbg(&ddata->pdev->dev, "disable\n");
 
+	dsicm_bl_power(ddata, false);
+
 	mutex_lock(&ddata->lock);
 
 	dsicm_cancel_ulps_work(ddata);
@@ -890,7 +950,7 @@ static int dsicm_update(struct omap_dss_device *dssdev,
 	if (r)
 		goto err;
 
-	if (ddata->te_enabled && gpio_is_valid(ddata->ext_te_gpio)) {
+	if (ddata->te_enabled && ddata->ext_te_gpio) {
 		schedule_delayed_work(&ddata->te_timeout_work,
 				msecs_to_jiffies(250));
 		atomic_set(&ddata->do_update, 1);
@@ -937,7 +997,7 @@ static int _dsicm_enable_te(struct panel_drv_data *ddata, bool enable)
 	else
 		r = dsicm_dcs_write_0(ddata, MIPI_DCS_SET_TEAR_OFF);
 
-	if (!gpio_is_valid(ddata->ext_te_gpio))
+	if (!ddata->ext_te_gpio)
 		in->ops.dsi->enable_te(in, enable);
 
 	/* possible panel bug */
@@ -1099,6 +1159,45 @@ static void dsicm_ulps_work(struct work_struct *work)
 	mutex_unlock(&ddata->lock);
 }
 
+static void dsicm_get_timings(struct omap_dss_device *dssdev,
+			      struct videomode *vm)
+{
+	struct panel_drv_data *ddata = to_panel_data(dssdev);
+
+	*vm = ddata->vm;
+}
+
+static int dsicm_check_timings(struct omap_dss_device *dssdev,
+			       struct videomode *vm)
+{
+	struct panel_drv_data *ddata = to_panel_data(dssdev);
+	int ret = 0;
+
+	if (vm->hactive != ddata->vm.hactive)
+		ret = -EINVAL;
+
+	if (vm->vactive != ddata->vm.vactive)
+		ret = -EINVAL;
+
+	if (ret) {
+		dev_warn(dssdev->dev, "wrong resolution: %d x %d",
+			 vm->hactive, vm->vactive);
+		dev_warn(dssdev->dev, "panel resolution: %d x %d",
+			 ddata->vm.hactive, ddata->vm.vactive);
+	}
+
+	return ret;
+}
+
+static void dsicm_get_size(struct omap_dss_device *dssdev,
+			  unsigned int *width, unsigned int *height)
+{
+	struct panel_drv_data *ddata = to_panel_data(dssdev);
+
+	*width = ddata->width_mm;
+	*height = ddata->height_mm;
+}
+
 static struct omap_dss_driver dsicm_ops = {
 	.connect	= dsicm_connect,
 	.disconnect	= dsicm_disconnect,
@@ -1109,6 +1208,10 @@ static struct omap_dss_driver dsicm_ops = {
 	.update		= dsicm_update,
 	.sync		= dsicm_sync,
 
+	.get_timings	= dsicm_get_timings,
+	.check_timings	= dsicm_check_timings,
+	.get_size	= dsicm_get_size,
+
 	.enable_te	= dsicm_enable_te,
 	.get_te		= dsicm_get_te,
 
@@ -1118,41 +1221,87 @@ static struct omap_dss_driver dsicm_ops = {
 static int dsicm_probe_of(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
+	struct device_node *backlight;
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *in;
-	int gpio;
+	struct display_timing timing;
+	int err;
+
+	ddata->reset_gpio = devm_gpiod_get(&pdev->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ddata->reset_gpio)) {
+		err = PTR_ERR(ddata->reset_gpio);
+		dev_err(&pdev->dev, "reset gpio request failed: %d", err);
+		return err;
+	}
 
-	gpio = of_get_named_gpio(node, "reset-gpios", 0);
-	if (!gpio_is_valid(gpio)) {
-		dev_err(&pdev->dev, "failed to parse reset gpio\n");
-		return gpio;
+	ddata->ext_te_gpio = devm_gpiod_get_optional(&pdev->dev, "te",
+						     GPIOD_IN);
+	if (IS_ERR(ddata->ext_te_gpio)) {
+		err = PTR_ERR(ddata->ext_te_gpio);
+		dev_err(&pdev->dev, "TE gpio request failed: %d", err);
+		return err;
 	}
-	ddata->reset_gpio = gpio;
 
-	gpio = of_get_named_gpio(node, "te-gpios", 0);
-	if (gpio_is_valid(gpio) || gpio == -ENOENT) {
-		ddata->ext_te_gpio = gpio;
+	err = of_get_display_timing(node, "panel-timing", &timing);
+	if (!err) {
+		videomode_from_timing(&timing, &ddata->vm);
+		if (!ddata->vm.pixelclock)
+			ddata->vm.pixelclock =
+				ddata->vm.hactive * ddata->vm.vactive * 60;
 	} else {
-		dev_err(&pdev->dev, "failed to parse TE gpio\n");
-		return gpio;
+		dev_warn(&pdev->dev,
+			 "failed to get video timing, using defaults\n");
 	}
 
+	ddata->width_mm = 0;
+	of_property_read_u32(node, "width-mm", &ddata->width_mm);
+
+	ddata->height_mm = 0;
+	of_property_read_u32(node, "height-mm", &ddata->height_mm);
+
 	in = omapdss_of_find_source_for_first_ep(node);
 	if (IS_ERR(in)) {
 		dev_err(&pdev->dev, "failed to find video source\n");
 		return PTR_ERR(in);
 	}
 
+	ddata->vpnl = devm_regulator_get_optional(&pdev->dev, "vpnl");
+	if (IS_ERR(ddata->vpnl)) {
+		err = PTR_ERR(ddata->vpnl);
+		if (err == -EPROBE_DEFER)
+			return err;
+		ddata->vpnl = NULL;
+	}
+
+	ddata->vddi = devm_regulator_get_optional(&pdev->dev, "vddi");
+	if (IS_ERR(ddata->vddi)) {
+		err = PTR_ERR(ddata->vddi);
+		if (err == -EPROBE_DEFER)
+			return err;
+		ddata->vddi = NULL;
+	}
+
 	ddata->in = in;
 
-	/* TODO: ulps, backlight */
+	backlight = of_parse_phandle(node, "backlight", 0);
+	if (backlight) {
+		ddata->extbldev = of_find_backlight_by_node(backlight);
+		of_node_put(backlight);
+
+		if (!ddata->extbldev)
+			return -EPROBE_DEFER;
+	} else {
+		/* assume native backlight support */
+		ddata->use_dsi_backlight = true;
+	}
+
+	/* TODO: ulps */
 
 	return 0;
 }
 
 static int dsicm_probe(struct platform_device *pdev)
 {
-	struct backlight_properties props;
 	struct panel_drv_data *ddata;
 	struct backlight_device *bldev = NULL;
 	struct device *dev = &pdev->dev;
@@ -1171,14 +1320,14 @@ static int dsicm_probe(struct platform_device *pdev)
 	if (!pdev->dev.of_node)
 		return -ENODEV;
 
-	r = dsicm_probe_of(pdev);
-	if (r)
-		return r;
-
 	ddata->vm.hactive = 864;
 	ddata->vm.vactive = 480;
 	ddata->vm.pixelclock = 864 * 480 * 60;
 
+	r = dsicm_probe_of(pdev);
+	if (r)
+		return r;
+
 	dssdev = &ddata->dssdev;
 	dssdev->dev = dev;
 	dssdev->driver = &dsicm_ops;
@@ -1200,31 +1349,15 @@ static int dsicm_probe(struct platform_device *pdev)
 
 	atomic_set(&ddata->do_update, 0);
 
-	if (gpio_is_valid(ddata->reset_gpio)) {
-		r = devm_gpio_request_one(dev, ddata->reset_gpio,
-				GPIOF_OUT_INIT_LOW, "taal rst");
-		if (r) {
-			dev_err(dev, "failed to request reset gpio\n");
-			return r;
-		}
-	}
-
-	if (gpio_is_valid(ddata->ext_te_gpio)) {
-		r = devm_gpio_request_one(dev, ddata->ext_te_gpio,
-				GPIOF_IN, "taal irq");
-		if (r) {
-			dev_err(dev, "GPIO request failed\n");
-			return r;
-		}
-
-		r = devm_request_irq(dev, gpio_to_irq(ddata->ext_te_gpio),
+	if (ddata->ext_te_gpio) {
+		r = devm_request_irq(dev, gpiod_to_irq(ddata->ext_te_gpio),
 				dsicm_te_isr,
 				IRQF_TRIGGER_RISING,
 				"taal vsync", ddata);
 
 		if (r) {
 			dev_err(dev, "IRQ request failed\n");
-			return r;
+			goto err_reg;
 		}
 
 		INIT_DEFERRABLE_WORK(&ddata->te_timeout_work,
@@ -1234,48 +1367,43 @@ static int dsicm_probe(struct platform_device *pdev)
 	}
 
 	ddata->workqueue = create_singlethread_workqueue("dsicm_wq");
-	if (ddata->workqueue == NULL) {
-		dev_err(dev, "can't create workqueue\n");
-		return -ENOMEM;
+	if (!ddata->workqueue) {
+		r = -ENOMEM;
+		goto err_reg;
 	}
 	INIT_DELAYED_WORK(&ddata->ulps_work, dsicm_ulps_work);
 
 	dsicm_hw_reset(ddata);
 
 	if (ddata->use_dsi_backlight) {
-		memset(&props, 0, sizeof(props));
+		struct backlight_properties props = { 0 };
 		props.max_brightness = 255;
-
 		props.type = BACKLIGHT_RAW;
-		bldev = backlight_device_register(dev_name(dev),
-				dev, ddata, &dsicm_bl_ops, &props);
+
+		bldev = devm_backlight_device_register(dev, dev_name(dev),
+			dev, ddata, &dsicm_bl_ops, &props);
 		if (IS_ERR(bldev)) {
 			r = PTR_ERR(bldev);
 			goto err_bl;
 		}
 
 		ddata->bldev = bldev;
-
-		bldev->props.fb_blank = FB_BLANK_UNBLANK;
-		bldev->props.power = FB_BLANK_UNBLANK;
-		bldev->props.brightness = 255;
-
-		dsicm_bl_update_status(bldev);
 	}
 
 	r = sysfs_create_group(&dev->kobj, &dsicm_attr_group);
 	if (r) {
 		dev_err(dev, "failed to create sysfs files\n");
-		goto err_sysfs_create;
+		goto err_bl;
 	}
 
 	return 0;
 
-err_sysfs_create:
-	backlight_device_unregister(bldev);
 err_bl:
 	destroy_workqueue(ddata->workqueue);
 err_reg:
+	if (ddata->extbldev)
+		put_device(&ddata->extbldev->dev);
+
 	return r;
 }
 
@@ -1283,7 +1411,6 @@ static int __exit dsicm_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
-	struct backlight_device *bldev;
 
 	dev_dbg(&pdev->dev, "remove\n");
 
@@ -1294,12 +1421,8 @@ static int __exit dsicm_remove(struct platform_device *pdev)
 
 	sysfs_remove_group(&pdev->dev.kobj, &dsicm_attr_group);
 
-	bldev = ddata->bldev;
-	if (bldev != NULL) {
-		bldev->props.power = FB_BLANK_POWERDOWN;
-		dsicm_bl_update_status(bldev);
-		backlight_device_unregister(bldev);
-	}
+	if (ddata->extbldev)
+		put_device(&ddata->extbldev->dev);
 
 	omap_dss_put_device(ddata->in);
 
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c b/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
index 74d13969b9ca7c0c5b187c27b77a6780ffb7bcd8..57af22ce87c5f108b12219062d912d90b3362837 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
@@ -1,7 +1,7 @@
 /*
  * LG.Philips LB035Q02 LCD Panel driver
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  * Based on a driver by: Steve Sakoman <steve@sakoman.com>
  *
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c b/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
index df8132d3b9c694da20924ead82e0e5232d3a87b9..bf53676263ada2ee29d40f7b97071c343a9b31a0 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
@@ -1,7 +1,7 @@
 /*
  * NEC NL8048HL11 Panel driver
  *
- * Copyright (C) 2010 Texas Instruments Inc.
+ * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Erik Gilling <konkers@android.com>
  * Converted to new DSS device model: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c b/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
index 98d170aecababf1c5166b8ea50f9948b37c9eee1..34555801fa4c1148d8338fa29051591f1789bca3 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
@@ -1,7 +1,7 @@
 /*
  * LCD panel driver for Sharp LS037V7DW01
  *
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
index 0a38a0e8c925c3901da5573f8c20d3f049398c34..2721a86ac5e733f28eaa0babefe3bc5e514cae3f 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
@@ -452,15 +452,27 @@ static int td028ttec1_panel_remove(struct spi_device *spi)
 }
 
 static const struct of_device_id td028ttec1_of_match[] = {
+	{ .compatible = "omapdss,tpo,td028ttec1", },
+	/* keep to not break older DTB */
 	{ .compatible = "omapdss,toppoly,td028ttec1", },
 	{},
 };
 
 MODULE_DEVICE_TABLE(of, td028ttec1_of_match);
 
+static const struct spi_device_id td028ttec1_ids[] = {
+	{ "toppoly,td028ttec1", 0 },
+	{ "tpo,td028ttec1", 0},
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, td028ttec1_ids);
+
+
 static struct spi_driver td028ttec1_spi_driver = {
 	.probe		= td028ttec1_panel_probe,
 	.remove		= td028ttec1_panel_remove,
+	.id_table	= td028ttec1_ids,
 
 	.driver         = {
 		.name   = "panel-tpo-td028ttec1",
@@ -471,7 +483,6 @@ static struct spi_driver td028ttec1_spi_driver = {
 
 module_spi_driver(td028ttec1_spi_driver);
 
-MODULE_ALIAS("spi:toppoly,td028ttec1");
 MODULE_AUTHOR("H. Nikolaus Schaller <hns@goldelico.com>");
 MODULE_DESCRIPTION("Toppoly TD028TTEC1 panel driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/omapdrm/dss/base.c b/drivers/gpu/drm/omapdrm/dss/base.c
index 13e91faaf7a6549a1025f4c40a0a176a2a3173e4..67cc87a4f1f6f9ba16d15b7f7ff9e50f7b044245 100644
--- a/drivers/gpu/drm/omapdrm/dss/base.c
+++ b/drivers/gpu/drm/omapdrm/dss/base.c
@@ -1,3 +1,18 @@
+/*
+ * OMAP Display Subsystem Base
+ *
+ * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
diff --git a/drivers/gpu/drm/omapdrm/dss/core.c b/drivers/gpu/drm/omapdrm/dss/core.c
index 197ddbc1512bb2a3f0380ffb2fe4c8321b0c5050..acef7ece5783367f8fe5ae40592e5787645e6447 100644
--- a/drivers/gpu/drm/omapdrm/dss/core.c
+++ b/drivers/gpu/drm/omapdrm/dss/core.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/core.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -30,38 +28,21 @@
 #include "dss.h"
 
 /* INIT */
-static int (*dss_output_drv_reg_funcs[])(void) __initdata = {
-	dss_init_platform_driver,
-	dispc_init_platform_driver,
+static struct platform_driver * const omap_dss_drivers[] = {
+	&omap_dsshw_driver,
+	&omap_dispchw_driver,
 #ifdef CONFIG_OMAP2_DSS_DSI
-	dsi_init_platform_driver,
+	&omap_dsihw_driver,
 #endif
 #ifdef CONFIG_OMAP2_DSS_VENC
-	venc_init_platform_driver,
+	&omap_venchw_driver,
 #endif
 #ifdef CONFIG_OMAP4_DSS_HDMI
-	hdmi4_init_platform_driver,
+	&omapdss_hdmi4hw_driver,
 #endif
 #ifdef CONFIG_OMAP5_DSS_HDMI
-	hdmi5_init_platform_driver,
-#endif
-};
-
-static void (*dss_output_drv_unreg_funcs[])(void) = {
-#ifdef CONFIG_OMAP5_DSS_HDMI
-	hdmi5_uninit_platform_driver,
-#endif
-#ifdef CONFIG_OMAP4_DSS_HDMI
-	hdmi4_uninit_platform_driver,
+	&omapdss_hdmi5hw_driver,
 #endif
-#ifdef CONFIG_OMAP2_DSS_VENC
-	venc_uninit_platform_driver,
-#endif
-#ifdef CONFIG_OMAP2_DSS_DSI
-	dsi_uninit_platform_driver,
-#endif
-	dispc_uninit_platform_driver,
-	dss_uninit_platform_driver,
 };
 
 static struct platform_device *omap_drm_device;
@@ -69,13 +50,11 @@ static struct platform_device *omap_drm_device;
 static int __init omap_dss_init(void)
 {
 	int r;
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(dss_output_drv_reg_funcs); ++i) {
-		r = dss_output_drv_reg_funcs[i]();
-		if (r)
-			goto err_reg;
-	}
+	r = platform_register_drivers(omap_dss_drivers,
+				      ARRAY_SIZE(omap_dss_drivers));
+	if (r)
+		goto err_reg;
 
 	omap_drm_device = platform_device_register_simple("omapdrm", 0, NULL, 0);
 	if (IS_ERR(omap_drm_device)) {
@@ -86,22 +65,18 @@ static int __init omap_dss_init(void)
 	return 0;
 
 err_reg:
-	for (i = ARRAY_SIZE(dss_output_drv_reg_funcs) - i;
-			i < ARRAY_SIZE(dss_output_drv_reg_funcs);
-			++i)
-		dss_output_drv_unreg_funcs[i]();
+	platform_unregister_drivers(omap_dss_drivers,
+				    ARRAY_SIZE(omap_dss_drivers));
 
 	return r;
 }
 
 static void __exit omap_dss_exit(void)
 {
-	int i;
-
 	platform_device_unregister(omap_drm_device);
 
-	for (i = 0; i < ARRAY_SIZE(dss_output_drv_unreg_funcs); ++i)
-		dss_output_drv_unreg_funcs[i]();
+	platform_unregister_drivers(omap_dss_drivers,
+				    ARRAY_SIZE(omap_dss_drivers));
 }
 
 module_init(omap_dss_init);
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index 0f4fdb221498e3144aa44a760a01fba0d9db5be1..4e8f68efd169ee1d24649a38d662c2619cdce392 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/dispc.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -4325,6 +4323,17 @@ static void dispc_free_irq(void *dev_id)
 	dispc.user_data = NULL;
 }
 
+static u32 dispc_get_memory_bandwidth_limit(void)
+{
+	u32 limit = 0;
+
+	/* Optional maximum memory bandwidth */
+	of_property_read_u32(dispc.pdev->dev.of_node, "max-memory-bandwidth",
+			     &limit);
+
+	return limit;
+}
+
 /*
  * Workaround for errata i734 in DSS dispc
  *  - LCD1 Gamma Correction Is Not Working When GFX Pipe Is Disabled
@@ -4497,6 +4506,8 @@ static const struct dispc_ops dispc_ops = {
 	.get_num_ovls = dispc_get_num_ovls,
 	.get_num_mgrs = dispc_get_num_mgrs,
 
+	.get_memory_bandwidth_limit = dispc_get_memory_bandwidth_limit,
+
 	.mgr_enable = dispc_mgr_enable,
 	.mgr_is_enabled = dispc_mgr_is_enabled,
 	.mgr_get_vsync_irq = dispc_mgr_get_vsync_irq,
@@ -4685,7 +4696,7 @@ static const struct dev_pm_ops dispc_pm_ops = {
 	.runtime_resume = dispc_runtime_resume,
 };
 
-static struct platform_driver omap_dispchw_driver = {
+struct platform_driver omap_dispchw_driver = {
 	.probe		= dispc_probe,
 	.remove         = dispc_remove,
 	.driver         = {
@@ -4695,13 +4706,3 @@ static struct platform_driver omap_dispchw_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
-int __init dispc_init_platform_driver(void)
-{
-	return platform_driver_register(&omap_dispchw_driver);
-}
-
-void dispc_uninit_platform_driver(void)
-{
-	platform_driver_unregister(&omap_dispchw_driver);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.h b/drivers/gpu/drm/omapdrm/dss/dispc.h
index 003adce532f4e99fce957ecd768ff432e1bf2317..e901dd1e4365e197e1fe359fab9da066c1a00751 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.h
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.h
@@ -1,10 +1,7 @@
 /*
- * linux/drivers/video/omap2/dss/dispc.h
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Archit Taneja <archit@ti.com>
  *
- *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
  * the Free Software Foundation.
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc_coefs.c b/drivers/gpu/drm/omapdrm/dss/dispc_coefs.c
index 34fad2376f8ddf1d713d1adaac09e9d787cea50f..44804c8c8777cce07088bf9301c218bb26d80d3f 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc_coefs.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc_coefs.c
@@ -1,7 +1,5 @@
 /*
- * linux/drivers/video/omap2/dss/dispc_coefs.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Chandrabhanu Mahapatra <cmahapatra@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/dss/display.c b/drivers/gpu/drm/omapdrm/dss/display.c
index 42279933790efb861afae5b80149b652189d2188..0c9480ba85c09f028b23ecb69a28ba53a29a30f1 100644
--- a/drivers/gpu/drm/omapdrm/dss/display.c
+++ b/drivers/gpu/drm/omapdrm/dss/display.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/display.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -175,17 +173,3 @@ struct omap_dss_device *omap_dss_get_next_device(struct omap_dss_device *from)
 	return dssdev;
 }
 EXPORT_SYMBOL(omap_dss_get_next_device);
-
-struct omap_dss_device *omap_dss_find_device(void *data,
-		int (*match)(struct omap_dss_device *dssdev, void *data))
-{
-	struct omap_dss_device *dssdev = NULL;
-
-	while ((dssdev = omap_dss_get_next_device(dssdev)) != NULL) {
-		if (match(dssdev, data))
-			return dssdev;
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(omap_dss_find_device);
diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c
index ca1e3b489540fe8171b7eaa1d6839963a2533e08..ea44137ed08c4fccc4ee1de7b804831894976d10 100644
--- a/drivers/gpu/drm/omapdrm/dss/dpi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dpi.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/dpi.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -52,8 +50,6 @@ struct dpi_data {
 	int data_lines;
 
 	struct omap_dss_device output;
-
-	bool port_initialized;
 };
 
 static struct dpi_data *dpi_get_data_from_dssdev(struct omap_dss_device *dssdev)
@@ -786,8 +782,6 @@ int dpi_init_port(struct platform_device *pdev, struct device_node *port,
 
 	dpi_init_output_port(dpi, port);
 
-	dpi->port_initialized = true;
-
 	return 0;
 
 err_datalines:
@@ -800,7 +794,7 @@ void dpi_uninit_port(struct device_node *port)
 {
 	struct dpi_data *dpi = port->data;
 
-	if (!dpi->port_initialized)
+	if (!dpi)
 		return;
 
 	dpi_uninit_output_port(port);
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index c2cf6d98e577bdf7f367042cde89ae6fd196f133..80f1f3679a3c1a3278be74924e6644979a68a01d 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/dsi.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -5660,7 +5658,7 @@ static const struct dev_pm_ops dsi_pm_ops = {
 	.runtime_resume = dsi_runtime_resume,
 };
 
-static struct platform_driver omap_dsihw_driver = {
+struct platform_driver omap_dsihw_driver = {
 	.probe		= dsi_probe,
 	.remove		= dsi_remove,
 	.driver         = {
@@ -5670,13 +5668,3 @@ static struct platform_driver omap_dsihw_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
-int __init dsi_init_platform_driver(void)
-{
-	return platform_driver_register(&omap_dsihw_driver);
-}
-
-void dsi_uninit_platform_driver(void)
-{
-	platform_driver_unregister(&omap_dsihw_driver);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/dss-of.c b/drivers/gpu/drm/omapdrm/dss/dss-of.c
index c6b86f348a5c2a589113b6ef401f1a20ea2dbdbf..967d9e1b34e5b6464d7db304d956d75636b34fb5 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss-of.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss-of.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 Texas Instruments
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c
index d1755f12236ba1d43f0b2266c58182993730908a..04300b2da1b16e2af35b80217de103793c54c49d 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/dss.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -23,6 +21,7 @@
 #define DSS_SUBSYS_NAME "DSS"
 
 #include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
@@ -367,7 +366,8 @@ const char *dss_get_clk_source_name(enum dss_clk_source clk_src)
 	return dss_generic_clk_source_names[clk_src];
 }
 
-void dss_dump_clocks(struct seq_file *s)
+#if defined(CONFIG_OMAP2_DSS_DEBUGFS)
+static void dss_dump_clocks(struct seq_file *s)
 {
 	const char *fclk_name;
 	unsigned long fclk_rate;
@@ -386,6 +386,7 @@ void dss_dump_clocks(struct seq_file *s)
 
 	dss_runtime_put();
 }
+#endif
 
 static void dss_dump_regs(struct seq_file *s)
 {
@@ -1441,6 +1442,12 @@ static int dss_probe(struct platform_device *pdev)
 
 	dss.pdev = pdev;
 
+	r = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+	if (r) {
+		dev_err(&pdev->dev, "Failed to set the DMA mask\n");
+		return r;
+	}
+
 	/*
 	 * The various OMAP3-based SoCs can't be told apart using the compatible
 	 * string, use SoC device matching.
@@ -1527,7 +1534,7 @@ static const struct dev_pm_ops dss_pm_ops = {
 	.runtime_resume = dss_runtime_resume,
 };
 
-static struct platform_driver omap_dsshw_driver = {
+struct platform_driver omap_dsshw_driver = {
 	.probe		= dss_probe,
 	.remove		= dss_remove,
 	.shutdown	= dss_shutdown,
@@ -1538,13 +1545,3 @@ static struct platform_driver omap_dsshw_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
-int __init dss_init_platform_driver(void)
-{
-	return platform_driver_register(&omap_dsshw_driver);
-}
-
-void dss_uninit_platform_driver(void)
-{
-	platform_driver_unregister(&omap_dsshw_driver);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/dss.h b/drivers/gpu/drm/omapdrm/dss/dss.h
index ed465572491ea5b6d7814753d033db458878d222..6374e57ed9daa332bdb7b55d781813a076985580 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss.h
+++ b/drivers/gpu/drm/omapdrm/dss/dss.h
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/dss.h
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -264,9 +262,6 @@ static inline int dss_debugfs_create_file(const char *name,
 }
 #endif /* CONFIG_OMAP2_DSS_DEBUGFS */
 
-int dss_init_platform_driver(void) __init;
-void dss_uninit_platform_driver(void);
-
 int dss_runtime_get(void);
 void dss_runtime_put(void);
 
@@ -277,7 +272,6 @@ int dss_dpi_select_source(int port, enum omap_channel channel);
 void dss_select_hdmi_venc_clk_source(enum dss_hdmi_venc_clk_source_select);
 enum dss_hdmi_venc_clk_source_select dss_get_hdmi_venc_clk_source(void);
 const char *dss_get_clk_source_name(enum dss_clk_source clk_src);
-void dss_dump_clocks(struct seq_file *s);
 
 /* DSS VIDEO PLL */
 struct dss_pll *dss_video_pll_init(struct platform_device *pdev, int id,
@@ -329,9 +323,6 @@ static inline void sdi_uninit_port(struct device_node *port)
 struct dentry;
 struct file_operations;
 
-int dsi_init_platform_driver(void) __init;
-void dsi_uninit_platform_driver(void);
-
 void dsi_dump_clocks(struct seq_file *s);
 
 void dsi_irq_handler(void);
@@ -355,8 +346,6 @@ static inline void dpi_uninit_port(struct device_node *port)
 #endif
 
 /* DISPC */
-int dispc_init_platform_driver(void) __init;
-void dispc_uninit_platform_driver(void);
 void dispc_dump_clocks(struct seq_file *s);
 
 int dispc_runtime_get(void);
@@ -400,18 +389,6 @@ void dispc_wb_set_channel_in(enum dss_writeback_channel channel);
 int dispc_wb_setup(const struct omap_dss_writeback_info *wi,
 		bool mem_to_mem, const struct videomode *vm);
 
-/* VENC */
-int venc_init_platform_driver(void) __init;
-void venc_uninit_platform_driver(void);
-
-/* HDMI */
-int hdmi4_init_platform_driver(void) __init;
-void hdmi4_uninit_platform_driver(void);
-
-int hdmi5_init_platform_driver(void) __init;
-void hdmi5_uninit_platform_driver(void);
-
-
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
 static inline void dss_collect_irq_stats(u32 irqstatus, unsigned *irq_arr)
 {
@@ -455,4 +432,19 @@ int dss_pll_write_config_type_b(struct dss_pll *pll,
 		const struct dss_pll_clock_info *cinfo);
 int dss_pll_wait_reset_done(struct dss_pll *pll);
 
+extern struct platform_driver omap_dsshw_driver;
+extern struct platform_driver omap_dispchw_driver;
+#ifdef CONFIG_OMAP2_DSS_DSI
+extern struct platform_driver omap_dsihw_driver;
+#endif
+#ifdef CONFIG_OMAP2_DSS_VENC
+extern struct platform_driver omap_venchw_driver;
+#endif
+#ifdef CONFIG_OMAP4_DSS_HDMI
+extern struct platform_driver omapdss_hdmi4hw_driver;
+#endif
+#ifdef CONFIG_OMAP5_DSS_HDMI
+extern struct platform_driver omapdss_hdmi5hw_driver;
+#endif
+
 #endif
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index a598dfdeb5850ee26f791416ecc4193cf11cf6e7..bf914f2ac99e7ee8be454e40a95c7316963bd67e 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -1,5 +1,6 @@
 /*
  * HDMI interface DSS driver for TI's OMAP4 family of SoCs.
+ *
  * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com/
  * Authors: Yong Zhi
  *	Mythri pk <mythripk@ti.com>
@@ -844,7 +845,7 @@ static const struct of_device_id hdmi_of_match[] = {
 	{},
 };
 
-static struct platform_driver omapdss_hdmihw_driver = {
+struct platform_driver omapdss_hdmi4hw_driver = {
 	.probe		= hdmi4_probe,
 	.remove		= hdmi4_remove,
 	.driver         = {
@@ -854,13 +855,3 @@ static struct platform_driver omapdss_hdmihw_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
-int __init hdmi4_init_platform_driver(void)
-{
-	return platform_driver_register(&omapdss_hdmihw_driver);
-}
-
-void hdmi4_uninit_platform_driver(void)
-{
-	platform_driver_unregister(&omapdss_hdmihw_driver);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
index e626eddf24d5e2231c2434a76d45ddd29067c6e4..23db74ae18263a92866467e36bf727bfc5c925b5 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
@@ -78,6 +78,8 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core)
 
 			/* then read the message */
 			msg.len = cnt & 0xf;
+			if (msg.len > CEC_MAX_MSG_SIZE - 2)
+				msg.len = CEC_MAX_MSG_SIZE - 2;
 			msg.msg[0] = hdmi_read_reg(core->base,
 						   HDMI_CEC_RX_CMD_HEADER);
 			msg.msg[1] = hdmi_read_reg(core->base,
@@ -104,26 +106,6 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core)
 	}
 }
 
-static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1)
-{
-	if (stat1 & 2) {
-		u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
-
-		cec_transmit_done(core->adap,
-				  CEC_TX_STATUS_NACK |
-				  CEC_TX_STATUS_MAX_RETRIES,
-				  0, (dbg3 >> 4) & 7, 0, 0);
-	} else if (stat1 & 1) {
-		cec_transmit_done(core->adap,
-				  CEC_TX_STATUS_ARB_LOST |
-				  CEC_TX_STATUS_MAX_RETRIES,
-				  0, 0, 0, 0);
-	} else if (stat1 == 0) {
-		cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
-				  0, 0, 0, 0);
-	}
-}
-
 void hdmi4_cec_irq(struct hdmi_core_data *core)
 {
 	u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0);
@@ -132,27 +114,21 @@ void hdmi4_cec_irq(struct hdmi_core_data *core)
 	hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0);
 	hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1);
 
-	if (stat0 & 0x40)
+	if (stat0 & 0x20) {
+		cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
+				  0, 0, 0, 0);
 		REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
-	else if (stat0 & 0x24)
-		hdmi_cec_transmit_fifo_empty(core, stat1);
-	if (stat1 & 2) {
+	} else if (stat1 & 0x02) {
 		u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
 
 		cec_transmit_done(core->adap,
 				  CEC_TX_STATUS_NACK |
 				  CEC_TX_STATUS_MAX_RETRIES,
 				  0, (dbg3 >> 4) & 7, 0, 0);
-	} else if (stat1 & 1) {
-		cec_transmit_done(core->adap,
-				  CEC_TX_STATUS_ARB_LOST |
-				  CEC_TX_STATUS_MAX_RETRIES,
-				  0, 0, 0, 0);
+		REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
 	}
 	if (stat0 & 0x02)
 		hdmi_cec_received_msg(core);
-	if (stat1 & 0x3)
-		REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
 }
 
 static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap)
@@ -231,18 +207,14 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
 	/*
 	 * Enable CEC interrupts:
 	 * Transmit Buffer Full/Empty Change event
-	 * Transmitter FIFO Empty event
 	 * Receiver FIFO Not Empty event
 	 */
-	hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26);
+	hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22);
 	/*
 	 * Enable CEC interrupts:
-	 * RX FIFO Overrun Error event
-	 * Short Pulse Detected event
 	 * Frame Retransmit Count Exceeded event
-	 * Start Bit Irregularity event
 	 */
-	hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f);
+	hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02);
 
 	/* cec calibration enable (self clearing) */
 	hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03);
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
index b06f9956e73321352eb048a6b54d54957db42f37..35ed2add6189c68e405dc5cf0715ad329c5ece9a 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
@@ -1,7 +1,6 @@
 /*
- * ti_hdmi_4xxx_ip.c
- *
  * HDMI TI81xx, TI38xx, TI OMAP4 etc IP driver Library
+ *
  * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com/
  * Authors: Yong Zhi
  *	Mythri pk <mythripk@ti.com>
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index b3221ca5bcd8413852ea1f0df3570adcb6d14337..689cda41858b61a6a663cde779452d1cbcd7980f 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -1,7 +1,7 @@
 /*
  * HDMI driver for OMAP5
  *
- * Copyright (C) 2014 Texas Instruments Incorporated
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/
  *
  * Authors:
  *	Yong Zhi
@@ -841,7 +841,7 @@ static const struct of_device_id hdmi_of_match[] = {
 	{},
 };
 
-static struct platform_driver omapdss_hdmihw_driver = {
+struct platform_driver omapdss_hdmi5hw_driver = {
 	.probe		= hdmi5_probe,
 	.remove		= hdmi5_remove,
 	.driver         = {
@@ -851,13 +851,3 @@ static struct platform_driver omapdss_hdmihw_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
-int __init hdmi5_init_platform_driver(void)
-{
-	return platform_driver_register(&omapdss_hdmihw_driver);
-}
-
-void hdmi5_uninit_platform_driver(void)
-{
-	platform_driver_unregister(&omapdss_hdmihw_driver);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c
index ab179ec133c0b275fecdb816270884d8551e4694..09759f8ea7bcb6ba9df6030a39cca3f342438bc8 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c
@@ -1,8 +1,7 @@
 /*
  * OMAP5 HDMI CORE IP driver library
  *
- * Copyright (C) 2014 Texas Instruments Incorporated
- *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/
  * Authors:
  *	Yong Zhi
  *	Mythri pk
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi_phy.c b/drivers/gpu/drm/omapdrm/dss/hdmi_phy.c
index a156292b1820b9a8cce0560dbcd4a13643aeb373..5c14ed851609aa2a4c9960657afaf2499eab3bf0 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi_phy.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi_phy.c
@@ -1,7 +1,7 @@
 /*
  * HDMI PHY
  *
- * Copyright (C) 2013 Texas Instruments Incorporated
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi_pll.c b/drivers/gpu/drm/omapdrm/dss/hdmi_pll.c
index 55bee81f4dd52a25ef4d9df81087411fa024e7f8..08885d7de1e87abb1987b9f077520d3fe9b7d145 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi_pll.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi_pll.c
@@ -1,7 +1,7 @@
 /*
  * HDMI PLL
  *
- * Copyright (C) 2013 Texas Instruments Incorporated
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c b/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c
index 88034fbe0e9f05402804af57382feee52ed373ad..806e5fdcfe52d0b6385a185c716f80ae77db20c6 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c
@@ -1,7 +1,7 @@
 /*
  * HDMI wrapper
  *
- * Copyright (C) 2013 Texas Instruments Incorporated
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c b/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c
index bf626acae2712b3a7eff53ec148a210f910f5c6d..3bfb95d230e0e4456b744ac5b961b629152c5527 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Texas Instruments
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss.h b/drivers/gpu/drm/omapdrm/dss/omapdss.h
index 990422b357849ec495f7c9fc9a3a6efbf4d4a0d0..f8f83e826a56d1481977730342cf51a5dd2a1e56 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss.h
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 Texas Instruments
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -563,6 +563,8 @@ struct omap_dss_driver {
 			    struct videomode *vm);
 	void (*get_timings)(struct omap_dss_device *dssdev,
 			    struct videomode *vm);
+	void (*get_size)(struct omap_dss_device *dssdev,
+			 unsigned int *width, unsigned int *height);
 
 	int (*set_wss)(struct omap_dss_device *dssdev, u32 wss);
 	u32 (*get_wss)(struct omap_dss_device *dssdev);
@@ -585,9 +587,6 @@ struct omap_dss_driver {
 
 bool omapdss_is_initialized(void);
 
-int omap_dss_register_driver(struct omap_dss_driver *);
-void omap_dss_unregister_driver(struct omap_dss_driver *);
-
 int omapdss_register_display(struct omap_dss_device *dssdev);
 void omapdss_unregister_display(struct omap_dss_device *dssdev);
 
@@ -595,9 +594,6 @@ struct omap_dss_device *omap_dss_get_device(struct omap_dss_device *dssdev);
 void omap_dss_put_device(struct omap_dss_device *dssdev);
 #define for_each_dss_dev(d) while ((d = omap_dss_get_next_device(d)) != NULL)
 struct omap_dss_device *omap_dss_get_next_device(struct omap_dss_device *from);
-struct omap_dss_device *omap_dss_find_device(void *data,
-		int (*match)(struct omap_dss_device *dssdev, void *data));
-
 
 int omap_dss_get_num_overlay_managers(void);
 
@@ -695,6 +691,8 @@ struct dispc_ops {
 	int (*get_num_ovls)(void);
 	int (*get_num_mgrs)(void);
 
+	u32 (*get_memory_bandwidth_limit)(void);
+
 	void (*mgr_enable)(enum omap_channel channel, bool enable);
 	bool (*mgr_is_enabled)(enum omap_channel channel);
 	u32 (*mgr_get_vsync_irq)(enum omap_channel channel);
diff --git a/drivers/gpu/drm/omapdrm/dss/output.c b/drivers/gpu/drm/omapdrm/dss/output.c
index 3c572b699ed31c025a8691afc992610b467fda59..b9afd80ae38545f8a1622e9ead773ddb906d9ace 100644
--- a/drivers/gpu/drm/omapdrm/dss/output.c
+++ b/drivers/gpu/drm/omapdrm/dss/output.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 Texas Instruments Ltd
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Archit Taneja <archit@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/dss/pll.c b/drivers/gpu/drm/omapdrm/dss/pll.c
index 9d9d9d42009baf4ee634733ee31a464bbe16e6bd..058714b1eb56278778caac3e5dbad9752ed6b941 100644
--- a/drivers/gpu/drm/omapdrm/dss/pll.c
+++ b/drivers/gpu/drm/omapdrm/dss/pll.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Texas Instruments Incorporated
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c
index d18ad58c5a1993b9d71858bac7bd721bfb5eaad9..d8ab31f3a8134d517e8b2f31778079c113639292 100644
--- a/drivers/gpu/drm/omapdrm/dss/sdi.c
+++ b/drivers/gpu/drm/omapdrm/dss/sdi.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/sdi.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c
index d58da6f32693e6a6aae39d93d307988f08771cd2..6de9d734ddb98f235e16f18564596cb868823713 100644
--- a/drivers/gpu/drm/omapdrm/dss/venc.c
+++ b/drivers/gpu/drm/omapdrm/dss/venc.c
@@ -1,6 +1,4 @@
 /*
- * linux/drivers/video/omap2/dss/venc.c
- *
  * Copyright (C) 2009 Nokia Corporation
  * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
  *
@@ -857,10 +855,10 @@ static int venc_probe_of(struct platform_device *pdev)
 	of_node_put(ep);
 
 	return 0;
+
 err:
 	of_node_put(ep);
-
-	return 0;
+	return r;
 }
 
 /* VENC HW IP initialisation */
@@ -986,7 +984,7 @@ static const struct of_device_id venc_of_match[] = {
 	{},
 };
 
-static struct platform_driver omap_venchw_driver = {
+struct platform_driver omap_venchw_driver = {
 	.probe		= venc_probe,
 	.remove		= venc_remove,
 	.driver         = {
@@ -996,13 +994,3 @@ static struct platform_driver omap_venchw_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
-int __init venc_init_platform_driver(void)
-{
-	return platform_driver_register(&omap_venchw_driver);
-}
-
-void venc_uninit_platform_driver(void)
-{
-	platform_driver_unregister(&omap_venchw_driver);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/video-pll.c b/drivers/gpu/drm/omapdrm/dss/video-pll.c
index 38a239cc5e044ece0abe4fcf5bcdad380e414b54..bbedac797927bef77a7a87f035edd343148a138f 100644
--- a/drivers/gpu/drm/omapdrm/dss/video-pll.c
+++ b/drivers/gpu/drm/omapdrm/dss/video-pll.c
@@ -1,13 +1,15 @@
 /*
-* Copyright (C) 2014 Texas Instruments Ltd
-*
-* This program is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 as published by
-* the Free Software Foundation.
-*
-* You should have received a copy of the GNU General Public License along with
-* this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
 
 #include <linux/clk.h>
 #include <linux/delay.h>
diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c
index aa5ba9ae2191c15518ee1aa326165793653912e1..a0d7b1d905e85500cc011d76fefb19453384a7fe 100644
--- a/drivers/gpu/drm/omapdrm/omap_connector.c
+++ b/drivers/gpu/drm/omapdrm/omap_connector.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_connector.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -149,6 +147,12 @@ static int omap_connector_get_modes(struct drm_connector *connector)
 		drm_mode_set_name(mode);
 		drm_mode_probed_add(connector, mode);
 
+		if (dssdrv->get_size) {
+			dssdrv->get_size(dssdev,
+					 &connector->display_info.width_mm,
+					 &connector->display_info.height_mm);
+		}
+
 		n = 1;
 	}
 
diff --git a/drivers/gpu/drm/omapdrm/omap_connector.h b/drivers/gpu/drm/omapdrm/omap_connector.h
new file mode 100644
index 0000000000000000000000000000000000000000..98bbc779b30265304363166c66da784de0277ff1
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_connector.h
@@ -0,0 +1,37 @@
+/*
+ * omap_connector.h -- OMAP DRM Connector
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_CONNECTOR_H__
+#define __OMAPDRM_CONNECTOR_H__
+
+#include <linux/types.h>
+
+struct drm_connector;
+struct drm_device;
+struct drm_encoder;
+struct omap_dss_device;
+
+struct drm_connector *omap_connector_init(struct drm_device *dev,
+		int connector_type, struct omap_dss_device *dssdev,
+		struct drm_encoder *encoder);
+struct drm_encoder *omap_connector_attached_encoder(
+		struct drm_connector *connector);
+bool omap_connector_get_hdmi_mode(struct drm_connector *connector);
+
+#endif /* __OMAPDRM_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index cc85c16cbc2abdf58e044c13840d16bd212f11b1..1b8154e58d1823e6e949a1448ac604cbe2ccf8aa 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_crtc.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -23,6 +21,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_plane_helper.h>
+#include <linux/math64.h>
 
 #include "omap_drv.h"
 
@@ -400,6 +399,41 @@ static void omap_crtc_atomic_disable(struct drm_crtc *crtc,
 	drm_crtc_vblank_off(crtc);
 }
 
+static enum drm_mode_status omap_crtc_mode_valid(struct drm_crtc *crtc,
+					const struct drm_display_mode *mode)
+{
+	struct omap_drm_private *priv = crtc->dev->dev_private;
+
+	/* Check for bandwidth limit */
+	if (priv->max_bandwidth) {
+		/*
+		 * Estimation for the bandwidth need of a given mode with one
+		 * full screen plane:
+		 * bandwidth = resolution * 32bpp * (pclk / (vtotal * htotal))
+		 *					^^ Refresh rate ^^
+		 *
+		 * The interlaced mode is taken into account by using the
+		 * pixelclock in the calculation.
+		 *
+		 * The equation is rearranged for 64bit arithmetic.
+		 */
+		uint64_t bandwidth = mode->clock * 1000;
+		unsigned int bpp = 4;
+
+		bandwidth = bandwidth * mode->hdisplay * mode->vdisplay * bpp;
+		bandwidth = div_u64(bandwidth, mode->htotal * mode->vtotal);
+
+		/*
+		 * Reject modes which would need more bandwidth if used with one
+		 * full resolution plane (most common use case).
+		 */
+		if (priv->max_bandwidth < bandwidth)
+			return MODE_BAD;
+	}
+
+	return MODE_OK;
+}
+
 static void omap_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
@@ -621,6 +655,7 @@ static const struct drm_crtc_helper_funcs omap_crtc_helper_funcs = {
 	.atomic_flush = omap_crtc_atomic_flush,
 	.atomic_enable = omap_crtc_atomic_enable,
 	.atomic_disable = omap_crtc_atomic_disable,
+	.mode_valid = omap_crtc_mode_valid,
 };
 
 /* -----------------------------------------------------------------------------
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.h b/drivers/gpu/drm/omapdrm/omap_crtc.h
new file mode 100644
index 0000000000000000000000000000000000000000..ad7b007c6174a718c0fbb86766924fb3fb5c2e6c
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.h
@@ -0,0 +1,43 @@
+/*
+ * omap_crtc.h -- OMAP DRM CRTC
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_CRTC_H__
+#define __OMAPDRM_CRTC_H__
+
+#include <linux/types.h>
+
+enum omap_channel;
+
+struct drm_crtc;
+struct drm_device;
+struct drm_plane;
+struct omap_dss_device;
+struct videomode;
+
+struct videomode *omap_crtc_timings(struct drm_crtc *crtc);
+enum omap_channel omap_crtc_channel(struct drm_crtc *crtc);
+void omap_crtc_pre_init(void);
+void omap_crtc_pre_uninit(void);
+struct drm_crtc *omap_crtc_init(struct drm_device *dev,
+		struct drm_plane *plane, struct omap_dss_device *dssdev);
+int omap_crtc_wait_pending(struct drm_crtc *crtc);
+void omap_crtc_error_irq(struct drm_crtc *crtc, uint32_t irqstatus);
+void omap_crtc_vblank_irq(struct drm_crtc *crtc);
+
+#endif /* __OMAPDRM_CRTC_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_debugfs.c b/drivers/gpu/drm/omapdrm/omap_debugfs.c
index 19b716745623ac9f9539307149e634b1efaed454..b42e286616b00b894b99e133c06d56b4b24db9be 100644
--- a/drivers/gpu/drm/omapdrm/omap_debugfs.c
+++ b/drivers/gpu/drm/omapdrm/omap_debugfs.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_debugfs.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob.clark@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_priv.h b/drivers/gpu/drm/omapdrm/omap_dmm_priv.h
index 9f32a83ca5078a587fccda89947f2dd9a05be83d..600064d5c25bfc14d5406eae0a39091bed0c3998 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_priv.h
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_priv.h
@@ -1,5 +1,4 @@
 /*
- *
  * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *         Andy Gross <andy.gross@ti.com>
@@ -13,6 +12,7 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
+
 #ifndef OMAP_DMM_PRIV_H
 #define OMAP_DMM_PRIV_H
 
@@ -59,12 +59,12 @@
 #define DMM_IRQSTAT_ERR_UPD_DATA	(1<<6)
 #define DMM_IRQSTAT_ERR_LUT_MISS	(1<<7)
 
-#define DMM_IRQSTAT_ERR_MASK	(DMM_IRQ_STAT_ERR_INV_DSC | \
-				DMM_IRQ_STAT_ERR_INV_DATA | \
-				DMM_IRQ_STAT_ERR_UPD_AREA | \
-				DMM_IRQ_STAT_ERR_UPD_CTRL | \
-				DMM_IRQ_STAT_ERR_UPD_DATA | \
-				DMM_IRQ_STAT_ERR_LUT_MISS)
+#define DMM_IRQSTAT_ERR_MASK	(DMM_IRQSTAT_ERR_INV_DSC | \
+				DMM_IRQSTAT_ERR_INV_DATA | \
+				DMM_IRQSTAT_ERR_UPD_AREA | \
+				DMM_IRQSTAT_ERR_UPD_CTRL | \
+				DMM_IRQSTAT_ERR_UPD_DATA | \
+				DMM_IRQSTAT_ERR_LUT_MISS)
 
 #define DMM_PATSTATUS_READY		(1<<0)
 #define DMM_PATSTATUS_VALID		(1<<1)
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index c60a85e82c6d8a529aa40062fdde21354d73b505..4be0c94673f5917282944a3b8e93fde6edc7f62f 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -1,11 +1,10 @@
 /*
  * DMM IOMMU driver support functions for TI OMAP processors.
  *
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *         Andy Gross <andy.gross@ti.com>
  *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation version 2.
@@ -121,14 +120,22 @@ static int wait_status(struct refill_engine *engine, uint32_t wait_mask)
 	while (true) {
 		r = dmm_read(dmm, reg[PAT_STATUS][engine->id]);
 		err = r & DMM_PATSTATUS_ERR;
-		if (err)
+		if (err) {
+			dev_err(dmm->dev,
+				"%s: error (engine%d). PAT_STATUS: 0x%08x\n",
+				__func__, engine->id, r);
 			return -EFAULT;
+		}
 
 		if ((r & wait_mask) == wait_mask)
 			break;
 
-		if (--i == 0)
+		if (--i == 0) {
+			dev_err(dmm->dev,
+				"%s: timeout (engine%d). PAT_STATUS: 0x%08x\n",
+				__func__, engine->id, r);
 			return -ETIMEDOUT;
+		}
 
 		udelay(1);
 	}
@@ -158,6 +165,11 @@ static irqreturn_t omap_dmm_irq_handler(int irq, void *arg)
 	dmm_write(dmm, status, DMM_PAT_IRQSTATUS);
 
 	for (i = 0; i < dmm->num_engines; i++) {
+		if (status & DMM_IRQSTAT_ERR_MASK)
+			dev_err(dmm->dev,
+				"irq error(engine%d): IRQSTAT 0x%02x\n",
+				i, status & 0xff);
+
 		if (status & DMM_IRQSTAT_LST) {
 			if (dmm->engines[i].async)
 				release_engine(&dmm->engines[i]);
@@ -298,7 +310,12 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait)
 				msecs_to_jiffies(100))) {
 			dev_err(dmm->dev, "timed out waiting for done\n");
 			ret = -ETIMEDOUT;
+			goto cleanup;
 		}
+
+		/* Check the engine status before continue */
+		ret = wait_status(engine, DMM_PATSTATUS_READY |
+				  DMM_PATSTATUS_VALID | DMM_PATSTATUS_DONE);
 	}
 
 cleanup:
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h
index e83c78372db8ea422448d14075cc8c8ac4be1c93..cc78ba4fe6ab19ad92602179e6c6323c4858ae2d 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h
@@ -1,5 +1,4 @@
 /*
- *
  * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *         Andy Gross <andy.gross@ti.com>
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index cdf5b0601ebabee12cb876c2d632bc59b18ad66d..dd68b2556f5bb3c1f9fd029499509f9f04cce2f5 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_drv.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -46,14 +44,6 @@
  *                 devices
  */
 
-static void omap_fb_output_poll_changed(struct drm_device *dev)
-{
-	struct omap_drm_private *priv = dev->dev_private;
-	DBG("dev=%p", dev);
-	if (priv->fbdev)
-		drm_fb_helper_hotplug_event(priv->fbdev);
-}
-
 static void omap_atomic_wait_for_completion(struct drm_device *dev,
 					    struct drm_atomic_state *old_state)
 {
@@ -132,7 +122,7 @@ static const struct drm_mode_config_helper_funcs omap_mode_config_helper_funcs =
 
 static const struct drm_mode_config_funcs omap_mode_config_funcs = {
 	.fb_create = omap_framebuffer_create,
-	.output_poll_changed = omap_fb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
@@ -467,28 +457,6 @@ static int dev_open(struct drm_device *dev, struct drm_file *file)
 	return 0;
 }
 
-/**
- * lastclose - clean up after all DRM clients have exited
- * @dev: DRM device
- *
- * Take care of cleaning up after all DRM clients have exited.  In the
- * mode setting case, we want to restore the kernel's initial mode (just
- * in case the last client left us in a bad state).
- */
-static void dev_lastclose(struct drm_device *dev)
-{
-	struct omap_drm_private *priv = dev->dev_private;
-	int ret;
-
-	DBG("lastclose: dev=%p", dev);
-
-	if (priv->fbdev) {
-		ret = drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
-		if (ret)
-			DBG("failed to restore crtc mode");
-	}
-}
-
 static const struct vm_operations_struct omap_gem_vm_ops = {
 	.fault = omap_gem_fault,
 	.open = drm_gem_vm_open,
@@ -511,7 +479,7 @@ static struct drm_driver omap_drm_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM  | DRIVER_PRIME |
 		DRIVER_ATOMIC | DRIVER_RENDER,
 	.open = dev_open,
-	.lastclose = dev_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 #ifdef CONFIG_DEBUG_FS
 	.debugfs_init = omap_debugfs_init,
 #endif
@@ -593,6 +561,11 @@ static int pdev_probe(struct platform_device *pdev)
 	ddev->dev_private = priv;
 	platform_set_drvdata(pdev, ddev);
 
+	/* Get memory bandwidth limits */
+	if (priv->dispc_ops->get_memory_bandwidth_limit)
+		priv->max_bandwidth =
+				priv->dispc_ops->get_memory_bandwidth_limit();
+
 	omap_gem_init(ddev);
 
 	ret = omap_modeset_init(ddev);
@@ -740,7 +713,7 @@ static int omap_drm_resume(struct device *dev)
 
 	drm_kms_helper_poll_enable(drm_dev);
 
-	return omap_gem_resume(dev);
+	return omap_gem_resume(drm_dev);
 }
 #endif
 
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 4bd1e9070b3184428058331bb31a4ec1f96924c6..0ac97fe09f9b9049ed7ff3e5fda1b8892fab1631 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_drv.h
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -17,8 +15,8 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef __OMAP_DRV_H__
-#define __OMAP_DRV_H__
+#ifndef __OMAPDRM_DRV_H__
+#define __OMAPDRM_DRV_H__
 
 #include <linux/module.h>
 #include <linux/types.h>
@@ -31,6 +29,15 @@
 
 #include "dss/omapdss.h"
 
+#include "omap_connector.h"
+#include "omap_crtc.h"
+#include "omap_encoder.h"
+#include "omap_fb.h"
+#include "omap_fbdev.h"
+#include "omap_gem.h"
+#include "omap_irq.h"
+#include "omap_plane.h"
+
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 #define VERB(fmt, ...) if (0) DRM_DEBUG(fmt, ##__VA_ARGS__) /* verbose debug */
 
@@ -38,14 +45,6 @@
 
 struct omap_drm_usergart;
 
-/* For KMS code that needs to wait for a certain # of IRQs:
- */
-struct omap_irq_wait;
-struct omap_irq_wait * omap_irq_wait_init(struct drm_device *dev,
-		uint32_t irqmask, int count);
-int omap_irq_wait(struct drm_device *dev, struct omap_irq_wait *wait,
-		unsigned long timeout);
-
 struct omap_drm_private {
 	uint32_t omaprev;
 
@@ -83,117 +82,12 @@ struct omap_drm_private {
 	spinlock_t wait_lock;		/* protects the wait_list */
 	struct list_head wait_list;	/* list of omap_irq_wait */
 	uint32_t irq_mask;		/* enabled irqs in addition to wait_list */
+
+	/* memory bandwidth limit if it is needed on the platform */
+	unsigned int max_bandwidth;
 };
 
 
-#ifdef CONFIG_DEBUG_FS
 int omap_debugfs_init(struct drm_minor *minor);
-void omap_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
-void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
-void omap_gem_describe_objects(struct list_head *list, struct seq_file *m);
-#endif
-
-#ifdef CONFIG_PM
-int omap_gem_resume(struct device *dev);
-#endif
-
-int omap_irq_enable_vblank(struct drm_crtc *crtc);
-void omap_irq_disable_vblank(struct drm_crtc *crtc);
-void omap_drm_irq_uninstall(struct drm_device *dev);
-int omap_drm_irq_install(struct drm_device *dev);
-
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-struct drm_fb_helper *omap_fbdev_init(struct drm_device *dev);
-void omap_fbdev_free(struct drm_device *dev);
-#else
-static inline struct drm_fb_helper *omap_fbdev_init(struct drm_device *dev)
-{
-	return NULL;
-}
-static inline void omap_fbdev_free(struct drm_device *dev)
-{
-}
-#endif
-
-struct videomode *omap_crtc_timings(struct drm_crtc *crtc);
-enum omap_channel omap_crtc_channel(struct drm_crtc *crtc);
-void omap_crtc_pre_init(void);
-void omap_crtc_pre_uninit(void);
-struct drm_crtc *omap_crtc_init(struct drm_device *dev,
-		struct drm_plane *plane, struct omap_dss_device *dssdev);
-int omap_crtc_wait_pending(struct drm_crtc *crtc);
-void omap_crtc_error_irq(struct drm_crtc *crtc, uint32_t irqstatus);
-void omap_crtc_vblank_irq(struct drm_crtc *crtc);
-
-struct drm_plane *omap_plane_init(struct drm_device *dev,
-		int idx, enum drm_plane_type type,
-		u32 possible_crtcs);
-void omap_plane_install_properties(struct drm_plane *plane,
-		struct drm_mode_object *obj);
-
-struct drm_encoder *omap_encoder_init(struct drm_device *dev,
-		struct omap_dss_device *dssdev);
-
-struct drm_connector *omap_connector_init(struct drm_device *dev,
-		int connector_type, struct omap_dss_device *dssdev,
-		struct drm_encoder *encoder);
-struct drm_encoder *omap_connector_attached_encoder(
-		struct drm_connector *connector);
-bool omap_connector_get_hdmi_mode(struct drm_connector *connector);
-
-struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev,
-		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd);
-struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
-		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
-int omap_framebuffer_pin(struct drm_framebuffer *fb);
-void omap_framebuffer_unpin(struct drm_framebuffer *fb);
-void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
-		struct drm_plane_state *state, struct omap_overlay_info *info);
-struct drm_connector *omap_framebuffer_get_next_connector(
-		struct drm_framebuffer *fb, struct drm_connector *from);
-bool omap_framebuffer_supports_rotation(struct drm_framebuffer *fb);
-
-void omap_gem_init(struct drm_device *dev);
-void omap_gem_deinit(struct drm_device *dev);
-
-struct drm_gem_object *omap_gem_new(struct drm_device *dev,
-		union omap_gem_size gsize, uint32_t flags);
-struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size,
-		struct sg_table *sgt);
-int omap_gem_new_handle(struct drm_device *dev, struct drm_file *file,
-		union omap_gem_size gsize, uint32_t flags, uint32_t *handle);
-void omap_gem_free_object(struct drm_gem_object *obj);
-void *omap_gem_vaddr(struct drm_gem_object *obj);
-int omap_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
-		uint32_t handle, uint64_t *offset);
-int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
-		struct drm_mode_create_dumb *args);
-int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int omap_gem_mmap_obj(struct drm_gem_object *obj,
-		struct vm_area_struct *vma);
-int omap_gem_fault(struct vm_fault *vmf);
-int omap_gem_roll(struct drm_gem_object *obj, uint32_t roll);
-void omap_gem_cpu_sync_page(struct drm_gem_object *obj, int pgoff);
-void omap_gem_dma_sync_buffer(struct drm_gem_object *obj,
-		enum dma_data_direction dir);
-int omap_gem_pin(struct drm_gem_object *obj, dma_addr_t *dma_addr);
-void omap_gem_unpin(struct drm_gem_object *obj);
-int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages,
-		bool remap);
-int omap_gem_put_pages(struct drm_gem_object *obj);
-uint32_t omap_gem_flags(struct drm_gem_object *obj);
-int omap_gem_rotated_dma_addr(struct drm_gem_object *obj, uint32_t orient,
-		int x, int y, dma_addr_t *dma_addr);
-uint64_t omap_gem_mmap_offset(struct drm_gem_object *obj);
-size_t omap_gem_mmap_size(struct drm_gem_object *obj);
-int omap_gem_tiled_stride(struct drm_gem_object *obj, uint32_t orient);
-
-struct dma_buf *omap_gem_prime_export(struct drm_device *dev,
-		struct drm_gem_object *obj, int flags);
-struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
-		struct dma_buf *buffer);
-
-/* map crtc to vblank mask */
-struct omap_dss_device *omap_encoder_get_dssdev(struct drm_encoder *encoder);
-
-#endif /* __OMAP_DRV_H__ */
+
+#endif /* __OMAPDRM_DRV_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 624f5b50b75518dd3f094efbc2c943e9e7dc3658..fcdf4b0a8eecab83fdd8bd1a230e15747dae79d8 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_encoder.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.h b/drivers/gpu/drm/omapdrm/omap_encoder.h
new file mode 100644
index 0000000000000000000000000000000000000000..d2f308bec494f38b77ba6ccedc648f34bfc98ee3
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.h
@@ -0,0 +1,33 @@
+/*
+ * omap_encoder.h -- OMAP DRM Encoder
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_ENCODER_H__
+#define __OMAPDRM_ENCODER_H__
+
+struct drm_device;
+struct drm_encoder;
+struct omap_dss_device;
+
+struct drm_encoder *omap_encoder_init(struct drm_device *dev,
+		struct omap_dss_device *dssdev);
+
+/* map crtc to vblank mask */
+struct omap_dss_device *omap_encoder_get_dssdev(struct drm_encoder *encoder);
+
+#endif /* __OMAPDRM_ENCODER_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index b1a762b70cbf8d1a9410966d8496276075a0db97..b2539a90e1a4d3276850c38ae362e7aa5fa5953f 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_fb.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.h b/drivers/gpu/drm/omapdrm/omap_fb.h
new file mode 100644
index 0000000000000000000000000000000000000000..94ad5f9e440490fef9a09333d92ca6ed8f8d85ee
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_fb.h
@@ -0,0 +1,46 @@
+/*
+ * omap_fb.h -- OMAP DRM Framebuffer
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_FB_H__
+#define __OMAPDRM_FB_H__
+
+struct drm_connector;
+struct drm_device;
+struct drm_file;
+struct drm_framebuffer;
+struct drm_gem_object;
+struct drm_mode_fb_cmd2;
+struct drm_plane_state;
+struct omap_overlay_info;
+struct seq_file;
+
+struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev,
+		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd);
+struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
+		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
+int omap_framebuffer_pin(struct drm_framebuffer *fb);
+void omap_framebuffer_unpin(struct drm_framebuffer *fb);
+void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
+		struct drm_plane_state *state, struct omap_overlay_info *info);
+struct drm_connector *omap_framebuffer_get_next_connector(
+		struct drm_framebuffer *fb, struct drm_connector *from);
+bool omap_framebuffer_supports_rotation(struct drm_framebuffer *fb);
+void omap_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
+
+#endif /* __OMAPDRM_FB_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c
index 9273118040b740e01f08b127c40aa5cfb7edc1d9..fb309d19ca1b923f5e3619ee174ef2099c5bd102 100644
--- a/drivers/gpu/drm/omapdrm/omap_fbdev.c
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_fbdev.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -84,9 +82,6 @@ static struct fb_ops omap_fb_ops = {
 	.owner = THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 
-	/* Note: to properly handle manual update displays, we wrap the
-	 * basic fbdev ops which write to the framebuffer
-	 */
 	.fb_read = drm_fb_helper_sys_read,
 	.fb_write = drm_fb_helper_sys_write,
 	.fb_fillrect = drm_fb_helper_sys_fillrect,
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.h b/drivers/gpu/drm/omapdrm/omap_fbdev.h
new file mode 100644
index 0000000000000000000000000000000000000000..1f5ba0996a1aa7645b0bdbca006566451c52974f
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.h
@@ -0,0 +1,39 @@
+/*
+ * omap_fbdev.h -- OMAP DRM FBDEV Compatibility
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_FBDEV_H__
+#define __OMAPDRM_FBDEV_H__
+
+struct drm_device;
+struct drm_fb_helper;
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+struct drm_fb_helper *omap_fbdev_init(struct drm_device *dev);
+void omap_fbdev_free(struct drm_device *dev);
+#else
+static inline struct drm_fb_helper *omap_fbdev_init(struct drm_device *dev)
+{
+	return NULL;
+}
+static inline void omap_fbdev_free(struct drm_device *dev)
+{
+}
+#endif
+
+#endif /* __OMAPDRM_FBDEV_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 5c5c86ddd6f4777a10f5b326d2df34138ba33cf8..443469d4fa463e7782d0db1a9f380c46bef358ed 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_gem.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob.clark@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -996,10 +994,9 @@ void *omap_gem_vaddr(struct drm_gem_object *obj)
 
 #ifdef CONFIG_PM
 /* re-pin objects in DMM in resume path: */
-int omap_gem_resume(struct device *dev)
+int omap_gem_resume(struct drm_device *dev)
 {
-	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct omap_drm_private *priv = drm_dev->dev_private;
+	struct omap_drm_private *priv = dev->dev_private;
 	struct omap_gem_object *omap_obj;
 	int ret = 0;
 
@@ -1012,7 +1009,7 @@ int omap_gem_resume(struct device *dev)
 					omap_obj->pages, npages,
 					omap_obj->roll, true);
 			if (ret) {
-				dev_err(dev, "could not repin: %d\n", ret);
+				dev_err(dev->dev, "could not repin: %d\n", ret);
 				return ret;
 			}
 		}
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.h b/drivers/gpu/drm/omapdrm/omap_gem.h
new file mode 100644
index 0000000000000000000000000000000000000000..35fa690b3d9063a04da720f691ad2a4fa79cb13c
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_gem.h
@@ -0,0 +1,99 @@
+/*
+ * omap_gem.h -- OMAP DRM GEM Object Management
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_GEM_H__
+#define __OMAPDRM_GEM_H__
+
+#include <linux/types.h>
+
+enum dma_data_direction;
+
+struct dma_buf;
+struct drm_device;
+struct drm_file;
+struct drm_gem_object;
+struct drm_mode_create_dumb;
+struct file;
+struct list_head;
+struct page;
+struct seq_file;
+struct vm_area_struct;
+struct vm_fault;
+
+union omap_gem_size;
+
+/* Initialization and Cleanup */
+void omap_gem_init(struct drm_device *dev);
+void omap_gem_deinit(struct drm_device *dev);
+
+#ifdef CONFIG_PM
+int omap_gem_resume(struct drm_device *dev);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
+void omap_gem_describe_objects(struct list_head *list, struct seq_file *m);
+#endif
+
+/* GEM Object Creation and Deletion */
+struct drm_gem_object *omap_gem_new(struct drm_device *dev,
+		union omap_gem_size gsize, uint32_t flags);
+struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size,
+		struct sg_table *sgt);
+int omap_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		union omap_gem_size gsize, uint32_t flags, uint32_t *handle);
+void omap_gem_free_object(struct drm_gem_object *obj);
+void *omap_gem_vaddr(struct drm_gem_object *obj);
+
+/* Dumb Buffers Interface */
+int omap_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
+		uint32_t handle, uint64_t *offset);
+int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
+		struct drm_mode_create_dumb *args);
+
+/* mmap() Interface */
+int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+int omap_gem_mmap_obj(struct drm_gem_object *obj,
+		struct vm_area_struct *vma);
+uint64_t omap_gem_mmap_offset(struct drm_gem_object *obj);
+size_t omap_gem_mmap_size(struct drm_gem_object *obj);
+
+/* PRIME Interface */
+struct dma_buf *omap_gem_prime_export(struct drm_device *dev,
+		struct drm_gem_object *obj, int flags);
+struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
+		struct dma_buf *buffer);
+
+int omap_gem_fault(struct vm_fault *vmf);
+int omap_gem_roll(struct drm_gem_object *obj, uint32_t roll);
+void omap_gem_cpu_sync_page(struct drm_gem_object *obj, int pgoff);
+void omap_gem_dma_sync_buffer(struct drm_gem_object *obj,
+		enum dma_data_direction dir);
+int omap_gem_pin(struct drm_gem_object *obj, dma_addr_t *dma_addr);
+void omap_gem_unpin(struct drm_gem_object *obj);
+int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages,
+		bool remap);
+int omap_gem_put_pages(struct drm_gem_object *obj);
+
+uint32_t omap_gem_flags(struct drm_gem_object *obj);
+int omap_gem_rotated_dma_addr(struct drm_gem_object *obj, uint32_t orient,
+		int x, int y, dma_addr_t *dma_addr);
+int omap_gem_tiled_stride(struct drm_gem_object *obj, uint32_t orient);
+
+#endif /* __OMAPDRM_GEM_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index afdbad5c866abdac8791a9174448144e69a14f27..8e41d649e248dd14ba6a5340c9e24db177088a6d 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob.clark@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/omap_irq.c b/drivers/gpu/drm/omapdrm/omap_irq.c
index 013b0bba712ffa914d5d25cc4eebcf6075dccea4..53ba424823b2abc02c082a49752db8b17a530fb7 100644
--- a/drivers/gpu/drm/omapdrm/omap_irq.c
+++ b/drivers/gpu/drm/omapdrm/omap_irq.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_irq.c
- *
- * Copyright (C) 2012 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob.clark@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/omap_irq.h b/drivers/gpu/drm/omapdrm/omap_irq.h
new file mode 100644
index 0000000000000000000000000000000000000000..606c09932bc0f0a8d056331468ac15a3ec7fa9af
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_irq.h
@@ -0,0 +1,39 @@
+/*
+ * omap_irq.h -- OMAP DRM IRQ Handling
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_IRQ_H__
+#define __OMAPDRM_IRQ_H__
+
+#include <linux/types.h>
+
+struct drm_crtc;
+struct drm_device;
+struct omap_irq_wait;
+
+int omap_irq_enable_vblank(struct drm_crtc *crtc);
+void omap_irq_disable_vblank(struct drm_crtc *crtc);
+void omap_drm_irq_uninstall(struct drm_device *dev);
+int omap_drm_irq_install(struct drm_device *dev);
+
+struct omap_irq_wait *omap_irq_wait_init(struct drm_device *dev,
+		uint32_t irqmask, int count);
+int omap_irq_wait(struct drm_device *dev, struct omap_irq_wait *wait,
+		unsigned long timeout);
+
+#endif /* __OMAPDRM_IRQ_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 15e5d5d325c617b8cf00dc84dad5bf299897f24c..7d789d1551a1ce8a9f7af7018e912c43fca6f7da 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -1,7 +1,5 @@
 /*
- * drivers/gpu/drm/omapdrm/omap_plane.c
- *
- * Copyright (C) 2011 Texas Instruments
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Rob Clark <rob.clark@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.h b/drivers/gpu/drm/omapdrm/omap_plane.h
new file mode 100644
index 0000000000000000000000000000000000000000..dc5e82ad061d32c5eef2bfc55a748d6db67b71bd
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/omap_plane.h
@@ -0,0 +1,37 @@
+/*
+ * omap_plane.h -- OMAP DRM Plane
+ *
+ * Copyright (C) 2011 Texas Instruments
+ * Author: Rob Clark <rob@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAPDRM_PLANE_H__
+#define __OMAPDRM_PLANE_H__
+
+#include <linux/types.h>
+
+enum drm_plane_type;
+
+struct drm_device;
+struct drm_mode_object;
+struct drm_plane;
+
+struct drm_plane *omap_plane_init(struct drm_device *dev,
+		int idx, enum drm_plane_type type,
+		u32 possible_crtcs);
+void omap_plane_install_properties(struct drm_plane *plane,
+		struct drm_mode_object *obj);
+
+#endif /* __OMAPDRM_PLANE_H__ */
diff --git a/drivers/gpu/drm/omapdrm/tcm-sita.c b/drivers/gpu/drm/omapdrm/tcm-sita.c
index c10fdfc0930f7fae6a06c28f70724316de329896..661362d072f7b8dc7e383db9f296664f3b92d709 100644
--- a/drivers/gpu/drm/omapdrm/tcm-sita.c
+++ b/drivers/gpu/drm/omapdrm/tcm-sita.c
@@ -1,13 +1,11 @@
 /*
- * tcm-sita.c
- *
  * SImple Tiler Allocator (SiTA): 2D and 1D allocation(reservation) algorithm
  *
  * Authors: Ravi Ramachandra <r.ramachandra@ti.com>,
  *          Lajos Molnar <molnar@ti.com>
  *          Andy Gross <andy.gross@ti.com>
  *
- * Copyright (C) 2012 Texas Instruments, Inc.
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
  *
  * This package is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/gpu/drm/omapdrm/tcm-sita.h b/drivers/gpu/drm/omapdrm/tcm-sita.h
index 0444f868671c9f0d51e5611df1408a64437153f3..460e63dbf825ee91f86b58722a978cda8d7044ed 100644
--- a/drivers/gpu/drm/omapdrm/tcm-sita.h
+++ b/drivers/gpu/drm/omapdrm/tcm-sita.h
@@ -1,11 +1,9 @@
 /*
- * tcm_sita.h
- *
  * SImple Tiler Allocator (SiTA) private structures.
  *
+ * Copyright (C) 2009-2011 Texas Instruments Incorporated - http://www.ti.com/
  * Author: Ravi Ramachandra <r.ramachandra@ti.com>
  *
- * Copyright (C) 2009-2011 Texas Instruments, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/drivers/gpu/drm/omapdrm/tcm.h b/drivers/gpu/drm/omapdrm/tcm.h
index ef7df7d6fc84c7faa2b92b94e93be1853a8c4c55..d8a369a4f2699f286692fddd06ed4a3b697a8e60 100644
--- a/drivers/gpu/drm/omapdrm/tcm.h
+++ b/drivers/gpu/drm/omapdrm/tcm.h
@@ -1,6 +1,4 @@
 /*
- * tcm.h
- *
  * TILER container manager specification and support functions for TI
  * TILER driver.
  *
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 726f3fb3312d46421c35e8a7b6238fe8ed38d5c7..6ba4031f391907580d3abc9fbd1f7739f56a24b6 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -28,6 +28,14 @@ config DRM_PANEL_SIMPLE
 	  that it can be automatically turned off when the panel goes into a
 	  low power state.
 
+config DRM_PANEL_ILITEK_IL9322
+	tristate "Ilitek ILI9322 320x240 QVGA panels"
+	depends on OF && SPI
+	select REGMAP
+	help
+	  Say Y here if you want to enable support for Ilitek IL9322
+	  QVGA (320x240) RGB, YUV and ITU-T BT.656 panels.
+
 config DRM_PANEL_INNOLUX_P079ZCA
 	tristate "Innolux P079ZCA panel"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 2c4e1a93e05fbd7045cdbb700dfe65e990400c4e..6d251ebc568cbf3793741af106befa73013522ce 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DRM_PANEL_LVDS) += panel-lvds.o
 obj-$(CONFIG_DRM_PANEL_SIMPLE) += panel-simple.o
+obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_P079ZCA) += panel-innolux-p079zca.o
 obj-$(CONFIG_DRM_PANEL_JDI_LT070ME05000) += panel-jdi-lt070me05000.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4ec0ecff8079e6e21de6b3c2089943064e5d22f
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
@@ -0,0 +1,962 @@
+/*
+ * Ilitek ILI9322 TFT LCD drm_panel driver.
+ *
+ * This panel can be configured to support:
+ * - 8-bit serial RGB interface
+ * - 24-bit parallel RGB interface
+ * - 8-bit ITU-R BT.601 interface
+ * - 8-bit ITU-R BT.656 interface
+ * - Up to 320RGBx240 dots resolution TFT LCD displays
+ * - Scaling, brightness and contrast
+ *
+ * The scaling means that the display accepts a 640x480 or 720x480
+ * input and rescales it to fit to the 320x240 display. So what we
+ * present to the system is something else than what comes out on the
+ * actual display.
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Derived from drivers/drm/gpu/panel/panel-samsung-ld9040.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_panel.h>
+
+#include <linux/of_device.h>
+#include <linux/bitops.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <video/mipi_display.h>
+#include <video/of_videomode.h>
+#include <video/videomode.h>
+
+#define ILI9322_CHIP_ID			0x00
+#define ILI9322_CHIP_ID_MAGIC		0x96
+
+/*
+ * Voltage on the communication interface, from 0.7 (0x00)
+ * to 1.32 (0x1f) times the VREG1OUT voltage in 2% increments.
+ * 1.00 (0x0f) is the default.
+ */
+#define ILI9322_VCOM_AMP		0x01
+
+/*
+ * High voltage on the communication signals, from 0.37 (0x00) to
+ * 1.0 (0x3f) times the VREGOUT1 voltage in 1% increments.
+ * 0.83 (0x2e) is the default.
+ */
+#define ILI9322_VCOM_HIGH		0x02
+
+/*
+ * VREG1 voltage regulator from 3.6V (0x00) to 6.0V (0x18) in 0.1V
+ * increments. 5.4V (0x12) is the default. This is the reference
+ * voltage for the VCOM levels and the greyscale level.
+ */
+#define ILI9322_VREG1_VOLTAGE		0x03
+
+/* Describes the incoming signal */
+#define ILI9322_ENTRY			0x06
+/* 0 = right-to-left, 1 = left-to-right (default), horizontal flip */
+#define ILI9322_ENTRY_HDIR		BIT(0)
+/* 0 = down-to-up, 1 = up-to-down (default), vertical flip  */
+#define ILI9322_ENTRY_VDIR		BIT(1)
+/* NTSC, PAL or autodetect */
+#define ILI9322_ENTRY_NTSC		(0 << 2)
+#define ILI9322_ENTRY_PAL		(1 << 2)
+#define ILI9322_ENTRY_AUTODETECT	(3 << 2)
+/* Input format */
+#define ILI9322_ENTRY_SERIAL_RGB_THROUGH (0 << 4)
+#define ILI9322_ENTRY_SERIAL_RGB_ALIGNED (1 << 4)
+#define ILI9322_ENTRY_SERIAL_RGB_DUMMY_320X240 (2 << 4)
+#define ILI9322_ENTRY_SERIAL_RGB_DUMMY_360X240 (3 << 4)
+#define ILI9322_ENTRY_DISABLE_1		(4 << 4)
+#define ILI9322_ENTRY_PARALLEL_RGB_THROUGH (5 << 4)
+#define ILI9322_ENTRY_PARALLEL_RGB_ALIGNED (6 << 4)
+#define ILI9322_ENTRY_YUV_640Y_320CBCR_25_54_MHZ (7 << 4)
+#define ILI9322_ENTRY_YUV_720Y_360CBCR_27_MHZ (8 << 4)
+#define ILI9322_ENTRY_DISABLE_2		(9 << 4)
+#define ILI9322_ENTRY_ITU_R_BT_656_720X360 (10 << 4)
+#define ILI9322_ENTRY_ITU_R_BT_656_640X320 (11 << 4)
+
+/* Power control */
+#define ILI9322_POW_CTRL		0x07
+#define ILI9322_POW_CTRL_STB		BIT(0) /* 0 = standby, 1 = normal */
+#define ILI9322_POW_CTRL_VGL		BIT(1) /* 0 = off, 1 = on  */
+#define ILI9322_POW_CTRL_VGH		BIT(2) /* 0 = off, 1 = on  */
+#define ILI9322_POW_CTRL_DDVDH		BIT(3) /* 0 = off, 1 = on  */
+#define ILI9322_POW_CTRL_VCOM		BIT(4) /* 0 = off, 1 = on  */
+#define ILI9322_POW_CTRL_VCL		BIT(5) /* 0 = off, 1 = on  */
+#define ILI9322_POW_CTRL_AUTO		BIT(6) /* 0 = interactive, 1 = auto */
+#define ILI9322_POW_CTRL_STANDBY	(ILI9322_POW_CTRL_VGL | \
+					 ILI9322_POW_CTRL_VGH | \
+					 ILI9322_POW_CTRL_DDVDH | \
+					 ILI9322_POW_CTRL_VCL | \
+					 ILI9322_POW_CTRL_AUTO | \
+					 BIT(7))
+#define ILI9322_POW_CTRL_DEFAULT	(ILI9322_POW_CTRL_STANDBY | \
+					 ILI9322_POW_CTRL_STB)
+
+/* Vertical back porch bits 0..5 */
+#define ILI9322_VBP			0x08
+
+/* Horizontal back porch, 8 bits */
+#define ILI9322_HBP			0x09
+
+/*
+ * Polarity settings:
+ * 1 = positive polarity
+ * 0 = negative polarity
+ */
+#define ILI9322_POL			0x0a
+#define ILI9322_POL_DCLK		BIT(0) /* 1 default */
+#define ILI9322_POL_HSYNC		BIT(1) /* 0 default */
+#define ILI9322_POL_VSYNC		BIT(2) /* 0 default */
+#define ILI9322_POL_DE			BIT(3) /* 1 default */
+/*
+ * 0 means YCBCR are ordered Cb0,Y0,Cr0,Y1,Cb2,Y2,Cr2,Y3 (default)
+ *   in RGB mode this means RGB comes in RGBRGB
+ * 1 means YCBCR are ordered Cr0,Y0,Cb0,Y1,Cr2,Y2,Cb2,Y3
+ *   in RGB mode this means RGB comes in BGRBGR
+ */
+#define ILI9322_POL_YCBCR_MODE		BIT(4)
+/* Formula A for YCbCR->RGB = 0, Formula B = 1 */
+#define ILI9322_POL_FORMULA		BIT(5)
+/* Reverse polarity: 0 = 0..255, 1 = 255..0 */
+#define ILI9322_POL_REV			BIT(6)
+
+#define ILI9322_IF_CTRL			0x0b
+#define ILI9322_IF_CTRL_HSYNC_VSYNC	0x00
+#define ILI9322_IF_CTRL_HSYNC_VSYNC_DE	BIT(2)
+#define ILI9322_IF_CTRL_DE_ONLY		BIT(3)
+#define ILI9322_IF_CTRL_SYNC_DISABLED	(BIT(2) | BIT(3))
+#define ILI9322_IF_CTRL_LINE_INVERSION	BIT(0) /* Not set means frame inv */
+
+#define ILI9322_GLOBAL_RESET		0x04
+#define ILI9322_GLOBAL_RESET_ASSERT	0x00 /* bit 0 = 0 -> reset */
+
+/*
+ * 4+4 bits of negative and positive gamma correction
+ * Upper nybble, bits 4-7 are negative gamma
+ * Lower nybble, bits 0-3 are positive gamma
+ */
+#define ILI9322_GAMMA_1			0x10
+#define ILI9322_GAMMA_2			0x11
+#define ILI9322_GAMMA_3			0x12
+#define ILI9322_GAMMA_4			0x13
+#define ILI9322_GAMMA_5			0x14
+#define ILI9322_GAMMA_6			0x15
+#define ILI9322_GAMMA_7			0x16
+#define ILI9322_GAMMA_8			0x17
+
+/**
+ * enum ili9322_input - the format of the incoming signal to the panel
+ *
+ * The panel can be connected to various input streams and four of them can
+ * be selected by electronic straps on the display. However it is possible
+ * to select another mode or override the electronic default with this
+ * setting.
+ */
+enum ili9322_input {
+	ILI9322_INPUT_SRGB_THROUGH = 0x0,
+	ILI9322_INPUT_SRGB_ALIGNED = 0x1,
+	ILI9322_INPUT_SRGB_DUMMY_320X240 = 0x2,
+	ILI9322_INPUT_SRGB_DUMMY_360X240 = 0x3,
+	ILI9322_INPUT_DISABLED_1 = 0x4,
+	ILI9322_INPUT_PRGB_THROUGH = 0x5,
+	ILI9322_INPUT_PRGB_ALIGNED = 0x6,
+	ILI9322_INPUT_YUV_640X320_YCBCR = 0x7,
+	ILI9322_INPUT_YUV_720X360_YCBCR = 0x8,
+	ILI9322_INPUT_DISABLED_2 = 0x9,
+	ILI9322_INPUT_ITU_R_BT656_720X360_YCBCR = 0xa,
+	ILI9322_INPUT_ITU_R_BT656_640X320_YCBCR = 0xb,
+	ILI9322_INPUT_UNKNOWN = 0xc,
+};
+
+const char *ili9322_inputs[] = {
+	"8 bit serial RGB through",
+	"8 bit serial RGB aligned",
+	"8 bit serial RGB dummy 320x240",
+	"8 bit serial RGB dummy 360x240",
+	"disabled 1",
+	"24 bit parallel RGB through",
+	"24 bit parallel RGB aligned",
+	"24 bit YUV 640Y 320CbCr",
+	"24 bit YUV 720Y 360CbCr",
+	"disabled 2",
+	"8 bit ITU-R BT.656 720Y 360CbCr",
+	"8 bit ITU-R BT.656 640Y 320CbCr",
+};
+
+/**
+ * struct ili9322_config - the system specific ILI9322 configuration
+ * @width_mm: physical panel width [mm]
+ * @height_mm: physical panel height [mm]
+ * @flip_horizontal: flip the image horizontally (right-to-left scan)
+ * (only in RGB and YUV modes)
+ * @flip_vertical: flip the image vertically (down-to-up scan)
+ * (only in RGB and YUV modes)
+ * @input: the input/entry type used in this system, if this is set to
+ * ILI9322_INPUT_UNKNOWN the driver will try to figure it out by probing
+ * the hardware
+ * @vreg1out_mv: the output in microvolts for the VREGOUT1 regulator used
+ * to drive the physical display. Valid ranges are 3600 thru 6000 in 100
+ * microvolt increments. If not specified, hardware defaults will be
+ * used (4.5V).
+ * @vcom_high_percent: the percentage of VREGOUT1 used for the peak
+ * voltage on the communications link. Valid ranges are 37 thru 100
+ * percent. If not specified, hardware defaults will be used (91%).
+ * @vcom_amplitude_percent: the percentage of VREGOUT1 used for the
+ * peak-to-peak amplitude of the communcation signals to the physical
+ * display. Valid ranges are 70 thru 132 percent in increments if two
+ * percent. Odd percentages will be truncated. If not specified, hardware
+ * defaults will be used (114%).
+ * @dclk_active_high: data/pixel clock active high, data will be clocked
+ * in on the rising edge of the DCLK (this is usually the case).
+ * @syncmode: The synchronization mode, what sync signals are emitted.
+ * See the enum for details.
+ * @de_active_high: DE (data entry) is active high
+ * @hsync_active_high: HSYNC is active high
+ * @vsync_active_high: VSYNC is active high
+ * @gamma_corr_pos: a set of 8 nybbles describing positive
+ * gamma correction for voltages V1 thru V8. Valid range 0..15
+ * @gamma_corr_neg: a set of 8 nybbles describing negative
+ * gamma correction for voltages V1 thru V8. Valid range 0..15
+ *
+ * These adjust what grayscale voltage will be output for input data V1 = 0,
+ * V2 = 16, V3 = 48, V4 = 96, V5 = 160, V6 = 208, V7 = 240 and V8 = 255.
+ * The curve is shaped like this:
+ *
+ *  ^
+ *  |                                                        V8
+ *  |                                                   V7
+ *  |                                          V6
+ *  |                               V5
+ *  |                    V4
+ *  |            V3
+ *  |     V2
+ *  | V1
+ *  +----------------------------------------------------------->
+ *    0   16     48      96         160        208      240  255
+ *
+ * The negative and postive gamma values adjust the V1 thru V8 up/down
+ * according to the datasheet specifications. This is a property of the
+ * physical display connected to the display controller and may vary.
+ * If defined, both arrays must be supplied in full. If the properties
+ * are not supplied, hardware defaults will be used.
+ */
+struct ili9322_config {
+	u32 width_mm;
+	u32 height_mm;
+	bool flip_horizontal;
+	bool flip_vertical;
+	enum ili9322_input input;
+	u32 vreg1out_mv;
+	u32 vcom_high_percent;
+	u32 vcom_amplitude_percent;
+	bool dclk_active_high;
+	bool de_active_high;
+	bool hsync_active_high;
+	bool vsync_active_high;
+	u8 syncmode;
+	u8 gamma_corr_pos[8];
+	u8 gamma_corr_neg[8];
+};
+
+struct ili9322 {
+	struct device *dev;
+	const struct ili9322_config *conf;
+	struct drm_panel panel;
+	struct regmap *regmap;
+	struct regulator_bulk_data supplies[3];
+	struct gpio_desc *reset_gpio;
+	enum ili9322_input input;
+	struct videomode vm;
+	u8 gamma[8];
+	u8 vreg1out;
+	u8 vcom_high;
+	u8 vcom_amplitude;
+};
+
+static inline struct ili9322 *panel_to_ili9322(struct drm_panel *panel)
+{
+	return container_of(panel, struct ili9322, panel);
+}
+
+static int ili9322_regmap_spi_write(void *context, const void *data,
+				    size_t count)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+	u8 buf[2];
+
+	/* Clear bit 7 to write */
+	memcpy(buf, data, 2);
+	buf[0] &= ~0x80;
+
+	dev_dbg(dev, "WRITE: %02x %02x\n", buf[0], buf[1]);
+	return spi_write_then_read(spi, buf, 2, NULL, 0);
+}
+
+static int ili9322_regmap_spi_read(void *context, const void *reg,
+				   size_t reg_size, void *val, size_t val_size)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+	u8 buf[1];
+
+	/* Set bit 7 to 1 to read */
+	memcpy(buf, reg, 1);
+	dev_dbg(dev, "READ: %02x reg size = %zu, val size = %zu\n",
+		buf[0], reg_size, val_size);
+	buf[0] |= 0x80;
+
+	return spi_write_then_read(spi, buf, 1, val, 1);
+}
+
+static struct regmap_bus ili9322_regmap_bus = {
+	.write = ili9322_regmap_spi_write,
+	.read = ili9322_regmap_spi_read,
+	.reg_format_endian_default = REGMAP_ENDIAN_BIG,
+	.val_format_endian_default = REGMAP_ENDIAN_BIG,
+};
+
+static bool ili9322_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return false;
+}
+
+static bool ili9322_writeable_reg(struct device *dev, unsigned int reg)
+{
+	/* Just register 0 is read-only */
+	if (reg == 0x00)
+		return false;
+	return true;
+}
+
+const struct regmap_config ili9322_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0x44,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = ili9322_volatile_reg,
+	.writeable_reg = ili9322_writeable_reg,
+};
+
+static int ili9322_init(struct drm_panel *panel, struct ili9322 *ili)
+{
+	struct drm_connector *connector = panel->connector;
+	u8 reg;
+	int ret;
+	int i;
+
+	/* Reset display */
+	ret = regmap_write(ili->regmap, ILI9322_GLOBAL_RESET,
+			   ILI9322_GLOBAL_RESET_ASSERT);
+	if (ret) {
+		dev_err(ili->dev, "can't issue GRESET (%d)\n", ret);
+		return ret;
+	}
+
+	/* Set up the main voltage regulator */
+	if (ili->vreg1out != U8_MAX) {
+		ret = regmap_write(ili->regmap, ILI9322_VREG1_VOLTAGE,
+				   ili->vreg1out);
+		if (ret) {
+			dev_err(ili->dev, "can't set up VREG1OUT (%d)\n", ret);
+			return ret;
+		}
+	}
+
+	if (ili->vcom_amplitude != U8_MAX) {
+		ret = regmap_write(ili->regmap, ILI9322_VCOM_AMP,
+				   ili->vcom_amplitude);
+		if (ret) {
+			dev_err(ili->dev,
+				"can't set up VCOM amplitude (%d)\n", ret);
+			return ret;
+		}
+	};
+
+	if (ili->vcom_high != U8_MAX) {
+		ret = regmap_write(ili->regmap, ILI9322_VCOM_HIGH,
+				   ili->vcom_high);
+		if (ret) {
+			dev_err(ili->dev, "can't set up VCOM high (%d)\n", ret);
+			return ret;
+		}
+	};
+
+	/* Set up gamma correction */
+	for (i = 0; i < ARRAY_SIZE(ili->gamma); i++) {
+		ret = regmap_write(ili->regmap, ILI9322_GAMMA_1 + i,
+				   ili->gamma[i]);
+		if (ret) {
+			dev_err(ili->dev,
+				"can't write gamma V%d to 0x%02x (%d)\n",
+				i + 1, ILI9322_GAMMA_1 + i, ret);
+			return ret;
+		}
+	}
+
+	/*
+	 * Polarity and inverted color order for RGB input.
+	 * None of this applies in the BT.656 mode.
+	 */
+	if (ili->conf->dclk_active_high) {
+		reg = ILI9322_POL_DCLK;
+		connector->display_info.bus_flags |=
+			DRM_BUS_FLAG_PIXDATA_POSEDGE;
+	} else {
+		reg = 0;
+		connector->display_info.bus_flags |=
+			DRM_BUS_FLAG_PIXDATA_NEGEDGE;
+	}
+	if (ili->conf->de_active_high) {
+		reg |= ILI9322_POL_DE;
+		connector->display_info.bus_flags |=
+			DRM_BUS_FLAG_DE_HIGH;
+	} else {
+		connector->display_info.bus_flags |=
+			DRM_BUS_FLAG_DE_LOW;
+	}
+	if (ili->conf->hsync_active_high)
+		reg |= ILI9322_POL_HSYNC;
+	if (ili->conf->vsync_active_high)
+		reg |= ILI9322_POL_VSYNC;
+	ret = regmap_write(ili->regmap, ILI9322_POL, reg);
+	if (ret) {
+		dev_err(ili->dev, "can't write POL register (%d)\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Set up interface control.
+	 * This is not used in the BT.656 mode (no H/Vsync or DE signals).
+	 */
+	reg = ili->conf->syncmode;
+	reg |= ILI9322_IF_CTRL_LINE_INVERSION;
+	ret = regmap_write(ili->regmap, ILI9322_IF_CTRL, reg);
+	if (ret) {
+		dev_err(ili->dev, "can't write IF CTRL register (%d)\n", ret);
+		return ret;
+	}
+
+	/* Set up the input mode */
+	reg = (ili->input << 4);
+	/* These are inverted, setting to 1 is the default, clearing flips */
+	if (!ili->conf->flip_horizontal)
+		reg |= ILI9322_ENTRY_HDIR;
+	if (!ili->conf->flip_vertical)
+		reg |= ILI9322_ENTRY_VDIR;
+	reg |= ILI9322_ENTRY_AUTODETECT;
+	ret = regmap_write(ili->regmap, ILI9322_ENTRY, reg);
+	if (ret) {
+		dev_err(ili->dev, "can't write ENTRY reg (%d)\n", ret);
+		return ret;
+	}
+	dev_info(ili->dev, "display is in %s mode, syncmode %02x\n",
+		 ili9322_inputs[ili->input],
+		 ili->conf->syncmode);
+
+	dev_info(ili->dev, "initialized display\n");
+
+	return 0;
+}
+
+/*
+ * This power-on sequence if from the datasheet, page 57.
+ */
+static int ili9322_power_on(struct ili9322 *ili)
+{
+	int ret;
+
+	/* Assert RESET */
+	gpiod_set_value(ili->reset_gpio, 1);
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ili->supplies), ili->supplies);
+	if (ret < 0) {
+		dev_err(ili->dev, "unable to enable regulators\n");
+		return ret;
+	}
+	msleep(20);
+
+	/* De-assert RESET */
+	gpiod_set_value(ili->reset_gpio, 0);
+
+	msleep(10);
+
+	return 0;
+}
+
+static int ili9322_power_off(struct ili9322 *ili)
+{
+	return regulator_bulk_disable(ARRAY_SIZE(ili->supplies), ili->supplies);
+}
+
+static int ili9322_disable(struct drm_panel *panel)
+{
+	struct ili9322 *ili = panel_to_ili9322(panel);
+	int ret;
+
+	ret = regmap_write(ili->regmap, ILI9322_POW_CTRL,
+			   ILI9322_POW_CTRL_STANDBY);
+	if (ret) {
+		dev_err(ili->dev, "unable to go to standby mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9322_unprepare(struct drm_panel *panel)
+{
+	struct ili9322 *ili = panel_to_ili9322(panel);
+
+	return ili9322_power_off(ili);
+}
+
+static int ili9322_prepare(struct drm_panel *panel)
+{
+	struct ili9322 *ili = panel_to_ili9322(panel);
+	int ret;
+
+	ret = ili9322_power_on(ili);
+	if (ret < 0)
+		return ret;
+
+	ret = ili9322_init(panel, ili);
+	if (ret < 0)
+		ili9322_unprepare(panel);
+
+	return ret;
+}
+
+static int ili9322_enable(struct drm_panel *panel)
+{
+	struct ili9322 *ili = panel_to_ili9322(panel);
+	int ret;
+
+	ret = regmap_write(ili->regmap, ILI9322_POW_CTRL,
+			   ILI9322_POW_CTRL_DEFAULT);
+	if (ret) {
+		dev_err(ili->dev, "unable to enable panel\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/* Serial RGB modes */
+static const struct drm_display_mode srgb_320x240_mode = {
+	.clock = 2453500,
+	.hdisplay = 320,
+	.hsync_start = 320 + 359,
+	.hsync_end = 320 + 359 + 1,
+	.htotal = 320 + 359 + 1 + 241,
+	.vdisplay = 240,
+	.vsync_start = 240 + 4,
+	.vsync_end = 240 + 4 + 1,
+	.vtotal = 262,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+static const struct drm_display_mode srgb_360x240_mode = {
+	.clock = 2700000,
+	.hdisplay = 360,
+	.hsync_start = 360 + 35,
+	.hsync_end = 360 + 35 + 1,
+	.htotal = 360 + 35 + 1 + 241,
+	.vdisplay = 240,
+	.vsync_start = 240 + 21,
+	.vsync_end = 240 + 21 + 1,
+	.vtotal = 262,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+/* This is the only mode listed for parallel RGB in the datasheet */
+static const struct drm_display_mode prgb_320x240_mode = {
+	.clock = 6400000,
+	.hdisplay = 320,
+	.hsync_start = 320 + 38,
+	.hsync_end = 320 + 38 + 1,
+	.htotal = 320 + 38 + 1 + 50,
+	.vdisplay = 240,
+	.vsync_start = 240 + 4,
+	.vsync_end = 240 + 4 + 1,
+	.vtotal = 262,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+/* YUV modes */
+static const struct drm_display_mode yuv_640x320_mode = {
+	.clock = 2454000,
+	.hdisplay = 640,
+	.hsync_start = 640 + 252,
+	.hsync_end = 640 + 252 + 1,
+	.htotal = 640 + 252 + 1 + 28,
+	.vdisplay = 320,
+	.vsync_start = 320 + 4,
+	.vsync_end = 320 + 4 + 1,
+	.vtotal = 320 + 4 + 1 + 18,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+static const struct drm_display_mode yuv_720x360_mode = {
+	.clock = 2700000,
+	.hdisplay = 720,
+	.hsync_start = 720 + 252,
+	.hsync_end = 720 + 252 + 1,
+	.htotal = 720 + 252 + 1 + 24,
+	.vdisplay = 360,
+	.vsync_start = 360 + 4,
+	.vsync_end = 360 + 4 + 1,
+	.vtotal = 360 + 4 + 1 + 18,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+/* BT.656 VGA mode, 640x480 */
+static const struct drm_display_mode itu_r_bt_656_640_mode = {
+	.clock = 2454000,
+	.hdisplay = 640,
+	.hsync_start = 640 + 3,
+	.hsync_end = 640 + 3 + 1,
+	.htotal = 640 + 3 + 1 + 272,
+	.vdisplay = 480,
+	.vsync_start = 480 + 4,
+	.vsync_end = 480 + 4 + 1,
+	.vtotal = 500,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+/* BT.656 D1 mode 720x480 */
+static const struct drm_display_mode itu_r_bt_656_720_mode = {
+	.clock = 2700000,
+	.hdisplay = 720,
+	.hsync_start = 720 + 3,
+	.hsync_end = 720 + 3 + 1,
+	.htotal = 720 + 3 + 1 + 272,
+	.vdisplay = 480,
+	.vsync_start = 480 + 4,
+	.vsync_end = 480 + 4 + 1,
+	.vtotal = 500,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+static int ili9322_get_modes(struct drm_panel *panel)
+{
+	struct drm_connector *connector = panel->connector;
+	struct ili9322 *ili = panel_to_ili9322(panel);
+	struct drm_display_mode *mode;
+
+	strncpy(connector->display_info.name, "ILI9322 TFT LCD driver\0",
+		DRM_DISPLAY_INFO_LEN);
+	connector->display_info.width_mm = ili->conf->width_mm;
+	connector->display_info.height_mm = ili->conf->height_mm;
+
+	switch (ili->input) {
+	case ILI9322_INPUT_SRGB_DUMMY_320X240:
+		mode = drm_mode_duplicate(panel->drm, &srgb_320x240_mode);
+		break;
+	case ILI9322_INPUT_SRGB_DUMMY_360X240:
+		mode = drm_mode_duplicate(panel->drm, &srgb_360x240_mode);
+		break;
+	case ILI9322_INPUT_PRGB_THROUGH:
+	case ILI9322_INPUT_PRGB_ALIGNED:
+		mode = drm_mode_duplicate(panel->drm, &prgb_320x240_mode);
+		break;
+	case ILI9322_INPUT_YUV_640X320_YCBCR:
+		mode = drm_mode_duplicate(panel->drm, &yuv_640x320_mode);
+		break;
+	case ILI9322_INPUT_YUV_720X360_YCBCR:
+		mode = drm_mode_duplicate(panel->drm, &yuv_720x360_mode);
+		break;
+	case ILI9322_INPUT_ITU_R_BT656_720X360_YCBCR:
+		mode = drm_mode_duplicate(panel->drm, &itu_r_bt_656_720_mode);
+		break;
+	case ILI9322_INPUT_ITU_R_BT656_640X320_YCBCR:
+		mode = drm_mode_duplicate(panel->drm, &itu_r_bt_656_640_mode);
+		break;
+	default:
+		mode = NULL;
+		break;
+	}
+	if (!mode) {
+		DRM_ERROR("bad mode or failed to add mode\n");
+		return -EINVAL;
+	}
+	drm_mode_set_name(mode);
+	/*
+	 * This is the preferred mode because most people are going
+	 * to want to use the display with VGA type graphics.
+	 */
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+
+	/* Set up the polarity */
+	if (ili->conf->hsync_active_high)
+		mode->flags |= DRM_MODE_FLAG_PHSYNC;
+	else
+		mode->flags |= DRM_MODE_FLAG_NHSYNC;
+	if (ili->conf->vsync_active_high)
+		mode->flags |= DRM_MODE_FLAG_PVSYNC;
+	else
+		mode->flags |= DRM_MODE_FLAG_NVSYNC;
+
+	mode->width_mm = ili->conf->width_mm;
+	mode->height_mm = ili->conf->height_mm;
+	drm_mode_probed_add(connector, mode);
+
+	return 1; /* Number of modes */
+}
+
+static const struct drm_panel_funcs ili9322_drm_funcs = {
+	.disable = ili9322_disable,
+	.unprepare = ili9322_unprepare,
+	.prepare = ili9322_prepare,
+	.enable = ili9322_enable,
+	.get_modes = ili9322_get_modes,
+};
+
+static int ili9322_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct ili9322 *ili;
+	const struct regmap_config *regmap_config;
+	u8 gamma;
+	u32 val;
+	int ret;
+	int i;
+
+	ili = devm_kzalloc(dev, sizeof(struct ili9322), GFP_KERNEL);
+	if (!ili)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, ili);
+
+	ili->dev = dev;
+
+	/*
+	 * Every new incarnation of this display must have a unique
+	 * data entry for the system in this driver.
+	 */
+	ili->conf = of_device_get_match_data(dev);
+	if (!ili->conf) {
+		dev_err(dev, "missing device configuration\n");
+		return -ENODEV;
+	}
+
+	val = ili->conf->vreg1out_mv;
+	if (!val) {
+		/* Default HW value, do not touch (should be 4.5V) */
+		ili->vreg1out = U8_MAX;
+	} else {
+		if (val < 3600) {
+			dev_err(dev, "too low VREG1OUT\n");
+			return -EINVAL;
+		}
+		if (val > 6000) {
+			dev_err(dev, "too high VREG1OUT\n");
+			return -EINVAL;
+		}
+		if ((val % 100) != 0) {
+			dev_err(dev, "VREG1OUT is no even 100 microvolt\n");
+			return -EINVAL;
+		}
+		val -= 3600;
+		val /= 100;
+		dev_dbg(dev, "VREG1OUT = 0x%02x\n", val);
+		ili->vreg1out = val;
+	}
+
+	val = ili->conf->vcom_high_percent;
+	if (!val) {
+		/* Default HW value, do not touch (should be 91%) */
+		ili->vcom_high = U8_MAX;
+	} else {
+		if (val < 37) {
+			dev_err(dev, "too low VCOM high\n");
+			return -EINVAL;
+		}
+		if (val > 100) {
+			dev_err(dev, "too high VCOM high\n");
+			return -EINVAL;
+		}
+		val -= 37;
+		dev_dbg(dev, "VCOM high = 0x%02x\n", val);
+		ili->vcom_high = val;
+	}
+
+	val = ili->conf->vcom_amplitude_percent;
+	if (!val) {
+		/* Default HW value, do not touch (should be 114%) */
+		ili->vcom_high = U8_MAX;
+	} else {
+		if (val < 70) {
+			dev_err(dev, "too low VCOM amplitude\n");
+			return -EINVAL;
+		}
+		if (val > 132) {
+			dev_err(dev, "too high VCOM amplitude\n");
+			return -EINVAL;
+		}
+		val -= 70;
+		val >>= 1; /* Increments of 2% */
+		dev_dbg(dev, "VCOM amplitude = 0x%02x\n", val);
+		ili->vcom_amplitude = val;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ili->gamma); i++) {
+		val = ili->conf->gamma_corr_neg[i];
+		if (val > 15) {
+			dev_err(dev, "negative gamma %u > 15, capping\n", val);
+			val = 15;
+		}
+		gamma = val << 4;
+		val = ili->conf->gamma_corr_pos[i];
+		if (val > 15) {
+			dev_err(dev, "positive gamma %u > 15, capping\n", val);
+			val = 15;
+		}
+		gamma |= val;
+		ili->gamma[i] = gamma;
+		dev_dbg(dev, "gamma V%d: 0x%02x\n", i + 1, gamma);
+	}
+
+	ili->supplies[0].supply = "vcc"; /* 2.7-3.6 V */
+	ili->supplies[1].supply = "iovcc"; /* 1.65-3.6V */
+	ili->supplies[2].supply = "vci"; /* 2.7-3.6V */
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ili->supplies),
+				      ili->supplies);
+	if (ret < 0)
+		return ret;
+	ret = regulator_set_voltage(ili->supplies[0].consumer,
+				    2700000, 3600000);
+	if (ret)
+		return ret;
+	ret = regulator_set_voltage(ili->supplies[1].consumer,
+				    1650000, 3600000);
+	if (ret)
+		return ret;
+	ret = regulator_set_voltage(ili->supplies[2].consumer,
+				    2700000, 3600000);
+	if (ret)
+		return ret;
+
+	ili->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(ili->reset_gpio)) {
+		dev_err(dev, "failed to get RESET GPIO\n");
+		return PTR_ERR(ili->reset_gpio);
+	}
+
+	spi->bits_per_word = 8;
+	ret = spi_setup(spi);
+	if (ret < 0) {
+		dev_err(dev, "spi setup failed.\n");
+		return ret;
+	}
+	regmap_config = &ili9322_regmap_config;
+	ili->regmap = devm_regmap_init(dev, &ili9322_regmap_bus, dev,
+				       regmap_config);
+	if (IS_ERR(ili->regmap)) {
+		dev_err(dev, "failed to allocate register map\n");
+		return PTR_ERR(ili->regmap);
+	}
+
+	ret = regmap_read(ili->regmap, ILI9322_CHIP_ID, &val);
+	if (ret) {
+		dev_err(dev, "can't get chip ID (%d)\n", ret);
+		return ret;
+	}
+	if (val != ILI9322_CHIP_ID_MAGIC) {
+		dev_err(dev, "chip ID 0x%0x2, expected 0x%02x\n", val,
+			ILI9322_CHIP_ID_MAGIC);
+		return -ENODEV;
+	}
+
+	/* Probe the system to find the display setting */
+	if (ili->conf->input == ILI9322_INPUT_UNKNOWN) {
+		ret = regmap_read(ili->regmap, ILI9322_ENTRY, &val);
+		if (ret) {
+			dev_err(dev, "can't get entry setting (%d)\n", ret);
+			return ret;
+		}
+		/* Input enum corresponds to HW setting */
+		ili->input = (val >> 4) & 0x0f;
+		if (ili->input >= ILI9322_INPUT_UNKNOWN)
+			ili->input = ILI9322_INPUT_UNKNOWN;
+	} else {
+		ili->input = ili->conf->input;
+	}
+
+	drm_panel_init(&ili->panel);
+	ili->panel.dev = dev;
+	ili->panel.funcs = &ili9322_drm_funcs;
+
+	return drm_panel_add(&ili->panel);
+}
+
+static int ili9322_remove(struct spi_device *spi)
+{
+	struct ili9322 *ili = spi_get_drvdata(spi);
+
+	ili9322_power_off(ili);
+	drm_panel_remove(&ili->panel);
+
+	return 0;
+}
+
+/*
+ * The D-Link DIR-685 panel is marked LM918A01-1A SY-B4-091116-E0199
+ */
+static const struct ili9322_config ili9322_dir_685 = {
+	.width_mm = 65,
+	.height_mm = 50,
+	.input = ILI9322_INPUT_ITU_R_BT656_640X320_YCBCR,
+	.vreg1out_mv = 4600,
+	.vcom_high_percent = 91,
+	.vcom_amplitude_percent = 114,
+	.syncmode = ILI9322_IF_CTRL_SYNC_DISABLED,
+	.dclk_active_high = true,
+	.gamma_corr_neg = { 0xa, 0x5, 0x7, 0x7, 0x7, 0x5, 0x1, 0x6 },
+	.gamma_corr_pos = { 0x7, 0x7, 0x3, 0x2, 0x3, 0x5, 0x7, 0x2 },
+};
+
+static const struct of_device_id ili9322_of_match[] = {
+	{
+		.compatible = "dlink,dir-685-panel",
+		.data = &ili9322_dir_685,
+	},
+	{
+		.compatible = "ilitek,ili9322",
+		.data = NULL,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ili9322_of_match);
+
+static struct spi_driver ili9322_driver = {
+	.probe = ili9322_probe,
+	.remove = ili9322_remove,
+	.driver = {
+		.name = "panel-ilitek-ili9322",
+		.of_match_table = ili9322_of_match,
+	},
+};
+module_spi_driver(ili9322_driver);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("ILI9322 LCD panel driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-lvds.c b/drivers/gpu/drm/panel/panel-lvds.c
index e2d57c01200b291b207e976011e70b71e7be9a72..b5e3994f0aa8c69929d3f88cb7a3be5acc272a93 100644
--- a/drivers/gpu/drm/panel/panel-lvds.c
+++ b/drivers/gpu/drm/panel/panel-lvds.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 
 #include <drm/drmP.h>
@@ -39,6 +40,7 @@ struct panel_lvds {
 	bool data_mirror;
 
 	struct backlight_device *backlight;
+	struct regulator *supply;
 
 	struct gpio_desc *enable_gpio;
 	struct gpio_desc *reset_gpio;
@@ -69,6 +71,9 @@ static int panel_lvds_unprepare(struct drm_panel *panel)
 	if (lvds->enable_gpio)
 		gpiod_set_value_cansleep(lvds->enable_gpio, 0);
 
+	if (lvds->supply)
+		regulator_disable(lvds->supply);
+
 	return 0;
 }
 
@@ -76,6 +81,17 @@ static int panel_lvds_prepare(struct drm_panel *panel)
 {
 	struct panel_lvds *lvds = to_panel_lvds(panel);
 
+	if (lvds->supply) {
+		int err;
+
+		err = regulator_enable(lvds->supply);
+		if (err < 0) {
+			dev_err(lvds->dev, "failed to enable supply: %d\n",
+				err);
+			return err;
+		}
+	}
+
 	if (lvds->enable_gpio)
 		gpiod_set_value_cansleep(lvds->enable_gpio, 1);
 
@@ -196,6 +212,20 @@ static int panel_lvds_probe(struct platform_device *pdev)
 	if (ret < 0)
 		return ret;
 
+	lvds->supply = devm_regulator_get_optional(lvds->dev, "power");
+	if (IS_ERR(lvds->supply)) {
+		ret = PTR_ERR(lvds->supply);
+
+		if (ret != -ENODEV) {
+			if (ret != -EPROBE_DEFER)
+				dev_err(lvds->dev, "failed to request regulator: %d\n",
+					ret);
+			return ret;
+		}
+
+		lvds->supply = NULL;
+	}
+
 	/* Get GPIOs and backlight controller. */
 	lvds->enable_gpio = devm_gpiod_get_optional(lvds->dev, "enable",
 						     GPIOD_OUT_LOW);
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index 440f53ebee8cdf4099c864cdbc81d9569dda539b..07fa2cdb364aff68b0fae1444c98a9a5fd439751 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -53,7 +53,6 @@ struct pl111_drm_dev_private {
 	struct drm_panel *panel;
 	struct drm_bridge *bridge;
 	struct drm_simple_display_pipe pipe;
-	struct drm_fbdev_cma *fbdev;
 
 	void *regs;
 	u32 ienb;
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index 201d57d5cb54d0ea8b85d3725719a741758e9179..acb738c69873991117e0d65e951778d7ce4a1672 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -64,6 +64,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_of.h>
 #include <drm/drm_bridge.h>
@@ -137,8 +138,7 @@ static int pl111_modeset_init(struct drm_device *dev)
 
 	drm_mode_config_reset(dev);
 
-	priv->fbdev = drm_fbdev_cma_init(dev, 32,
-					 dev->mode_config.num_connector);
+	drm_fb_cma_fbdev_init(dev, 32, 0);
 
 	drm_kms_helper_poll_init(dev);
 
@@ -155,17 +155,10 @@ static int pl111_modeset_init(struct drm_device *dev)
 
 DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
 
-static void pl111_lastclose(struct drm_device *dev)
-{
-	struct pl111_drm_dev_private *priv = dev->dev_private;
-
-	drm_fbdev_cma_restore_mode(priv->fbdev);
-}
-
 static struct drm_driver pl111_drm_driver = {
 	.driver_features =
 		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC,
-	.lastclose = pl111_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.ioctls = NULL,
 	.fops = &drm_fops,
 	.name = "pl111",
@@ -281,8 +274,7 @@ static int pl111_amba_remove(struct amba_device *amba_dev)
 	struct pl111_drm_dev_private *priv = drm->dev_private;
 
 	drm_dev_unregister(drm);
-	if (priv->fbdev)
-		drm_fbdev_cma_fini(priv->fbdev);
+	drm_fb_cma_fbdev_fini(drm);
 	if (priv->panel)
 		drm_panel_bridge_remove(priv->bridge);
 	drm_mode_config_cleanup(drm);
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 4756b3c9bf2cabadd7811accf1057e61c6426825..9a9214ae0fb50de379c68c433da3bf8337c3093a 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -289,6 +289,7 @@ static void qxl_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct qxl_crtc *qxl_crtc = to_qxl_crtc(crtc);
 
+	qxl_bo_unref(&qxl_crtc->cursor_bo);
 	drm_crtc_cleanup(crtc);
 	kfree(qxl_crtc);
 }
@@ -495,6 +496,53 @@ static int qxl_primary_atomic_check(struct drm_plane *plane,
 	return 0;
 }
 
+static int qxl_primary_apply_cursor(struct drm_plane *plane)
+{
+	struct drm_device *dev = plane->dev;
+	struct qxl_device *qdev = dev->dev_private;
+	struct drm_framebuffer *fb = plane->state->fb;
+	struct qxl_crtc *qcrtc = to_qxl_crtc(plane->state->crtc);
+	struct qxl_cursor_cmd *cmd;
+	struct qxl_release *release;
+	int ret = 0;
+
+	if (!qcrtc->cursor_bo)
+		return 0;
+
+	ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd),
+					 QXL_RELEASE_CURSOR_CMD,
+					 &release, NULL);
+	if (ret)
+		return ret;
+
+	ret = qxl_release_list_add(release, qcrtc->cursor_bo);
+	if (ret)
+		goto out_free_release;
+
+	ret = qxl_release_reserve_list(release, false);
+	if (ret)
+		goto out_free_release;
+
+	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
+	cmd->type = QXL_CURSOR_SET;
+	cmd->u.set.position.x = plane->state->crtc_x + fb->hot_x;
+	cmd->u.set.position.y = plane->state->crtc_y + fb->hot_y;
+
+	cmd->u.set.shape = qxl_bo_physical_address(qdev, qcrtc->cursor_bo, 0);
+
+	cmd->u.set.visible = 1;
+	qxl_release_unmap(qdev, release, &cmd->release_info);
+
+	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
+	qxl_release_fence_buffer_objects(release);
+
+	return ret;
+
+out_free_release:
+	qxl_release_free(qdev, release);
+	return ret;
+}
+
 static void qxl_primary_atomic_update(struct drm_plane *plane,
 				      struct drm_plane_state *old_state)
 {
@@ -510,6 +558,7 @@ static void qxl_primary_atomic_update(struct drm_plane *plane,
 	    .x2 = qfb->base.width,
 	    .y2 = qfb->base.height
 	};
+	int ret;
 	bool same_shadow = false;
 
 	if (old_state->fb) {
@@ -531,6 +580,11 @@ static void qxl_primary_atomic_update(struct drm_plane *plane,
 		if (!same_shadow)
 			qxl_io_destroy_primary(qdev);
 		bo_old->is_primary = false;
+
+		ret = qxl_primary_apply_cursor(plane);
+		if (ret)
+			DRM_ERROR(
+			"could not set cursor after creating primary");
 	}
 
 	if (!bo->is_primary) {
@@ -571,11 +625,12 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	struct drm_device *dev = plane->dev;
 	struct qxl_device *qdev = dev->dev_private;
 	struct drm_framebuffer *fb = plane->state->fb;
+	struct qxl_crtc *qcrtc = to_qxl_crtc(plane->state->crtc);
 	struct qxl_release *release;
 	struct qxl_cursor_cmd *cmd;
 	struct qxl_cursor *cursor;
 	struct drm_gem_object *obj;
-	struct qxl_bo *cursor_bo, *user_bo = NULL;
+	struct qxl_bo *cursor_bo = NULL, *user_bo = NULL;
 	int ret;
 	void *user_ptr;
 	int size = 64*64*4;
@@ -628,6 +683,10 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		cmd->u.set.shape = qxl_bo_physical_address(qdev,
 							   cursor_bo, 0);
 		cmd->type = QXL_CURSOR_SET;
+
+		qxl_bo_unref(&qcrtc->cursor_bo);
+		qcrtc->cursor_bo = cursor_bo;
+		cursor_bo = NULL;
 	} else {
 
 		ret = qxl_release_reserve_list(release, true);
@@ -645,6 +704,8 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
 	qxl_release_fence_buffer_objects(release);
 
+	qxl_bo_unref(&cursor_bo);
+
 	return;
 
 out_backoff:
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 08752c0ffb35b21f1af70d84b9e03783b6aa2f61..00a1a66b052a5c31fe925bebb4800155013e3897 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -111,6 +111,8 @@ struct qxl_bo_list {
 struct qxl_crtc {
 	struct drm_crtc base;
 	int index;
+
+	struct qxl_bo *cursor_bo;
 };
 
 struct qxl_output {
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index d866f329e7d8501daa4cddcf9a0ebf878bc5c706..59cd74c3f3afe76b52dc73c91315ace9c3be18b3 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -291,14 +291,15 @@ static struct ttm_backend_func qxl_backend_func = {
 	.destroy = &qxl_ttm_backend_destroy,
 };
 
-static int qxl_ttm_tt_populate(struct ttm_tt *ttm)
+static int qxl_ttm_tt_populate(struct ttm_tt *ttm,
+			struct ttm_operation_ctx *ctx)
 {
 	int r;
 
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
-	r = ttm_pool_populate(ttm);
+	r = ttm_pool_populate(ttm, ctx);
 	if (r)
 		return r;
 
@@ -357,8 +358,7 @@ static int qxl_bo_move(struct ttm_buffer_object *bo, bool evict,
 		qxl_move_null(bo, new_mem);
 		return 0;
 	}
-	return ttm_bo_move_memcpy(bo, ctx->interruptible, ctx->no_wait_gpu,
-				  new_mem);
+	return ttm_bo_move_memcpy(bo, ctx, new_mem);
 }
 
 static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
@@ -389,7 +389,6 @@ static struct ttm_bo_driver qxl_bo_driver = {
 	.verify_access = &qxl_verify_access,
 	.io_mem_reserve = &qxl_ttm_io_mem_reserve,
 	.io_mem_free = &qxl_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 	.move_notify = &qxl_bo_move_notify,
 };
 
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index a6511918f632586372a90e430c8ac5128a44b03d..d3045a371a557261776ff32a88aca99b8836c538 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -1627,8 +1627,6 @@ static const u32 godavari_golden_registers[] =
 
 static void cik_init_golden_registers(struct radeon_device *rdev)
 {
-	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
-	mutex_lock(&rdev->grbm_idx_mutex);
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
 		radeon_program_register_sequence(rdev,
@@ -1703,7 +1701,6 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
 	default:
 		break;
 	}
-	mutex_unlock(&rdev->grbm_idx_mutex);
 }
 
 /**
@@ -3120,7 +3117,6 @@ static void cik_setup_rb(struct radeon_device *rdev,
 	u32 disabled_rbs = 0;
 	u32 enabled_rbs = 0;
 
-	mutex_lock(&rdev->grbm_idx_mutex);
 	for (i = 0; i < se_num; i++) {
 		for (j = 0; j < sh_per_se; j++) {
 			cik_select_se_sh(rdev, i, j);
@@ -3132,7 +3128,6 @@ static void cik_setup_rb(struct radeon_device *rdev,
 		}
 	}
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
-	mutex_unlock(&rdev->grbm_idx_mutex);
 
 	mask = 1;
 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
@@ -3143,7 +3138,6 @@ static void cik_setup_rb(struct radeon_device *rdev,
 
 	rdev->config.cik.backend_enable_mask = enabled_rbs;
 
-	mutex_lock(&rdev->grbm_idx_mutex);
 	for (i = 0; i < se_num; i++) {
 		cik_select_se_sh(rdev, i, 0xffffffff);
 		data = 0;
@@ -3171,7 +3165,6 @@ static void cik_setup_rb(struct radeon_device *rdev,
 		WREG32(PA_SC_RASTER_CONFIG, data);
 	}
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
-	mutex_unlock(&rdev->grbm_idx_mutex);
 }
 
 /**
@@ -3391,12 +3384,6 @@ static void cik_gpu_init(struct radeon_device *rdev)
 	/* set HW defaults for 3D engine */
 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
 
-	mutex_lock(&rdev->grbm_idx_mutex);
-	/*
-	 * making sure that the following register writes will be broadcasted
-	 * to all the shaders
-	 */
-	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 	WREG32(SX_DEBUG_1, 0x20);
 
 	WREG32(TA_CNTL_AUX, 0x00010000);
@@ -3452,7 +3439,6 @@ static void cik_gpu_init(struct radeon_device *rdev)
 
 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
-	mutex_unlock(&rdev->grbm_idx_mutex);
 
 	udelay(50);
 }
@@ -4432,11 +4418,12 @@ static int cik_mec_init(struct radeon_device *rdev)
 	/*
 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
-	 * Nonetheless, we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
 	 */
-	rdev->mec.num_mec = 1;
-	rdev->mec.num_pipe = 1;
+	if (rdev->family == CHIP_KAVERI)
+		rdev->mec.num_mec = 2;
+	else
+		rdev->mec.num_mec = 1;
+	rdev->mec.num_pipe = 4;
 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
 
 	if (rdev->mec.hpd_eop_obj == NULL) {
@@ -4579,8 +4566,11 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 	/* init the pipes */
 	mutex_lock(&rdev->srbm_mutex);
 
-	for (i = 0; i < rdev->mec.num_pipe; ++i) {
-		cik_srbm_select(rdev, 0, i, 0, 0);
+	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
+		int me = (i < 4) ? 1 : 2;
+		int pipe = (i < 4) ? i : (i - 4);
+
+		cik_srbm_select(rdev, me, pipe, 0, 0);
 
 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
 		/* write the EOP addr */
@@ -4597,6 +4587,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 		WREG32(CP_HPD_EOP_CONTROL, tmp);
 
 	}
+	cik_srbm_select(rdev, 0, 0, 0, 0);
 	mutex_unlock(&rdev->srbm_mutex);
 
 	/* init the queues.  Just two for now. */
@@ -5830,7 +5821,6 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
 	u32 i, j, k;
 	u32 mask;
 
-	mutex_lock(&rdev->grbm_idx_mutex);
 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
 			cik_select_se_sh(rdev, i, j);
@@ -5842,7 +5832,6 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
 		}
 	}
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
-	mutex_unlock(&rdev->grbm_idx_mutex);
 
 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
 	for (k = 0; k < rdev->usec_timeout; k++) {
@@ -5977,12 +5966,10 @@ static int cik_rlc_resume(struct radeon_device *rdev)
 	WREG32(RLC_LB_CNTR_INIT, 0);
 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
 
-	mutex_lock(&rdev->grbm_idx_mutex);
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
 	WREG32(RLC_LB_PARAMS, 0x00600408);
 	WREG32(RLC_LB_CNTL, 0x80000004);
-	mutex_unlock(&rdev->grbm_idx_mutex);
 
 	WREG32(RLC_MC_CNTL, 0);
 	WREG32(RLC_UCODE_CNTL, 0);
@@ -6049,13 +6036,11 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
 
 		tmp = cik_halt_rlc(rdev);
 
-		mutex_lock(&rdev->grbm_idx_mutex);
 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
-		mutex_unlock(&rdev->grbm_idx_mutex);
 
 		cik_update_rlc(rdev, tmp);
 
@@ -6098,13 +6083,11 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
 
 		tmp = cik_halt_rlc(rdev);
 
-		mutex_lock(&rdev->grbm_idx_mutex);
 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
 		WREG32(RLC_SERDES_WR_CTRL, data);
-		mutex_unlock(&rdev->grbm_idx_mutex);
 
 		cik_update_rlc(rdev, tmp);
 
@@ -6148,13 +6131,11 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
 
 		tmp = cik_halt_rlc(rdev);
 
-		mutex_lock(&rdev->grbm_idx_mutex);
 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
 		WREG32(RLC_SERDES_WR_CTRL, data);
-		mutex_unlock(&rdev->grbm_idx_mutex);
 
 		cik_update_rlc(rdev, tmp);
 	}
@@ -6583,12 +6564,10 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
 	u32 mask = 0, tmp, tmp1;
 	int i;
 
-	mutex_lock(&rdev->grbm_idx_mutex);
 	cik_select_se_sh(rdev, se, sh);
 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
-	mutex_unlock(&rdev->grbm_idx_mutex);
 
 	tmp &= 0xffff0000;
 
@@ -7074,7 +7053,8 @@ static int cik_irq_init(struct radeon_device *rdev)
 int cik_irq_set(struct radeon_device *rdev)
 {
 	u32 cp_int_cntl;
-	u32 cp_m1p0;
+	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
+	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
 	u32 grbm_int_cntl = 0;
@@ -7107,6 +7087,13 @@ int cik_irq_set(struct radeon_device *rdev)
 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
 
 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
 
 	/* enable CP interrupts on all rings */
 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
@@ -7121,6 +7108,33 @@ int cik_irq_set(struct radeon_device *rdev)
 			case 0:
 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
 				break;
+			case 1:
+				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 2:
+				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 3:
+				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+				break;
+			default:
+				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
+				break;
+			}
+		} else if (ring->me == 2) {
+			switch (ring->pipe) {
+			case 0:
+				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 1:
+				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 2:
+				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 3:
+				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+				break;
 			default:
 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
 				break;
@@ -7137,6 +7151,33 @@ int cik_irq_set(struct radeon_device *rdev)
 			case 0:
 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
 				break;
+			case 1:
+				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 2:
+				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 3:
+				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+				break;
+			default:
+				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
+				break;
+			}
+		} else if (ring->me == 2) {
+			switch (ring->pipe) {
+			case 0:
+				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 1:
+				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 2:
+				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+				break;
+			case 3:
+				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+				break;
 			default:
 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
 				break;
@@ -7217,6 +7258,13 @@ int cik_irq_set(struct radeon_device *rdev)
 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
 
 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
+	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
+	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
+	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
+	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
+	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
+	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
+	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
 
 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
 
diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h
index 4e883fdc59d8a5edf35cf4bf9a254173a7a92391..318377df09ef43f6cdf0bccae48e2521cddc761e 100644
--- a/drivers/gpu/drm/radeon/cik_reg.h
+++ b/drivers/gpu/drm/radeon/cik_reg.h
@@ -147,8 +147,6 @@
 
 #define CIK_LB_DESKTOP_HEIGHT                     0x6b0c
 
-#define KFD_CIK_SDMA_QUEUE_OFFSET		0x200
-
 #define SQ_IND_INDEX					0x8DE0
 #define SQ_CMD						0x8DEC
 #define SQ_IND_DATA					0x8DE4
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 24fe66c89dfb057b4ec7a0b8b02de0d77e70d919..5712d63dca2070c43fc3b85f9b6f214e9db3e1c9 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3513,6 +3513,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 		tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends,
 						EVERGREEN_MAX_BACKENDS, disabled_rb_mask);
 	}
+	rdev->config.evergreen.backend_map = tmp;
 	WREG32(GB_BACKEND_MAP, tmp);
 
 	WREG32(CGTS_SYS_TCC_DISABLE, 0);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 9eccd0c81d8815fc30a16da28b2e6360b3a69e8f..381b0255ff027657117df61ec84da4a8f1118b8f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1148,6 +1148,7 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 						rdev->config.cayman.max_shader_engines,
 						CAYMAN_MAX_BACKENDS, disabled_rb_mask);
 	}
+	rdev->config.cayman.backend_map = tmp;
 	WREG32(GB_BACKEND_MAP, tmp);
 
 	cgts_tcc_disable = 0xffff0000;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index a8e546569858309a8d3955f090cebda5133dfdfe..d34887873deaf7b72aba80acf2a8e7b0037087c2 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -731,10 +731,6 @@ struct radeon_doorbell {
 
 int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
 void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell);
-void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
-				  phys_addr_t *aperture_base,
-				  size_t *aperture_size,
-				  size_t *start_offset);
 
 /*
  * IRQS.
@@ -2442,8 +2438,6 @@ struct radeon_device {
 	struct radeon_atcs		atcs;
 	/* srbm instance registers */
 	struct mutex			srbm_mutex;
-	/* GRBM index mutex. Protects concurrents access to GRBM index */
-	struct mutex			grbm_idx_mutex;
 	/* clock, powergating flags */
 	u32 cg_flags;
 	u32 pg_flags;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index ffc10cadcf34ccb79a7d09ae253b4a2dffda7cfe..8d3e3d2e0090938c2ec895d0836871622fad0bd7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -392,37 +392,6 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell)
 		__clear_bit(doorbell, rdev->doorbell.used);
 }
 
-/**
- * radeon_doorbell_get_kfd_info - Report doorbell configuration required to
- *                                setup KFD
- *
- * @rdev: radeon_device pointer
- * @aperture_base: output returning doorbell aperture base physical address
- * @aperture_size: output returning doorbell aperture size in bytes
- * @start_offset: output returning # of doorbell bytes reserved for radeon.
- *
- * Radeon and the KFD share the doorbell aperture. Radeon sets it up,
- * takes doorbells required for its own rings and reports the setup to KFD.
- * Radeon reserved doorbells are at the start of the doorbell aperture.
- */
-void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
-				  phys_addr_t *aperture_base,
-				  size_t *aperture_size,
-				  size_t *start_offset)
-{
-	/* The first num_doorbells are used by radeon.
-	 * KFD takes whatever's left in the aperture. */
-	if (rdev->doorbell.size > rdev->doorbell.num_doorbells * sizeof(u32)) {
-		*aperture_base = rdev->doorbell.base;
-		*aperture_size = rdev->doorbell.size;
-		*start_offset = rdev->doorbell.num_doorbells * sizeof(u32);
-	} else {
-		*aperture_base = 0;
-		*aperture_size = 0;
-		*start_offset = 0;
-	}
-}
-
 /*
  * radeon_wb_*()
  * Writeback is the the method by which the the GPU updates special pages
@@ -1341,7 +1310,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->gpu_clock_mutex);
 	mutex_init(&rdev->srbm_mutex);
-	mutex_init(&rdev->grbm_idx_mutex);
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index 183b4b4821383ecb5ed218a974cff7c4b7fef156..238e6eb842ea1caa321bd2ad101322cfaa9f68a7 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -328,7 +328,7 @@ static void radeon_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
 	drm_kms_helper_hotplug_event(dev);
 }
 
-const struct drm_dp_mst_topology_cbs mst_cbs = {
+static const struct drm_dp_mst_topology_cbs mst_cbs = {
 	.add_connector = radeon_dp_add_mst_connector,
 	.register_connector = radeon_dp_register_mst_connector,
 	.destroy_connector = radeon_dp_destroy_mst_connector,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 98e30d71d9e0f8970b17d058e08e491d1dcb2c72..a0a839bc39bf99011fd9660266e257b38d3c85f6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -339,7 +339,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 		goto out_cleanup;
 	}
 
-	r = ttm_tt_bind(bo->ttm, &tmp_mem);
+	r = ttm_tt_bind(bo->ttm, &tmp_mem, &ctx);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -347,7 +347,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
+	r = ttm_bo_move_ttm(bo, &ctx, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
@@ -380,7 +380,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	if (unlikely(r)) {
 		return r;
 	}
-	r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
+	r = ttm_bo_move_ttm(bo, &ctx, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -445,8 +445,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, ctx->interruptible,
-				       ctx->no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
 		if (r) {
 			return r;
 		}
@@ -722,7 +721,8 @@ static struct radeon_ttm_tt *radeon_ttm_tt_to_gtt(struct ttm_tt *ttm)
 	return (struct radeon_ttm_tt *)ttm;
 }
 
-static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
+static int radeon_ttm_tt_populate(struct ttm_tt *ttm,
+			struct ttm_operation_ctx *ctx)
 {
 	struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(ttm);
 	struct radeon_device *rdev;
@@ -751,17 +751,17 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
 	rdev = radeon_get_rdev(ttm->bdev);
 #if IS_ENABLED(CONFIG_AGP)
 	if (rdev->flags & RADEON_IS_AGP) {
-		return ttm_agp_tt_populate(ttm);
+		return ttm_agp_tt_populate(ttm, ctx);
 	}
 #endif
 
 #ifdef CONFIG_SWIOTLB
 	if (swiotlb_nr_tbl()) {
-		return ttm_dma_populate(&gtt->ttm, rdev->dev);
+		return ttm_dma_populate(&gtt->ttm, rdev->dev, ctx);
 	}
 #endif
 
-	return ttm_populate_and_map_pages(rdev->dev, &gtt->ttm);
+	return ttm_populate_and_map_pages(rdev->dev, &gtt->ttm, ctx);
 }
 
 static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -845,7 +845,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
 	.io_mem_free = &radeon_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int radeon_ttm_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index ee3e74266a1332a03f18f4d15ed1b0dfdf6244a0..97a0a639dad90b082d5c8856f042a00932fcabd0 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2984,6 +2984,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+		if ((rdev->pdev->revision == 0xC3) ||
+		    (rdev->pdev->device == 0x6665)) {
+			max_sclk = 60000;
+			max_mclk = 80000;
+		}
 	} else if (rdev->family == CHIP_OLAND) {
 		if ((rdev->pdev->revision == 0xC7) ||
 		    (rdev->pdev->revision == 0x80) ||
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 76d63de5921d1c0931eabae91ed0fd1b6ff9fcc4..d85431400a0dbdfeaf8312a490439ba8ac8b1e3f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -207,13 +207,6 @@ static void rockchip_drm_unbind(struct device *dev)
 	drm_dev_unref(drm_dev);
 }
 
-static void rockchip_drm_lastclose(struct drm_device *dev)
-{
-	struct rockchip_drm_private *priv = dev->dev_private;
-
-	drm_fb_helper_restore_fbdev_mode_unlocked(&priv->fbdev_helper);
-}
-
 static const struct file_operations rockchip_drm_driver_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
@@ -228,7 +221,7 @@ static const struct file_operations rockchip_drm_driver_fops = {
 static struct drm_driver rockchip_drm_driver = {
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM |
 				  DRIVER_PRIME | DRIVER_ATOMIC,
-	.lastclose		= rockchip_drm_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.gem_vm_ops		= &drm_gem_cma_vm_ops,
 	.gem_free_object_unlocked = rockchip_gem_free_object,
 	.dumb_create		= rockchip_gem_dumb_create,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index cd2ace0c3caa3582777b571637044232c3540c7c..e266539e04e5cc12a986f4250afa3b03d4127513 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -167,20 +167,13 @@ rockchip_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 	return ERR_PTR(ret);
 }
 
-static void rockchip_drm_output_poll_changed(struct drm_device *dev)
-{
-	struct rockchip_drm_private *private = dev->dev_private;
-
-	drm_fb_helper_hotplug_event(&private->fbdev_helper);
-}
-
 static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = {
 	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
 };
 
 static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = {
 	.fb_create = rockchip_user_fb_create,
-	.output_poll_changed = rockchip_drm_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..bd0377c0d2eeaff0df66ce76ede9aa32d0225455
--- /dev/null
+++ b/drivers/gpu/drm/scheduler/Makefile
@@ -0,0 +1,26 @@
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+ccflags-y := -Iinclude/drm
+gpu-sched-y := gpu_scheduler.o sched_fence.o
+
+obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
similarity index 65%
rename from drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
rename to drivers/gpu/drm/scheduler/gpu_scheduler.c
index dcb987e6d94ab6c7136cc8df80640e6f32c92e05..2c18996d59c58e6247a24936da9dc155f76d78cf 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -19,37 +19,36 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- *
  */
+
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <uapi/linux/sched/types.h>
 #include <drm/drmP.h>
-#include "gpu_scheduler.h"
-
-#include "spsc_queue.h"
+#include <drm/gpu_scheduler.h>
+#include <drm/spsc_queue.h>
 
 #define CREATE_TRACE_POINTS
-#include "gpu_sched_trace.h"
+#include <drm/gpu_scheduler_trace.h>
 
-#define to_amd_sched_job(sched_job)		\
-		container_of((sched_job), struct amd_sched_job, queue_node)
+#define to_drm_sched_job(sched_job)		\
+		container_of((sched_job), struct drm_sched_job, queue_node)
 
-static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
-static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
-static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
+static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
+static void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
+static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
 
 /* Initialize a given run queue struct */
-static void amd_sched_rq_init(struct amd_sched_rq *rq)
+static void drm_sched_rq_init(struct drm_sched_rq *rq)
 {
 	spin_lock_init(&rq->lock);
 	INIT_LIST_HEAD(&rq->entities);
 	rq->current_entity = NULL;
 }
 
-static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
-				    struct amd_sched_entity *entity)
+static void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
+				    struct drm_sched_entity *entity)
 {
 	if (!list_empty(&entity->list))
 		return;
@@ -58,8 +57,8 @@ static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
 	spin_unlock(&rq->lock);
 }
 
-static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
-				       struct amd_sched_entity *entity)
+static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
+				       struct drm_sched_entity *entity)
 {
 	if (list_empty(&entity->list))
 		return;
@@ -77,17 +76,17 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
  *
  * Try to find a ready entity, returns NULL if none found.
  */
-static struct amd_sched_entity *
-amd_sched_rq_select_entity(struct amd_sched_rq *rq)
+static struct drm_sched_entity *
+drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 {
-	struct amd_sched_entity *entity;
+	struct drm_sched_entity *entity;
 
 	spin_lock(&rq->lock);
 
 	entity = rq->current_entity;
 	if (entity) {
 		list_for_each_entry_continue(entity, &rq->entities, list) {
-			if (amd_sched_entity_is_ready(entity)) {
+			if (drm_sched_entity_is_ready(entity)) {
 				rq->current_entity = entity;
 				spin_unlock(&rq->lock);
 				return entity;
@@ -97,7 +96,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
 
 	list_for_each_entry(entity, &rq->entities, list) {
 
-		if (amd_sched_entity_is_ready(entity)) {
+		if (drm_sched_entity_is_ready(entity)) {
 			rq->current_entity = entity;
 			spin_unlock(&rq->lock);
 			return entity;
@@ -116,22 +115,22 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
  * Init a context entity used by scheduler when submit to HW ring.
  *
  * @sched	The pointer to the scheduler
- * @entity	The pointer to a valid amd_sched_entity
+ * @entity	The pointer to a valid drm_sched_entity
  * @rq		The run queue this entity belongs
  * @kernel	If this is an entity for the kernel
  * @jobs	The max number of jobs in the job queue
  *
  * return 0 if succeed. negative error code on failure
 */
-int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
-			  struct amd_sched_entity *entity,
-			  struct amd_sched_rq *rq,
+int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
+			  struct drm_sched_entity *entity,
+			  struct drm_sched_rq *rq,
 			  uint32_t jobs, atomic_t *guilty)
 {
 	if (!(sched && entity && rq))
 		return -EINVAL;
 
-	memset(entity, 0, sizeof(struct amd_sched_entity));
+	memset(entity, 0, sizeof(struct drm_sched_entity));
 	INIT_LIST_HEAD(&entity->list);
 	entity->rq = rq;
 	entity->sched = sched;
@@ -146,6 +145,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 
 	return 0;
 }
+EXPORT_SYMBOL(drm_sched_entity_init);
 
 /**
  * Query if entity is initialized
@@ -155,8 +155,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  *
  * return true if entity is initialized, false otherwise
 */
-static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
-					    struct amd_sched_entity *entity)
+static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
+					    struct drm_sched_entity *entity)
 {
 	return entity->sched == sched &&
 		entity->rq != NULL;
@@ -169,7 +169,7 @@ static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
  *
  * Return true if entity don't has any unscheduled jobs.
  */
-static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
+static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
 {
 	rmb();
 	if (spsc_queue_peek(&entity->job_queue) == NULL)
@@ -185,7 +185,7 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
  *
  * Return true if entity could provide a job.
  */
-static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
+static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
 {
 	if (spsc_queue_peek(&entity->job_queue) == NULL)
 		return false;
@@ -204,12 +204,12 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
  *
  * Cleanup and free the allocated resources.
  */
-void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
-			   struct amd_sched_entity *entity)
+void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity)
 {
 	int r;
 
-	if (!amd_sched_entity_is_initialized(sched, entity))
+	if (!drm_sched_entity_is_initialized(sched, entity))
 		return;
 	/**
 	 * The client will not queue more IBs during this fini, consume existing
@@ -219,10 +219,10 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 		r = -ERESTARTSYS;
 	else
 		r = wait_event_killable(sched->job_scheduled,
-					amd_sched_entity_is_idle(entity));
-	amd_sched_entity_set_rq(entity, NULL);
+					drm_sched_entity_is_idle(entity));
+	drm_sched_entity_set_rq(entity, NULL);
 	if (r) {
-		struct amd_sched_job *job;
+		struct drm_sched_job *job;
 
 		/* Park the kernel for a moment to make sure it isn't processing
 		 * our enity.
@@ -236,37 +236,38 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 			entity->dependency = NULL;
 		}
 
-		while ((job = to_amd_sched_job(spsc_queue_pop(&entity->job_queue)))) {
-			struct amd_sched_fence *s_fence = job->s_fence;
-			amd_sched_fence_scheduled(s_fence);
+		while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
+			struct drm_sched_fence *s_fence = job->s_fence;
+			drm_sched_fence_scheduled(s_fence);
 			dma_fence_set_error(&s_fence->finished, -ESRCH);
-			amd_sched_fence_finished(s_fence);
+			drm_sched_fence_finished(s_fence);
 			WARN_ON(s_fence->parent);
 			dma_fence_put(&s_fence->finished);
 			sched->ops->free_job(job);
 		}
 	}
 }
+EXPORT_SYMBOL(drm_sched_entity_fini);
 
-static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
+static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
 {
-	struct amd_sched_entity *entity =
-		container_of(cb, struct amd_sched_entity, cb);
+	struct drm_sched_entity *entity =
+		container_of(cb, struct drm_sched_entity, cb);
 	entity->dependency = NULL;
 	dma_fence_put(f);
-	amd_sched_wakeup(entity->sched);
+	drm_sched_wakeup(entity->sched);
 }
 
-static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
+static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
 {
-	struct amd_sched_entity *entity =
-		container_of(cb, struct amd_sched_entity, cb);
+	struct drm_sched_entity *entity =
+		container_of(cb, struct drm_sched_entity, cb);
 	entity->dependency = NULL;
 	dma_fence_put(f);
 }
 
-void amd_sched_entity_set_rq(struct amd_sched_entity *entity,
-			     struct amd_sched_rq *rq)
+void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
+			     struct drm_sched_rq *rq)
 {
 	if (entity->rq == rq)
 		return;
@@ -274,37 +275,39 @@ void amd_sched_entity_set_rq(struct amd_sched_entity *entity,
 	spin_lock(&entity->rq_lock);
 
 	if (entity->rq)
-		amd_sched_rq_remove_entity(entity->rq, entity);
+		drm_sched_rq_remove_entity(entity->rq, entity);
 
 	entity->rq = rq;
 	if (rq)
-		amd_sched_rq_add_entity(rq, entity);
+		drm_sched_rq_add_entity(rq, entity);
 
 	spin_unlock(&entity->rq_lock);
 }
+EXPORT_SYMBOL(drm_sched_entity_set_rq);
 
-bool amd_sched_dependency_optimized(struct dma_fence* fence,
-				    struct amd_sched_entity *entity)
+bool drm_sched_dependency_optimized(struct dma_fence* fence,
+				    struct drm_sched_entity *entity)
 {
-	struct amd_gpu_scheduler *sched = entity->sched;
-	struct amd_sched_fence *s_fence;
+	struct drm_gpu_scheduler *sched = entity->sched;
+	struct drm_sched_fence *s_fence;
 
 	if (!fence || dma_fence_is_signaled(fence))
 		return false;
 	if (fence->context == entity->fence_context)
 		return true;
-	s_fence = to_amd_sched_fence(fence);
+	s_fence = to_drm_sched_fence(fence);
 	if (s_fence && s_fence->sched == sched)
 		return true;
 
 	return false;
 }
+EXPORT_SYMBOL(drm_sched_dependency_optimized);
 
-static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
+static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 {
-	struct amd_gpu_scheduler *sched = entity->sched;
+	struct drm_gpu_scheduler *sched = entity->sched;
 	struct dma_fence * fence = entity->dependency;
-	struct amd_sched_fence *s_fence;
+	struct drm_sched_fence *s_fence;
 
 	if (fence->context == entity->fence_context) {
 		/* We can ignore fences from ourself */
@@ -312,7 +315,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 		return false;
 	}
 
-	s_fence = to_amd_sched_fence(fence);
+	s_fence = to_drm_sched_fence(fence);
 	if (s_fence && s_fence->sched == sched) {
 
 		/*
@@ -323,7 +326,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 		dma_fence_put(entity->dependency);
 		entity->dependency = fence;
 		if (!dma_fence_add_callback(fence, &entity->cb,
-					    amd_sched_entity_clear_dep))
+					    drm_sched_entity_clear_dep))
 			return true;
 
 		/* Ignore it when it is already scheduled */
@@ -332,25 +335,25 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 	}
 
 	if (!dma_fence_add_callback(entity->dependency, &entity->cb,
-				    amd_sched_entity_wakeup))
+				    drm_sched_entity_wakeup))
 		return true;
 
 	dma_fence_put(entity->dependency);
 	return false;
 }
 
-static struct amd_sched_job *
-amd_sched_entity_pop_job(struct amd_sched_entity *entity)
+static struct drm_sched_job *
+drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 {
-	struct amd_gpu_scheduler *sched = entity->sched;
-	struct amd_sched_job *sched_job = to_amd_sched_job(
+	struct drm_gpu_scheduler *sched = entity->sched;
+	struct drm_sched_job *sched_job = to_drm_sched_job(
 						spsc_queue_peek(&entity->job_queue));
 
 	if (!sched_job)
 		return NULL;
 
 	while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
-		if (amd_sched_entity_add_dependency_cb(entity))
+		if (drm_sched_entity_add_dependency_cb(entity))
 			return NULL;
 
 	/* skip jobs from entity that marked guilty */
@@ -368,13 +371,13 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity)
  *
  * Returns 0 for success, negative error code otherwise.
  */
-void amd_sched_entity_push_job(struct amd_sched_job *sched_job,
-			       struct amd_sched_entity *entity)
+void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
+			       struct drm_sched_entity *entity)
 {
-	struct amd_gpu_scheduler *sched = sched_job->sched;
+	struct drm_gpu_scheduler *sched = sched_job->sched;
 	bool first = false;
 
-	trace_amd_sched_job(sched_job, entity);
+	trace_drm_sched_job(sched_job, entity);
 
 	spin_lock(&entity->queue_lock);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
@@ -385,25 +388,26 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job,
 	if (first) {
 		/* Add the entity to the run queue */
 		spin_lock(&entity->rq_lock);
-		amd_sched_rq_add_entity(entity->rq, entity);
+		drm_sched_rq_add_entity(entity->rq, entity);
 		spin_unlock(&entity->rq_lock);
-		amd_sched_wakeup(sched);
+		drm_sched_wakeup(sched);
 	}
 }
+EXPORT_SYMBOL(drm_sched_entity_push_job);
 
 /* job_finish is called after hw fence signaled
  */
-static void amd_sched_job_finish(struct work_struct *work)
+static void drm_sched_job_finish(struct work_struct *work)
 {
-	struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
+	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
 						   finish_work);
-	struct amd_gpu_scheduler *sched = s_job->sched;
+	struct drm_gpu_scheduler *sched = s_job->sched;
 
 	/* remove job from ring_mirror_list */
 	spin_lock(&sched->job_list_lock);
 	list_del_init(&s_job->node);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
-		struct amd_sched_job *next;
+		struct drm_sched_job *next;
 
 		spin_unlock(&sched->job_list_lock);
 		cancel_delayed_work_sync(&s_job->work_tdr);
@@ -411,7 +415,7 @@ static void amd_sched_job_finish(struct work_struct *work)
 
 		/* queue TDR for next job */
 		next = list_first_entry_or_null(&sched->ring_mirror_list,
-						struct amd_sched_job, node);
+						struct drm_sched_job, node);
 
 		if (next)
 			schedule_delayed_work(&next->work_tdr, sched->timeout);
@@ -421,42 +425,42 @@ static void amd_sched_job_finish(struct work_struct *work)
 	sched->ops->free_job(s_job);
 }
 
-static void amd_sched_job_finish_cb(struct dma_fence *f,
+static void drm_sched_job_finish_cb(struct dma_fence *f,
 				    struct dma_fence_cb *cb)
 {
-	struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
+	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 						 finish_cb);
 	schedule_work(&job->finish_work);
 }
 
-static void amd_sched_job_begin(struct amd_sched_job *s_job)
+static void drm_sched_job_begin(struct drm_sched_job *s_job)
 {
-	struct amd_gpu_scheduler *sched = s_job->sched;
+	struct drm_gpu_scheduler *sched = s_job->sched;
 
 	dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
-			       amd_sched_job_finish_cb);
+			       drm_sched_job_finish_cb);
 
 	spin_lock(&sched->job_list_lock);
 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    list_first_entry_or_null(&sched->ring_mirror_list,
-				     struct amd_sched_job, node) == s_job)
+				     struct drm_sched_job, node) == s_job)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 	spin_unlock(&sched->job_list_lock);
 }
 
-static void amd_sched_job_timedout(struct work_struct *work)
+static void drm_sched_job_timedout(struct work_struct *work)
 {
-	struct amd_sched_job *job = container_of(work, struct amd_sched_job,
+	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
 						 work_tdr.work);
 
 	job->sched->ops->timedout_job(job);
 }
 
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *bad)
+void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 {
-	struct amd_sched_job *s_job;
-	struct amd_sched_entity *entity, *tmp;
+	struct drm_sched_job *s_job;
+	struct drm_sched_entity *entity, *tmp;
 	int i;;
 
 	spin_lock(&sched->job_list_lock);
@@ -471,14 +475,14 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_jo
 	}
 	spin_unlock(&sched->job_list_lock);
 
-	if (bad && bad->s_priority != AMD_SCHED_PRIORITY_KERNEL) {
+	if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 		atomic_inc(&bad->karma);
 		/* don't increase @bad's karma if it's from KERNEL RQ,
 		 * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 		 * corrupt but keep in mind that kernel jobs always considered good.
 		 */
-		for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_KERNEL; i++ ) {
-			struct amd_sched_rq *rq = &sched->sched_rq[i];
+		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) {
+			struct drm_sched_rq *rq = &sched->sched_rq[i];
 
 			spin_lock(&rq->lock);
 			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
@@ -495,30 +499,22 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_jo
 		}
 	}
 }
+EXPORT_SYMBOL(drm_sched_hw_job_reset);
 
-void amd_sched_job_kickout(struct amd_sched_job *s_job)
-{
-	struct amd_gpu_scheduler *sched = s_job->sched;
-
-	spin_lock(&sched->job_list_lock);
-	list_del_init(&s_job->node);
-	spin_unlock(&sched->job_list_lock);
-}
-
-void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
+void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 {
-	struct amd_sched_job *s_job, *tmp;
+	struct drm_sched_job *s_job, *tmp;
 	bool found_guilty = false;
 	int r;
 
 	spin_lock(&sched->job_list_lock);
 	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
-					 struct amd_sched_job, node);
+					 struct drm_sched_job, node);
 	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
-		struct amd_sched_fence *s_fence = s_job->s_fence;
+		struct drm_sched_fence *s_fence = s_job->s_fence;
 		struct dma_fence *fence;
 		uint64_t guilty_context;
 
@@ -536,45 +532,47 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
 		if (fence) {
 			s_fence->parent = dma_fence_get(fence);
 			r = dma_fence_add_callback(fence, &s_fence->cb,
-						   amd_sched_process_job);
+						   drm_sched_process_job);
 			if (r == -ENOENT)
-				amd_sched_process_job(fence, &s_fence->cb);
+				drm_sched_process_job(fence, &s_fence->cb);
 			else if (r)
 				DRM_ERROR("fence add callback failed (%d)\n",
 					  r);
 			dma_fence_put(fence);
 		} else {
-			amd_sched_process_job(NULL, &s_fence->cb);
+			drm_sched_process_job(NULL, &s_fence->cb);
 		}
 		spin_lock(&sched->job_list_lock);
 	}
 	spin_unlock(&sched->job_list_lock);
 }
+EXPORT_SYMBOL(drm_sched_job_recovery);
 
 /* init a sched_job with basic field */
-int amd_sched_job_init(struct amd_sched_job *job,
-		       struct amd_gpu_scheduler *sched,
-		       struct amd_sched_entity *entity,
+int drm_sched_job_init(struct drm_sched_job *job,
+		       struct drm_gpu_scheduler *sched,
+		       struct drm_sched_entity *entity,
 		       void *owner)
 {
 	job->sched = sched;
 	job->s_priority = entity->rq - sched->sched_rq;
-	job->s_fence = amd_sched_fence_create(entity, owner);
+	job->s_fence = drm_sched_fence_create(entity, owner);
 	if (!job->s_fence)
 		return -ENOMEM;
 	job->id = atomic64_inc_return(&sched->job_id_count);
 
-	INIT_WORK(&job->finish_work, amd_sched_job_finish);
+	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
-	INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
+	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
 
 	return 0;
 }
+EXPORT_SYMBOL(drm_sched_job_init);
 
 /**
  * Return ture if we can push more jobs to the hw.
  */
-static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
+static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 {
 	return atomic_read(&sched->hw_rq_count) <
 		sched->hw_submission_limit;
@@ -583,27 +581,27 @@ static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
 /**
  * Wake up the scheduler when it is ready
  */
-static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
+static void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 {
-	if (amd_sched_ready(sched))
+	if (drm_sched_ready(sched))
 		wake_up_interruptible(&sched->wake_up_worker);
 }
 
 /**
  * Select next entity to process
 */
-static struct amd_sched_entity *
-amd_sched_select_entity(struct amd_gpu_scheduler *sched)
+static struct drm_sched_entity *
+drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 {
-	struct amd_sched_entity *entity;
+	struct drm_sched_entity *entity;
 	int i;
 
-	if (!amd_sched_ready(sched))
+	if (!drm_sched_ready(sched))
 		return NULL;
 
 	/* Kernel run queue has higher priority than normal run queue*/
-	for (i = AMD_SCHED_PRIORITY_MAX - 1; i >= AMD_SCHED_PRIORITY_MIN; i--) {
-		entity = amd_sched_rq_select_entity(&sched->sched_rq[i]);
+	for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+		entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 		if (entity)
 			break;
 	}
@@ -611,22 +609,22 @@ amd_sched_select_entity(struct amd_gpu_scheduler *sched)
 	return entity;
 }
 
-static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
+static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 {
-	struct amd_sched_fence *s_fence =
-		container_of(cb, struct amd_sched_fence, cb);
-	struct amd_gpu_scheduler *sched = s_fence->sched;
+	struct drm_sched_fence *s_fence =
+		container_of(cb, struct drm_sched_fence, cb);
+	struct drm_gpu_scheduler *sched = s_fence->sched;
 
 	dma_fence_get(&s_fence->finished);
 	atomic_dec(&sched->hw_rq_count);
-	amd_sched_fence_finished(s_fence);
+	drm_sched_fence_finished(s_fence);
 
-	trace_amd_sched_process_job(s_fence);
+	trace_drm_sched_process_job(s_fence);
 	dma_fence_put(&s_fence->finished);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
 
-static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
+static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 {
 	if (kthread_should_park()) {
 		kthread_parkme();
@@ -636,52 +634,52 @@ static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
 	return false;
 }
 
-static int amd_sched_main(void *param)
+static int drm_sched_main(void *param)
 {
 	struct sched_param sparam = {.sched_priority = 1};
-	struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
+	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 	int r;
 
 	sched_setscheduler(current, SCHED_FIFO, &sparam);
 
 	while (!kthread_should_stop()) {
-		struct amd_sched_entity *entity = NULL;
-		struct amd_sched_fence *s_fence;
-		struct amd_sched_job *sched_job;
+		struct drm_sched_entity *entity = NULL;
+		struct drm_sched_fence *s_fence;
+		struct drm_sched_job *sched_job;
 		struct dma_fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
-					 (!amd_sched_blocked(sched) &&
-					  (entity = amd_sched_select_entity(sched))) ||
+					 (!drm_sched_blocked(sched) &&
+					  (entity = drm_sched_select_entity(sched))) ||
 					 kthread_should_stop());
 
 		if (!entity)
 			continue;
 
-		sched_job = amd_sched_entity_pop_job(entity);
+		sched_job = drm_sched_entity_pop_job(entity);
 		if (!sched_job)
 			continue;
 
 		s_fence = sched_job->s_fence;
 
 		atomic_inc(&sched->hw_rq_count);
-		amd_sched_job_begin(sched_job);
+		drm_sched_job_begin(sched_job);
 
 		fence = sched->ops->run_job(sched_job);
-		amd_sched_fence_scheduled(s_fence);
+		drm_sched_fence_scheduled(s_fence);
 
 		if (fence) {
 			s_fence->parent = dma_fence_get(fence);
 			r = dma_fence_add_callback(fence, &s_fence->cb,
-						   amd_sched_process_job);
+						   drm_sched_process_job);
 			if (r == -ENOENT)
-				amd_sched_process_job(fence, &s_fence->cb);
+				drm_sched_process_job(fence, &s_fence->cb);
 			else if (r)
 				DRM_ERROR("fence add callback failed (%d)\n",
 					  r);
 			dma_fence_put(fence);
 		} else {
-			amd_sched_process_job(NULL, &s_fence->cb);
+			drm_sched_process_job(NULL, &s_fence->cb);
 		}
 
 		wake_up(&sched->job_scheduled);
@@ -699,8 +697,8 @@ static int amd_sched_main(void *param)
  *
  * Return 0 on success, otherwise error code.
 */
-int amd_sched_init(struct amd_gpu_scheduler *sched,
-		   const struct amd_sched_backend_ops *ops,
+int drm_sched_init(struct drm_gpu_scheduler *sched,
+		   const struct drm_sched_backend_ops *ops,
 		   unsigned hw_submission,
 		   unsigned hang_limit,
 		   long timeout,
@@ -712,8 +710,8 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
 	sched->name = name;
 	sched->timeout = timeout;
 	sched->hang_limit = hang_limit;
-	for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++)
-		amd_sched_rq_init(&sched->sched_rq[i]);
+	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
+		drm_sched_rq_init(&sched->sched_rq[i]);
 
 	init_waitqueue_head(&sched->wake_up_worker);
 	init_waitqueue_head(&sched->job_scheduled);
@@ -723,7 +721,7 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
 	atomic64_set(&sched->job_id_count, 0);
 
 	/* Each scheduler will run on a seperate kernel thread */
-	sched->thread = kthread_run(amd_sched_main, sched, sched->name);
+	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 	if (IS_ERR(sched->thread)) {
 		DRM_ERROR("Failed to create scheduler for %s.\n", name);
 		return PTR_ERR(sched->thread);
@@ -731,14 +729,16 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
 
 	return 0;
 }
+EXPORT_SYMBOL(drm_sched_init);
 
 /**
  * Destroy a gpu scheduler
  *
  * @sched	The pointer to the scheduler
  */
-void amd_sched_fini(struct amd_gpu_scheduler *sched)
+void drm_sched_fini(struct drm_gpu_scheduler *sched)
 {
 	if (sched->thread)
 		kthread_stop(sched->thread);
 }
+EXPORT_SYMBOL(drm_sched_fini);
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
similarity index 58%
rename from drivers/gpu/drm/amd/scheduler/sched_fence.c
rename to drivers/gpu/drm/scheduler/sched_fence.c
index 33f54d0a5c4fce3662874e50804922b01f810750..69aab086b9133383ca5841d6274066436d1abdc2 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -19,20 +19,20 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- *
  */
+
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <drm/drmP.h>
-#include "gpu_scheduler.h"
+#include <drm/gpu_scheduler.h>
 
 static struct kmem_cache *sched_fence_slab;
 
-int amd_sched_fence_slab_init(void)
+static int __init drm_sched_fence_slab_init(void)
 {
 	sched_fence_slab = kmem_cache_create(
-		"amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+		"drm_sched_fence", sizeof(struct drm_sched_fence), 0,
 		SLAB_HWCACHE_ALIGN, NULL);
 	if (!sched_fence_slab)
 		return -ENOMEM;
@@ -40,36 +40,13 @@ int amd_sched_fence_slab_init(void)
 	return 0;
 }
 
-void amd_sched_fence_slab_fini(void)
+static void __exit drm_sched_fence_slab_fini(void)
 {
 	rcu_barrier();
 	kmem_cache_destroy(sched_fence_slab);
 }
 
-struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
-					       void *owner)
-{
-	struct amd_sched_fence *fence = NULL;
-	unsigned seq;
-
-	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
-	if (fence == NULL)
-		return NULL;
-
-	fence->owner = owner;
-	fence->sched = entity->sched;
-	spin_lock_init(&fence->lock);
-
-	seq = atomic_inc_return(&entity->fence_seq);
-	dma_fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled,
-		       &fence->lock, entity->fence_context, seq);
-	dma_fence_init(&fence->finished, &amd_sched_fence_ops_finished,
-		       &fence->lock, entity->fence_context + 1, seq);
-
-	return fence;
-}
-
-void amd_sched_fence_scheduled(struct amd_sched_fence *fence)
+void drm_sched_fence_scheduled(struct drm_sched_fence *fence)
 {
 	int ret = dma_fence_signal(&fence->scheduled);
 
@@ -81,7 +58,7 @@ void amd_sched_fence_scheduled(struct amd_sched_fence *fence)
 				"was already signaled\n");
 }
 
-void amd_sched_fence_finished(struct amd_sched_fence *fence)
+void drm_sched_fence_finished(struct drm_sched_fence *fence)
 {
 	int ret = dma_fence_signal(&fence->finished);
 
@@ -93,18 +70,18 @@ void amd_sched_fence_finished(struct amd_sched_fence *fence)
 				"was already signaled\n");
 }
 
-static const char *amd_sched_fence_get_driver_name(struct dma_fence *fence)
+static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
 {
-	return "amd_sched";
+	return "drm_sched";
 }
 
-static const char *amd_sched_fence_get_timeline_name(struct dma_fence *f)
+static const char *drm_sched_fence_get_timeline_name(struct dma_fence *f)
 {
-	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+	struct drm_sched_fence *fence = to_drm_sched_fence(f);
 	return (const char *)fence->sched->name;
 }
 
-static bool amd_sched_fence_enable_signaling(struct dma_fence *f)
+static bool drm_sched_fence_enable_signaling(struct dma_fence *f)
 {
 	return true;
 }
@@ -116,10 +93,10 @@ static bool amd_sched_fence_enable_signaling(struct dma_fence *f)
  *
  * Free up the fence memory after the RCU grace period.
  */
-static void amd_sched_fence_free(struct rcu_head *rcu)
+static void drm_sched_fence_free(struct rcu_head *rcu)
 {
 	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
-	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+	struct drm_sched_fence *fence = to_drm_sched_fence(f);
 
 	dma_fence_put(fence->parent);
 	kmem_cache_free(sched_fence_slab, fence);
@@ -133,11 +110,11 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
  * This function is called when the reference count becomes zero.
  * It just RCU schedules freeing up the fence.
  */
-static void amd_sched_fence_release_scheduled(struct dma_fence *f)
+static void drm_sched_fence_release_scheduled(struct dma_fence *f)
 {
-	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+	struct drm_sched_fence *fence = to_drm_sched_fence(f);
 
-	call_rcu(&fence->finished.rcu, amd_sched_fence_free);
+	call_rcu(&fence->finished.rcu, drm_sched_fence_free);
 }
 
 /**
@@ -147,27 +124,68 @@ static void amd_sched_fence_release_scheduled(struct dma_fence *f)
  *
  * Drop the extra reference from the scheduled fence to the base fence.
  */
-static void amd_sched_fence_release_finished(struct dma_fence *f)
+static void drm_sched_fence_release_finished(struct dma_fence *f)
 {
-	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+	struct drm_sched_fence *fence = to_drm_sched_fence(f);
 
 	dma_fence_put(&fence->scheduled);
 }
 
-const struct dma_fence_ops amd_sched_fence_ops_scheduled = {
-	.get_driver_name = amd_sched_fence_get_driver_name,
-	.get_timeline_name = amd_sched_fence_get_timeline_name,
-	.enable_signaling = amd_sched_fence_enable_signaling,
+const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
+	.get_driver_name = drm_sched_fence_get_driver_name,
+	.get_timeline_name = drm_sched_fence_get_timeline_name,
+	.enable_signaling = drm_sched_fence_enable_signaling,
 	.signaled = NULL,
 	.wait = dma_fence_default_wait,
-	.release = amd_sched_fence_release_scheduled,
+	.release = drm_sched_fence_release_scheduled,
 };
 
-const struct dma_fence_ops amd_sched_fence_ops_finished = {
-	.get_driver_name = amd_sched_fence_get_driver_name,
-	.get_timeline_name = amd_sched_fence_get_timeline_name,
-	.enable_signaling = amd_sched_fence_enable_signaling,
+const struct dma_fence_ops drm_sched_fence_ops_finished = {
+	.get_driver_name = drm_sched_fence_get_driver_name,
+	.get_timeline_name = drm_sched_fence_get_timeline_name,
+	.enable_signaling = drm_sched_fence_enable_signaling,
 	.signaled = NULL,
 	.wait = dma_fence_default_wait,
-	.release = amd_sched_fence_release_finished,
+	.release = drm_sched_fence_release_finished,
 };
+
+struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
+{
+	if (f->ops == &drm_sched_fence_ops_scheduled)
+		return container_of(f, struct drm_sched_fence, scheduled);
+
+	if (f->ops == &drm_sched_fence_ops_finished)
+		return container_of(f, struct drm_sched_fence, finished);
+
+	return NULL;
+}
+EXPORT_SYMBOL(to_drm_sched_fence);
+
+struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
+					       void *owner)
+{
+	struct drm_sched_fence *fence = NULL;
+	unsigned seq;
+
+	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
+	if (fence == NULL)
+		return NULL;
+
+	fence->owner = owner;
+	fence->sched = entity->sched;
+	spin_lock_init(&fence->lock);
+
+	seq = atomic_inc_return(&entity->fence_seq);
+	dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
+		       &fence->lock, entity->fence_context, seq);
+	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
+		       &fence->lock, entity->fence_context + 1, seq);
+
+	return fence;
+}
+
+module_init(drm_sched_fence_slab_init);
+module_exit(drm_sched_fence_slab_fini);
+
+MODULE_DESCRIPTION("DRM GPU scheduler");
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 88d1dc6408afd380e9221d01cf6c8c077a790092..55b6967d27e10b9e3d5b768e884510a48433b3c4 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -17,6 +17,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_of.h>
 
@@ -138,16 +139,9 @@ static int sti_atomic_check(struct drm_device *dev,
 	return ret;
 }
 
-static void sti_output_poll_changed(struct drm_device *ddev)
-{
-	struct sti_private *private = ddev->dev_private;
-
-	drm_fbdev_cma_hotplug_event(private->fbdev);
-}
-
 static const struct drm_mode_config_funcs sti_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
-	.output_poll_changed = sti_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = sti_atomic_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
@@ -230,11 +224,7 @@ static void sti_cleanup(struct drm_device *ddev)
 {
 	struct sti_private *private = ddev->dev_private;
 
-	if (private->fbdev) {
-		drm_fbdev_cma_fini(private->fbdev);
-		private->fbdev = NULL;
-	}
-
+	drm_fb_cma_fbdev_fini(ddev);
 	drm_kms_helper_poll_fini(ddev);
 	component_unbind_all(ddev->dev, ddev);
 	kfree(private);
@@ -244,8 +234,6 @@ static void sti_cleanup(struct drm_device *ddev)
 static int sti_bind(struct device *dev)
 {
 	struct drm_device *ddev;
-	struct sti_private *private;
-	struct drm_fbdev_cma *fbdev;
 	int ret;
 
 	ddev = drm_dev_alloc(&sti_driver, dev);
@@ -266,15 +254,10 @@ static int sti_bind(struct device *dev)
 
 	drm_mode_config_reset(ddev);
 
-	private = ddev->dev_private;
 	if (ddev->mode_config.num_connector) {
-		fbdev = drm_fbdev_cma_init(ddev, 32,
-					   ddev->mode_config.num_connector);
-		if (IS_ERR(fbdev)) {
+		ret = drm_fb_cma_fbdev_init(ddev, 32, 0);
+		if (ret)
 			DRM_DEBUG_DRIVER("Warning: fails to create fbdev\n");
-			fbdev = NULL;
-		}
-		private->fbdev = fbdev;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_drv.h b/drivers/gpu/drm/sti/sti_drv.h
index abc49b43566e74468e3120c13f4b23c5fd74c30b..4b41142a22e41405dbd306ae890462eeb699706b 100644
--- a/drivers/gpu/drm/sti/sti_drv.h
+++ b/drivers/gpu/drm/sti/sti_drv.h
@@ -24,7 +24,6 @@ struct sti_private {
 	struct sti_compositor *compo;
 	struct drm_property *plane_zorder_property;
 	struct drm_device *drm_dev;
-	struct drm_fbdev_cma *fbdev;
 };
 
 extern struct platform_driver sti_tvout_driver;
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
index 2d6e9ca0450b822ee548120b13d7bcff3e604dd0..8fe954c27fbacdfb5858a2cac1a11cb719e185c4 100644
--- a/drivers/gpu/drm/stm/drv.c
+++ b/drivers/gpu/drm/stm/drv.c
@@ -14,6 +14,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -23,35 +24,19 @@
 #define STM_MAX_FB_WIDTH	2048
 #define STM_MAX_FB_HEIGHT	2048 /* same as width to handle orientation */
 
-static void drv_output_poll_changed(struct drm_device *ddev)
-{
-	struct ltdc_device *ldev = ddev->dev_private;
-
-	drm_fbdev_cma_hotplug_event(ldev->fbdev);
-}
-
 static const struct drm_mode_config_funcs drv_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
-	.output_poll_changed = drv_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
-static void drv_lastclose(struct drm_device *ddev)
-{
-	struct ltdc_device *ldev = ddev->dev_private;
-
-	DRM_DEBUG("%s\n", __func__);
-
-	drm_fbdev_cma_restore_mode(ldev->fbdev);
-}
-
 DEFINE_DRM_GEM_CMA_FOPS(drv_driver_fops);
 
 static struct drm_driver drv_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
 			   DRIVER_ATOMIC,
-	.lastclose = drv_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.name = "stm",
 	.desc = "STMicroelectronics SoC DRM",
 	.date = "20170330",
@@ -78,7 +63,6 @@ static struct drm_driver drv_driver = {
 static int drv_load(struct drm_device *ddev)
 {
 	struct platform_device *pdev = to_platform_device(ddev->dev);
-	struct drm_fbdev_cma *fbdev;
 	struct ltdc_device *ldev;
 	int ret;
 
@@ -111,14 +95,9 @@ static int drv_load(struct drm_device *ddev)
 	drm_kms_helper_poll_init(ddev);
 
 	if (ddev->mode_config.num_connector) {
-		ldev = ddev->dev_private;
-		fbdev = drm_fbdev_cma_init(ddev, 16,
-					   ddev->mode_config.num_connector);
-		if (IS_ERR(fbdev)) {
+		ret = drm_fb_cma_fbdev_init(ddev, 16, 0);
+		if (ret)
 			DRM_DEBUG("Warning: fails to create fbdev\n");
-			fbdev = NULL;
-		}
-		ldev->fbdev = fbdev;
 	}
 
 	platform_set_drvdata(pdev, ddev);
@@ -131,14 +110,9 @@ static int drv_load(struct drm_device *ddev)
 
 static void drv_unload(struct drm_device *ddev)
 {
-	struct ltdc_device *ldev = ddev->dev_private;
-
 	DRM_DEBUG("%s\n", __func__);
 
-	if (ldev->fbdev) {
-		drm_fbdev_cma_fini(ldev->fbdev);
-		ldev->fbdev = NULL;
-	}
+	drm_fb_cma_fbdev_fini(ddev);
 	drm_kms_helper_poll_fini(ddev);
 	ltdc_unload(ddev);
 	drm_mode_config_cleanup(ddev);
diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
index 82dcb20cdaa34df23a8eb3144c51d379e149ec29..fd02506274dab996a08e70e1e04db3cd5af0d18c 100644
--- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
+++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
@@ -290,11 +290,6 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		DRM_ERROR("Unable to get resource\n");
-		return -ENODEV;
-	}
-
 	dsi->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(dsi->base)) {
 		DRM_ERROR("Unable to get dsi registers\n");
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 394613b0fd467ce4b23c4bbd71b4970380a1686a..6dc5d4ec4e17c8831a7d1c093a5eb57d1548df4a 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -901,12 +901,6 @@ int ltdc_load(struct drm_device *ddev)
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		DRM_ERROR("Unable to get resource\n");
-		ret = -ENODEV;
-		goto err;
-	}
-
 	ldev->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ldev->regs)) {
 		DRM_ERROR("Unable to get ltdc registers\n");
diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h
index d5da74d24995be9eb7253f5c39fb8e9e8bb5ac85..edd1c0a446d1ecd9997b987b9b88047852afb92b 100644
--- a/drivers/gpu/drm/stm/ltdc.h
+++ b/drivers/gpu/drm/stm/ltdc.h
@@ -20,7 +20,6 @@ struct ltdc_caps {
 };
 
 struct ltdc_device {
-	struct drm_fbdev_cma *fbdev;
 	void __iomem *regs;
 	struct clk *pixel_clk;	/* lcd pixel clock */
 	struct mutex err_lock;	/* protecting error_status */
diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile
index 82a6ac57fbe337dfaf897f4b5d8c754e0a48eec6..2b37a6abbb1d7d7e073c05684634a03e8e73c14e 100644
--- a/drivers/gpu/drm/sun4i/Makefile
+++ b/drivers/gpu/drm/sun4i/Makefile
@@ -15,6 +15,7 @@ sun8i-mixer-y			+= sun8i_mixer.o sun8i_ui_layer.o \
 
 sun4i-tcon-y			+= sun4i_crtc.o
 sun4i-tcon-y			+= sun4i_dotclock.o
+sun4i-tcon-y			+= sun4i_lvds.o
 sun4i-tcon-y			+= sun4i_tcon.o
 sun4i-tcon-y			+= sun4i_rgb.o
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index d401156490f36c890f49f23d1eab1b3f7691d108..023f39bda633de70825243b3a28335e2686091ed 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -17,8 +17,9 @@
 #include "sun4i_dotclock.h"
 
 struct sun4i_dclk {
-	struct clk_hw	hw;
-	struct regmap	*regmap;
+	struct clk_hw		hw;
+	struct regmap		*regmap;
+	struct sun4i_tcon	*tcon;
 };
 
 static inline struct sun4i_dclk *hw_to_dclk(struct clk_hw *hw)
@@ -73,11 +74,13 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
+	struct sun4i_dclk *dclk = hw_to_dclk(hw);
+	struct sun4i_tcon *tcon = dclk->tcon;
 	unsigned long best_parent = 0;
 	u8 best_div = 1;
 	int i;
 
-	for (i = 6; i <= 127; i++) {
+	for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) {
 		unsigned long ideal = rate * i;
 		unsigned long rounded;
 
@@ -167,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
 		return -ENOMEM;
+	dclk->tcon = tcon;
 
 	init.name = clk_name;
 	init.ops = &sun4i_dclk_ops;
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 49215d91c853892ef25a4a66a8c7aaba8efc0ff1..4570da0227b4e49523b7f9c697fce54777dacd7e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -26,20 +26,13 @@
 #include "sun4i_framebuffer.h"
 #include "sun4i_tcon.h"
 
-static void sun4i_drv_lastclose(struct drm_device *dev)
-{
-	struct sun4i_drv *drv = dev->dev_private;
-
-	drm_fbdev_cma_restore_mode(drv->fbdev);
-}
-
 DEFINE_DRM_GEM_CMA_FOPS(sun4i_drv_fops);
 
 static struct drm_driver sun4i_drv_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC,
 
 	/* Generic Operations */
-	.lastclose		= sun4i_drv_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.fops			= &sun4i_drv_fops,
 	.name			= "sun4i-drm",
 	.desc			= "Allwinner sun4i Display Engine",
@@ -126,10 +119,9 @@ static int sun4i_drv_bind(struct device *dev)
 	sun4i_remove_framebuffers();
 
 	/* Create our framebuffer */
-	drv->fbdev = sun4i_framebuffer_init(drm);
-	if (IS_ERR(drv->fbdev)) {
+	ret = sun4i_framebuffer_init(drm);
+	if (ret) {
 		dev_err(drm->dev, "Couldn't create our framebuffer\n");
-		ret = PTR_ERR(drv->fbdev);
 		goto cleanup_mode_config;
 	}
 
@@ -347,6 +339,7 @@ static const struct of_device_id sun4i_drv_of_table[] = {
 	{ .compatible = "allwinner,sun6i-a31s-display-engine" },
 	{ .compatible = "allwinner,sun7i-a20-display-engine" },
 	{ .compatible = "allwinner,sun8i-a33-display-engine" },
+	{ .compatible = "allwinner,sun8i-a83t-display-engine" },
 	{ .compatible = "allwinner,sun8i-v3s-display-engine" },
 	{ }
 };
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.h b/drivers/gpu/drm/sun4i/sun4i_drv.h
index a960c89270cc74e250eec8b443e831d13f6b8a29..2825f140da547369829e29ea7e350248455def95 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.h
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.h
@@ -20,8 +20,6 @@
 struct sun4i_drv {
 	struct list_head	engine_list;
 	struct list_head	tcon_list;
-
-	struct drm_fbdev_cma	*fbdev;
 };
 
 #endif /* _SUN4I_DRV_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
index 2992f0a6b349dbbe62e97fb2accdd686a08899b8..38a36c0dfa2f8de2d0b5f3ed2730a2ce9204e59f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
@@ -11,6 +11,7 @@
  */
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drmP.h>
@@ -18,21 +19,14 @@
 #include "sun4i_drv.h"
 #include "sun4i_framebuffer.h"
 
-static void sun4i_de_output_poll_changed(struct drm_device *drm)
-{
-	struct sun4i_drv *drv = drm->dev_private;
-
-	drm_fbdev_cma_hotplug_event(drv->fbdev);
-}
-
 static const struct drm_mode_config_funcs sun4i_de_mode_config_funcs = {
-	.output_poll_changed	= sun4i_de_output_poll_changed,
+	.output_poll_changed	= drm_fb_helper_output_poll_changed,
 	.atomic_check		= drm_atomic_helper_check,
 	.atomic_commit		= drm_atomic_helper_commit,
 	.fb_create		= drm_gem_fb_create,
 };
 
-struct drm_fbdev_cma *sun4i_framebuffer_init(struct drm_device *drm)
+int sun4i_framebuffer_init(struct drm_device *drm)
 {
 	drm_mode_config_reset(drm);
 
@@ -41,12 +35,10 @@ struct drm_fbdev_cma *sun4i_framebuffer_init(struct drm_device *drm)
 
 	drm->mode_config.funcs = &sun4i_de_mode_config_funcs;
 
-	return drm_fbdev_cma_init(drm, 32, drm->mode_config.num_connector);
+	return drm_fb_cma_fbdev_init(drm, 32, 0);
 }
 
 void sun4i_framebuffer_free(struct drm_device *drm)
 {
-	struct sun4i_drv *drv = drm->dev_private;
-
-	drm_fbdev_cma_fini(drv->fbdev);
+	drm_fb_cma_fbdev_fini(drm);
 }
diff --git a/drivers/gpu/drm/sun4i/sun4i_framebuffer.h b/drivers/gpu/drm/sun4i/sun4i_framebuffer.h
index 3afd65252ee0c477cfdf2c817194b9dc8727b8f9..7ef0aed8384ce8741b9aafeb7cbe490f75ef4e94 100644
--- a/drivers/gpu/drm/sun4i/sun4i_framebuffer.h
+++ b/drivers/gpu/drm/sun4i/sun4i_framebuffer.h
@@ -13,7 +13,7 @@
 #ifndef _SUN4I_FRAMEBUFFER_H_
 #define _SUN4I_FRAMEBUFFER_H_
 
-struct drm_fbdev_cma *sun4i_framebuffer_init(struct drm_device *drm);
+int sun4i_framebuffer_init(struct drm_device *drm);
 void sun4i_framebuffer_free(struct drm_device *drm);
 
 #endif /* _SUN4I_FRAMEBUFFER_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index dda904ec0534cd9d84d3967b94bf5fa4f444df9e..500b6fb3e0284d2fdfc71265a64f0d5b51fe4f99 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
 	writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
 }
 
+static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
+					const struct drm_display_mode *mode)
+{
+	struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
+	unsigned long rate = mode->clock * 1000;
+	unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
+	long rounded_rate;
+
+	/* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
+	if (rate > 165000000)
+		return MODE_CLOCK_HIGH;
+	rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
+	if (rounded_rate > 0 &&
+	    max_t(unsigned long, rounded_rate, rate) -
+	    min_t(unsigned long, rounded_rate, rate) < diff)
+		return MODE_OK;
+	return MODE_NOCLOCK;
+}
+
 static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
 	.atomic_check	= sun4i_hdmi_atomic_check,
 	.disable	= sun4i_hdmi_disable,
 	.enable		= sun4i_hdmi_enable,
 	.mode_set	= sun4i_hdmi_mode_set,
+	.mode_valid	= sun4i_hdmi_mode_valid,
 };
 
 static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c
new file mode 100644
index 0000000000000000000000000000000000000000..be3f14d7746deee1b0183c113dc653e4c7e3629d
--- /dev/null
+++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ */
+
+#include <linux/clk.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+
+#include "sun4i_crtc.h"
+#include "sun4i_tcon.h"
+#include "sun4i_lvds.h"
+
+struct sun4i_lvds {
+	struct drm_connector	connector;
+	struct drm_encoder	encoder;
+
+	struct sun4i_tcon	*tcon;
+};
+
+static inline struct sun4i_lvds *
+drm_connector_to_sun4i_lvds(struct drm_connector *connector)
+{
+	return container_of(connector, struct sun4i_lvds,
+			    connector);
+}
+
+static inline struct sun4i_lvds *
+drm_encoder_to_sun4i_lvds(struct drm_encoder *encoder)
+{
+	return container_of(encoder, struct sun4i_lvds,
+			    encoder);
+}
+
+static int sun4i_lvds_get_modes(struct drm_connector *connector)
+{
+	struct sun4i_lvds *lvds =
+		drm_connector_to_sun4i_lvds(connector);
+	struct sun4i_tcon *tcon = lvds->tcon;
+
+	return drm_panel_get_modes(tcon->panel);
+}
+
+static struct drm_connector_helper_funcs sun4i_lvds_con_helper_funcs = {
+	.get_modes	= sun4i_lvds_get_modes,
+};
+
+static void
+sun4i_lvds_connector_destroy(struct drm_connector *connector)
+{
+	struct sun4i_lvds *lvds = drm_connector_to_sun4i_lvds(connector);
+	struct sun4i_tcon *tcon = lvds->tcon;
+
+	drm_panel_detach(tcon->panel);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs sun4i_lvds_con_funcs = {
+	.fill_modes		= drm_helper_probe_single_connector_modes,
+	.destroy		= sun4i_lvds_connector_destroy,
+	.reset			= drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state	= drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_connector_destroy_state,
+};
+
+static void sun4i_lvds_encoder_enable(struct drm_encoder *encoder)
+{
+	struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(encoder);
+	struct sun4i_tcon *tcon = lvds->tcon;
+
+	DRM_DEBUG_DRIVER("Enabling LVDS output\n");
+
+	if (!IS_ERR(tcon->panel)) {
+		drm_panel_prepare(tcon->panel);
+		drm_panel_enable(tcon->panel);
+	}
+}
+
+static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder)
+{
+	struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(encoder);
+	struct sun4i_tcon *tcon = lvds->tcon;
+
+	DRM_DEBUG_DRIVER("Disabling LVDS output\n");
+
+	if (!IS_ERR(tcon->panel)) {
+		drm_panel_disable(tcon->panel);
+		drm_panel_unprepare(tcon->panel);
+	}
+}
+
+static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = {
+	.disable	= sun4i_lvds_encoder_disable,
+	.enable		= sun4i_lvds_encoder_enable,
+};
+
+static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = {
+	.destroy	= drm_encoder_cleanup,
+};
+
+int sun4i_lvds_init(struct drm_device *drm, struct sun4i_tcon *tcon)
+{
+	struct drm_encoder *encoder;
+	struct drm_bridge *bridge;
+	struct sun4i_lvds *lvds;
+	int ret;
+
+	lvds = devm_kzalloc(drm->dev, sizeof(*lvds), GFP_KERNEL);
+	if (!lvds)
+		return -ENOMEM;
+	lvds->tcon = tcon;
+	encoder = &lvds->encoder;
+
+	ret = drm_of_find_panel_or_bridge(tcon->dev->of_node, 1, 0,
+					  &tcon->panel, &bridge);
+	if (ret) {
+		dev_info(drm->dev, "No panel or bridge found... LVDS output disabled\n");
+		return 0;
+	}
+
+	drm_encoder_helper_add(&lvds->encoder,
+			       &sun4i_lvds_enc_helper_funcs);
+	ret = drm_encoder_init(drm,
+			       &lvds->encoder,
+			       &sun4i_lvds_enc_funcs,
+			       DRM_MODE_ENCODER_LVDS,
+			       NULL);
+	if (ret) {
+		dev_err(drm->dev, "Couldn't initialise the lvds encoder\n");
+		goto err_out;
+	}
+
+	/* The LVDS encoder can only work with the TCON channel 0 */
+	lvds->encoder.possible_crtcs = BIT(drm_crtc_index(&tcon->crtc->crtc));
+
+	if (tcon->panel) {
+		drm_connector_helper_add(&lvds->connector,
+					 &sun4i_lvds_con_helper_funcs);
+		ret = drm_connector_init(drm, &lvds->connector,
+					 &sun4i_lvds_con_funcs,
+					 DRM_MODE_CONNECTOR_LVDS);
+		if (ret) {
+			dev_err(drm->dev, "Couldn't initialise the lvds connector\n");
+			goto err_cleanup_connector;
+		}
+
+		drm_mode_connector_attach_encoder(&lvds->connector,
+						  &lvds->encoder);
+
+		ret = drm_panel_attach(tcon->panel, &lvds->connector);
+		if (ret) {
+			dev_err(drm->dev, "Couldn't attach our panel\n");
+			goto err_cleanup_connector;
+		}
+	}
+
+	if (bridge) {
+		ret = drm_bridge_attach(encoder, bridge, NULL);
+		if (ret) {
+			dev_err(drm->dev, "Couldn't attach our bridge\n");
+			goto err_cleanup_connector;
+		}
+	}
+
+	return 0;
+
+err_cleanup_connector:
+	drm_encoder_cleanup(&lvds->encoder);
+err_out:
+	return ret;
+}
+EXPORT_SYMBOL(sun4i_lvds_init);
diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.h b/drivers/gpu/drm/sun4i/sun4i_lvds.h
new file mode 100644
index 0000000000000000000000000000000000000000..f3e90faa308210710fda7907d1a4caa40ee5dc74
--- /dev/null
+++ b/drivers/gpu/drm/sun4i/sun4i_lvds.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2017 Free Electrons
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ */
+
+#ifndef _SUN4I_LVDS_H_
+#define _SUN4I_LVDS_H_
+
+int sun4i_lvds_init(struct drm_device *drm, struct sun4i_tcon *tcon);
+
+#endif /* _SUN4I_LVDS_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index a1ed462c24307b8b79eea334efdab6a5a17027a2..3c15cf24b50360918253ed5bcff4d4a73a1e6cd0 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -31,10 +31,52 @@
 #include "sun4i_crtc.h"
 #include "sun4i_dotclock.h"
 #include "sun4i_drv.h"
+#include "sun4i_lvds.h"
 #include "sun4i_rgb.h"
 #include "sun4i_tcon.h"
 #include "sunxi_engine.h"
 
+static struct drm_connector *sun4i_tcon_get_connector(const struct drm_encoder *encoder)
+{
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+
+	drm_connector_list_iter_begin(encoder->dev, &iter);
+	drm_for_each_connector_iter(connector, &iter)
+		if (connector->encoder == encoder) {
+			drm_connector_list_iter_end(&iter);
+			return connector;
+		}
+	drm_connector_list_iter_end(&iter);
+
+	return NULL;
+}
+
+static int sun4i_tcon_get_pixel_depth(const struct drm_encoder *encoder)
+{
+	struct drm_connector *connector;
+	struct drm_display_info *info;
+
+	connector = sun4i_tcon_get_connector(encoder);
+	if (!connector)
+		return -EINVAL;
+
+	info = &connector->display_info;
+	if (info->num_bus_formats != 1)
+		return -EINVAL;
+
+	switch (info->bus_formats[0]) {
+	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
+		return 18;
+
+	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
+	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
+		return 24;
+	}
+
+	return -EINVAL;
+}
+
 static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel,
 					  bool enabled)
 {
@@ -65,13 +107,63 @@ static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel,
 		clk_disable_unprepare(clk);
 }
 
+static void sun4i_tcon_lvds_set_status(struct sun4i_tcon *tcon,
+				       const struct drm_encoder *encoder,
+				       bool enabled)
+{
+	if (enabled) {
+		u8 val;
+
+		regmap_update_bits(tcon->regs, SUN4I_TCON0_LVDS_IF_REG,
+				   SUN4I_TCON0_LVDS_IF_EN,
+				   SUN4I_TCON0_LVDS_IF_EN);
+
+		/*
+		 * As their name suggest, these values only apply to the A31
+		 * and later SoCs. We'll have to rework this when merging
+		 * support for the older SoCs.
+		 */
+		regmap_write(tcon->regs, SUN4I_TCON0_LVDS_ANA0_REG,
+			     SUN6I_TCON0_LVDS_ANA0_C(2) |
+			     SUN6I_TCON0_LVDS_ANA0_V(3) |
+			     SUN6I_TCON0_LVDS_ANA0_PD(2) |
+			     SUN6I_TCON0_LVDS_ANA0_EN_LDO);
+		udelay(2);
+
+		regmap_update_bits(tcon->regs, SUN4I_TCON0_LVDS_ANA0_REG,
+				   SUN6I_TCON0_LVDS_ANA0_EN_MB,
+				   SUN6I_TCON0_LVDS_ANA0_EN_MB);
+		udelay(2);
+
+		regmap_update_bits(tcon->regs, SUN4I_TCON0_LVDS_ANA0_REG,
+				   SUN6I_TCON0_LVDS_ANA0_EN_DRVC,
+				   SUN6I_TCON0_LVDS_ANA0_EN_DRVC);
+
+		if (sun4i_tcon_get_pixel_depth(encoder) == 18)
+			val = 7;
+		else
+			val = 0xf;
+
+		regmap_write_bits(tcon->regs, SUN4I_TCON0_LVDS_ANA0_REG,
+				  SUN6I_TCON0_LVDS_ANA0_EN_DRVD(0xf),
+				  SUN6I_TCON0_LVDS_ANA0_EN_DRVD(val));
+	} else {
+		regmap_update_bits(tcon->regs, SUN4I_TCON0_LVDS_IF_REG,
+				   SUN4I_TCON0_LVDS_IF_EN, 0);
+	}
+}
+
 void sun4i_tcon_set_status(struct sun4i_tcon *tcon,
 			   const struct drm_encoder *encoder,
 			   bool enabled)
 {
+	bool is_lvds = false;
 	int channel;
 
 	switch (encoder->encoder_type) {
+	case DRM_MODE_ENCODER_LVDS:
+		is_lvds = true;
+		/* Fallthrough */
 	case DRM_MODE_ENCODER_NONE:
 		channel = 0;
 		break;
@@ -84,10 +176,16 @@ void sun4i_tcon_set_status(struct sun4i_tcon *tcon,
 		return;
 	}
 
+	if (is_lvds && !enabled)
+		sun4i_tcon_lvds_set_status(tcon, encoder, false);
+
 	regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG,
 			   SUN4I_TCON_GCTL_TCON_ENABLE,
 			   enabled ? SUN4I_TCON_GCTL_TCON_ENABLE : 0);
 
+	if (is_lvds && enabled)
+		sun4i_tcon_lvds_set_status(tcon, encoder, true);
+
 	sun4i_tcon_channel_set_status(tcon, channel, enabled);
 }
 
@@ -170,6 +268,75 @@ static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon,
 		     SUN4I_TCON0_BASIC0_Y(mode->crtc_vdisplay));
 }
 
+static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
+				      const struct drm_encoder *encoder,
+				      const struct drm_display_mode *mode)
+{
+	unsigned int bp;
+	u8 clk_delay;
+	u32 reg, val = 0;
+
+	tcon->dclk_min_div = 7;
+	tcon->dclk_max_div = 7;
+	sun4i_tcon0_mode_set_common(tcon, mode);
+
+	/* Adjust clock delay */
+	clk_delay = sun4i_tcon_get_clk_delay(mode, 0);
+	regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG,
+			   SUN4I_TCON0_CTL_CLK_DELAY_MASK,
+			   SUN4I_TCON0_CTL_CLK_DELAY(clk_delay));
+
+	/*
+	 * This is called a backporch in the register documentation,
+	 * but it really is the back porch + hsync
+	 */
+	bp = mode->crtc_htotal - mode->crtc_hsync_start;
+	DRM_DEBUG_DRIVER("Setting horizontal total %d, backporch %d\n",
+			 mode->crtc_htotal, bp);
+
+	/* Set horizontal display timings */
+	regmap_write(tcon->regs, SUN4I_TCON0_BASIC1_REG,
+		     SUN4I_TCON0_BASIC1_H_TOTAL(mode->htotal) |
+		     SUN4I_TCON0_BASIC1_H_BACKPORCH(bp));
+
+	/*
+	 * This is called a backporch in the register documentation,
+	 * but it really is the back porch + hsync
+	 */
+	bp = mode->crtc_vtotal - mode->crtc_vsync_start;
+	DRM_DEBUG_DRIVER("Setting vertical total %d, backporch %d\n",
+			 mode->crtc_vtotal, bp);
+
+	/* Set vertical display timings */
+	regmap_write(tcon->regs, SUN4I_TCON0_BASIC2_REG,
+		     SUN4I_TCON0_BASIC2_V_TOTAL(mode->crtc_vtotal * 2) |
+		     SUN4I_TCON0_BASIC2_V_BACKPORCH(bp));
+
+	reg = SUN4I_TCON0_LVDS_IF_CLK_SEL_TCON0 |
+		SUN4I_TCON0_LVDS_IF_DATA_POL_NORMAL |
+		SUN4I_TCON0_LVDS_IF_CLK_POL_NORMAL;
+	if (sun4i_tcon_get_pixel_depth(encoder) == 24)
+		reg |= SUN4I_TCON0_LVDS_IF_BITWIDTH_24BITS;
+	else
+		reg |= SUN4I_TCON0_LVDS_IF_BITWIDTH_18BITS;
+
+	regmap_write(tcon->regs, SUN4I_TCON0_LVDS_IF_REG, reg);
+
+	/* Setup the polarity of the various signals */
+	if (!(mode->flags & DRM_MODE_FLAG_PHSYNC))
+		val |= SUN4I_TCON0_IO_POL_HSYNC_POSITIVE;
+
+	if (!(mode->flags & DRM_MODE_FLAG_PVSYNC))
+		val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE;
+
+	regmap_write(tcon->regs, SUN4I_TCON0_IO_POL_REG, val);
+
+	/* Map output pins to channel 0 */
+	regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG,
+			   SUN4I_TCON_GCTL_IOMAP_MASK,
+			   SUN4I_TCON_GCTL_IOMAP_TCON0);
+}
+
 static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 				     const struct drm_display_mode *mode)
 {
@@ -177,6 +344,8 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 	u8 clk_delay;
 	u32 val = 0;
 
+	tcon->dclk_min_div = 6;
+	tcon->dclk_max_div = 127;
 	sun4i_tcon0_mode_set_common(tcon, mode);
 
 	/* Adjust clock delay */
@@ -334,6 +503,9 @@ void sun4i_tcon_mode_set(struct sun4i_tcon *tcon,
 			 const struct drm_display_mode *mode)
 {
 	switch (encoder->encoder_type) {
+	case DRM_MODE_ENCODER_LVDS:
+		sun4i_tcon0_mode_set_lvds(tcon, encoder, mode);
+		break;
 	case DRM_MODE_ENCODER_NONE:
 		sun4i_tcon0_mode_set_rgb(tcon, mode);
 		sun4i_tcon_set_mux(tcon, 0, encoder);
@@ -665,7 +837,9 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 	struct drm_device *drm = data;
 	struct sun4i_drv *drv = drm->dev_private;
 	struct sunxi_engine *engine;
+	struct device_node *remote;
 	struct sun4i_tcon *tcon;
+	bool has_lvds_rst, has_lvds_alt, can_lvds;
 	int ret;
 
 	engine = sun4i_tcon_find_engine(drv, dev->of_node);
@@ -696,6 +870,54 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return ret;
 	}
 
+	/*
+	 * This can only be made optional since we've had DT nodes
+	 * without the LVDS reset properties.
+	 *
+	 * If the property is missing, just disable LVDS, and print a
+	 * warning.
+	 */
+	tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
+	if (IS_ERR(tcon->lvds_rst)) {
+		dev_err(dev, "Couldn't get our reset line\n");
+		return PTR_ERR(tcon->lvds_rst);
+	} else if (tcon->lvds_rst) {
+		has_lvds_rst = true;
+		reset_control_reset(tcon->lvds_rst);
+	} else {
+		has_lvds_rst = false;
+	}
+
+	/*
+	 * This can only be made optional since we've had DT nodes
+	 * without the LVDS reset properties.
+	 *
+	 * If the property is missing, just disable LVDS, and print a
+	 * warning.
+	 */
+	if (tcon->quirks->has_lvds_alt) {
+		tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
+		if (IS_ERR(tcon->lvds_pll)) {
+			if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
+				has_lvds_alt = false;
+			} else {
+				dev_err(dev, "Couldn't get the LVDS PLL\n");
+				return PTR_ERR(tcon->lvds_pll);
+			}
+		} else {
+			has_lvds_alt = true;
+		}
+	}
+
+	if (!has_lvds_rst || (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
+		dev_warn(dev,
+			 "Missing LVDS properties, Please upgrade your DT\n");
+		dev_warn(dev, "LVDS output disabled\n");
+		can_lvds = false;
+	} else {
+		can_lvds = true;
+	}
+
 	ret = sun4i_tcon_init_clocks(dev, tcon);
 	if (ret) {
 		dev_err(dev, "Couldn't init our TCON clocks\n");
@@ -724,12 +946,26 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 	if (IS_ERR(tcon->crtc)) {
 		dev_err(dev, "Couldn't create our CRTC\n");
 		ret = PTR_ERR(tcon->crtc);
-		goto err_free_clocks;
+		goto err_free_dotclock;
 	}
 
-	ret = sun4i_rgb_init(drm, tcon);
+	/*
+	 * If we have an LVDS panel connected to the TCON, we should
+	 * just probe the LVDS connector. Otherwise, just probe RGB as
+	 * we used to.
+	 */
+	remote = of_graph_get_remote_node(dev->of_node, 1, 0);
+	if (of_device_is_compatible(remote, "panel-lvds"))
+		if (can_lvds)
+			ret = sun4i_lvds_init(drm, tcon);
+		else
+			ret = -EINVAL;
+	else
+		ret = sun4i_rgb_init(drm, tcon);
+	of_node_put(remote);
+
 	if (ret < 0)
-		goto err_free_clocks;
+		goto err_free_dotclock;
 
 	if (tcon->quirks->needs_de_be_mux) {
 		/*
@@ -877,6 +1113,7 @@ static const struct sun4i_tcon_quirks sun5i_a13_quirks = {
 
 static const struct sun4i_tcon_quirks sun6i_a31_quirks = {
 	.has_channel_1		= true,
+	.has_lvds_alt		= true,
 	.needs_de_be_mux	= true,
 	.set_mux		= sun6i_tcon_set_mux,
 };
@@ -893,6 +1130,10 @@ static const struct sun4i_tcon_quirks sun7i_a20_quirks = {
 };
 
 static const struct sun4i_tcon_quirks sun8i_a33_quirks = {
+	.has_lvds_alt		= true,
+};
+
+static const struct sun4i_tcon_quirks sun8i_a83t_lcd_quirks = {
 	/* nothing is supported */
 };
 
@@ -908,6 +1149,7 @@ const struct of_device_id sun4i_tcon_of_table[] = {
 	{ .compatible = "allwinner,sun6i-a31s-tcon", .data = &sun6i_a31s_quirks },
 	{ .compatible = "allwinner,sun7i-a20-tcon", .data = &sun7i_a20_quirks },
 	{ .compatible = "allwinner,sun8i-a33-tcon", .data = &sun8i_a33_quirks },
+	{ .compatible = "allwinner,sun8i-a83t-tcon-lcd", .data = &sun8i_a83t_lcd_quirks },
 	{ .compatible = "allwinner,sun8i-v3s-tcon", .data = &sun8i_v3s_quirks },
 	{ }
 };
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h
index 839266a3850590badb9cdfbb375c9d02ec03a24d..b761c7b823c560536b0f44a3c6eab6337d3e32d7 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.h
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h
@@ -70,7 +70,21 @@
 #define SUN4I_TCON0_TTL2_REG			0x78
 #define SUN4I_TCON0_TTL3_REG			0x7c
 #define SUN4I_TCON0_TTL4_REG			0x80
+
 #define SUN4I_TCON0_LVDS_IF_REG			0x84
+#define SUN4I_TCON0_LVDS_IF_EN				BIT(31)
+#define SUN4I_TCON0_LVDS_IF_BITWIDTH_MASK		BIT(26)
+#define SUN4I_TCON0_LVDS_IF_BITWIDTH_18BITS		(1 << 26)
+#define SUN4I_TCON0_LVDS_IF_BITWIDTH_24BITS		(0 << 26)
+#define SUN4I_TCON0_LVDS_IF_CLK_SEL_MASK		BIT(20)
+#define SUN4I_TCON0_LVDS_IF_CLK_SEL_TCON0		(1 << 20)
+#define SUN4I_TCON0_LVDS_IF_CLK_POL_MASK		BIT(4)
+#define SUN4I_TCON0_LVDS_IF_CLK_POL_NORMAL		(1 << 4)
+#define SUN4I_TCON0_LVDS_IF_CLK_POL_INV			(0 << 4)
+#define SUN4I_TCON0_LVDS_IF_DATA_POL_MASK		GENMASK(3, 0)
+#define SUN4I_TCON0_LVDS_IF_DATA_POL_NORMAL		(0xf)
+#define SUN4I_TCON0_LVDS_IF_DATA_POL_INV		(0)
+
 #define SUN4I_TCON0_IO_POL_REG			0x88
 #define SUN4I_TCON0_IO_POL_DCLK_PHASE(phase)		((phase & 3) << 28)
 #define SUN4I_TCON0_IO_POL_HSYNC_POSITIVE		BIT(25)
@@ -131,6 +145,16 @@
 #define SUN4I_TCON_CEU_RANGE_G_REG		0x144
 #define SUN4I_TCON_CEU_RANGE_B_REG		0x148
 #define SUN4I_TCON_MUX_CTRL_REG			0x200
+
+#define SUN4I_TCON0_LVDS_ANA0_REG		0x220
+#define SUN6I_TCON0_LVDS_ANA0_EN_MB			BIT(31)
+#define SUN6I_TCON0_LVDS_ANA0_EN_LDO			BIT(30)
+#define SUN6I_TCON0_LVDS_ANA0_EN_DRVC			BIT(24)
+#define SUN6I_TCON0_LVDS_ANA0_EN_DRVD(x)		(((x) & 0xf) << 20)
+#define SUN6I_TCON0_LVDS_ANA0_C(x)			(((x) & 3) << 17)
+#define SUN6I_TCON0_LVDS_ANA0_V(x)			(((x) & 3) << 8)
+#define SUN6I_TCON0_LVDS_ANA0_PD(x)			(((x) & 3) << 4)
+
 #define SUN4I_TCON1_FILL_CTL_REG		0x300
 #define SUN4I_TCON1_FILL_BEG0_REG		0x304
 #define SUN4I_TCON1_FILL_END0_REG		0x308
@@ -149,6 +173,7 @@ struct sun4i_tcon;
 
 struct sun4i_tcon_quirks {
 	bool	has_channel_1;	/* a33 does not have channel 1 */
+	bool	has_lvds_alt;	/* Does the LVDS clock have a parent other than the TCON clock? */
 	bool	needs_de_be_mux; /* sun6i needs mux to select backend */
 
 	/* callback to handle tcon muxing options */
@@ -167,11 +192,17 @@ struct sun4i_tcon {
 	struct clk			*sclk0;
 	struct clk			*sclk1;
 
+	/* Possible mux for the LVDS clock */
+	struct clk			*lvds_pll;
+
 	/* Pixel clock */
 	struct clk			*dclk;
+	u8				dclk_max_div;
+	u8				dclk_min_div;
 
 	/* Reset control */
 	struct reset_control		*lcd_rst;
+	struct reset_control		*lvds_rst;
 
 	struct drm_panel		*panel;
 
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c
index 29ceeb016d728f26fa90d431dd064273bfd9195e..2cbb2de6d39c83c372cc4db7e346c1b10ca4e823 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c
@@ -398,6 +398,15 @@ static int sun8i_mixer_bind(struct device *dev, struct device *master,
 		ret = PTR_ERR(mixer->mod_clk);
 		goto err_disable_bus_clk;
 	}
+
+	/*
+	 * It seems that we need to enforce that rate for whatever
+	 * reason for the mixer to be functional. Make sure it's the
+	 * case.
+	 */
+	if (mixer->cfg->mod_rate)
+		clk_set_rate(mixer->mod_clk, mixer->cfg->mod_rate);
+
 	clk_prepare_enable(mixer->mod_clk);
 
 	list_add_tail(&mixer->engine.list, &drv->engine_list);
@@ -469,14 +478,26 @@ static int sun8i_mixer_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct sun8i_mixer_cfg sun8i_a83t_mixer0_cfg = {
+	.ccsc		= 0,
+	.scaler_mask	= 0xf,
+	.ui_num		= 3,
+	.vi_num		= 1,
+};
+
 static const struct sun8i_mixer_cfg sun8i_v3s_mixer_cfg = {
 	.vi_num = 2,
 	.ui_num = 1,
 	.scaler_mask = 0x3,
 	.ccsc = 0,
+	.mod_rate = 150000000,
 };
 
 static const struct of_device_id sun8i_mixer_of_table[] = {
+	{
+		.compatible = "allwinner,sun8i-a83t-de2-mixer-0",
+		.data = &sun8i_a83t_mixer0_cfg,
+	},
 	{
 		.compatible = "allwinner,sun8i-v3s-de2-mixer",
 		.data = &sun8i_v3s_mixer_cfg,
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h
index bc58040a88f915dd231fcc780b0808cc5ec2908f..f34e70c42adf4707cee223d4bd8d9b1437f05a45 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.h
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h
@@ -121,12 +121,15 @@ struct de2_fmt_info {
  *	Set value to 0 if this is first mixer or second mixer with VEP support.
  *	Set value to 1 if this is second mixer without VEP support. Other values
  *	are invalid.
+ * @mod_rate: module clock rate that needs to be set in order to have
+ *	a functional block.
  */
 struct sun8i_mixer_cfg {
 	int		vi_num;
 	int		ui_num;
 	int		scaler_mask;
 	int		ccsc;
+	unsigned long	mod_rate;
 };
 
 struct sun8i_mixer {
diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index 46d65d39214d94fc973ad2539ff3613132a34397..2e0d6213f6bcd3a79b8ba4e411f92dd9d606b424 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -5,6 +5,8 @@ tegra-drm-y := \
 	drm.o \
 	gem.o \
 	fb.o \
+	hub.o \
+	plane.o \
 	dc.o \
 	output.o \
 	rgb.o \
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index fc70351b9017a0a916aadb013fb58bd3a490e245..b8403ed48285288c277d224e253285caebed3adb 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -19,82 +19,79 @@
 #include "dc.h"
 #include "drm.h"
 #include "gem.h"
+#include "hub.h"
+#include "plane.h"
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 
-struct tegra_plane {
-	struct drm_plane base;
-	unsigned int index;
-};
-
-static inline struct tegra_plane *to_tegra_plane(struct drm_plane *plane)
+static void tegra_dc_stats_reset(struct tegra_dc_stats *stats)
 {
-	return container_of(plane, struct tegra_plane, base);
+	stats->frames = 0;
+	stats->vblank = 0;
+	stats->underflow = 0;
+	stats->overflow = 0;
 }
 
-struct tegra_dc_state {
-	struct drm_crtc_state base;
+/* Reads the active copy of a register. */
+static u32 tegra_dc_readl_active(struct tegra_dc *dc, unsigned long offset)
+{
+	u32 value;
 
-	struct clk *clk;
-	unsigned long pclk;
-	unsigned int div;
+	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
+	value = tegra_dc_readl(dc, offset);
+	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
 
-	u32 planes;
-};
+	return value;
+}
 
-static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
+static inline unsigned int tegra_plane_offset(struct tegra_plane *plane,
+					      unsigned int offset)
 {
-	if (state)
-		return container_of(state, struct tegra_dc_state, base);
-
-	return NULL;
-}
+	if (offset >= 0x500 && offset <= 0x638) {
+		offset = 0x000 + (offset - 0x500);
+		return plane->offset + offset;
+	}
 
-struct tegra_plane_state {
-	struct drm_plane_state base;
+	if (offset >= 0x700 && offset <= 0x719) {
+		offset = 0x180 + (offset - 0x700);
+		return plane->offset + offset;
+	}
 
-	struct tegra_bo_tiling tiling;
-	u32 format;
-	u32 swap;
-};
+	if (offset >= 0x800 && offset <= 0x839) {
+		offset = 0x1c0 + (offset - 0x800);
+		return plane->offset + offset;
+	}
 
-static inline struct tegra_plane_state *
-to_tegra_plane_state(struct drm_plane_state *state)
-{
-	if (state)
-		return container_of(state, struct tegra_plane_state, base);
+	dev_WARN(plane->dc->dev, "invalid offset: %x\n", offset);
 
-	return NULL;
+	return plane->offset + offset;
 }
 
-static void tegra_dc_stats_reset(struct tegra_dc_stats *stats)
+static inline u32 tegra_plane_readl(struct tegra_plane *plane,
+				    unsigned int offset)
 {
-	stats->frames = 0;
-	stats->vblank = 0;
-	stats->underflow = 0;
-	stats->overflow = 0;
+	return tegra_dc_readl(plane->dc, tegra_plane_offset(plane, offset));
 }
 
-/*
- * Reads the active copy of a register. This takes the dc->lock spinlock to
- * prevent races with the VBLANK processing which also needs access to the
- * active copy of some registers.
- */
-static u32 tegra_dc_readl_active(struct tegra_dc *dc, unsigned long offset)
+static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value,
+				      unsigned int offset)
 {
-	unsigned long flags;
-	u32 value;
+	tegra_dc_writel(plane->dc, value, tegra_plane_offset(plane, offset));
+}
 
-	spin_lock_irqsave(&dc->lock, flags);
+bool tegra_dc_has_output(struct tegra_dc *dc, struct device *dev)
+{
+	struct device_node *np = dc->dev->of_node;
+	struct of_phandle_iterator it;
+	int err;
 
-	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
-	value = tegra_dc_readl(dc, offset);
-	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
+	of_for_each_phandle(&it, err, np, "nvidia,outputs", NULL, 0)
+		if (it.node == dev->of_node)
+			return true;
 
-	spin_unlock_irqrestore(&dc->lock, flags);
-	return value;
+	return false;
 }
 
 /*
@@ -115,81 +112,6 @@ void tegra_dc_commit(struct tegra_dc *dc)
 	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
 }
 
-static int tegra_dc_format(u32 fourcc, u32 *format, u32 *swap)
-{
-	/* assume no swapping of fetched data */
-	if (swap)
-		*swap = BYTE_SWAP_NOSWAP;
-
-	switch (fourcc) {
-	case DRM_FORMAT_XBGR8888:
-		*format = WIN_COLOR_DEPTH_R8G8B8A8;
-		break;
-
-	case DRM_FORMAT_XRGB8888:
-		*format = WIN_COLOR_DEPTH_B8G8R8A8;
-		break;
-
-	case DRM_FORMAT_RGB565:
-		*format = WIN_COLOR_DEPTH_B5G6R5;
-		break;
-
-	case DRM_FORMAT_UYVY:
-		*format = WIN_COLOR_DEPTH_YCbCr422;
-		break;
-
-	case DRM_FORMAT_YUYV:
-		if (swap)
-			*swap = BYTE_SWAP_SWAP2;
-
-		*format = WIN_COLOR_DEPTH_YCbCr422;
-		break;
-
-	case DRM_FORMAT_YUV420:
-		*format = WIN_COLOR_DEPTH_YCbCr420P;
-		break;
-
-	case DRM_FORMAT_YUV422:
-		*format = WIN_COLOR_DEPTH_YCbCr422P;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static bool tegra_dc_format_is_yuv(unsigned int format, bool *planar)
-{
-	switch (format) {
-	case WIN_COLOR_DEPTH_YCbCr422:
-	case WIN_COLOR_DEPTH_YUV422:
-		if (planar)
-			*planar = false;
-
-		return true;
-
-	case WIN_COLOR_DEPTH_YCbCr420P:
-	case WIN_COLOR_DEPTH_YUV420P:
-	case WIN_COLOR_DEPTH_YCbCr422P:
-	case WIN_COLOR_DEPTH_YUV422P:
-	case WIN_COLOR_DEPTH_YCbCr422R:
-	case WIN_COLOR_DEPTH_YUV422R:
-	case WIN_COLOR_DEPTH_YCbCr422RA:
-	case WIN_COLOR_DEPTH_YUV422RA:
-		if (planar)
-			*planar = true;
-
-		return true;
-	}
-
-	if (planar)
-		*planar = false;
-
-	return false;
-}
-
 static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v,
 				  unsigned int bpp)
 {
@@ -230,36 +152,104 @@ static inline u32 compute_initial_dda(unsigned int in)
 	return dfixed_frac(inf);
 }
 
-static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
+static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane)
+{
+	u32 background[3] = {
+		BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE,
+		BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE,
+		BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE,
+	};
+	u32 foreground = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255) |
+			 BLEND_COLOR_KEY_NONE;
+	u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255);
+	struct tegra_plane_state *state;
+	unsigned int i;
+
+	state = to_tegra_plane_state(plane->base.state);
+
+	/* alpha contribution is 1 minus sum of overlapping windows */
+	for (i = 0; i < 3; i++) {
+		if (state->dependent[i])
+			background[i] |= BLEND_CONTROL_DEPENDENT;
+	}
+
+	/* enable alpha blending if pixel format has an alpha component */
+	if (!state->opaque)
+		foreground |= BLEND_CONTROL_ALPHA;
+
+	/*
+	 * Disable blending and assume Window A is the bottom-most window,
+	 * Window C is the top-most window and Window B is in the middle.
+	 */
+	tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY);
+	tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN);
+
+	switch (plane->index) {
+	case 0:
+		tegra_plane_writel(plane, background[0], DC_WIN_BLEND_2WIN_X);
+		tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y);
+		tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY);
+		break;
+
+	case 1:
+		tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X);
+		tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y);
+		tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY);
+		break;
+
+	case 2:
+		tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X);
+		tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_Y);
+		tegra_plane_writel(plane, foreground, DC_WIN_BLEND_3WIN_XY);
+		break;
+	}
+}
+
+static void tegra_plane_setup_blending(struct tegra_plane *plane,
+				       const struct tegra_dc_window *window)
+{
+	u32 value;
+
+	value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 |
+		BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC |
+		BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC;
+	tegra_plane_writel(plane, value, DC_WIN_BLEND_MATCH_SELECT);
+
+	value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 |
+		BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC |
+		BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC;
+	tegra_plane_writel(plane, value, DC_WIN_BLEND_NOMATCH_SELECT);
+
+	value = K2(255) | K1(255) | WINDOW_LAYER_DEPTH(255 - window->zpos);
+	tegra_plane_writel(plane, value, DC_WIN_BLEND_LAYER_CONTROL);
+}
+
+static void tegra_dc_setup_window(struct tegra_plane *plane,
 				  const struct tegra_dc_window *window)
 {
 	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
-	unsigned long value, flags;
+	struct tegra_dc *dc = plane->dc;
 	bool yuv, planar;
+	u32 value;
 
 	/*
 	 * For YUV planar modes, the number of bytes per pixel takes into
 	 * account only the luma component and therefore is 1.
 	 */
-	yuv = tegra_dc_format_is_yuv(window->format, &planar);
+	yuv = tegra_plane_format_is_yuv(window->format, &planar);
 	if (!yuv)
 		bpp = window->bits_per_pixel / 8;
 	else
 		bpp = planar ? 1 : 2;
 
-	spin_lock_irqsave(&dc->lock, flags);
-
-	value = WINDOW_A_SELECT << index;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
-	tegra_dc_writel(dc, window->swap, DC_WIN_BYTE_SWAP);
+	tegra_plane_writel(plane, window->format, DC_WIN_COLOR_DEPTH);
+	tegra_plane_writel(plane, window->swap, DC_WIN_BYTE_SWAP);
 
 	value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x);
-	tegra_dc_writel(dc, value, DC_WIN_POSITION);
+	tegra_plane_writel(plane, value, DC_WIN_POSITION);
 
 	value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w);
-	tegra_dc_writel(dc, value, DC_WIN_SIZE);
+	tegra_plane_writel(plane, value, DC_WIN_SIZE);
 
 	h_offset = window->src.x * bpp;
 	v_offset = window->src.y;
@@ -267,7 +257,7 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
 	v_size = window->src.h;
 
 	value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size);
-	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
+	tegra_plane_writel(plane, value, DC_WIN_PRESCALED_SIZE);
 
 	/*
 	 * For DDA computations the number of bytes per pixel for YUV planar
@@ -280,33 +270,33 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
 	v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp);
 
 	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
-	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
+	tegra_plane_writel(plane, value, DC_WIN_DDA_INC);
 
 	h_dda = compute_initial_dda(window->src.x);
 	v_dda = compute_initial_dda(window->src.y);
 
-	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
-	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
+	tegra_plane_writel(plane, h_dda, DC_WIN_H_INITIAL_DDA);
+	tegra_plane_writel(plane, v_dda, DC_WIN_V_INITIAL_DDA);
 
-	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
-	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
+	tegra_plane_writel(plane, 0, DC_WIN_UV_BUF_STRIDE);
+	tegra_plane_writel(plane, 0, DC_WIN_BUF_STRIDE);
 
-	tegra_dc_writel(dc, window->base[0], DC_WINBUF_START_ADDR);
+	tegra_plane_writel(plane, window->base[0], DC_WINBUF_START_ADDR);
 
 	if (yuv && planar) {
-		tegra_dc_writel(dc, window->base[1], DC_WINBUF_START_ADDR_U);
-		tegra_dc_writel(dc, window->base[2], DC_WINBUF_START_ADDR_V);
+		tegra_plane_writel(plane, window->base[1], DC_WINBUF_START_ADDR_U);
+		tegra_plane_writel(plane, window->base[2], DC_WINBUF_START_ADDR_V);
 		value = window->stride[1] << 16 | window->stride[0];
-		tegra_dc_writel(dc, value, DC_WIN_LINE_STRIDE);
+		tegra_plane_writel(plane, value, DC_WIN_LINE_STRIDE);
 	} else {
-		tegra_dc_writel(dc, window->stride[0], DC_WIN_LINE_STRIDE);
+		tegra_plane_writel(plane, window->stride[0], DC_WIN_LINE_STRIDE);
 	}
 
 	if (window->bottom_up)
 		v_offset += window->src.h - 1;
 
-	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
-	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
+	tegra_plane_writel(plane, h_offset, DC_WINBUF_ADDR_H_OFFSET);
+	tegra_plane_writel(plane, v_offset, DC_WINBUF_ADDR_V_OFFSET);
 
 	if (dc->soc->supports_block_linear) {
 		unsigned long height = window->tiling.value;
@@ -326,7 +316,7 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
 			break;
 		}
 
-		tegra_dc_writel(dc, value, DC_WINBUF_SURFACE_KIND);
+		tegra_plane_writel(plane, value, DC_WINBUF_SURFACE_KIND);
 	} else {
 		switch (window->tiling.mode) {
 		case TEGRA_BO_TILING_MODE_PITCH:
@@ -347,21 +337,21 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
 			break;
 		}
 
-		tegra_dc_writel(dc, value, DC_WIN_BUFFER_ADDR_MODE);
+		tegra_plane_writel(plane, value, DC_WIN_BUFFER_ADDR_MODE);
 	}
 
 	value = WIN_ENABLE;
 
 	if (yuv) {
 		/* setup default colorspace conversion coefficients */
-		tegra_dc_writel(dc, 0x00f0, DC_WIN_CSC_YOF);
-		tegra_dc_writel(dc, 0x012a, DC_WIN_CSC_KYRGB);
-		tegra_dc_writel(dc, 0x0000, DC_WIN_CSC_KUR);
-		tegra_dc_writel(dc, 0x0198, DC_WIN_CSC_KVR);
-		tegra_dc_writel(dc, 0x039b, DC_WIN_CSC_KUG);
-		tegra_dc_writel(dc, 0x032f, DC_WIN_CSC_KVG);
-		tegra_dc_writel(dc, 0x0204, DC_WIN_CSC_KUB);
-		tegra_dc_writel(dc, 0x0000, DC_WIN_CSC_KVB);
+		tegra_plane_writel(plane, 0x00f0, DC_WIN_CSC_YOF);
+		tegra_plane_writel(plane, 0x012a, DC_WIN_CSC_KYRGB);
+		tegra_plane_writel(plane, 0x0000, DC_WIN_CSC_KUR);
+		tegra_plane_writel(plane, 0x0198, DC_WIN_CSC_KVR);
+		tegra_plane_writel(plane, 0x039b, DC_WIN_CSC_KUG);
+		tegra_plane_writel(plane, 0x032f, DC_WIN_CSC_KVG);
+		tegra_plane_writel(plane, 0x0204, DC_WIN_CSC_KUB);
+		tegra_plane_writel(plane, 0x0000, DC_WIN_CSC_KVB);
 
 		value |= CSC_ENABLE;
 	} else if (window->bits_per_pixel < 24) {
@@ -371,137 +361,74 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
 	if (window->bottom_up)
 		value |= V_DIRECTION;
 
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	/*
-	 * Disable blending and assume Window A is the bottom-most window,
-	 * Window C is the top-most window and Window B is in the middle.
-	 */
-	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_NOKEY);
-	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_1WIN);
-
-	switch (index) {
-	case 0:
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_X);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
-		break;
-
-	case 1:
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
-		break;
-
-	case 2:
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_Y);
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_3WIN_XY);
-		break;
-	}
-
-	spin_unlock_irqrestore(&dc->lock, flags);
-}
-
-static void tegra_plane_destroy(struct drm_plane *plane)
-{
-	struct tegra_plane *p = to_tegra_plane(plane);
+	tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS);
 
-	drm_plane_cleanup(plane);
-	kfree(p);
+	if (dc->soc->supports_blending)
+		tegra_plane_setup_blending(plane, window);
+	else
+		tegra_plane_setup_blending_legacy(plane);
 }
 
-static const u32 tegra_primary_plane_formats[] = {
+static const u32 tegra20_primary_formats[] = {
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	/* non-native formats */
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB8888,
-	DRM_FORMAT_RGB565,
 };
 
-static void tegra_primary_plane_destroy(struct drm_plane *plane)
-{
-	tegra_plane_destroy(plane);
-}
-
-static void tegra_plane_reset(struct drm_plane *plane)
-{
-	struct tegra_plane_state *state;
-
-	if (plane->state)
-		__drm_atomic_helper_plane_destroy_state(plane->state);
-
-	kfree(plane->state);
-	plane->state = NULL;
-
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		plane->state = &state->base;
-		plane->state->plane = plane;
-	}
-}
-
-static struct drm_plane_state *tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
-{
-	struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
-	struct tegra_plane_state *copy;
-
-	copy = kmalloc(sizeof(*copy), GFP_KERNEL);
-	if (!copy)
-		return NULL;
-
-	__drm_atomic_helper_plane_duplicate_state(plane, &copy->base);
-	copy->tiling = state->tiling;
-	copy->format = state->format;
-	copy->swap = state->swap;
-
-	return &copy->base;
-}
-
-static void tegra_plane_atomic_destroy_state(struct drm_plane *plane,
-					     struct drm_plane_state *state)
-{
-	__drm_atomic_helper_plane_destroy_state(state);
-	kfree(state);
-}
-
-static const struct drm_plane_funcs tegra_primary_plane_funcs = {
-	.update_plane = drm_atomic_helper_update_plane,
-	.disable_plane = drm_atomic_helper_disable_plane,
-	.destroy = tegra_primary_plane_destroy,
-	.reset = tegra_plane_reset,
-	.atomic_duplicate_state = tegra_plane_atomic_duplicate_state,
-	.atomic_destroy_state = tegra_plane_atomic_destroy_state,
+static const u32 tegra114_primary_formats[] = {
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	/* new on Tegra114 */
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
 };
 
-static int tegra_plane_state_add(struct tegra_plane *plane,
-				 struct drm_plane_state *state)
-{
-	struct drm_crtc_state *crtc_state;
-	struct tegra_dc_state *tegra;
-	struct drm_rect clip;
-	int err;
-
-	/* Propagate errors from allocation or locking failures. */
-	crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
-	if (IS_ERR(crtc_state))
-		return PTR_ERR(crtc_state);
-
-	clip.x1 = 0;
-	clip.y1 = 0;
-	clip.x2 = crtc_state->mode.hdisplay;
-	clip.y2 = crtc_state->mode.vdisplay;
-
-	/* Check plane state for visibility and calculate clipping bounds */
-	err = drm_atomic_helper_check_plane_state(state, crtc_state, &clip,
-						  0, INT_MAX, true, true);
-	if (err < 0)
-		return err;
-
-	tegra = to_dc_state(crtc_state);
-
-	tegra->planes |= WIN_A_ACT_REQ << plane->index;
-
-	return 0;
-}
+static const u32 tegra124_primary_formats[] = {
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	/* new on Tegra114 */
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	/* new on Tegra124 */
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_BGRX8888,
+};
 
 static int tegra_plane_atomic_check(struct drm_plane *plane,
 				    struct drm_plane_state *state)
@@ -510,17 +437,40 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 	struct tegra_bo_tiling *tiling = &plane_state->tiling;
 	struct tegra_plane *tegra = to_tegra_plane(plane);
 	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+	unsigned int format;
 	int err;
 
 	/* no need for further checks if the plane is being disabled */
 	if (!state->crtc)
 		return 0;
 
-	err = tegra_dc_format(state->fb->format->format, &plane_state->format,
-			      &plane_state->swap);
+	err = tegra_plane_format(state->fb->format->format, &format,
+				 &plane_state->swap);
 	if (err < 0)
 		return err;
 
+	/*
+	 * Tegra20 and Tegra30 are special cases here because they support
+	 * only variants of specific formats with an alpha component, but not
+	 * the corresponding opaque formats. However, the opaque formats can
+	 * be emulated by disabling alpha blending for the plane.
+	 */
+	if (!dc->soc->supports_blending) {
+		if (!tegra_plane_format_has_alpha(format)) {
+			err = tegra_plane_format_get_alpha(format, &format);
+			if (err < 0)
+				return err;
+
+			plane_state->opaque = true;
+		} else {
+			plane_state->opaque = false;
+		}
+
+		tegra_plane_check_dependent(tegra, plane_state);
+	}
+
+	plane_state->format = format;
+
 	err = tegra_fb_get_tiling(state->fb, tiling);
 	if (err < 0)
 		return err;
@@ -553,32 +503,22 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 static void tegra_plane_atomic_disable(struct drm_plane *plane,
 				       struct drm_plane_state *old_state)
 {
-	struct tegra_dc *dc = to_tegra_dc(old_state->crtc);
 	struct tegra_plane *p = to_tegra_plane(plane);
-	unsigned long flags;
 	u32 value;
 
 	/* rien ne va plus */
 	if (!old_state || !old_state->crtc)
 		return;
 
-	spin_lock_irqsave(&dc->lock, flags);
-
-	value = WINDOW_A_SELECT << p->index;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
+	value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS);
 	value &= ~WIN_ENABLE;
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	spin_unlock_irqrestore(&dc->lock, flags);
+	tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS);
 }
 
 static void tegra_plane_atomic_update(struct drm_plane *plane,
 				      struct drm_plane_state *old_state)
 {
 	struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
-	struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_dc_window window;
@@ -604,6 +544,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 	window.bottom_up = tegra_fb_is_bottom_up(fb);
 
 	/* copy from state */
+	window.zpos = plane->state->normalized_zpos;
 	window.tiling = state->tiling;
 	window.format = state->format;
 	window.swap = state->swap;
@@ -622,7 +563,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 			window.stride[i] = fb->pitches[i];
 	}
 
-	tegra_dc_setup_window(dc, p->index, &window);
+	tegra_dc_setup_window(p, &window);
 }
 
 static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
@@ -631,8 +572,7 @@ static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
 	.atomic_update = tegra_plane_atomic_update,
 };
 
-static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
-						       struct tegra_dc *dc)
+static unsigned long tegra_plane_get_possible_crtcs(struct drm_device *drm)
 {
 	/*
 	 * Ideally this would use drm_crtc_mask(), but that would require the
@@ -646,7 +586,14 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
 	 * of CRTCs that have been registered, and should therefore always be
 	 * the same as drm_crtc_index() after registration.
 	 */
-	unsigned long possible_crtcs = 1 << drm->mode_config.num_crtc;
+	return 1 << drm->mode_config.num_crtc;
+}
+
+static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
+						    struct tegra_dc *dc)
+{
+	unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm);
+	enum drm_plane_type type = DRM_PLANE_TYPE_PRIMARY;
 	struct tegra_plane *plane;
 	unsigned int num_formats;
 	const u32 *formats;
@@ -656,13 +603,17 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
 	if (!plane)
 		return ERR_PTR(-ENOMEM);
 
-	num_formats = ARRAY_SIZE(tegra_primary_plane_formats);
-	formats = tegra_primary_plane_formats;
+	/* Always use window A as primary window */
+	plane->offset = 0xa00;
+	plane->index = 0;
+	plane->dc = dc;
+
+	num_formats = dc->soc->num_primary_formats;
+	formats = dc->soc->primary_formats;
 
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
-				       &tegra_primary_plane_funcs, formats,
-				       num_formats, NULL,
-				       DRM_PLANE_TYPE_PRIMARY, NULL);
+				       &tegra_plane_funcs, formats,
+				       num_formats, NULL, type, NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -670,6 +621,9 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
 
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
 
+	if (dc->soc->supports_blending)
+		drm_plane_create_zpos_property(&plane->base, 0, 0, 255);
+
 	return &plane->base;
 }
 
@@ -786,15 +740,6 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane,
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 }
 
-static const struct drm_plane_funcs tegra_cursor_plane_funcs = {
-	.update_plane = drm_atomic_helper_update_plane,
-	.disable_plane = drm_atomic_helper_disable_plane,
-	.destroy = tegra_plane_destroy,
-	.reset = tegra_plane_reset,
-	.atomic_duplicate_state = tegra_plane_atomic_duplicate_state,
-	.atomic_destroy_state = tegra_plane_atomic_destroy_state,
-};
-
 static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = {
 	.atomic_check = tegra_cursor_atomic_check,
 	.atomic_update = tegra_cursor_atomic_update,
@@ -804,6 +749,7 @@ static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = {
 static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 						      struct tegra_dc *dc)
 {
+	unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm);
 	struct tegra_plane *plane;
 	unsigned int num_formats;
 	const u32 *formats;
@@ -821,12 +767,13 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 	 * need to special-casing the cursor plane.
 	 */
 	plane->index = 6;
+	plane->dc = dc;
 
 	num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
 	formats = tegra_cursor_plane_formats;
 
-	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
-				       &tegra_cursor_plane_funcs, formats,
+	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
+				       &tegra_plane_funcs, formats,
 				       num_formats, NULL,
 				       DRM_PLANE_TYPE_CURSOR, NULL);
 	if (err < 0) {
@@ -839,24 +786,76 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 	return &plane->base;
 }
 
-static void tegra_overlay_plane_destroy(struct drm_plane *plane)
-{
-	tegra_plane_destroy(plane);
-}
-
-static const struct drm_plane_funcs tegra_overlay_plane_funcs = {
-	.update_plane = drm_atomic_helper_update_plane,
-	.disable_plane = drm_atomic_helper_disable_plane,
-	.destroy = tegra_overlay_plane_destroy,
-	.reset = tegra_plane_reset,
-	.atomic_duplicate_state = tegra_plane_atomic_duplicate_state,
-	.atomic_destroy_state = tegra_plane_atomic_destroy_state,
+static const u32 tegra20_overlay_formats[] = {
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	/* non-native formats */
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB8888,
+	/* planar formats */
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YUV420,
+	DRM_FORMAT_YUV422,
 };
 
-static const uint32_t tegra_overlay_plane_formats[] = {
-	DRM_FORMAT_XBGR8888,
+static const u32 tegra114_overlay_formats[] = {
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	/* new on Tegra114 */
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_RGBA8888,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	/* planar formats */
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YUV420,
+	DRM_FORMAT_YUV422,
+};
+
+static const u32 tegra124_overlay_formats[] = {
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ARGB1555,
 	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	/* new on Tegra114 */
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	/* new on Tegra124 */
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_BGRX8888,
+	/* planar formats */
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YUV420,
@@ -867,6 +866,7 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 						       struct tegra_dc *dc,
 						       unsigned int index)
 {
+	unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm);
 	struct tegra_plane *plane;
 	unsigned int num_formats;
 	const u32 *formats;
@@ -876,13 +876,15 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 	if (!plane)
 		return ERR_PTR(-ENOMEM);
 
+	plane->offset = 0xa00 + 0x200 * index;
 	plane->index = index;
+	plane->dc = dc;
 
-	num_formats = ARRAY_SIZE(tegra_overlay_plane_formats);
-	formats = tegra_overlay_plane_formats;
+	num_formats = dc->soc->num_overlay_formats;
+	formats = dc->soc->overlay_formats;
 
-	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
-				       &tegra_overlay_plane_funcs, formats,
+	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
+				       &tegra_plane_funcs, formats,
 				       num_formats, NULL,
 				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (err < 0) {
@@ -892,147 +894,494 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
 
+	if (dc->soc->supports_blending)
+		drm_plane_create_zpos_property(&plane->base, 0, 0, 255);
+
 	return &plane->base;
 }
 
-static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
+static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm,
+						    struct tegra_dc *dc)
+{
+	struct drm_plane *plane, *primary = NULL;
+	unsigned int i, j;
+
+	for (i = 0; i < dc->soc->num_wgrps; i++) {
+		const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i];
+
+		if (wgrp->dc == dc->pipe) {
+			for (j = 0; j < wgrp->num_windows; j++) {
+				unsigned int index = wgrp->windows[j];
+
+				plane = tegra_shared_plane_create(drm, dc,
+								  wgrp->index,
+								  index);
+				if (IS_ERR(plane))
+					return plane;
+
+				/*
+				 * Choose the first shared plane owned by this
+				 * head as the primary plane.
+				 */
+				if (!primary) {
+					plane->type = DRM_PLANE_TYPE_PRIMARY;
+					primary = plane;
+				}
+			}
+		}
+	}
+
+	return primary;
+}
+
+static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm,
+					     struct tegra_dc *dc)
 {
-	struct drm_plane *plane;
+	struct drm_plane *planes[2], *primary;
 	unsigned int i;
+	int err;
+
+	primary = tegra_primary_plane_create(drm, dc);
+	if (IS_ERR(primary))
+		return primary;
 
 	for (i = 0; i < 2; i++) {
-		plane = tegra_dc_overlay_plane_create(drm, dc, 1 + i);
-		if (IS_ERR(plane))
-			return PTR_ERR(plane);
+		planes[i] = tegra_dc_overlay_plane_create(drm, dc, 1 + i);
+		if (IS_ERR(planes[i])) {
+			err = PTR_ERR(planes[i]);
+
+			while (i--)
+				tegra_plane_funcs.destroy(planes[i]);
+
+			tegra_plane_funcs.destroy(primary);
+			return ERR_PTR(err);
+		}
 	}
 
-	return 0;
+	return primary;
 }
 
-static u32 tegra_dc_get_vblank_counter(struct drm_crtc *crtc)
+static void tegra_dc_destroy(struct drm_crtc *crtc)
 {
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-
-	if (dc->syncpt)
-		return host1x_syncpt_read(dc->syncpt);
-
-	/* fallback to software emulated VBLANK counter */
-	return drm_crtc_vblank_count(&dc->base);
+	drm_crtc_cleanup(crtc);
 }
 
-static int tegra_dc_enable_vblank(struct drm_crtc *crtc)
+static void tegra_crtc_reset(struct drm_crtc *crtc)
 {
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned long value, flags;
+	struct tegra_dc_state *state;
 
-	spin_lock_irqsave(&dc->lock, flags);
+	if (crtc->state)
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
 
-	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	value |= VBLANK_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+	kfree(crtc->state);
+	crtc->state = NULL;
 
-	spin_unlock_irqrestore(&dc->lock, flags);
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state) {
+		crtc->state = &state->base;
+		crtc->state->crtc = crtc;
+	}
 
-	return 0;
+	drm_crtc_vblank_reset(crtc);
 }
 
-static void tegra_dc_disable_vblank(struct drm_crtc *crtc)
+static struct drm_crtc_state *
+tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
 {
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned long value, flags;
+	struct tegra_dc_state *state = to_dc_state(crtc->state);
+	struct tegra_dc_state *copy;
 
-	spin_lock_irqsave(&dc->lock, flags);
+	copy = kmalloc(sizeof(*copy), GFP_KERNEL);
+	if (!copy)
+		return NULL;
 
-	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	value &= ~VBLANK_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &copy->base);
+	copy->clk = state->clk;
+	copy->pclk = state->pclk;
+	copy->div = state->div;
+	copy->planes = state->planes;
 
-	spin_unlock_irqrestore(&dc->lock, flags);
+	return &copy->base;
 }
 
-static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
+static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+					    struct drm_crtc_state *state)
 {
-	struct drm_device *drm = dc->base.dev;
-	struct drm_crtc *crtc = &dc->base;
-	unsigned long flags, base;
-	struct tegra_bo *bo;
+	__drm_atomic_helper_crtc_destroy_state(state);
+	kfree(state);
+}
 
-	spin_lock_irqsave(&drm->event_lock, flags);
+#define DEBUGFS_REG32(_name) { .name = #_name, .offset = _name }
+
+static const struct debugfs_reg32 tegra_dc_regs[] = {
+	DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT),
+	DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT_CNTRL),
+	DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT_ERROR),
+	DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT),
+	DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT_CNTRL),
+	DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT_ERROR),
+	DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT),
+	DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT_CNTRL),
+	DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT_ERROR),
+	DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT),
+	DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT_CNTRL),
+	DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT_ERROR),
+	DEBUGFS_REG32(DC_CMD_CONT_SYNCPT_VSYNC),
+	DEBUGFS_REG32(DC_CMD_DISPLAY_COMMAND_OPTION0),
+	DEBUGFS_REG32(DC_CMD_DISPLAY_COMMAND),
+	DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE),
+	DEBUGFS_REG32(DC_CMD_DISPLAY_POWER_CONTROL),
+	DEBUGFS_REG32(DC_CMD_INT_STATUS),
+	DEBUGFS_REG32(DC_CMD_INT_MASK),
+	DEBUGFS_REG32(DC_CMD_INT_ENABLE),
+	DEBUGFS_REG32(DC_CMD_INT_TYPE),
+	DEBUGFS_REG32(DC_CMD_INT_POLARITY),
+	DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE1),
+	DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE2),
+	DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE3),
+	DEBUGFS_REG32(DC_CMD_STATE_ACCESS),
+	DEBUGFS_REG32(DC_CMD_STATE_CONTROL),
+	DEBUGFS_REG32(DC_CMD_DISPLAY_WINDOW_HEADER),
+	DEBUGFS_REG32(DC_CMD_REG_ACT_CONTROL),
+	DEBUGFS_REG32(DC_COM_CRC_CONTROL),
+	DEBUGFS_REG32(DC_COM_CRC_CHECKSUM),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(0)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(1)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(2)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(3)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(0)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(1)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(2)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(3)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(0)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(1)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(2)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(3)),
+	DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(0)),
+	DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(1)),
+	DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(2)),
+	DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(3)),
+	DEBUGFS_REG32(DC_COM_PIN_INPUT_DATA(0)),
+	DEBUGFS_REG32(DC_COM_PIN_INPUT_DATA(1)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(0)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(1)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(2)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(3)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(4)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(5)),
+	DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(6)),
+	DEBUGFS_REG32(DC_COM_PIN_MISC_CONTROL),
+	DEBUGFS_REG32(DC_COM_PIN_PM0_CONTROL),
+	DEBUGFS_REG32(DC_COM_PIN_PM0_DUTY_CYCLE),
+	DEBUGFS_REG32(DC_COM_PIN_PM1_CONTROL),
+	DEBUGFS_REG32(DC_COM_PIN_PM1_DUTY_CYCLE),
+	DEBUGFS_REG32(DC_COM_SPI_CONTROL),
+	DEBUGFS_REG32(DC_COM_SPI_START_BYTE),
+	DEBUGFS_REG32(DC_COM_HSPI_WRITE_DATA_AB),
+	DEBUGFS_REG32(DC_COM_HSPI_WRITE_DATA_CD),
+	DEBUGFS_REG32(DC_COM_HSPI_CS_DC),
+	DEBUGFS_REG32(DC_COM_SCRATCH_REGISTER_A),
+	DEBUGFS_REG32(DC_COM_SCRATCH_REGISTER_B),
+	DEBUGFS_REG32(DC_COM_GPIO_CTRL),
+	DEBUGFS_REG32(DC_COM_GPIO_DEBOUNCE_COUNTER),
+	DEBUGFS_REG32(DC_COM_CRC_CHECKSUM_LATCHED),
+	DEBUGFS_REG32(DC_DISP_DISP_SIGNAL_OPTIONS0),
+	DEBUGFS_REG32(DC_DISP_DISP_SIGNAL_OPTIONS1),
+	DEBUGFS_REG32(DC_DISP_DISP_WIN_OPTIONS),
+	DEBUGFS_REG32(DC_DISP_DISP_MEM_HIGH_PRIORITY),
+	DEBUGFS_REG32(DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER),
+	DEBUGFS_REG32(DC_DISP_DISP_TIMING_OPTIONS),
+	DEBUGFS_REG32(DC_DISP_REF_TO_SYNC),
+	DEBUGFS_REG32(DC_DISP_SYNC_WIDTH),
+	DEBUGFS_REG32(DC_DISP_BACK_PORCH),
+	DEBUGFS_REG32(DC_DISP_ACTIVE),
+	DEBUGFS_REG32(DC_DISP_FRONT_PORCH),
+	DEBUGFS_REG32(DC_DISP_H_PULSE0_CONTROL),
+	DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_B),
+	DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_C),
+	DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_D),
+	DEBUGFS_REG32(DC_DISP_H_PULSE1_CONTROL),
+	DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_B),
+	DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_C),
+	DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_D),
+	DEBUGFS_REG32(DC_DISP_H_PULSE2_CONTROL),
+	DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_B),
+	DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_C),
+	DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_D),
+	DEBUGFS_REG32(DC_DISP_V_PULSE0_CONTROL),
+	DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_B),
+	DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_C),
+	DEBUGFS_REG32(DC_DISP_V_PULSE1_CONTROL),
+	DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_B),
+	DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_C),
+	DEBUGFS_REG32(DC_DISP_V_PULSE2_CONTROL),
+	DEBUGFS_REG32(DC_DISP_V_PULSE2_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_V_PULSE3_CONTROL),
+	DEBUGFS_REG32(DC_DISP_V_PULSE3_POSITION_A),
+	DEBUGFS_REG32(DC_DISP_M0_CONTROL),
+	DEBUGFS_REG32(DC_DISP_M1_CONTROL),
+	DEBUGFS_REG32(DC_DISP_DI_CONTROL),
+	DEBUGFS_REG32(DC_DISP_PP_CONTROL),
+	DEBUGFS_REG32(DC_DISP_PP_SELECT_A),
+	DEBUGFS_REG32(DC_DISP_PP_SELECT_B),
+	DEBUGFS_REG32(DC_DISP_PP_SELECT_C),
+	DEBUGFS_REG32(DC_DISP_PP_SELECT_D),
+	DEBUGFS_REG32(DC_DISP_DISP_CLOCK_CONTROL),
+	DEBUGFS_REG32(DC_DISP_DISP_INTERFACE_CONTROL),
+	DEBUGFS_REG32(DC_DISP_DISP_COLOR_CONTROL),
+	DEBUGFS_REG32(DC_DISP_SHIFT_CLOCK_OPTIONS),
+	DEBUGFS_REG32(DC_DISP_DATA_ENABLE_OPTIONS),
+	DEBUGFS_REG32(DC_DISP_SERIAL_INTERFACE_OPTIONS),
+	DEBUGFS_REG32(DC_DISP_LCD_SPI_OPTIONS),
+	DEBUGFS_REG32(DC_DISP_BORDER_COLOR),
+	DEBUGFS_REG32(DC_DISP_COLOR_KEY0_LOWER),
+	DEBUGFS_REG32(DC_DISP_COLOR_KEY0_UPPER),
+	DEBUGFS_REG32(DC_DISP_COLOR_KEY1_LOWER),
+	DEBUGFS_REG32(DC_DISP_COLOR_KEY1_UPPER),
+	DEBUGFS_REG32(DC_DISP_CURSOR_FOREGROUND),
+	DEBUGFS_REG32(DC_DISP_CURSOR_BACKGROUND),
+	DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR),
+	DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR_NS),
+	DEBUGFS_REG32(DC_DISP_CURSOR_POSITION),
+	DEBUGFS_REG32(DC_DISP_CURSOR_POSITION_NS),
+	DEBUGFS_REG32(DC_DISP_INIT_SEQ_CONTROL),
+	DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_A),
+	DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_B),
+	DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_C),
+	DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_D),
+	DEBUGFS_REG32(DC_DISP_DC_MCCIF_FIFOCTRL),
+	DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY0A_HYST),
+	DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY0B_HYST),
+	DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY1A_HYST),
+	DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY1B_HYST),
+	DEBUGFS_REG32(DC_DISP_DAC_CRT_CTRL),
+	DEBUGFS_REG32(DC_DISP_DISP_MISC_CONTROL),
+	DEBUGFS_REG32(DC_DISP_SD_CONTROL),
+	DEBUGFS_REG32(DC_DISP_SD_CSC_COEFF),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(0)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(1)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(2)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(3)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(4)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(5)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(6)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(7)),
+	DEBUGFS_REG32(DC_DISP_SD_LUT(8)),
+	DEBUGFS_REG32(DC_DISP_SD_FLICKER_CONTROL),
+	DEBUGFS_REG32(DC_DISP_DC_PIXEL_COUNT),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(0)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(1)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(2)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(3)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(4)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(5)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(6)),
+	DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(7)),
+	DEBUGFS_REG32(DC_DISP_SD_BL_TF(0)),
+	DEBUGFS_REG32(DC_DISP_SD_BL_TF(1)),
+	DEBUGFS_REG32(DC_DISP_SD_BL_TF(2)),
+	DEBUGFS_REG32(DC_DISP_SD_BL_TF(3)),
+	DEBUGFS_REG32(DC_DISP_SD_BL_CONTROL),
+	DEBUGFS_REG32(DC_DISP_SD_HW_K_VALUES),
+	DEBUGFS_REG32(DC_DISP_SD_MAN_K_VALUES),
+	DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR_HI),
+	DEBUGFS_REG32(DC_DISP_BLEND_CURSOR_CONTROL),
+	DEBUGFS_REG32(DC_WIN_WIN_OPTIONS),
+	DEBUGFS_REG32(DC_WIN_BYTE_SWAP),
+	DEBUGFS_REG32(DC_WIN_BUFFER_CONTROL),
+	DEBUGFS_REG32(DC_WIN_COLOR_DEPTH),
+	DEBUGFS_REG32(DC_WIN_POSITION),
+	DEBUGFS_REG32(DC_WIN_SIZE),
+	DEBUGFS_REG32(DC_WIN_PRESCALED_SIZE),
+	DEBUGFS_REG32(DC_WIN_H_INITIAL_DDA),
+	DEBUGFS_REG32(DC_WIN_V_INITIAL_DDA),
+	DEBUGFS_REG32(DC_WIN_DDA_INC),
+	DEBUGFS_REG32(DC_WIN_LINE_STRIDE),
+	DEBUGFS_REG32(DC_WIN_BUF_STRIDE),
+	DEBUGFS_REG32(DC_WIN_UV_BUF_STRIDE),
+	DEBUGFS_REG32(DC_WIN_BUFFER_ADDR_MODE),
+	DEBUGFS_REG32(DC_WIN_DV_CONTROL),
+	DEBUGFS_REG32(DC_WIN_BLEND_NOKEY),
+	DEBUGFS_REG32(DC_WIN_BLEND_1WIN),
+	DEBUGFS_REG32(DC_WIN_BLEND_2WIN_X),
+	DEBUGFS_REG32(DC_WIN_BLEND_2WIN_Y),
+	DEBUGFS_REG32(DC_WIN_BLEND_3WIN_XY),
+	DEBUGFS_REG32(DC_WIN_HP_FETCH_CONTROL),
+	DEBUGFS_REG32(DC_WINBUF_START_ADDR),
+	DEBUGFS_REG32(DC_WINBUF_START_ADDR_NS),
+	DEBUGFS_REG32(DC_WINBUF_START_ADDR_U),
+	DEBUGFS_REG32(DC_WINBUF_START_ADDR_U_NS),
+	DEBUGFS_REG32(DC_WINBUF_START_ADDR_V),
+	DEBUGFS_REG32(DC_WINBUF_START_ADDR_V_NS),
+	DEBUGFS_REG32(DC_WINBUF_ADDR_H_OFFSET),
+	DEBUGFS_REG32(DC_WINBUF_ADDR_H_OFFSET_NS),
+	DEBUGFS_REG32(DC_WINBUF_ADDR_V_OFFSET),
+	DEBUGFS_REG32(DC_WINBUF_ADDR_V_OFFSET_NS),
+	DEBUGFS_REG32(DC_WINBUF_UFLOW_STATUS),
+	DEBUGFS_REG32(DC_WINBUF_AD_UFLOW_STATUS),
+	DEBUGFS_REG32(DC_WINBUF_BD_UFLOW_STATUS),
+	DEBUGFS_REG32(DC_WINBUF_CD_UFLOW_STATUS),
+};
 
-	if (!dc->event) {
-		spin_unlock_irqrestore(&drm->event_lock, flags);
-		return;
+static int tegra_dc_show_regs(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct tegra_dc *dc = node->info_ent->data;
+	unsigned int i;
+	int err = 0;
+
+	drm_modeset_lock(&dc->base.mutex, NULL);
+
+	if (!dc->base.state->active) {
+		err = -EBUSY;
+		goto unlock;
 	}
 
-	bo = tegra_fb_get_plane(crtc->primary->fb, 0);
+	for (i = 0; i < ARRAY_SIZE(tegra_dc_regs); i++) {
+		unsigned int offset = tegra_dc_regs[i].offset;
 
-	spin_lock(&dc->lock);
+		seq_printf(s, "%-40s %#05x %08x\n", tegra_dc_regs[i].name,
+			   offset, tegra_dc_readl(dc, offset));
+	}
 
-	/* check if new start address has been latched */
-	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
-	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
-	base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR);
-	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
+unlock:
+	drm_modeset_unlock(&dc->base.mutex);
+	return err;
+}
+
+static int tegra_dc_show_crc(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct tegra_dc *dc = node->info_ent->data;
+	int err = 0;
+	u32 value;
 
-	spin_unlock(&dc->lock);
+	drm_modeset_lock(&dc->base.mutex, NULL);
 
-	if (base == bo->paddr + crtc->primary->fb->offsets[0]) {
-		drm_crtc_send_vblank_event(crtc, dc->event);
-		drm_crtc_vblank_put(crtc);
-		dc->event = NULL;
+	if (!dc->base.state->active) {
+		err = -EBUSY;
+		goto unlock;
 	}
 
-	spin_unlock_irqrestore(&drm->event_lock, flags);
+	value = DC_COM_CRC_CONTROL_ACTIVE_DATA | DC_COM_CRC_CONTROL_ENABLE;
+	tegra_dc_writel(dc, value, DC_COM_CRC_CONTROL);
+	tegra_dc_commit(dc);
+
+	drm_crtc_wait_one_vblank(&dc->base);
+	drm_crtc_wait_one_vblank(&dc->base);
+
+	value = tegra_dc_readl(dc, DC_COM_CRC_CHECKSUM);
+	seq_printf(s, "%08x\n", value);
+
+	tegra_dc_writel(dc, 0, DC_COM_CRC_CONTROL);
+
+unlock:
+	drm_modeset_unlock(&dc->base.mutex);
+	return err;
 }
 
-static void tegra_dc_destroy(struct drm_crtc *crtc)
+static int tegra_dc_show_stats(struct seq_file *s, void *data)
 {
-	drm_crtc_cleanup(crtc);
+	struct drm_info_node *node = s->private;
+	struct tegra_dc *dc = node->info_ent->data;
+
+	seq_printf(s, "frames: %lu\n", dc->stats.frames);
+	seq_printf(s, "vblank: %lu\n", dc->stats.vblank);
+	seq_printf(s, "underflow: %lu\n", dc->stats.underflow);
+	seq_printf(s, "overflow: %lu\n", dc->stats.overflow);
+
+	return 0;
 }
 
-static void tegra_crtc_reset(struct drm_crtc *crtc)
+static struct drm_info_list debugfs_files[] = {
+	{ "regs", tegra_dc_show_regs, 0, NULL },
+	{ "crc", tegra_dc_show_crc, 0, NULL },
+	{ "stats", tegra_dc_show_stats, 0, NULL },
+};
+
+static int tegra_dc_late_register(struct drm_crtc *crtc)
 {
-	struct tegra_dc_state *state;
+	unsigned int i, count = ARRAY_SIZE(debugfs_files);
+	struct drm_minor *minor = crtc->dev->primary;
+	struct dentry *root;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	int err;
 
-	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+#ifdef CONFIG_DEBUG_FS
+	root = crtc->debugfs_entry;
+#else
+	root = NULL;
+#endif
 
-	kfree(crtc->state);
-	crtc->state = NULL;
+	dc->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
+				    GFP_KERNEL);
+	if (!dc->debugfs_files)
+		return -ENOMEM;
 
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		crtc->state = &state->base;
-		crtc->state->crtc = crtc;
-	}
+	for (i = 0; i < count; i++)
+		dc->debugfs_files[i].data = dc;
+
+	err = drm_debugfs_create_files(dc->debugfs_files, count, root, minor);
+	if (err < 0)
+		goto free;
+
+	return 0;
+
+free:
+	kfree(dc->debugfs_files);
+	dc->debugfs_files = NULL;
+
+	return err;
+}
+
+static void tegra_dc_early_unregister(struct drm_crtc *crtc)
+{
+	unsigned int count = ARRAY_SIZE(debugfs_files);
+	struct drm_minor *minor = crtc->dev->primary;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
 
-	drm_crtc_vblank_reset(crtc);
+	drm_debugfs_remove_files(dc->debugfs_files, count, minor);
+	kfree(dc->debugfs_files);
+	dc->debugfs_files = NULL;
 }
 
-static struct drm_crtc_state *
-tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
+static u32 tegra_dc_get_vblank_counter(struct drm_crtc *crtc)
 {
-	struct tegra_dc_state *state = to_dc_state(crtc->state);
-	struct tegra_dc_state *copy;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
 
-	copy = kmalloc(sizeof(*copy), GFP_KERNEL);
-	if (!copy)
-		return NULL;
+	/* XXX vblank syncpoints don't work with nvdisplay yet */
+	if (dc->syncpt && !dc->soc->has_nvdisplay)
+		return host1x_syncpt_read(dc->syncpt);
 
-	__drm_atomic_helper_crtc_duplicate_state(crtc, &copy->base);
-	copy->clk = state->clk;
-	copy->pclk = state->pclk;
-	copy->div = state->div;
-	copy->planes = state->planes;
+	/* fallback to software emulated VBLANK counter */
+	return drm_crtc_vblank_count(&dc->base);
+}
 
-	return &copy->base;
+static int tegra_dc_enable_vblank(struct drm_crtc *crtc)
+{
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	u32 value;
+
+	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
+	value |= VBLANK_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	return 0;
 }
 
-static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc,
-					    struct drm_crtc_state *state)
+static void tegra_dc_disable_vblank(struct drm_crtc *crtc)
 {
-	__drm_atomic_helper_crtc_destroy_state(state);
-	kfree(state);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	u32 value;
+
+	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
+	value &= ~VBLANK_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
 }
 
 static const struct drm_crtc_funcs tegra_crtc_funcs = {
@@ -1042,6 +1391,8 @@ static const struct drm_crtc_funcs tegra_crtc_funcs = {
 	.reset = tegra_crtc_reset,
 	.atomic_duplicate_state = tegra_crtc_atomic_duplicate_state,
 	.atomic_destroy_state = tegra_crtc_atomic_destroy_state,
+	.late_register = tegra_dc_late_register,
+	.early_unregister = tegra_dc_early_unregister,
 	.get_vblank_counter = tegra_dc_get_vblank_counter,
 	.enable_vblank = tegra_dc_enable_vblank,
 	.disable_vblank = tegra_dc_disable_vblank,
@@ -1054,10 +1405,12 @@ static int tegra_dc_set_timings(struct tegra_dc *dc,
 	unsigned int v_ref_to_sync = 1;
 	unsigned long value;
 
-	tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
+	if (!dc->soc->has_nvdisplay) {
+		tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
 
-	value = (v_ref_to_sync << 16) | h_ref_to_sync;
-	tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC);
+		value = (v_ref_to_sync << 16) | h_ref_to_sync;
+		tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC);
+	}
 
 	value = ((mode->vsync_end - mode->vsync_start) << 16) |
 		((mode->hsync_end - mode->hsync_start) <<  0);
@@ -1136,8 +1489,10 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
 		      state->div);
 	DRM_DEBUG_KMS("pclk: %lu\n", state->pclk);
 
-	value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1;
-	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+	if (!dc->soc->has_nvdisplay) {
+		value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1;
+		tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+	}
 
 	err = clk_set_rate(dc->clk, state->pclk);
 	if (err < 0)
@@ -1223,6 +1578,15 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
 	tegra_dc_stats_reset(&dc->stats);
 	drm_crtc_vblank_off(crtc);
 
+	spin_lock_irq(&crtc->dev->event_lock);
+
+	if (crtc->state->event) {
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+	}
+
+	spin_unlock_irq(&crtc->dev->event_lock);
+
 	pm_runtime_put_sync(dc->dev);
 }
 
@@ -1238,41 +1602,70 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	/* initialize display controller */
 	if (dc->syncpt) {
-		u32 syncpt = host1x_syncpt_id(dc->syncpt);
+		u32 syncpt = host1x_syncpt_id(dc->syncpt), enable;
+
+		if (dc->soc->has_nvdisplay)
+			enable = 1 << 31;
+		else
+			enable = 1 << 8;
 
 		value = SYNCPT_CNTRL_NO_STALL;
 		tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
 
-		value = SYNCPT_VSYNC_ENABLE | syncpt;
+		value = enable | syncpt;
 		tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC);
 	}
 
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
+	if (dc->soc->has_nvdisplay) {
+		value = DSC_TO_UF_INT | DSC_BBUF_UF_INT | DSC_RBUF_UF_INT |
+			DSC_OBUF_UF_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
 
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
+		value = DSC_TO_UF_INT | DSC_BBUF_UF_INT | DSC_RBUF_UF_INT |
+			DSC_OBUF_UF_INT | SD3_BUCKET_WALK_DONE_INT |
+			HEAD_UF_INT | MSF_INT | REG_TMOUT_INT |
+			REGION_CRC_INT | V_PULSE2_INT | V_PULSE3_INT |
+			VBLANK_INT | FRAME_END_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
 
-	/* initialize timer */
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
-		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
+		value = SD3_BUCKET_WALK_DONE_INT | HEAD_UF_INT | VBLANK_INT |
+			FRAME_END_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
 
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
-		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
+		value = HEAD_UF_INT | REG_TMOUT_INT | FRAME_END_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
 
-	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+		tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
+	} else {
+		value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+			WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
+
+		value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+			WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
+
+		/* initialize timer */
+		value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
+			WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
+		tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
+
+		value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
+			WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
+		tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
+
+		value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+			WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
+
+		value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+			WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+		tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+	}
 
-	if (dc->soc->supports_border_color)
+	if (dc->soc->supports_background_color)
+		tegra_dc_writel(dc, 0, DC_DISP_BLEND_BACKGROUND_COLOR);
+	else
 		tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
 
 	/* apply PLL and pixel clock changes */
@@ -1293,10 +1686,18 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
 	value |= DISP_CTRL_MODE_C_DISPLAY;
 	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND);
 
-	value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL);
-	value |= PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
-		 PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
+	if (!dc->soc->has_nvdisplay) {
+		value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL);
+		value |= PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
+			 PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
+		tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
+	}
+
+	/* enable underflow reporting and display red for missing pixels */
+	if (dc->soc->has_nvdisplay) {
+		value = UNDERFLOW_MODE_RED | UNDERFLOW_REPORT_ENABLE;
+		tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW);
+	}
 
 	tegra_dc_commit(dc);
 
@@ -1306,20 +1707,43 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
 static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
 				   struct drm_crtc_state *state)
 {
+	struct tegra_atomic_state *s = to_tegra_atomic_state(state->state);
+	struct tegra_dc_state *tegra = to_dc_state(state);
+
+	/*
+	 * The display hub display clock needs to be fed by the display clock
+	 * with the highest frequency to ensure proper functioning of all the
+	 * displays.
+	 *
+	 * Note that this isn't used before Tegra186, but it doesn't hurt and
+	 * conditionalizing it would make the code less clean.
+	 */
+	if (state->active) {
+		if (!s->clk_disp || tegra->pclk > s->rate) {
+			s->dc = to_tegra_dc(crtc);
+			s->clk_disp = s->dc->clk;
+			s->rate = tegra->pclk;
+		}
+	}
+
 	return 0;
 }
 
 static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
 				    struct drm_crtc_state *old_crtc_state)
 {
-	struct tegra_dc *dc = to_tegra_dc(crtc);
+	unsigned long flags;
 
 	if (crtc->state->event) {
-		crtc->state->event->pipe = drm_crtc_index(crtc);
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+		if (drm_crtc_vblank_get(crtc) != 0)
+			drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		else
+			drm_crtc_arm_vblank_event(crtc, crtc->state->event);
 
-		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 
-		dc->event = crtc->state->event;
 		crtc->state->event = NULL;
 	}
 }
@@ -1329,9 +1753,15 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
 {
 	struct tegra_dc_state *state = to_dc_state(crtc->state);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
+	u32 value;
 
-	tegra_dc_writel(dc, state->planes << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, state->planes, DC_CMD_STATE_CONTROL);
+	value = state->planes << 8 | GENERAL_UPDATE;
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+	value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+
+	value = state->planes | GENERAL_ACT_REQ;
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+	value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 }
 
 static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
@@ -1362,7 +1792,6 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
 		*/
 		drm_crtc_handle_vblank(&dc->base);
-		tegra_dc_finish_page_flip(dc);
 		dc->stats.vblank++;
 	}
 
@@ -1380,357 +1809,18 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		dc->stats.overflow++;
 	}
 
-	return IRQ_HANDLED;
-}
-
-static int tegra_dc_show_regs(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = s->private;
-	struct tegra_dc *dc = node->info_ent->data;
-	int err = 0;
-
-	drm_modeset_lock(&dc->base.mutex, NULL);
-
-	if (!dc->base.state->active) {
-		err = -EBUSY;
-		goto unlock;
-	}
-
-#define DUMP_REG(name)						\
-	seq_printf(s, "%-40s %#05x %08x\n", #name, name,	\
-		   tegra_dc_readl(dc, name))
-
-	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_CONT_SYNCPT_VSYNC);
-	DUMP_REG(DC_CMD_DISPLAY_COMMAND_OPTION0);
-	DUMP_REG(DC_CMD_DISPLAY_COMMAND);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE);
-	DUMP_REG(DC_CMD_DISPLAY_POWER_CONTROL);
-	DUMP_REG(DC_CMD_INT_STATUS);
-	DUMP_REG(DC_CMD_INT_MASK);
-	DUMP_REG(DC_CMD_INT_ENABLE);
-	DUMP_REG(DC_CMD_INT_TYPE);
-	DUMP_REG(DC_CMD_INT_POLARITY);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE1);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE2);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE3);
-	DUMP_REG(DC_CMD_STATE_ACCESS);
-	DUMP_REG(DC_CMD_STATE_CONTROL);
-	DUMP_REG(DC_CMD_DISPLAY_WINDOW_HEADER);
-	DUMP_REG(DC_CMD_REG_ACT_CONTROL);
-	DUMP_REG(DC_COM_CRC_CONTROL);
-	DUMP_REG(DC_COM_CRC_CHECKSUM);
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(3));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(3));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(3));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(0));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(1));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(2));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(3));
-	DUMP_REG(DC_COM_PIN_INPUT_DATA(0));
-	DUMP_REG(DC_COM_PIN_INPUT_DATA(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(3));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(4));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(5));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(6));
-	DUMP_REG(DC_COM_PIN_MISC_CONTROL);
-	DUMP_REG(DC_COM_PIN_PM0_CONTROL);
-	DUMP_REG(DC_COM_PIN_PM0_DUTY_CYCLE);
-	DUMP_REG(DC_COM_PIN_PM1_CONTROL);
-	DUMP_REG(DC_COM_PIN_PM1_DUTY_CYCLE);
-	DUMP_REG(DC_COM_SPI_CONTROL);
-	DUMP_REG(DC_COM_SPI_START_BYTE);
-	DUMP_REG(DC_COM_HSPI_WRITE_DATA_AB);
-	DUMP_REG(DC_COM_HSPI_WRITE_DATA_CD);
-	DUMP_REG(DC_COM_HSPI_CS_DC);
-	DUMP_REG(DC_COM_SCRATCH_REGISTER_A);
-	DUMP_REG(DC_COM_SCRATCH_REGISTER_B);
-	DUMP_REG(DC_COM_GPIO_CTRL);
-	DUMP_REG(DC_COM_GPIO_DEBOUNCE_COUNTER);
-	DUMP_REG(DC_COM_CRC_CHECKSUM_LATCHED);
-	DUMP_REG(DC_DISP_DISP_SIGNAL_OPTIONS0);
-	DUMP_REG(DC_DISP_DISP_SIGNAL_OPTIONS1);
-	DUMP_REG(DC_DISP_DISP_WIN_OPTIONS);
-	DUMP_REG(DC_DISP_DISP_MEM_HIGH_PRIORITY);
-	DUMP_REG(DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
-	DUMP_REG(DC_DISP_DISP_TIMING_OPTIONS);
-	DUMP_REG(DC_DISP_REF_TO_SYNC);
-	DUMP_REG(DC_DISP_SYNC_WIDTH);
-	DUMP_REG(DC_DISP_BACK_PORCH);
-	DUMP_REG(DC_DISP_ACTIVE);
-	DUMP_REG(DC_DISP_FRONT_PORCH);
-	DUMP_REG(DC_DISP_H_PULSE0_CONTROL);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_A);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_B);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_C);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_D);
-	DUMP_REG(DC_DISP_H_PULSE1_CONTROL);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_A);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_B);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_C);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_D);
-	DUMP_REG(DC_DISP_H_PULSE2_CONTROL);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_A);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_B);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_C);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_D);
-	DUMP_REG(DC_DISP_V_PULSE0_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE0_POSITION_A);
-	DUMP_REG(DC_DISP_V_PULSE0_POSITION_B);
-	DUMP_REG(DC_DISP_V_PULSE0_POSITION_C);
-	DUMP_REG(DC_DISP_V_PULSE1_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE1_POSITION_A);
-	DUMP_REG(DC_DISP_V_PULSE1_POSITION_B);
-	DUMP_REG(DC_DISP_V_PULSE1_POSITION_C);
-	DUMP_REG(DC_DISP_V_PULSE2_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE2_POSITION_A);
-	DUMP_REG(DC_DISP_V_PULSE3_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE3_POSITION_A);
-	DUMP_REG(DC_DISP_M0_CONTROL);
-	DUMP_REG(DC_DISP_M1_CONTROL);
-	DUMP_REG(DC_DISP_DI_CONTROL);
-	DUMP_REG(DC_DISP_PP_CONTROL);
-	DUMP_REG(DC_DISP_PP_SELECT_A);
-	DUMP_REG(DC_DISP_PP_SELECT_B);
-	DUMP_REG(DC_DISP_PP_SELECT_C);
-	DUMP_REG(DC_DISP_PP_SELECT_D);
-	DUMP_REG(DC_DISP_DISP_CLOCK_CONTROL);
-	DUMP_REG(DC_DISP_DISP_INTERFACE_CONTROL);
-	DUMP_REG(DC_DISP_DISP_COLOR_CONTROL);
-	DUMP_REG(DC_DISP_SHIFT_CLOCK_OPTIONS);
-	DUMP_REG(DC_DISP_DATA_ENABLE_OPTIONS);
-	DUMP_REG(DC_DISP_SERIAL_INTERFACE_OPTIONS);
-	DUMP_REG(DC_DISP_LCD_SPI_OPTIONS);
-	DUMP_REG(DC_DISP_BORDER_COLOR);
-	DUMP_REG(DC_DISP_COLOR_KEY0_LOWER);
-	DUMP_REG(DC_DISP_COLOR_KEY0_UPPER);
-	DUMP_REG(DC_DISP_COLOR_KEY1_LOWER);
-	DUMP_REG(DC_DISP_COLOR_KEY1_UPPER);
-	DUMP_REG(DC_DISP_CURSOR_FOREGROUND);
-	DUMP_REG(DC_DISP_CURSOR_BACKGROUND);
-	DUMP_REG(DC_DISP_CURSOR_START_ADDR);
-	DUMP_REG(DC_DISP_CURSOR_START_ADDR_NS);
-	DUMP_REG(DC_DISP_CURSOR_POSITION);
-	DUMP_REG(DC_DISP_CURSOR_POSITION_NS);
-	DUMP_REG(DC_DISP_INIT_SEQ_CONTROL);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_A);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_B);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_C);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_D);
-	DUMP_REG(DC_DISP_DC_MCCIF_FIFOCTRL);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY0A_HYST);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY0B_HYST);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY1A_HYST);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY1B_HYST);
-	DUMP_REG(DC_DISP_DAC_CRT_CTRL);
-	DUMP_REG(DC_DISP_DISP_MISC_CONTROL);
-	DUMP_REG(DC_DISP_SD_CONTROL);
-	DUMP_REG(DC_DISP_SD_CSC_COEFF);
-	DUMP_REG(DC_DISP_SD_LUT(0));
-	DUMP_REG(DC_DISP_SD_LUT(1));
-	DUMP_REG(DC_DISP_SD_LUT(2));
-	DUMP_REG(DC_DISP_SD_LUT(3));
-	DUMP_REG(DC_DISP_SD_LUT(4));
-	DUMP_REG(DC_DISP_SD_LUT(5));
-	DUMP_REG(DC_DISP_SD_LUT(6));
-	DUMP_REG(DC_DISP_SD_LUT(7));
-	DUMP_REG(DC_DISP_SD_LUT(8));
-	DUMP_REG(DC_DISP_SD_FLICKER_CONTROL);
-	DUMP_REG(DC_DISP_DC_PIXEL_COUNT);
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(0));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(1));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(2));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(3));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(4));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(5));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(6));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(7));
-	DUMP_REG(DC_DISP_SD_BL_TF(0));
-	DUMP_REG(DC_DISP_SD_BL_TF(1));
-	DUMP_REG(DC_DISP_SD_BL_TF(2));
-	DUMP_REG(DC_DISP_SD_BL_TF(3));
-	DUMP_REG(DC_DISP_SD_BL_CONTROL);
-	DUMP_REG(DC_DISP_SD_HW_K_VALUES);
-	DUMP_REG(DC_DISP_SD_MAN_K_VALUES);
-	DUMP_REG(DC_DISP_CURSOR_START_ADDR_HI);
-	DUMP_REG(DC_DISP_BLEND_CURSOR_CONTROL);
-	DUMP_REG(DC_WIN_WIN_OPTIONS);
-	DUMP_REG(DC_WIN_BYTE_SWAP);
-	DUMP_REG(DC_WIN_BUFFER_CONTROL);
-	DUMP_REG(DC_WIN_COLOR_DEPTH);
-	DUMP_REG(DC_WIN_POSITION);
-	DUMP_REG(DC_WIN_SIZE);
-	DUMP_REG(DC_WIN_PRESCALED_SIZE);
-	DUMP_REG(DC_WIN_H_INITIAL_DDA);
-	DUMP_REG(DC_WIN_V_INITIAL_DDA);
-	DUMP_REG(DC_WIN_DDA_INC);
-	DUMP_REG(DC_WIN_LINE_STRIDE);
-	DUMP_REG(DC_WIN_BUF_STRIDE);
-	DUMP_REG(DC_WIN_UV_BUF_STRIDE);
-	DUMP_REG(DC_WIN_BUFFER_ADDR_MODE);
-	DUMP_REG(DC_WIN_DV_CONTROL);
-	DUMP_REG(DC_WIN_BLEND_NOKEY);
-	DUMP_REG(DC_WIN_BLEND_1WIN);
-	DUMP_REG(DC_WIN_BLEND_2WIN_X);
-	DUMP_REG(DC_WIN_BLEND_2WIN_Y);
-	DUMP_REG(DC_WIN_BLEND_3WIN_XY);
-	DUMP_REG(DC_WIN_HP_FETCH_CONTROL);
-	DUMP_REG(DC_WINBUF_START_ADDR);
-	DUMP_REG(DC_WINBUF_START_ADDR_NS);
-	DUMP_REG(DC_WINBUF_START_ADDR_U);
-	DUMP_REG(DC_WINBUF_START_ADDR_U_NS);
-	DUMP_REG(DC_WINBUF_START_ADDR_V);
-	DUMP_REG(DC_WINBUF_START_ADDR_V_NS);
-	DUMP_REG(DC_WINBUF_ADDR_H_OFFSET);
-	DUMP_REG(DC_WINBUF_ADDR_H_OFFSET_NS);
-	DUMP_REG(DC_WINBUF_ADDR_V_OFFSET);
-	DUMP_REG(DC_WINBUF_ADDR_V_OFFSET_NS);
-	DUMP_REG(DC_WINBUF_UFLOW_STATUS);
-	DUMP_REG(DC_WINBUF_AD_UFLOW_STATUS);
-	DUMP_REG(DC_WINBUF_BD_UFLOW_STATUS);
-	DUMP_REG(DC_WINBUF_CD_UFLOW_STATUS);
-
-#undef DUMP_REG
-
-unlock:
-	drm_modeset_unlock(&dc->base.mutex);
-	return err;
-}
-
-static int tegra_dc_show_crc(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = s->private;
-	struct tegra_dc *dc = node->info_ent->data;
-	int err = 0;
-	u32 value;
-
-	drm_modeset_lock(&dc->base.mutex, NULL);
-
-	if (!dc->base.state->active) {
-		err = -EBUSY;
-		goto unlock;
-	}
-
-	value = DC_COM_CRC_CONTROL_ACTIVE_DATA | DC_COM_CRC_CONTROL_ENABLE;
-	tegra_dc_writel(dc, value, DC_COM_CRC_CONTROL);
-	tegra_dc_commit(dc);
-
-	drm_crtc_wait_one_vblank(&dc->base);
-	drm_crtc_wait_one_vblank(&dc->base);
-
-	value = tegra_dc_readl(dc, DC_COM_CRC_CHECKSUM);
-	seq_printf(s, "%08x\n", value);
-
-	tegra_dc_writel(dc, 0, DC_COM_CRC_CONTROL);
-
-unlock:
-	drm_modeset_unlock(&dc->base.mutex);
-	return err;
-}
-
-static int tegra_dc_show_stats(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = s->private;
-	struct tegra_dc *dc = node->info_ent->data;
-
-	seq_printf(s, "frames: %lu\n", dc->stats.frames);
-	seq_printf(s, "vblank: %lu\n", dc->stats.vblank);
-	seq_printf(s, "underflow: %lu\n", dc->stats.underflow);
-	seq_printf(s, "overflow: %lu\n", dc->stats.overflow);
-
-	return 0;
-}
-
-static struct drm_info_list debugfs_files[] = {
-	{ "regs", tegra_dc_show_regs, 0, NULL },
-	{ "crc", tegra_dc_show_crc, 0, NULL },
-	{ "stats", tegra_dc_show_stats, 0, NULL },
-};
-
-static int tegra_dc_debugfs_init(struct tegra_dc *dc, struct drm_minor *minor)
-{
-	unsigned int i;
-	char *name;
-	int err;
-
-	name = kasprintf(GFP_KERNEL, "dc.%d", dc->pipe);
-	dc->debugfs = debugfs_create_dir(name, minor->debugfs_root);
-	kfree(name);
-
-	if (!dc->debugfs)
-		return -ENOMEM;
-
-	dc->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
-				    GFP_KERNEL);
-	if (!dc->debugfs_files) {
-		err = -ENOMEM;
-		goto remove;
+	if (status & HEAD_UF_INT) {
+		dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__);
+		dc->stats.underflow++;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
-		dc->debugfs_files[i].data = dc;
-
-	err = drm_debugfs_create_files(dc->debugfs_files,
-				       ARRAY_SIZE(debugfs_files),
-				       dc->debugfs, minor);
-	if (err < 0)
-		goto free;
-
-	dc->minor = minor;
-
-	return 0;
-
-free:
-	kfree(dc->debugfs_files);
-	dc->debugfs_files = NULL;
-remove:
-	debugfs_remove(dc->debugfs);
-	dc->debugfs = NULL;
-
-	return err;
-}
-
-static int tegra_dc_debugfs_exit(struct tegra_dc *dc)
-{
-	drm_debugfs_remove_files(dc->debugfs_files, ARRAY_SIZE(debugfs_files),
-				 dc->minor);
-	dc->minor = NULL;
-
-	kfree(dc->debugfs_files);
-	dc->debugfs_files = NULL;
-
-	debugfs_remove(dc->debugfs);
-	dc->debugfs = NULL;
-
-	return 0;
+	return IRQ_HANDLED;
 }
 
 static int tegra_dc_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct iommu_group *group = iommu_group_get(client->dev);
 	unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED;
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -1742,18 +1832,27 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	if (tegra->domain) {
-		err = iommu_attach_device(tegra->domain, dc->dev);
-		if (err < 0) {
-			dev_err(dc->dev, "failed to attach to domain: %d\n",
-				err);
-			return err;
+	if (group && tegra->domain) {
+		if (group != tegra->group) {
+			err = iommu_attach_group(tegra->domain, group);
+			if (err < 0) {
+				dev_err(dc->dev,
+					"failed to attach to domain: %d\n",
+					err);
+				return err;
+			}
+
+			tegra->group = group;
 		}
 
 		dc->domain = tegra->domain;
 	}
 
-	primary = tegra_dc_primary_plane_create(drm, dc);
+	if (dc->soc->wgrps)
+		primary = tegra_dc_add_shared_planes(drm, dc);
+	else
+		primary = tegra_dc_add_planes(drm, dc);
+
 	if (IS_ERR(primary)) {
 		err = PTR_ERR(primary);
 		goto cleanup;
@@ -1787,16 +1886,6 @@ static int tegra_dc_init(struct host1x_client *client)
 		goto cleanup;
 	}
 
-	err = tegra_dc_add_planes(drm, dc);
-	if (err < 0)
-		goto cleanup;
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_init(dc, drm->primary);
-		if (err < 0)
-			dev_err(dc->dev, "debugfs setup failed: %d\n", err);
-	}
-
 	err = devm_request_irq(dc->dev, dc->irq, tegra_dc_irq, 0,
 			       dev_name(dc->dev), dc);
 	if (err < 0) {
@@ -1808,14 +1897,14 @@ static int tegra_dc_init(struct host1x_client *client)
 	return 0;
 
 cleanup:
-	if (cursor)
+	if (!IS_ERR_OR_NULL(cursor))
 		drm_plane_cleanup(cursor);
 
-	if (primary)
+	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	if (tegra->domain) {
-		iommu_detach_device(tegra->domain, dc->dev);
+	if (group && tegra->domain) {
+		iommu_detach_group(tegra->domain, group);
 		dc->domain = NULL;
 	}
 
@@ -1824,25 +1913,20 @@ static int tegra_dc_init(struct host1x_client *client)
 
 static int tegra_dc_exit(struct host1x_client *client)
 {
+	struct iommu_group *group = iommu_group_get(client->dev);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	int err;
 
 	devm_free_irq(dc->dev, dc->irq, dc);
 
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_exit(dc);
-		if (err < 0)
-			dev_err(dc->dev, "debugfs cleanup failed: %d\n", err);
-	}
-
 	err = tegra_dc_rgb_exit(dc);
 	if (err) {
 		dev_err(dc->dev, "failed to shutdown RGB output: %d\n", err);
 		return err;
 	}
 
-	if (dc->domain) {
-		iommu_detach_device(dc->domain, dc->dev);
+	if (group && dc->domain) {
+		iommu_detach_group(dc->domain, group);
 		dc->domain = NULL;
 	}
 
@@ -1857,57 +1941,138 @@ static const struct host1x_client_ops dc_client_ops = {
 };
 
 static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
-	.supports_border_color = true,
+	.supports_background_color = false,
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
+	.supports_blending = false,
 	.pitch_align = 8,
 	.has_powergate = false,
-	.broken_reset = true,
+	.coupled_pm = true,
+	.has_nvdisplay = false,
+	.num_primary_formats = ARRAY_SIZE(tegra20_primary_formats),
+	.primary_formats = tegra20_primary_formats,
+	.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
+	.overlay_formats = tegra20_overlay_formats,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
-	.supports_border_color = true,
+	.supports_background_color = false,
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
+	.supports_blending = false,
 	.pitch_align = 8,
 	.has_powergate = false,
-	.broken_reset = false,
+	.coupled_pm = false,
+	.has_nvdisplay = false,
+	.num_primary_formats = ARRAY_SIZE(tegra20_primary_formats),
+	.primary_formats = tegra20_primary_formats,
+	.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
+	.overlay_formats = tegra20_overlay_formats,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
-	.supports_border_color = true,
+	.supports_background_color = false,
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
+	.supports_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
-	.broken_reset = false,
+	.coupled_pm = false,
+	.has_nvdisplay = false,
+	.num_primary_formats = ARRAY_SIZE(tegra114_primary_formats),
+	.primary_formats = tegra114_primary_formats,
+	.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
+	.overlay_formats = tegra114_overlay_formats,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
-	.supports_border_color = false,
+	.supports_background_color = true,
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
+	.supports_blending = true,
 	.pitch_align = 64,
 	.has_powergate = true,
-	.broken_reset = false,
+	.coupled_pm = false,
+	.has_nvdisplay = false,
+	.num_primary_formats = ARRAY_SIZE(tegra124_primary_formats),
+	.primary_formats = tegra114_primary_formats,
+	.num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats),
+	.overlay_formats = tegra114_overlay_formats,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
-	.supports_border_color = false,
+	.supports_background_color = true,
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
+	.supports_blending = true,
 	.pitch_align = 64,
 	.has_powergate = true,
-	.broken_reset = false,
+	.coupled_pm = false,
+	.has_nvdisplay = false,
+	.num_primary_formats = ARRAY_SIZE(tegra114_primary_formats),
+	.primary_formats = tegra114_primary_formats,
+	.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
+	.overlay_formats = tegra114_overlay_formats,
+};
+
+static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
+	{
+		.index = 0,
+		.dc = 0,
+		.windows = (const unsigned int[]) { 0 },
+		.num_windows = 1,
+	}, {
+		.index = 1,
+		.dc = 1,
+		.windows = (const unsigned int[]) { 1 },
+		.num_windows = 1,
+	}, {
+		.index = 2,
+		.dc = 1,
+		.windows = (const unsigned int[]) { 2 },
+		.num_windows = 1,
+	}, {
+		.index = 3,
+		.dc = 2,
+		.windows = (const unsigned int[]) { 3 },
+		.num_windows = 1,
+	}, {
+		.index = 4,
+		.dc = 2,
+		.windows = (const unsigned int[]) { 4 },
+		.num_windows = 1,
+	}, {
+		.index = 5,
+		.dc = 2,
+		.windows = (const unsigned int[]) { 5 },
+		.num_windows = 1,
+	},
+};
+
+static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
+	.supports_background_color = true,
+	.supports_interlacing = true,
+	.supports_cursor = true,
+	.supports_block_linear = true,
+	.supports_blending = true,
+	.pitch_align = 64,
+	.has_powergate = false,
+	.coupled_pm = false,
+	.has_nvdisplay = true,
+	.wgrps = tegra186_dc_wgrps,
+	.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {
 	{
+		.compatible = "nvidia,tegra186-dc",
+		.data = &tegra186_dc_soc_info,
+	}, {
 		.compatible = "nvidia,tegra210-dc",
 		.data = &tegra210_dc_soc_info,
 	}, {
@@ -1965,6 +2130,43 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc)
 	return 0;
 }
 
+static int tegra_dc_match_by_pipe(struct device *dev, void *data)
+{
+	struct tegra_dc *dc = dev_get_drvdata(dev);
+	unsigned int pipe = (unsigned long)data;
+
+	return dc->pipe == pipe;
+}
+
+static int tegra_dc_couple(struct tegra_dc *dc)
+{
+	/*
+	 * On Tegra20, DC1 requires DC0 to be taken out of reset in order to
+	 * be enabled, otherwise CPU hangs on writing to CMD_DISPLAY_COMMAND /
+	 * POWER_CONTROL registers during CRTC enabling.
+	 */
+	if (dc->soc->coupled_pm && dc->pipe == 1) {
+		u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE;
+		struct device_link *link;
+		struct device *partner;
+
+		partner = driver_find_device(dc->dev->driver, NULL, 0,
+					     tegra_dc_match_by_pipe);
+		if (!partner)
+			return -EPROBE_DEFER;
+
+		link = device_link_add(dc->dev, partner, flags);
+		if (!link) {
+			dev_err(dc->dev, "failed to link controllers\n");
+			return -EINVAL;
+		}
+
+		dev_dbg(dc->dev, "coupled to %s\n", dev_name(partner));
+	}
+
+	return 0;
+}
+
 static int tegra_dc_probe(struct platform_device *pdev)
 {
 	struct resource *regs;
@@ -1977,7 +2179,6 @@ static int tegra_dc_probe(struct platform_device *pdev)
 
 	dc->soc = of_device_get_match_data(&pdev->dev);
 
-	spin_lock_init(&dc->lock);
 	INIT_LIST_HEAD(&dc->list);
 	dc->dev = &pdev->dev;
 
@@ -1985,6 +2186,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
 	if (err < 0)
 		return err;
 
+	err = tegra_dc_couple(dc);
+	if (err < 0)
+		return err;
+
 	dc->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dc->clk)) {
 		dev_err(&pdev->dev, "failed to get clock\n");
@@ -1998,21 +2203,19 @@ static int tegra_dc_probe(struct platform_device *pdev)
 	}
 
 	/* assert reset and disable clock */
-	if (!dc->soc->broken_reset) {
-		err = clk_prepare_enable(dc->clk);
-		if (err < 0)
-			return err;
+	err = clk_prepare_enable(dc->clk);
+	if (err < 0)
+		return err;
 
-		usleep_range(2000, 4000);
+	usleep_range(2000, 4000);
 
-		err = reset_control_assert(dc->rst);
-		if (err < 0)
-			return err;
+	err = reset_control_assert(dc->rst);
+	if (err < 0)
+		return err;
 
-		usleep_range(2000, 4000);
+	usleep_range(2000, 4000);
 
-		clk_disable_unprepare(dc->clk);
-	}
+	clk_disable_unprepare(dc->clk);
 
 	if (dc->soc->has_powergate) {
 		if (dc->pipe == 0)
@@ -2086,12 +2289,10 @@ static int tegra_dc_suspend(struct device *dev)
 	struct tegra_dc *dc = dev_get_drvdata(dev);
 	int err;
 
-	if (!dc->soc->broken_reset) {
-		err = reset_control_assert(dc->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to assert reset: %d\n", err);
-			return err;
-		}
+	err = reset_control_assert(dc->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
 	}
 
 	if (dc->soc->has_powergate)
@@ -2121,13 +2322,10 @@ static int tegra_dc_resume(struct device *dev)
 			return err;
 		}
 
-		if (!dc->soc->broken_reset) {
-			err = reset_control_deassert(dc->rst);
-			if (err < 0) {
-				dev_err(dev,
-					"failed to deassert reset: %d\n", err);
-				return err;
-			}
+		err = reset_control_deassert(dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			return err;
 		}
 	}
 
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index cb100b6e32821efb5cf9d2242f4cb0021044b0ea..096a81ad6d8d1b4c8aa4893eb61a4daea4b4feda 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -18,6 +18,24 @@
 
 struct tegra_output;
 
+struct tegra_dc_state {
+	struct drm_crtc_state base;
+
+	struct clk *clk;
+	unsigned long pclk;
+	unsigned int div;
+
+	u32 planes;
+};
+
+static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
+{
+	if (state)
+		return container_of(state, struct tegra_dc_state, base);
+
+	return NULL;
+}
+
 struct tegra_dc_stats {
 	unsigned long frames;
 	unsigned long vblank;
@@ -25,21 +43,35 @@ struct tegra_dc_stats {
 	unsigned long overflow;
 };
 
+struct tegra_windowgroup_soc {
+	unsigned int index;
+	unsigned int dc;
+	const unsigned int *windows;
+	unsigned int num_windows;
+};
+
 struct tegra_dc_soc_info {
-	bool supports_border_color;
+	bool supports_background_color;
 	bool supports_interlacing;
 	bool supports_cursor;
 	bool supports_block_linear;
+	bool supports_blending;
 	unsigned int pitch_align;
 	bool has_powergate;
-	bool broken_reset;
+	bool coupled_pm;
+	bool has_nvdisplay;
+	const struct tegra_windowgroup_soc *wgrps;
+	unsigned int num_wgrps;
+	const u32 *primary_formats;
+	unsigned int num_primary_formats;
+	const u32 *overlay_formats;
+	unsigned int num_overlay_formats;
 };
 
 struct tegra_dc {
 	struct host1x_client client;
 	struct host1x_syncpt *syncpt;
 	struct device *dev;
-	spinlock_t lock;
 
 	struct drm_crtc base;
 	unsigned int powergate;
@@ -56,11 +88,6 @@ struct tegra_dc {
 	struct list_head list;
 
 	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
-
-	/* page-flip handling */
-	struct drm_pending_vblank_event *event;
 
 	const struct tegra_dc_soc_info *soc;
 
@@ -110,6 +137,7 @@ struct tegra_dc_window {
 	unsigned int bits_per_pixel;
 	unsigned int stride[2];
 	unsigned long base[3];
+	unsigned int zpos;
 	bool bottom_up;
 
 	struct tegra_bo_tiling tiling;
@@ -118,6 +146,7 @@ struct tegra_dc_window {
 };
 
 /* from dc.c */
+bool tegra_dc_has_output(struct tegra_dc *dc, struct device *dev);
 void tegra_dc_commit(struct tegra_dc *dc);
 int tegra_dc_state_setup_clock(struct tegra_dc *dc,
 			       struct drm_crtc_state *crtc_state,
@@ -167,15 +196,26 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define DC_CMD_INT_ENABLE			0x039
 #define DC_CMD_INT_TYPE				0x03a
 #define DC_CMD_INT_POLARITY			0x03b
-#define CTXSW_INT     (1 << 0)
-#define FRAME_END_INT (1 << 1)
-#define VBLANK_INT    (1 << 2)
-#define WIN_A_UF_INT  (1 << 8)
-#define WIN_B_UF_INT  (1 << 9)
-#define WIN_C_UF_INT  (1 << 10)
-#define WIN_A_OF_INT  (1 << 14)
-#define WIN_B_OF_INT  (1 << 15)
-#define WIN_C_OF_INT  (1 << 16)
+#define CTXSW_INT                (1 << 0)
+#define FRAME_END_INT            (1 << 1)
+#define VBLANK_INT               (1 << 2)
+#define V_PULSE3_INT             (1 << 4)
+#define V_PULSE2_INT             (1 << 5)
+#define REGION_CRC_INT           (1 << 6)
+#define REG_TMOUT_INT            (1 << 7)
+#define WIN_A_UF_INT             (1 << 8)
+#define WIN_B_UF_INT             (1 << 9)
+#define WIN_C_UF_INT             (1 << 10)
+#define MSF_INT                  (1 << 12)
+#define WIN_A_OF_INT             (1 << 14)
+#define WIN_B_OF_INT             (1 << 15)
+#define WIN_C_OF_INT             (1 << 16)
+#define HEAD_UF_INT              (1 << 23)
+#define SD3_BUCKET_WALK_DONE_INT (1 << 24)
+#define DSC_OBUF_UF_INT          (1 << 26)
+#define DSC_RBUF_UF_INT          (1 << 27)
+#define DSC_BBUF_UF_INT          (1 << 28)
+#define DSC_TO_UF_INT            (1 << 29)
 
 #define DC_CMD_SIGNAL_RAISE1			0x03c
 #define DC_CMD_SIGNAL_RAISE2			0x03d
@@ -196,6 +236,8 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define WIN_B_UPDATE    (1 << 10)
 #define WIN_C_UPDATE    (1 << 11)
 #define CURSOR_UPDATE   (1 << 15)
+#define COMMON_ACTREQ   (1 << 16)
+#define COMMON_UPDATE   (1 << 17)
 #define NC_HOST_TRIG    (1 << 24)
 
 #define DC_CMD_DISPLAY_WINDOW_HEADER		0x042
@@ -238,6 +280,10 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define DC_COM_GPIO_DEBOUNCE_COUNTER		0x328
 #define DC_COM_CRC_CHECKSUM_LATCHED		0x329
 
+#define DC_COM_RG_UNDERFLOW			0x365
+#define  UNDERFLOW_MODE_RED      (1 << 8)
+#define  UNDERFLOW_REPORT_ENABLE (1 << 0)
+
 #define DC_DISP_DISP_SIGNAL_OPTIONS0		0x400
 #define H_PULSE0_ENABLE (1 <<  8)
 #define H_PULSE1_ENABLE (1 << 10)
@@ -249,10 +295,10 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define HDMI_ENABLE	(1 << 30)
 #define DSI_ENABLE	(1 << 29)
 #define SOR1_TIMING_CYA	(1 << 27)
-#define SOR1_ENABLE	(1 << 26)
-#define SOR_ENABLE	(1 << 25)
 #define CURSOR_ENABLE	(1 << 16)
 
+#define SOR_ENABLE(x)	(1 << (25 + (x)))
+
 #define DC_DISP_DISP_MEM_HIGH_PRIORITY		0x403
 #define CURSOR_THRESHOLD(x)   (((x) & 0x03) << 24)
 #define WINDOW_A_THRESHOLD(x) (((x) & 0x7f) << 16)
@@ -360,29 +406,33 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define DISP_ORDER_BLUE_RED        (1 << 9)
 
 #define DC_DISP_DISP_COLOR_CONTROL		0x430
-#define BASE_COLOR_SIZE666     (0 << 0)
-#define BASE_COLOR_SIZE111     (1 << 0)
-#define BASE_COLOR_SIZE222     (2 << 0)
-#define BASE_COLOR_SIZE333     (3 << 0)
-#define BASE_COLOR_SIZE444     (4 << 0)
-#define BASE_COLOR_SIZE555     (5 << 0)
-#define BASE_COLOR_SIZE565     (6 << 0)
-#define BASE_COLOR_SIZE332     (7 << 0)
-#define BASE_COLOR_SIZE888     (8 << 0)
+#define BASE_COLOR_SIZE666     ( 0 << 0)
+#define BASE_COLOR_SIZE111     ( 1 << 0)
+#define BASE_COLOR_SIZE222     ( 2 << 0)
+#define BASE_COLOR_SIZE333     ( 3 << 0)
+#define BASE_COLOR_SIZE444     ( 4 << 0)
+#define BASE_COLOR_SIZE555     ( 5 << 0)
+#define BASE_COLOR_SIZE565     ( 6 << 0)
+#define BASE_COLOR_SIZE332     ( 7 << 0)
+#define BASE_COLOR_SIZE888     ( 8 << 0)
+#define BASE_COLOR_SIZE101010  (10 << 0)
+#define BASE_COLOR_SIZE121212  (12 << 0)
 #define DITHER_CONTROL_MASK    (3 << 8)
 #define DITHER_CONTROL_DISABLE (0 << 8)
 #define DITHER_CONTROL_ORDERED (2 << 8)
 #define DITHER_CONTROL_ERRDIFF (3 << 8)
 #define BASE_COLOR_SIZE_MASK   (0xf << 0)
-#define BASE_COLOR_SIZE_666    (0 << 0)
-#define BASE_COLOR_SIZE_111    (1 << 0)
-#define BASE_COLOR_SIZE_222    (2 << 0)
-#define BASE_COLOR_SIZE_333    (3 << 0)
-#define BASE_COLOR_SIZE_444    (4 << 0)
-#define BASE_COLOR_SIZE_555    (5 << 0)
-#define BASE_COLOR_SIZE_565    (6 << 0)
-#define BASE_COLOR_SIZE_332    (7 << 0)
-#define BASE_COLOR_SIZE_888    (8 << 0)
+#define BASE_COLOR_SIZE_666    (  0 << 0)
+#define BASE_COLOR_SIZE_111    (  1 << 0)
+#define BASE_COLOR_SIZE_222    (  2 << 0)
+#define BASE_COLOR_SIZE_333    (  3 << 0)
+#define BASE_COLOR_SIZE_444    (  4 << 0)
+#define BASE_COLOR_SIZE_555    (  5 << 0)
+#define BASE_COLOR_SIZE_565    (  6 << 0)
+#define BASE_COLOR_SIZE_332    (  7 << 0)
+#define BASE_COLOR_SIZE_888    (  8 << 0)
+#define BASE_COLOR_SIZE_101010 ( 10 << 0)
+#define BASE_COLOR_SIZE_121212 ( 12 << 0)
 
 #define DC_DISP_SHIFT_CLOCK_OPTIONS		0x431
 #define  SC1_H_QUALIFIER_NONE	(1 << 16)
@@ -449,6 +499,12 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define DC_DISP_SD_HW_K_VALUES			0x4dd
 #define DC_DISP_SD_MAN_K_VALUES			0x4de
 
+#define DC_DISP_BLEND_BACKGROUND_COLOR		0x4e4
+#define  BACKGROUND_COLOR_ALPHA(x) (((x) & 0xff) << 24)
+#define  BACKGROUND_COLOR_BLUE(x)  (((x) & 0xff) << 16)
+#define  BACKGROUND_COLOR_GREEN(x) (((x) & 0xff) << 8)
+#define  BACKGROUND_COLOR_RED(x)   (((x) & 0xff) << 0)
+
 #define DC_DISP_INTERLACE_CONTROL		0x4e5
 #define  INTERLACE_STATUS (1 << 2)
 #define  INTERLACE_START  (1 << 1)
@@ -467,6 +523,35 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define CURSOR_SRC_BLEND_MASK			(3 << 8)
 #define CURSOR_ALPHA				0xff
 
+#define DC_WIN_CORE_ACT_CONTROL 0x50e
+#define  VCOUNTER (0 << 0)
+#define  HCOUNTER (1 << 0)
+
+#define DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLA 0x543
+#define  LATENCY_CTL_MODE_ENABLE (1 << 2)
+
+#define DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLB 0x544
+#define  WATERMARK_MASK 0x1fffffff
+
+#define DC_WIN_CORE_PRECOMP_WGRP_PIPE_METER 0x560
+#define  PIPE_METER_INT(x)  (((x) & 0xff) << 8)
+#define  PIPE_METER_FRAC(x) (((x) & 0xff) << 0)
+
+#define DC_WIN_CORE_IHUB_WGRP_POOL_CONFIG 0x561
+#define  MEMPOOL_ENTRIES(x) (((x) & 0xffff) << 0)
+
+#define DC_WIN_CORE_IHUB_WGRP_FETCH_METER 0x562
+#define  SLOTS(x) (((x) & 0xff) << 0)
+
+#define DC_WIN_CORE_IHUB_LINEBUF_CONFIG 0x563
+#define  MODE_TWO_LINES  (0 << 14)
+#define  MODE_FOUR_LINES (1 << 14)
+
+#define DC_WIN_CORE_IHUB_THREAD_GROUP 0x568
+#define  THREAD_NUM_MASK (0x1f << 1)
+#define  THREAD_NUM(x) (((x) & 0x1f) << 1)
+#define  THREAD_GROUP_ENABLE (1 << 0)
+
 #define DC_WIN_CSC_YOF				0x611
 #define DC_WIN_CSC_KYRGB			0x612
 #define DC_WIN_CSC_KUR				0x613
@@ -502,9 +587,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define WIN_COLOR_DEPTH_P4              2
 #define WIN_COLOR_DEPTH_P8              3
 #define WIN_COLOR_DEPTH_B4G4R4A4        4
-#define WIN_COLOR_DEPTH_B5G5R5A         5
+#define WIN_COLOR_DEPTH_B5G5R5A1        5
 #define WIN_COLOR_DEPTH_B5G6R5          6
-#define WIN_COLOR_DEPTH_AB5G5R5         7
+#define WIN_COLOR_DEPTH_A1B5G5R5        7
 #define WIN_COLOR_DEPTH_B8G8R8A8       12
 #define WIN_COLOR_DEPTH_R8G8B8A8       13
 #define WIN_COLOR_DEPTH_B6x2G6x2R6x2A8 14
@@ -519,18 +604,32 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define WIN_COLOR_DEPTH_YUV422R        23
 #define WIN_COLOR_DEPTH_YCbCr422RA     24
 #define WIN_COLOR_DEPTH_YUV422RA       25
+#define WIN_COLOR_DEPTH_R4G4B4A4       27
+#define WIN_COLOR_DEPTH_R5G5B5A        28
+#define WIN_COLOR_DEPTH_AR5G5B5        29
+#define WIN_COLOR_DEPTH_B5G5R5X1       30
+#define WIN_COLOR_DEPTH_X1B5G5R5       31
+#define WIN_COLOR_DEPTH_R5G5B5X1       32
+#define WIN_COLOR_DEPTH_X1R5G5B5       33
+#define WIN_COLOR_DEPTH_R5G6B5         34
+#define WIN_COLOR_DEPTH_A8R8G8B8       35
+#define WIN_COLOR_DEPTH_A8B8G8R8       36
+#define WIN_COLOR_DEPTH_B8G8R8X8       37
+#define WIN_COLOR_DEPTH_R8G8B8X8       38
+#define WIN_COLOR_DEPTH_X8B8G8R8       65
+#define WIN_COLOR_DEPTH_X8R8G8B8       66
 
 #define DC_WIN_POSITION				0x704
-#define H_POSITION(x) (((x) & 0x1fff) <<  0)
-#define V_POSITION(x) (((x) & 0x1fff) << 16)
+#define H_POSITION(x) (((x) & 0x1fff) <<  0) /* XXX 0x7fff on Tegra186 */
+#define V_POSITION(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */
 
 #define DC_WIN_SIZE				0x705
-#define H_SIZE(x) (((x) & 0x1fff) <<  0)
-#define V_SIZE(x) (((x) & 0x1fff) << 16)
+#define H_SIZE(x) (((x) & 0x1fff) <<  0) /* XXX 0x7fff on Tegra186 */
+#define V_SIZE(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */
 
 #define DC_WIN_PRESCALED_SIZE			0x706
 #define H_PRESCALED_SIZE(x) (((x) & 0x7fff) <<  0)
-#define V_PRESCALED_SIZE(x) (((x) & 0x1fff) << 16)
+#define V_PRESCALED_SIZE(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */
 
 #define DC_WIN_H_INITIAL_DDA			0x707
 #define DC_WIN_V_INITIAL_DDA			0x708
@@ -546,11 +645,24 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define DC_WIN_BUFFER_ADDR_MODE_TILE		(1 <<  0)
 #define DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV	(0 << 16)
 #define DC_WIN_BUFFER_ADDR_MODE_TILE_UV		(1 << 16)
+
 #define DC_WIN_DV_CONTROL			0x70e
 
 #define DC_WIN_BLEND_NOKEY			0x70f
+#define  BLEND_WEIGHT1(x) (((x) & 0xff) << 16)
+#define  BLEND_WEIGHT0(x) (((x) & 0xff) <<  8)
+
 #define DC_WIN_BLEND_1WIN			0x710
+#define  BLEND_CONTROL_FIX    (0 << 2)
+#define  BLEND_CONTROL_ALPHA  (1 << 2)
+#define  BLEND_COLOR_KEY_NONE (0 << 0)
+#define  BLEND_COLOR_KEY_0    (1 << 0)
+#define  BLEND_COLOR_KEY_1    (2 << 0)
+#define  BLEND_COLOR_KEY_BOTH (3 << 0)
+
 #define DC_WIN_BLEND_2WIN_X			0x711
+#define  BLEND_CONTROL_DEPENDENT (2 << 2)
+
 #define DC_WIN_BLEND_2WIN_Y			0x712
 #define DC_WIN_BLEND_3WIN_XY			0x713
 
@@ -575,8 +687,97 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define DC_WINBUF_SURFACE_KIND_BLOCK	(2 << 0)
 #define DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(x) (((x) & 0x7) << 4)
 
+#define DC_WINBUF_START_ADDR_HI			0x80d
+
+#define DC_WINBUF_CDE_CONTROL			0x82f
+#define  ENABLE_SURFACE (1 << 0)
+
 #define DC_WINBUF_AD_UFLOW_STATUS		0xbca
 #define DC_WINBUF_BD_UFLOW_STATUS		0xdca
 #define DC_WINBUF_CD_UFLOW_STATUS		0xfca
 
+/* Tegra186 and later */
+#define DC_DISP_CORE_SOR_SET_CONTROL(x)		(0x403 + (x))
+#define PROTOCOL_MASK (0xf << 8)
+#define PROTOCOL_SINGLE_TMDS_A (0x1 << 8)
+
+#define DC_WIN_CORE_WINDOWGROUP_SET_CONTROL	0x702
+#define OWNER_MASK (0xf << 0)
+#define OWNER(x) (((x) & 0xf) << 0)
+
+#define DC_WIN_CROPPED_SIZE			0x706
+
+#define DC_WIN_PLANAR_STORAGE			0x709
+#define PITCH(x) (((x) >> 6) & 0x1fff)
+
+#define DC_WIN_SET_PARAMS			0x70d
+#define  CLAMP_BEFORE_BLEND (1 << 15)
+#define  DEGAMMA_NONE (0 << 13)
+#define  DEGAMMA_SRGB (1 << 13)
+#define  DEGAMMA_YUV8_10 (2 << 13)
+#define  DEGAMMA_YUV12 (3 << 13)
+#define  INPUT_RANGE_BYPASS (0 << 10)
+#define  INPUT_RANGE_LIMITED (1 << 10)
+#define  INPUT_RANGE_FULL (2 << 10)
+#define  COLOR_SPACE_RGB (0 << 8)
+#define  COLOR_SPACE_YUV_601 (1 << 8)
+#define  COLOR_SPACE_YUV_709 (2 << 8)
+#define  COLOR_SPACE_YUV_2020 (3 << 8)
+
+#define DC_WIN_WINDOWGROUP_SET_CONTROL_INPUT_SCALER	0x70e
+#define  HORIZONTAL_TAPS_2 (1 << 3)
+#define  HORIZONTAL_TAPS_5 (4 << 3)
+#define  VERTICAL_TAPS_2 (1 << 0)
+#define  VERTICAL_TAPS_5 (4 << 0)
+
+#define DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE	0x711
+#define  INPUT_SCALER_USE422  (1 << 2)
+#define  INPUT_SCALER_VBYPASS (1 << 1)
+#define  INPUT_SCALER_HBYPASS (1 << 0)
+
+#define DC_WIN_BLEND_LAYER_CONTROL		0x716
+#define  COLOR_KEY_NONE (0 << 25)
+#define  COLOR_KEY_SRC (1 << 25)
+#define  COLOR_KEY_DST (2 << 25)
+#define  BLEND_BYPASS (1 << 24)
+#define  K2(x) (((x) & 0xff) << 16)
+#define  K1(x) (((x) & 0xff) << 8)
+#define  WINDOW_LAYER_DEPTH(x) (((x) & 0xff) << 0)
+
+#define DC_WIN_BLEND_MATCH_SELECT		0x717
+#define  BLEND_FACTOR_DST_ALPHA_ZERO			(0 << 12)
+#define  BLEND_FACTOR_DST_ALPHA_ONE			(1 << 12)
+#define  BLEND_FACTOR_DST_ALPHA_NEG_K1_TIMES_SRC	(2 << 12)
+#define  BLEND_FACTOR_DST_ALPHA_K2			(3 << 12)
+#define  BLEND_FACTOR_SRC_ALPHA_ZERO			(0 << 8)
+#define  BLEND_FACTOR_SRC_ALPHA_K1			(1 << 8)
+#define  BLEND_FACTOR_SRC_ALPHA_K2			(2 << 8)
+#define  BLEND_FACTOR_SRC_ALPHA_NEG_K1_TIMES_DST	(3 << 8)
+#define  BLEND_FACTOR_DST_COLOR_ZERO			(0 << 4)
+#define  BLEND_FACTOR_DST_COLOR_ONE			(1 << 4)
+#define  BLEND_FACTOR_DST_COLOR_K1			(2 << 4)
+#define  BLEND_FACTOR_DST_COLOR_K2			(3 << 4)
+#define  BLEND_FACTOR_DST_COLOR_K1_TIMES_DST		(4 << 4)
+#define  BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_DST	(5 << 4)
+#define  BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC	(6 << 4)
+#define  BLEND_FACTOR_DST_COLOR_NEG_K1			(7 << 4)
+#define  BLEND_FACTOR_SRC_COLOR_ZERO			(0 << 0)
+#define  BLEND_FACTOR_SRC_COLOR_ONE			(1 << 0)
+#define  BLEND_FACTOR_SRC_COLOR_K1			(2 << 0)
+#define  BLEND_FACTOR_SRC_COLOR_K1_TIMES_DST		(3 << 0)
+#define  BLEND_FACTOR_SRC_COLOR_NEG_K1_TIMES_DST	(4 << 0)
+#define  BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC		(5 << 0)
+
+#define DC_WIN_BLEND_NOMATCH_SELECT		0x718
+
+#define DC_WIN_PRECOMP_WGRP_PARAMS		0x724
+#define  SWAP_UV (1 << 0)
+
+#define DC_WIN_WINDOW_SET_CONTROL		0x730
+#define  CONTROL_CSC_ENABLE (1 << 5)
+
+#define DC_WINBUF_CROPPED_POINT			0x806
+#define OFFSET_Y(x) (((x) & 0xffff) << 16)
+#define OFFSET_X(x) (((x) & 0xffff) << 0)
+
 #endif /* TEGRA_DC_H */
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index e4da041ba89b197fdcb5db2e2ced98ec189ec1a1..d84e81ff36ad0d906b1002650093748d728ec95b 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -15,6 +15,7 @@
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
+#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/regulator/consumer.h>
@@ -321,6 +322,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
 	case DPAUX_PADCTL_FUNC_I2C:
 		value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
 			DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
 			DPAUX_HYBRID_PADCTL_MODE_I2C;
 		break;
 
@@ -467,52 +471,37 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return PTR_ERR(dpaux->clk);
 	}
 
-	err = clk_prepare_enable(dpaux->clk);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to enable module clock: %d\n",
-			err);
-		return err;
-	}
-
-	if (dpaux->rst)
-		reset_control_deassert(dpaux->rst);
-
 	dpaux->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dpaux->clk_parent)) {
 		dev_err(&pdev->dev, "failed to get parent clock: %ld\n",
 			PTR_ERR(dpaux->clk_parent));
-		err = PTR_ERR(dpaux->clk_parent);
-		goto assert_reset;
-	}
-
-	err = clk_prepare_enable(dpaux->clk_parent);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to enable parent clock: %d\n",
-			err);
-		goto assert_reset;
+		return PTR_ERR(dpaux->clk_parent);
 	}
 
 	err = clk_set_rate(dpaux->clk_parent, 270000000);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to set clock to 270 MHz: %d\n",
 			err);
-		goto disable_parent_clk;
+		return err;
 	}
 
 	dpaux->vdd = devm_regulator_get(&pdev->dev, "vdd");
 	if (IS_ERR(dpaux->vdd)) {
 		dev_err(&pdev->dev, "failed to get VDD supply: %ld\n",
 			PTR_ERR(dpaux->vdd));
-		err = PTR_ERR(dpaux->vdd);
-		goto disable_parent_clk;
+		return PTR_ERR(dpaux->vdd);
 	}
 
+	platform_set_drvdata(pdev, dpaux);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
 	err = devm_request_irq(dpaux->dev, dpaux->irq, tegra_dpaux_irq, 0,
 			       dev_name(dpaux->dev), dpaux);
 	if (err < 0) {
 		dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n",
 			dpaux->irq, err);
-		goto disable_parent_clk;
+		return err;
 	}
 
 	disable_irq(dpaux->irq);
@@ -522,7 +511,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 
 	err = drm_dp_aux_register(&dpaux->aux);
 	if (err < 0)
-		goto disable_parent_clk;
+		return err;
 
 	/*
 	 * Assume that by default the DPAUX/I2C pads will be used for HDMI,
@@ -560,47 +549,97 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	list_add_tail(&dpaux->list, &dpaux_list);
 	mutex_unlock(&dpaux_lock);
 
-	platform_set_drvdata(pdev, dpaux);
-
 	return 0;
-
-disable_parent_clk:
-	clk_disable_unprepare(dpaux->clk_parent);
-assert_reset:
-	if (dpaux->rst)
-		reset_control_assert(dpaux->rst);
-
-	clk_disable_unprepare(dpaux->clk);
-
-	return err;
 }
 
 static int tegra_dpaux_remove(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux = platform_get_drvdata(pdev);
 
+	cancel_work_sync(&dpaux->work);
+
 	/* make sure pads are powered down when not in use */
 	tegra_dpaux_pad_power_down(dpaux);
 
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
 	drm_dp_aux_unregister(&dpaux->aux);
 
 	mutex_lock(&dpaux_lock);
 	list_del(&dpaux->list);
 	mutex_unlock(&dpaux_lock);
 
-	cancel_work_sync(&dpaux->work);
+	return 0;
+}
 
-	clk_disable_unprepare(dpaux->clk_parent);
+#ifdef CONFIG_PM
+static int tegra_dpaux_suspend(struct device *dev)
+{
+	struct tegra_dpaux *dpaux = dev_get_drvdata(dev);
+	int err = 0;
+
+	if (dpaux->rst) {
+		err = reset_control_assert(dpaux->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
 
-	if (dpaux->rst)
-		reset_control_assert(dpaux->rst);
+	usleep_range(1000, 2000);
 
+	clk_disable_unprepare(dpaux->clk_parent);
 	clk_disable_unprepare(dpaux->clk);
 
+	return err;
+}
+
+static int tegra_dpaux_resume(struct device *dev)
+{
+	struct tegra_dpaux *dpaux = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(dpaux->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(dpaux->clk_parent);
+	if (err < 0) {
+		dev_err(dev, "failed to enable parent clock: %d\n", err);
+		goto disable_clk;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (dpaux->rst) {
+		err = reset_control_deassert(dpaux->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			goto disable_parent;
+		}
+
+		usleep_range(1000, 2000);
+	}
+
 	return 0;
+
+disable_parent:
+	clk_disable_unprepare(dpaux->clk_parent);
+disable_clk:
+	clk_disable_unprepare(dpaux->clk);
+	return err;
 }
+#endif
+
+static const struct dev_pm_ops tegra_dpaux_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL)
+};
 
 static const struct of_device_id tegra_dpaux_of_match[] = {
+	{ .compatible = "nvidia,tegra186-dpaux", },
 	{ .compatible = "nvidia,tegra210-dpaux", },
 	{ .compatible = "nvidia,tegra124-dpaux", },
 	{ },
@@ -611,6 +650,7 @@ struct platform_driver tegra_dpaux_driver = {
 	.driver = {
 		.name = "tegra-dpaux",
 		.of_match_table = tegra_dpaux_of_match,
+		.pm = &tegra_dpaux_pm_ops,
 	},
 	.probe = tegra_dpaux_probe,
 	.remove = tegra_dpaux_remove,
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 52552b9b89ef713b345fa31abac4d735c26f1d06..d50bddb2e4474e456b8105d6b90b47488ff21263 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -33,97 +33,91 @@ struct tegra_drm_file {
 	struct mutex lock;
 };
 
-static void tegra_atomic_schedule(struct tegra_drm *tegra,
-				  struct drm_atomic_state *state)
+static int tegra_atomic_check(struct drm_device *drm,
+			      struct drm_atomic_state *state)
 {
-	tegra->commit.state = state;
-	schedule_work(&tegra->commit.work);
-}
+	int err;
 
-static void tegra_atomic_complete(struct tegra_drm *tegra,
-				  struct drm_atomic_state *state)
-{
-	struct drm_device *drm = tegra->drm;
+	err = drm_atomic_helper_check_modeset(drm, state);
+	if (err < 0)
+		return err;
 
-	/*
-	 * Everything below can be run asynchronously without the need to grab
-	 * any modeset locks at all under one condition: It must be guaranteed
-	 * that the asynchronous work has either been cancelled (if the driver
-	 * supports it, which at least requires that the framebuffers get
-	 * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
-	 * before the new state gets committed on the software side with
-	 * drm_atomic_helper_swap_state().
-	 *
-	 * This scheme allows new atomic state updates to be prepared and
-	 * checked in parallel to the asynchronous completion of the previous
-	 * update. Which is important since compositors need to figure out the
-	 * composition of the next frame right after having submitted the
-	 * current layout.
-	 */
+	err = drm_atomic_normalize_zpos(drm, state);
+	if (err < 0)
+		return err;
 
-	drm_atomic_helper_commit_modeset_disables(drm, state);
-	drm_atomic_helper_commit_modeset_enables(drm, state);
-	drm_atomic_helper_commit_planes(drm, state,
-					DRM_PLANE_COMMIT_ACTIVE_ONLY);
+	err = drm_atomic_helper_check_planes(drm, state);
+	if (err < 0)
+		return err;
 
-	drm_atomic_helper_wait_for_vblanks(drm, state);
+	if (state->legacy_cursor_update)
+		state->async_update = !drm_atomic_helper_async_check(drm, state);
 
-	drm_atomic_helper_cleanup_planes(drm, state);
-	drm_atomic_state_put(state);
+	return 0;
 }
 
-static void tegra_atomic_work(struct work_struct *work)
+static struct drm_atomic_state *
+tegra_atomic_state_alloc(struct drm_device *drm)
 {
-	struct tegra_drm *tegra = container_of(work, struct tegra_drm,
-					       commit.work);
+	struct tegra_atomic_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
+
+	if (!state || drm_atomic_state_init(drm, &state->base) < 0) {
+		kfree(state);
+		return NULL;
+	}
 
-	tegra_atomic_complete(tegra, tegra->commit.state);
+	return &state->base;
 }
 
-static int tegra_atomic_commit(struct drm_device *drm,
-			       struct drm_atomic_state *state, bool nonblock)
+static void tegra_atomic_state_clear(struct drm_atomic_state *state)
 {
-	struct tegra_drm *tegra = drm->dev_private;
-	int err;
-
-	err = drm_atomic_helper_prepare_planes(drm, state);
-	if (err)
-		return err;
+	struct tegra_atomic_state *tegra = to_tegra_atomic_state(state);
 
-	/* serialize outstanding nonblocking commits */
-	mutex_lock(&tegra->commit.lock);
-	flush_work(&tegra->commit.work);
-
-	/*
-	 * This is the point of no return - everything below never fails except
-	 * when the hw goes bonghits. Which means we can commit the new state on
-	 * the software side now.
-	 */
-
-	err = drm_atomic_helper_swap_state(state, true);
-	if (err) {
-		mutex_unlock(&tegra->commit.lock);
-		drm_atomic_helper_cleanup_planes(drm, state);
-		return err;
-	}
-
-	drm_atomic_state_get(state);
-	if (nonblock)
-		tegra_atomic_schedule(tegra, state);
-	else
-		tegra_atomic_complete(tegra, state);
+	drm_atomic_state_default_clear(state);
+	tegra->clk_disp = NULL;
+	tegra->dc = NULL;
+	tegra->rate = 0;
+}
 
-	mutex_unlock(&tegra->commit.lock);
-	return 0;
+static void tegra_atomic_state_free(struct drm_atomic_state *state)
+{
+	drm_atomic_state_default_release(state);
+	kfree(state);
 }
 
-static const struct drm_mode_config_funcs tegra_drm_mode_funcs = {
+static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = {
 	.fb_create = tegra_fb_create,
 #ifdef CONFIG_DRM_FBDEV_EMULATION
-	.output_poll_changed = tegra_fb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 #endif
-	.atomic_check = drm_atomic_helper_check,
-	.atomic_commit = tegra_atomic_commit,
+	.atomic_check = tegra_atomic_check,
+	.atomic_commit = drm_atomic_helper_commit,
+	.atomic_state_alloc = tegra_atomic_state_alloc,
+	.atomic_state_clear = tegra_atomic_state_clear,
+	.atomic_state_free = tegra_atomic_state_free,
+};
+
+static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
+{
+	struct drm_device *drm = old_state->dev;
+	struct tegra_drm *tegra = drm->dev_private;
+
+	if (tegra->hub) {
+		drm_atomic_helper_commit_modeset_disables(drm, old_state);
+		tegra_display_hub_atomic_commit(drm, old_state);
+		drm_atomic_helper_commit_planes(drm, old_state, 0);
+		drm_atomic_helper_commit_modeset_enables(drm, old_state);
+		drm_atomic_helper_commit_hw_done(old_state);
+		drm_atomic_helper_wait_for_vblanks(drm, old_state);
+		drm_atomic_helper_cleanup_planes(drm, old_state);
+	} else {
+		drm_atomic_helper_commit_tail_rpm(old_state);
+	}
+}
+
+static const struct drm_mode_config_helper_funcs
+tegra_drm_mode_config_helpers = {
+	.atomic_commit_tail = tegra_atomic_commit_tail,
 };
 
 static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
@@ -172,9 +166,6 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 	mutex_init(&tegra->clients_lock);
 	INIT_LIST_HEAD(&tegra->clients);
 
-	mutex_init(&tegra->commit.lock);
-	INIT_WORK(&tegra->commit.work, tegra_atomic_work);
-
 	drm->dev_private = tegra;
 	tegra->drm = drm;
 
@@ -188,7 +179,8 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 
 	drm->mode_config.allow_fb_modifiers = true;
 
-	drm->mode_config.funcs = &tegra_drm_mode_funcs;
+	drm->mode_config.funcs = &tegra_drm_mode_config_funcs;
+	drm->mode_config.helper_private = &tegra_drm_mode_config_helpers;
 
 	err = tegra_drm_fb_prepare(drm);
 	if (err < 0)
@@ -200,6 +192,12 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 	if (err < 0)
 		goto fbdev;
 
+	if (tegra->hub) {
+		err = tegra_display_hub_prepare(tegra->hub);
+		if (err < 0)
+			goto device;
+	}
+
 	/*
 	 * We don't use the drm_irq_install() helpers provided by the DRM
 	 * core, so we need to set this manually in order to allow the
@@ -212,16 +210,19 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 
 	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
 	if (err < 0)
-		goto device;
+		goto hub;
 
 	drm_mode_config_reset(drm);
 
 	err = tegra_drm_fb_init(drm);
 	if (err < 0)
-		goto device;
+		goto hub;
 
 	return 0;
 
+hub:
+	if (tegra->hub)
+		tegra_display_hub_cleanup(tegra->hub);
 device:
 	host1x_device_exit(device);
 fbdev:
@@ -286,15 +287,6 @@ static void tegra_drm_context_free(struct tegra_drm_context *context)
 	kfree(context);
 }
 
-static void tegra_drm_lastclose(struct drm_device *drm)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct tegra_drm *tegra = drm->dev_private;
-
-	tegra_fbdev_restore_mode(tegra->fbdev);
-#endif
-}
-
 static struct host1x_bo *
 host1x_bo_lookup(struct drm_file *file, u32 handle)
 {
@@ -660,7 +652,8 @@ static int tegra_syncpt_wait(struct drm_device *drm, void *data,
 	if (!sp)
 		return -EINVAL;
 
-	return host1x_syncpt_wait(sp, args->thresh, args->timeout,
+	return host1x_syncpt_wait(sp, args->thresh,
+				  msecs_to_jiffies(args->timeout),
 				  &args->value);
 }
 
@@ -1100,7 +1093,7 @@ static struct drm_driver tegra_drm_driver = {
 	.unload = tegra_drm_unload,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
-	.lastclose = tegra_drm_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = tegra_debugfs_init,
@@ -1148,8 +1141,7 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 	return 0;
 }
 
-void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size,
-			      dma_addr_t *dma)
+void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
 {
 	struct iova *alloc;
 	void *virt;
@@ -1317,6 +1309,10 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra210-sor", },
 	{ .compatible = "nvidia,tegra210-sor1", },
 	{ .compatible = "nvidia,tegra210-vic", },
+	{ .compatible = "nvidia,tegra186-display", },
+	{ .compatible = "nvidia,tegra186-dc", },
+	{ .compatible = "nvidia,tegra186-sor", },
+	{ .compatible = "nvidia,tegra186-sor1", },
 	{ .compatible = "nvidia,tegra186-vic", },
 	{ /* sentinel */ }
 };
@@ -1332,6 +1328,7 @@ static struct host1x_driver host1x_drm_driver = {
 };
 
 static struct platform_driver * const drivers[] = {
+	&tegra_display_hub_driver,
 	&tegra_dc_driver,
 	&tegra_hdmi_driver,
 	&tegra_dsi_driver,
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index ddae331ad8b6b9369c5f5f9ddfb30b6d295ec06a..73b661ce7086592af497b39bdaf94fe806b8860f 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -16,6 +16,7 @@
 #include <linux/of_gpio.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
@@ -23,6 +24,7 @@
 #include <drm/drm_fixed.h>
 
 #include "gem.h"
+#include "hub.h"
 #include "trace.h"
 
 struct reset_control;
@@ -40,10 +42,25 @@ struct tegra_fbdev {
 };
 #endif
 
+struct tegra_atomic_state {
+	struct drm_atomic_state base;
+
+	struct clk *clk_disp;
+	struct tegra_dc *dc;
+	unsigned long rate;
+};
+
+static inline struct tegra_atomic_state *
+to_tegra_atomic_state(struct drm_atomic_state *state)
+{
+	return container_of(state, struct tegra_atomic_state, base);
+}
+
 struct tegra_drm {
 	struct drm_device *drm;
 
 	struct iommu_domain *domain;
+	struct iommu_group *group;
 	struct mutex mm_lock;
 	struct drm_mm mm;
 
@@ -62,11 +79,7 @@ struct tegra_drm {
 
 	unsigned int pitch_align;
 
-	struct {
-		struct drm_atomic_state *state;
-		struct work_struct work;
-		struct mutex lock;
-	} commit;
+	struct tegra_display_hub *hub;
 
 	struct drm_atomic_state *state;
 };
@@ -152,6 +165,8 @@ int tegra_output_probe(struct tegra_output *output);
 void tegra_output_remove(struct tegra_output *output);
 int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
 void tegra_output_exit(struct tegra_output *output);
+void tegra_output_find_possible_crtcs(struct tegra_output *output,
+				      struct drm_device *drm);
 
 int tegra_output_connector_get_modes(struct drm_connector *connector);
 enum drm_connector_status
@@ -188,11 +203,8 @@ int tegra_drm_fb_init(struct drm_device *drm);
 void tegra_drm_fb_exit(struct drm_device *drm);
 void tegra_drm_fb_suspend(struct drm_device *drm);
 void tegra_drm_fb_resume(struct drm_device *drm);
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
-void tegra_fb_output_poll_changed(struct drm_device *drm);
-#endif
 
+extern struct platform_driver tegra_display_hub_driver;
 extern struct platform_driver tegra_dc_driver;
 extern struct platform_driver tegra_hdmi_driver;
 extern struct platform_driver tegra_dsi_driver;
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 046649ec9441990869f9cb77a7cd248a2c97fd88..4d2ed966f9e3248a074ab25dcdfb6156e9c93200 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -65,8 +65,6 @@ struct tegra_dsi {
 	struct clk *clk;
 
 	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
 
 	unsigned long flags;
 	enum mipi_dsi_pixel_format format;
@@ -122,12 +120,89 @@ static inline void tegra_dsi_writel(struct tegra_dsi *dsi, u32 value,
 	writel(value, dsi->regs + (offset << 2));
 }
 
+#define DEBUGFS_REG32(_name) { .name = #_name, .offset = _name }
+
+static const struct debugfs_reg32 tegra_dsi_regs[] = {
+	DEBUGFS_REG32(DSI_INCR_SYNCPT),
+	DEBUGFS_REG32(DSI_INCR_SYNCPT_CONTROL),
+	DEBUGFS_REG32(DSI_INCR_SYNCPT_ERROR),
+	DEBUGFS_REG32(DSI_CTXSW),
+	DEBUGFS_REG32(DSI_RD_DATA),
+	DEBUGFS_REG32(DSI_WR_DATA),
+	DEBUGFS_REG32(DSI_POWER_CONTROL),
+	DEBUGFS_REG32(DSI_INT_ENABLE),
+	DEBUGFS_REG32(DSI_INT_STATUS),
+	DEBUGFS_REG32(DSI_INT_MASK),
+	DEBUGFS_REG32(DSI_HOST_CONTROL),
+	DEBUGFS_REG32(DSI_CONTROL),
+	DEBUGFS_REG32(DSI_SOL_DELAY),
+	DEBUGFS_REG32(DSI_MAX_THRESHOLD),
+	DEBUGFS_REG32(DSI_TRIGGER),
+	DEBUGFS_REG32(DSI_TX_CRC),
+	DEBUGFS_REG32(DSI_STATUS),
+	DEBUGFS_REG32(DSI_INIT_SEQ_CONTROL),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_0),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_1),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_2),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_3),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_4),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_5),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_6),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_7),
+	DEBUGFS_REG32(DSI_PKT_SEQ_0_LO),
+	DEBUGFS_REG32(DSI_PKT_SEQ_0_HI),
+	DEBUGFS_REG32(DSI_PKT_SEQ_1_LO),
+	DEBUGFS_REG32(DSI_PKT_SEQ_1_HI),
+	DEBUGFS_REG32(DSI_PKT_SEQ_2_LO),
+	DEBUGFS_REG32(DSI_PKT_SEQ_2_HI),
+	DEBUGFS_REG32(DSI_PKT_SEQ_3_LO),
+	DEBUGFS_REG32(DSI_PKT_SEQ_3_HI),
+	DEBUGFS_REG32(DSI_PKT_SEQ_4_LO),
+	DEBUGFS_REG32(DSI_PKT_SEQ_4_HI),
+	DEBUGFS_REG32(DSI_PKT_SEQ_5_LO),
+	DEBUGFS_REG32(DSI_PKT_SEQ_5_HI),
+	DEBUGFS_REG32(DSI_DCS_CMDS),
+	DEBUGFS_REG32(DSI_PKT_LEN_0_1),
+	DEBUGFS_REG32(DSI_PKT_LEN_2_3),
+	DEBUGFS_REG32(DSI_PKT_LEN_4_5),
+	DEBUGFS_REG32(DSI_PKT_LEN_6_7),
+	DEBUGFS_REG32(DSI_PHY_TIMING_0),
+	DEBUGFS_REG32(DSI_PHY_TIMING_1),
+	DEBUGFS_REG32(DSI_PHY_TIMING_2),
+	DEBUGFS_REG32(DSI_BTA_TIMING),
+	DEBUGFS_REG32(DSI_TIMEOUT_0),
+	DEBUGFS_REG32(DSI_TIMEOUT_1),
+	DEBUGFS_REG32(DSI_TO_TALLY),
+	DEBUGFS_REG32(DSI_PAD_CONTROL_0),
+	DEBUGFS_REG32(DSI_PAD_CONTROL_CD),
+	DEBUGFS_REG32(DSI_PAD_CD_STATUS),
+	DEBUGFS_REG32(DSI_VIDEO_MODE_CONTROL),
+	DEBUGFS_REG32(DSI_PAD_CONTROL_1),
+	DEBUGFS_REG32(DSI_PAD_CONTROL_2),
+	DEBUGFS_REG32(DSI_PAD_CONTROL_3),
+	DEBUGFS_REG32(DSI_PAD_CONTROL_4),
+	DEBUGFS_REG32(DSI_GANGED_MODE_CONTROL),
+	DEBUGFS_REG32(DSI_GANGED_MODE_START),
+	DEBUGFS_REG32(DSI_GANGED_MODE_SIZE),
+	DEBUGFS_REG32(DSI_RAW_DATA_BYTE_COUNT),
+	DEBUGFS_REG32(DSI_ULTRA_LOW_POWER_CONTROL),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_8),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_9),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_10),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_11),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_12),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_13),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_14),
+	DEBUGFS_REG32(DSI_INIT_SEQ_DATA_15),
+};
+
 static int tegra_dsi_show_regs(struct seq_file *s, void *data)
 {
 	struct drm_info_node *node = s->private;
 	struct tegra_dsi *dsi = node->info_ent->data;
 	struct drm_crtc *crtc = dsi->output.encoder.crtc;
 	struct drm_device *drm = node->minor->dev;
+	unsigned int i;
 	int err = 0;
 
 	drm_modeset_lock_all(drm);
@@ -137,93 +212,12 @@ static int tegra_dsi_show_regs(struct seq_file *s, void *data)
 		goto unlock;
 	}
 
-#define DUMP_REG(name)						\
-	seq_printf(s, "%-32s %#05x %08x\n", #name, name,	\
-		   tegra_dsi_readl(dsi, name))
-
-	DUMP_REG(DSI_INCR_SYNCPT);
-	DUMP_REG(DSI_INCR_SYNCPT_CONTROL);
-	DUMP_REG(DSI_INCR_SYNCPT_ERROR);
-	DUMP_REG(DSI_CTXSW);
-	DUMP_REG(DSI_RD_DATA);
-	DUMP_REG(DSI_WR_DATA);
-	DUMP_REG(DSI_POWER_CONTROL);
-	DUMP_REG(DSI_INT_ENABLE);
-	DUMP_REG(DSI_INT_STATUS);
-	DUMP_REG(DSI_INT_MASK);
-	DUMP_REG(DSI_HOST_CONTROL);
-	DUMP_REG(DSI_CONTROL);
-	DUMP_REG(DSI_SOL_DELAY);
-	DUMP_REG(DSI_MAX_THRESHOLD);
-	DUMP_REG(DSI_TRIGGER);
-	DUMP_REG(DSI_TX_CRC);
-	DUMP_REG(DSI_STATUS);
-
-	DUMP_REG(DSI_INIT_SEQ_CONTROL);
-	DUMP_REG(DSI_INIT_SEQ_DATA_0);
-	DUMP_REG(DSI_INIT_SEQ_DATA_1);
-	DUMP_REG(DSI_INIT_SEQ_DATA_2);
-	DUMP_REG(DSI_INIT_SEQ_DATA_3);
-	DUMP_REG(DSI_INIT_SEQ_DATA_4);
-	DUMP_REG(DSI_INIT_SEQ_DATA_5);
-	DUMP_REG(DSI_INIT_SEQ_DATA_6);
-	DUMP_REG(DSI_INIT_SEQ_DATA_7);
-
-	DUMP_REG(DSI_PKT_SEQ_0_LO);
-	DUMP_REG(DSI_PKT_SEQ_0_HI);
-	DUMP_REG(DSI_PKT_SEQ_1_LO);
-	DUMP_REG(DSI_PKT_SEQ_1_HI);
-	DUMP_REG(DSI_PKT_SEQ_2_LO);
-	DUMP_REG(DSI_PKT_SEQ_2_HI);
-	DUMP_REG(DSI_PKT_SEQ_3_LO);
-	DUMP_REG(DSI_PKT_SEQ_3_HI);
-	DUMP_REG(DSI_PKT_SEQ_4_LO);
-	DUMP_REG(DSI_PKT_SEQ_4_HI);
-	DUMP_REG(DSI_PKT_SEQ_5_LO);
-	DUMP_REG(DSI_PKT_SEQ_5_HI);
-
-	DUMP_REG(DSI_DCS_CMDS);
-
-	DUMP_REG(DSI_PKT_LEN_0_1);
-	DUMP_REG(DSI_PKT_LEN_2_3);
-	DUMP_REG(DSI_PKT_LEN_4_5);
-	DUMP_REG(DSI_PKT_LEN_6_7);
-
-	DUMP_REG(DSI_PHY_TIMING_0);
-	DUMP_REG(DSI_PHY_TIMING_1);
-	DUMP_REG(DSI_PHY_TIMING_2);
-	DUMP_REG(DSI_BTA_TIMING);
-
-	DUMP_REG(DSI_TIMEOUT_0);
-	DUMP_REG(DSI_TIMEOUT_1);
-	DUMP_REG(DSI_TO_TALLY);
-
-	DUMP_REG(DSI_PAD_CONTROL_0);
-	DUMP_REG(DSI_PAD_CONTROL_CD);
-	DUMP_REG(DSI_PAD_CD_STATUS);
-	DUMP_REG(DSI_VIDEO_MODE_CONTROL);
-	DUMP_REG(DSI_PAD_CONTROL_1);
-	DUMP_REG(DSI_PAD_CONTROL_2);
-	DUMP_REG(DSI_PAD_CONTROL_3);
-	DUMP_REG(DSI_PAD_CONTROL_4);
-
-	DUMP_REG(DSI_GANGED_MODE_CONTROL);
-	DUMP_REG(DSI_GANGED_MODE_START);
-	DUMP_REG(DSI_GANGED_MODE_SIZE);
-
-	DUMP_REG(DSI_RAW_DATA_BYTE_COUNT);
-	DUMP_REG(DSI_ULTRA_LOW_POWER_CONTROL);
-
-	DUMP_REG(DSI_INIT_SEQ_DATA_8);
-	DUMP_REG(DSI_INIT_SEQ_DATA_9);
-	DUMP_REG(DSI_INIT_SEQ_DATA_10);
-	DUMP_REG(DSI_INIT_SEQ_DATA_11);
-	DUMP_REG(DSI_INIT_SEQ_DATA_12);
-	DUMP_REG(DSI_INIT_SEQ_DATA_13);
-	DUMP_REG(DSI_INIT_SEQ_DATA_14);
-	DUMP_REG(DSI_INIT_SEQ_DATA_15);
-
-#undef DUMP_REG
+	for (i = 0; i < ARRAY_SIZE(tegra_dsi_regs); i++) {
+		unsigned int offset = tegra_dsi_regs[i].offset;
+
+		seq_printf(s, "%-32s %#05x %08x\n", tegra_dsi_regs[i].name,
+			   offset, tegra_dsi_readl(dsi, offset));
+	}
 
 unlock:
 	drm_modeset_unlock_all(drm);
@@ -234,58 +228,46 @@ static struct drm_info_list debugfs_files[] = {
 	{ "regs", tegra_dsi_show_regs, 0, NULL },
 };
 
-static int tegra_dsi_debugfs_init(struct tegra_dsi *dsi,
-				  struct drm_minor *minor)
+static int tegra_dsi_late_register(struct drm_connector *connector)
 {
-	const char *name = dev_name(dsi->dev);
-	unsigned int i;
+	struct tegra_output *output = connector_to_output(connector);
+	unsigned int i, count = ARRAY_SIZE(debugfs_files);
+	struct drm_minor *minor = connector->dev->primary;
+	struct dentry *root = connector->debugfs_entry;
+	struct tegra_dsi *dsi = to_dsi(output);
 	int err;
 
-	dsi->debugfs = debugfs_create_dir(name, minor->debugfs_root);
-	if (!dsi->debugfs)
-		return -ENOMEM;
-
 	dsi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				     GFP_KERNEL);
-	if (!dsi->debugfs_files) {
-		err = -ENOMEM;
-		goto remove;
-	}
+	if (!dsi->debugfs_files)
+		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
+	for (i = 0; i < count; i++)
 		dsi->debugfs_files[i].data = dsi;
 
-	err = drm_debugfs_create_files(dsi->debugfs_files,
-				       ARRAY_SIZE(debugfs_files),
-				       dsi->debugfs, minor);
+	err = drm_debugfs_create_files(dsi->debugfs_files, count, root, minor);
 	if (err < 0)
 		goto free;
 
-	dsi->minor = minor;
-
 	return 0;
 
 free:
 	kfree(dsi->debugfs_files);
 	dsi->debugfs_files = NULL;
-remove:
-	debugfs_remove(dsi->debugfs);
-	dsi->debugfs = NULL;
 
 	return err;
 }
 
-static void tegra_dsi_debugfs_exit(struct tegra_dsi *dsi)
+static void tegra_dsi_early_unregister(struct drm_connector *connector)
 {
-	drm_debugfs_remove_files(dsi->debugfs_files, ARRAY_SIZE(debugfs_files),
-				 dsi->minor);
-	dsi->minor = NULL;
+	struct tegra_output *output = connector_to_output(connector);
+	unsigned int count = ARRAY_SIZE(debugfs_files);
+	struct tegra_dsi *dsi = to_dsi(output);
 
+	drm_debugfs_remove_files(dsi->debugfs_files, count,
+				 connector->dev->primary);
 	kfree(dsi->debugfs_files);
 	dsi->debugfs_files = NULL;
-
-	debugfs_remove(dsi->debugfs);
-	dsi->debugfs = NULL;
 }
 
 #define PKT_ID0(id)	((((id) & 0x3f) <<  3) | (1 <<  9))
@@ -827,6 +809,8 @@ static const struct drm_connector_funcs tegra_dsi_connector_funcs = {
 	.destroy = tegra_output_connector_destroy,
 	.atomic_duplicate_state = tegra_dsi_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.late_register = tegra_dsi_late_register,
+	.early_unregister = tegra_dsi_early_unregister,
 };
 
 static enum drm_mode_status
@@ -1080,12 +1064,6 @@ static int tegra_dsi_init(struct host1x_client *client)
 		dsi->output.encoder.possible_crtcs = 0x3;
 	}
 
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dsi_debugfs_init(dsi, drm->primary);
-		if (err < 0)
-			dev_err(dsi->dev, "debugfs setup failed: %d\n", err);
-	}
-
 	return 0;
 }
 
@@ -1094,10 +1072,6 @@ static int tegra_dsi_exit(struct host1x_client *client)
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 
 	tegra_output_exit(&dsi->output);
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS))
-		tegra_dsi_debugfs_exit(dsi);
-
 	regulator_disable(dsi->vdd);
 
 	return 0;
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 80540c1c66dc3de5385ac4e41dc7d1ca7fd5af85..001cb77e2f596dd93184d21aeea877ce824d1a7c 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -54,17 +54,40 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 	struct tegra_fb *fb = to_tegra_fb(framebuffer);
 	uint64_t modifier = fb->base.modifier;
 
-	switch (fourcc_mod_tegra_mod(modifier)) {
-	case NV_FORMAT_MOD_TEGRA_TILED:
+	switch (modifier) {
+	case DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED:
 		tiling->mode = TEGRA_BO_TILING_MODE_TILED;
 		tiling->value = 0;
 		break;
 
-	case NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(0):
+	case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0):
 		tiling->mode = TEGRA_BO_TILING_MODE_BLOCK;
-		tiling->value = fourcc_mod_tegra_param(modifier);
-		if (tiling->value > 5)
-			return -EINVAL;
+		tiling->value = 0;
+		break;
+
+	case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1):
+		tiling->mode = TEGRA_BO_TILING_MODE_BLOCK;
+		tiling->value = 1;
+		break;
+
+	case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2):
+		tiling->mode = TEGRA_BO_TILING_MODE_BLOCK;
+		tiling->value = 2;
+		break;
+
+	case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3):
+		tiling->mode = TEGRA_BO_TILING_MODE_BLOCK;
+		tiling->value = 3;
+		break;
+
+	case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4):
+		tiling->mode = TEGRA_BO_TILING_MODE_BLOCK;
+		tiling->value = 4;
+		break;
+
+	case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5):
+		tiling->mode = TEGRA_BO_TILING_MODE_BLOCK;
+		tiling->value = 5;
 		break;
 
 	default:
@@ -230,6 +253,7 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper,
 	cmd.height = sizes->surface_height;
 	cmd.pitches[0] = round_up(sizes->surface_width * bytes_per_pixel,
 				  tegra->pitch_align);
+
 	cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 						     sizes->surface_depth);
 
@@ -361,20 +385,6 @@ static void tegra_fbdev_exit(struct tegra_fbdev *fbdev)
 	drm_fb_helper_fini(&fbdev->base);
 	tegra_fbdev_free(fbdev);
 }
-
-void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev)
-{
-	if (fbdev)
-		drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev->base);
-}
-
-void tegra_fb_output_poll_changed(struct drm_device *drm)
-{
-	struct tegra_drm *tegra = drm->dev_private;
-
-	if (tegra->fbdev)
-		drm_fb_helper_hotplug_event(&tegra->fbdev->base);
-}
 #endif
 
 int tegra_drm_fb_prepare(struct drm_device *drm)
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index ab1e53d434e8cfbec5d2545f07d9540dbcdc5205..49b9bf28f87285dd4f2dee538baaa4ce6f18bb17 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -114,7 +114,7 @@ static const struct host1x_bo_ops tegra_bo_ops = {
 static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 {
 	int prot = IOMMU_READ | IOMMU_WRITE;
-	ssize_t err;
+	int err;
 
 	if (bo->mm)
 		return -EBUSY;
@@ -128,22 +128,21 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 	err = drm_mm_insert_node_generic(&tegra->mm,
 					 bo->mm, bo->gem.size, PAGE_SIZE, 0, 0);
 	if (err < 0) {
-		dev_err(tegra->drm->dev, "out of I/O virtual memory: %zd\n",
+		dev_err(tegra->drm->dev, "out of I/O virtual memory: %d\n",
 			err);
 		goto unlock;
 	}
 
 	bo->paddr = bo->mm->start;
 
-	err = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
-			   bo->sgt->nents, prot);
-	if (err < 0) {
-		dev_err(tegra->drm->dev, "failed to map buffer: %zd\n", err);
+	bo->size = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
+				bo->sgt->nents, prot);
+	if (!bo->size) {
+		dev_err(tegra->drm->dev, "failed to map buffer\n");
+		err = -ENOMEM;
 		goto remove;
 	}
 
-	bo->size = err;
-
 	mutex_unlock(&tegra->mm_lock);
 
 	return 0;
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 6434b3d3d1ba4da8bf2b1838df90a89267838098..784739a9f497d530e7425a34bd958956033302bb 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -79,8 +79,6 @@ struct tegra_hdmi {
 	bool dvi;
 
 	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
 };
 
 static inline struct tegra_hdmi *
@@ -910,6 +908,249 @@ tegra_hdmi_connector_detect(struct drm_connector *connector, bool force)
 	return status;
 }
 
+#define DEBUGFS_REG32(_name) { .name = #_name, .offset = _name }
+
+static const struct debugfs_reg32 tegra_hdmi_regs[] = {
+	DEBUGFS_REG32(HDMI_CTXSW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_STATE0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_STATE1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_STATE2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_AN_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_AN_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CN_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CN_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_AKSV_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_AKSV_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_BKSV_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_BKSV_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CKSV_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CKSV_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_DKSV_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_DKSV_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CMODE),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_MPRIME_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_MPRIME_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_SPRIME_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_RI),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CS_MSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_RG_HDCP_CS_LSB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_EMU0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_EMU_RDATA0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_EMU1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_EMU2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_STATUS),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_STATUS),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_STATUS),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_HEADER),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_LOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_HIGH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_VSYNC_KEEPOUT),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_VSYNC_WINDOW),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GCP_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GCP_STATUS),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_GCP_SUBPACK),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_EMU0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_EMU1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_EMU1_RDATA),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_SPARE),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDMI_HDCPRIF_ROM_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_CAP),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_PWR),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_TEST),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_PLL0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_PLL1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_PLL2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_CSTM),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_LVDS),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_CRCA),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_CRCB),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_BLANK),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_CTL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(0)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(1)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(2)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(3)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(4)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(5)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(6)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(7)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(8)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(9)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(10)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(11)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(12)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(13)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(14)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_SEQ_INST(15)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_VCRCA0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_VCRCA1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_CCRCA0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_CCRCA1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_EDATAA0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_EDATAA1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_COUNTA0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_COUNTA1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_DEBUGA0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_DEBUGA1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_TRIG),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_MSCHECK),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_DEBUG0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_DEBUG1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_DEBUG2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(0)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(1)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(2)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(3)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(4)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(5)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_FS(6)),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_PULSE_WIDTH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_THRESHOLD),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_CNTRL0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_AUDIO_N),
+	DEBUGFS_REG32(HDMI_NV_PDISP_HDCPRIF_ROM_TIMING),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_REFCLK),
+	DEBUGFS_REG32(HDMI_NV_PDISP_CRC_CONTROL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_INPUT_CONTROL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SCRATCH),
+	DEBUGFS_REG32(HDMI_NV_PDISP_PE_CURRENT),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_CTRL),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_DEBUG0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_DEBUG1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_DEBUG2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_HDCP_KEY_0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_HDCP_KEY_1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_HDCP_KEY_2),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_HDCP_KEY_3),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG),
+	DEBUGFS_REG32(HDMI_NV_PDISP_KEY_SKEY_INDEX),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_AUDIO_CNTRL0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_AUDIO_SPARE0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH1),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE),
+	DEBUGFS_REG32(HDMI_NV_PDISP_INT_STATUS),
+	DEBUGFS_REG32(HDMI_NV_PDISP_INT_MASK),
+	DEBUGFS_REG32(HDMI_NV_PDISP_INT_ENABLE),
+	DEBUGFS_REG32(HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT),
+};
+
+static int tegra_hdmi_show_regs(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct tegra_hdmi *hdmi = node->info_ent->data;
+	struct drm_crtc *crtc = hdmi->output.encoder.crtc;
+	struct drm_device *drm = node->minor->dev;
+	unsigned int i;
+	int err = 0;
+
+	drm_modeset_lock_all(drm);
+
+	if (!crtc || !crtc->state->active) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tegra_hdmi_regs); i++) {
+		unsigned int offset = tegra_hdmi_regs[i].offset;
+
+		seq_printf(s, "%-56s %#05x %08x\n", tegra_hdmi_regs[i].name,
+			   offset, tegra_hdmi_readl(hdmi, offset));
+	}
+
+unlock:
+	drm_modeset_unlock_all(drm);
+	return err;
+}
+
+static struct drm_info_list debugfs_files[] = {
+	{ "regs", tegra_hdmi_show_regs, 0, NULL },
+};
+
+static int tegra_hdmi_late_register(struct drm_connector *connector)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	unsigned int i, count = ARRAY_SIZE(debugfs_files);
+	struct drm_minor *minor = connector->dev->primary;
+	struct dentry *root = connector->debugfs_entry;
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+	int err;
+
+	hdmi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
+				      GFP_KERNEL);
+	if (!hdmi->debugfs_files)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++)
+		hdmi->debugfs_files[i].data = hdmi;
+
+	err = drm_debugfs_create_files(hdmi->debugfs_files, count, root, minor);
+	if (err < 0)
+		goto free;
+
+	return 0;
+
+free:
+	kfree(hdmi->debugfs_files);
+	hdmi->debugfs_files = NULL;
+
+	return err;
+}
+
+static void tegra_hdmi_early_unregister(struct drm_connector *connector)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	struct drm_minor *minor = connector->dev->primary;
+	unsigned int count = ARRAY_SIZE(debugfs_files);
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+
+	drm_debugfs_remove_files(hdmi->debugfs_files, count, minor);
+	kfree(hdmi->debugfs_files);
+	hdmi->debugfs_files = NULL;
+}
+
 static const struct drm_connector_funcs tegra_hdmi_connector_funcs = {
 	.reset = drm_atomic_helper_connector_reset,
 	.detect = tegra_hdmi_connector_detect,
@@ -917,6 +1158,8 @@ static const struct drm_connector_funcs tegra_hdmi_connector_funcs = {
 	.destroy = tegra_output_connector_destroy,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.late_register = tegra_hdmi_late_register,
+	.early_unregister = tegra_hdmi_early_unregister,
 };
 
 static enum drm_mode_status
@@ -1225,254 +1468,6 @@ static const struct drm_encoder_helper_funcs tegra_hdmi_encoder_helper_funcs = {
 	.atomic_check = tegra_hdmi_encoder_atomic_check,
 };
 
-static int tegra_hdmi_show_regs(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = s->private;
-	struct tegra_hdmi *hdmi = node->info_ent->data;
-	struct drm_crtc *crtc = hdmi->output.encoder.crtc;
-	struct drm_device *drm = node->minor->dev;
-	int err = 0;
-
-	drm_modeset_lock_all(drm);
-
-	if (!crtc || !crtc->state->active) {
-		err = -EBUSY;
-		goto unlock;
-	}
-
-#define DUMP_REG(name)						\
-	seq_printf(s, "%-56s %#05x %08x\n", #name, name,	\
-		   tegra_hdmi_readl(hdmi, name))
-
-	DUMP_REG(HDMI_CTXSW);
-	DUMP_REG(HDMI_NV_PDISP_SOR_STATE0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_STATE1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_STATE2);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AN_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AN_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CN_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CN_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_BKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_BKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_DKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_DKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CMODE);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_MPRIME_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_MPRIME_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB2);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB1);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_RI);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CS_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CS_LSB);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU0);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU_RDATA0);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU2);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_HEADER);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_VSYNC_KEEPOUT);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_VSYNC_WINDOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_SUBPACK);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS2);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU0);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU1_RDATA);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_SPARE);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS2);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_HDCPRIF_ROM_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CAP);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PWR);
-	DUMP_REG(HDMI_NV_PDISP_SOR_TEST);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PLL0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PLL1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PLL2);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CSTM);
-	DUMP_REG(HDMI_NV_PDISP_SOR_LVDS);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CRCA);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CRCB);
-	DUMP_REG(HDMI_NV_PDISP_SOR_BLANK);
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_CTL);
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(0));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(1));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(2));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(3));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(4));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(5));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(6));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(7));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(8));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(9));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(10));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(11));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(12));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(13));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(14));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(15));
-	DUMP_REG(HDMI_NV_PDISP_SOR_VCRCA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_VCRCA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CCRCA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CCRCA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_EDATAA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_EDATAA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_COUNTA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_COUNTA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_DEBUGA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_DEBUGA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_TRIG);
-	DUMP_REG(HDMI_NV_PDISP_SOR_MSCHECK);
-	DUMP_REG(HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG0);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG1);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG2);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(0));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(1));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(2));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(3));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(4));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(5));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(6));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_PULSE_WIDTH);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_THRESHOLD);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_CNTRL0);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_N);
-	DUMP_REG(HDMI_NV_PDISP_HDCPRIF_ROM_TIMING);
-	DUMP_REG(HDMI_NV_PDISP_SOR_REFCLK);
-	DUMP_REG(HDMI_NV_PDISP_CRC_CONTROL);
-	DUMP_REG(HDMI_NV_PDISP_INPUT_CONTROL);
-	DUMP_REG(HDMI_NV_PDISP_SCRATCH);
-	DUMP_REG(HDMI_NV_PDISP_PE_CURRENT);
-	DUMP_REG(HDMI_NV_PDISP_KEY_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG0);
-	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG1);
-	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG2);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_0);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_1);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_2);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_3);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG);
-	DUMP_REG(HDMI_NV_PDISP_KEY_SKEY_INDEX);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
-	DUMP_REG(HDMI_NV_PDISP_INT_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_INT_MASK);
-	DUMP_REG(HDMI_NV_PDISP_INT_ENABLE);
-	DUMP_REG(HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT);
-
-#undef DUMP_REG
-
-unlock:
-	drm_modeset_unlock_all(drm);
-	return err;
-}
-
-static struct drm_info_list debugfs_files[] = {
-	{ "regs", tegra_hdmi_show_regs, 0, NULL },
-};
-
-static int tegra_hdmi_debugfs_init(struct tegra_hdmi *hdmi,
-				   struct drm_minor *minor)
-{
-	unsigned int i;
-	int err;
-
-	hdmi->debugfs = debugfs_create_dir("hdmi", minor->debugfs_root);
-	if (!hdmi->debugfs)
-		return -ENOMEM;
-
-	hdmi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
-				      GFP_KERNEL);
-	if (!hdmi->debugfs_files) {
-		err = -ENOMEM;
-		goto remove;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
-		hdmi->debugfs_files[i].data = hdmi;
-
-	err = drm_debugfs_create_files(hdmi->debugfs_files,
-				       ARRAY_SIZE(debugfs_files),
-				       hdmi->debugfs, minor);
-	if (err < 0)
-		goto free;
-
-	hdmi->minor = minor;
-
-	return 0;
-
-free:
-	kfree(hdmi->debugfs_files);
-	hdmi->debugfs_files = NULL;
-remove:
-	debugfs_remove(hdmi->debugfs);
-	hdmi->debugfs = NULL;
-
-	return err;
-}
-
-static void tegra_hdmi_debugfs_exit(struct tegra_hdmi *hdmi)
-{
-	drm_debugfs_remove_files(hdmi->debugfs_files, ARRAY_SIZE(debugfs_files),
-				 hdmi->minor);
-	hdmi->minor = NULL;
-
-	kfree(hdmi->debugfs_files);
-	hdmi->debugfs_files = NULL;
-
-	debugfs_remove(hdmi->debugfs);
-	hdmi->debugfs = NULL;
-}
-
 static int tegra_hdmi_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
@@ -1505,12 +1500,6 @@ static int tegra_hdmi_init(struct host1x_client *client)
 
 	hdmi->output.encoder.possible_crtcs = 0x3;
 
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_hdmi_debugfs_init(hdmi, drm->primary);
-		if (err < 0)
-			dev_err(client->dev, "debugfs setup failed: %d\n", err);
-	}
-
 	err = regulator_enable(hdmi->hdmi);
 	if (err < 0) {
 		dev_err(client->dev, "failed to enable HDMI regulator: %d\n",
@@ -1543,9 +1532,6 @@ static int tegra_hdmi_exit(struct host1x_client *client)
 	regulator_disable(hdmi->pll);
 	regulator_disable(hdmi->hdmi);
 
-	if (IS_ENABLED(CONFIG_DEBUG_FS))
-		tegra_hdmi_debugfs_exit(hdmi);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
new file mode 100644
index 0000000000000000000000000000000000000000..e10a47d573138b164acc90c690c86ccddf9d0b08
--- /dev/null
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/host1x.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "drm.h"
+#include "dc.h"
+#include "plane.h"
+
+static const u32 tegra_shared_plane_formats[] = {
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	/* new on Tegra114 */
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	/* planar formats */
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YUV420,
+	DRM_FORMAT_YUV422,
+};
+
+static inline unsigned int tegra_plane_offset(struct tegra_plane *plane,
+					      unsigned int offset)
+{
+	if (offset >= 0x500 && offset <= 0x581) {
+		offset = 0x000 + (offset - 0x500);
+		return plane->offset + offset;
+	}
+
+	if (offset >= 0x700 && offset <= 0x73c) {
+		offset = 0x180 + (offset - 0x700);
+		return plane->offset + offset;
+	}
+
+	if (offset >= 0x800 && offset <= 0x83e) {
+		offset = 0x1c0 + (offset - 0x800);
+		return plane->offset + offset;
+	}
+
+	dev_WARN(plane->dc->dev, "invalid offset: %x\n", offset);
+
+	return plane->offset + offset;
+}
+
+static inline u32 tegra_plane_readl(struct tegra_plane *plane,
+				    unsigned int offset)
+{
+	return tegra_dc_readl(plane->dc, tegra_plane_offset(plane, offset));
+}
+
+static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value,
+				      unsigned int offset)
+{
+	tegra_dc_writel(plane->dc, value, tegra_plane_offset(plane, offset));
+}
+
+static int tegra_windowgroup_enable(struct tegra_windowgroup *wgrp)
+{
+	mutex_lock(&wgrp->lock);
+
+	if (wgrp->usecount == 0) {
+		pm_runtime_get_sync(wgrp->parent);
+		reset_control_deassert(wgrp->rst);
+	}
+
+	wgrp->usecount++;
+	mutex_unlock(&wgrp->lock);
+
+	return 0;
+}
+
+static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp)
+{
+	int err;
+
+	mutex_lock(&wgrp->lock);
+
+	if (wgrp->usecount == 1) {
+		err = reset_control_assert(wgrp->rst);
+		if (err < 0) {
+			pr_err("failed to assert reset for window group %u\n",
+			       wgrp->index);
+		}
+
+		pm_runtime_put(wgrp->parent);
+	}
+
+	wgrp->usecount--;
+	mutex_unlock(&wgrp->lock);
+}
+
+int tegra_display_hub_prepare(struct tegra_display_hub *hub)
+{
+	unsigned int i;
+
+	/*
+	 * XXX Enabling/disabling windowgroups needs to happen when the owner
+	 * display controller is disabled. There's currently no good point at
+	 * which this could be executed, so unconditionally enable all window
+	 * groups for now.
+	 */
+	for (i = 0; i < hub->soc->num_wgrps; i++) {
+		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
+
+		tegra_windowgroup_enable(wgrp);
+	}
+
+	return 0;
+}
+
+void tegra_display_hub_cleanup(struct tegra_display_hub *hub)
+{
+	unsigned int i;
+
+	/*
+	 * XXX Remove this once window groups can be more fine-grainedly
+	 * enabled and disabled.
+	 */
+	for (i = 0; i < hub->soc->num_wgrps; i++) {
+		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
+
+		tegra_windowgroup_disable(wgrp);
+	}
+}
+
+static void tegra_shared_plane_update(struct tegra_plane *plane)
+{
+	struct tegra_dc *dc = plane->dc;
+	unsigned long timeout;
+	u32 mask, value;
+
+	mask = COMMON_UPDATE | WIN_A_UPDATE << plane->base.index;
+	tegra_dc_writel(dc, mask, DC_CMD_STATE_CONTROL);
+
+	timeout = jiffies + msecs_to_jiffies(1000);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+		if ((value & mask) == 0)
+			break;
+
+		usleep_range(100, 400);
+	}
+}
+
+static void tegra_shared_plane_activate(struct tegra_plane *plane)
+{
+	struct tegra_dc *dc = plane->dc;
+	unsigned long timeout;
+	u32 mask, value;
+
+	mask = COMMON_ACTREQ | WIN_A_ACT_REQ << plane->base.index;
+	tegra_dc_writel(dc, mask, DC_CMD_STATE_CONTROL);
+
+	timeout = jiffies + msecs_to_jiffies(1000);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+		if ((value & mask) == 0)
+			break;
+
+		usleep_range(100, 400);
+	}
+}
+
+static unsigned int
+tegra_shared_plane_get_owner(struct tegra_plane *plane, struct tegra_dc *dc)
+{
+	unsigned int offset =
+		tegra_plane_offset(plane, DC_WIN_CORE_WINDOWGROUP_SET_CONTROL);
+
+	return tegra_dc_readl(dc, offset) & OWNER_MASK;
+}
+
+static bool tegra_dc_owns_shared_plane(struct tegra_dc *dc,
+				       struct tegra_plane *plane)
+{
+	struct device *dev = dc->dev;
+
+	if (tegra_shared_plane_get_owner(plane, dc) == dc->pipe) {
+		if (plane->dc == dc)
+			return true;
+
+		dev_WARN(dev, "head %u owns window %u but is not attached\n",
+			 dc->pipe, plane->index);
+	}
+
+	return false;
+}
+
+static int tegra_shared_plane_set_owner(struct tegra_plane *plane,
+					struct tegra_dc *new)
+{
+	unsigned int offset =
+		tegra_plane_offset(plane, DC_WIN_CORE_WINDOWGROUP_SET_CONTROL);
+	struct tegra_dc *old = plane->dc, *dc = new ? new : old;
+	struct device *dev = new ? new->dev : old->dev;
+	unsigned int owner, index = plane->index;
+	u32 value;
+
+	value = tegra_dc_readl(dc, offset);
+	owner = value & OWNER_MASK;
+
+	if (new && (owner != OWNER_MASK && owner != new->pipe)) {
+		dev_WARN(dev, "window %u owned by head %u\n", index, owner);
+		return -EBUSY;
+	}
+
+	/*
+	 * This seems to happen whenever the head has been disabled with one
+	 * or more windows being active. This is harmless because we'll just
+	 * reassign the window to the new head anyway.
+	 */
+	if (old && owner == OWNER_MASK)
+		dev_dbg(dev, "window %u not owned by head %u but %u\n", index,
+			old->pipe, owner);
+
+	value &= ~OWNER_MASK;
+
+	if (new)
+		value |= OWNER(new->pipe);
+	else
+		value |= OWNER_MASK;
+
+	tegra_dc_writel(dc, value, offset);
+
+	plane->dc = new;
+
+	return 0;
+}
+
+static void tegra_dc_assign_shared_plane(struct tegra_dc *dc,
+					 struct tegra_plane *plane)
+{
+	u32 value;
+	int err;
+
+	if (!tegra_dc_owns_shared_plane(dc, plane)) {
+		err = tegra_shared_plane_set_owner(plane, dc);
+		if (err < 0)
+			return;
+	}
+
+	value = tegra_plane_readl(plane, DC_WIN_CORE_IHUB_LINEBUF_CONFIG);
+	value |= MODE_FOUR_LINES;
+	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_LINEBUF_CONFIG);
+
+	value = tegra_plane_readl(plane, DC_WIN_CORE_IHUB_WGRP_FETCH_METER);
+	value = SLOTS(1);
+	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_WGRP_FETCH_METER);
+
+	/* disable watermark */
+	value = tegra_plane_readl(plane, DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLA);
+	value &= ~LATENCY_CTL_MODE_ENABLE;
+	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLA);
+
+	value = tegra_plane_readl(plane, DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLB);
+	value |= WATERMARK_MASK;
+	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLB);
+
+	/* pipe meter */
+	value = tegra_plane_readl(plane, DC_WIN_CORE_PRECOMP_WGRP_PIPE_METER);
+	value = PIPE_METER_INT(0) | PIPE_METER_FRAC(0);
+	tegra_plane_writel(plane, value, DC_WIN_CORE_PRECOMP_WGRP_PIPE_METER);
+
+	/* mempool entries */
+	value = tegra_plane_readl(plane, DC_WIN_CORE_IHUB_WGRP_POOL_CONFIG);
+	value = MEMPOOL_ENTRIES(0x331);
+	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_WGRP_POOL_CONFIG);
+
+	value = tegra_plane_readl(plane, DC_WIN_CORE_IHUB_THREAD_GROUP);
+	value &= ~THREAD_NUM_MASK;
+	value |= THREAD_NUM(plane->base.index);
+	value |= THREAD_GROUP_ENABLE;
+	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_THREAD_GROUP);
+
+	tegra_shared_plane_update(plane);
+	tegra_shared_plane_activate(plane);
+}
+
+static void tegra_dc_remove_shared_plane(struct tegra_dc *dc,
+					 struct tegra_plane *plane)
+{
+	tegra_shared_plane_set_owner(plane, NULL);
+}
+
+static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
+					   struct drm_plane_state *state)
+{
+	struct tegra_plane_state *plane_state = to_tegra_plane_state(state);
+	struct tegra_shared_plane *tegra = to_tegra_shared_plane(plane);
+	struct tegra_bo_tiling *tiling = &plane_state->tiling;
+	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+	int err;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!state->crtc || !state->fb)
+		return 0;
+
+	err = tegra_plane_format(state->fb->format->format,
+				 &plane_state->format,
+				 &plane_state->swap);
+	if (err < 0)
+		return err;
+
+	err = tegra_fb_get_tiling(state->fb, tiling);
+	if (err < 0)
+		return err;
+
+	if (tiling->mode == TEGRA_BO_TILING_MODE_BLOCK &&
+	    !dc->soc->supports_block_linear) {
+		DRM_ERROR("hardware doesn't support block linear mode\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Tegra doesn't support different strides for U and V planes so we
+	 * error out if the user tries to display a framebuffer with such a
+	 * configuration.
+	 */
+	if (state->fb->format->num_planes > 2) {
+		if (state->fb->pitches[2] != state->fb->pitches[1]) {
+			DRM_ERROR("unsupported UV-plane configuration\n");
+			return -EINVAL;
+		}
+	}
+
+	/* XXX scaling is not yet supported, add a check here */
+
+	err = tegra_plane_state_add(&tegra->base, state);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
+					      struct drm_plane_state *old_state)
+{
+	struct tegra_dc *dc = to_tegra_dc(old_state->crtc);
+	struct tegra_plane *p = to_tegra_plane(plane);
+	u32 value;
+
+	/* rien ne va plus */
+	if (!old_state || !old_state->crtc)
+		return;
+
+	/*
+	 * XXX Legacy helpers seem to sometimes call ->atomic_disable() even
+	 * on planes that are already disabled. Make sure we fallback to the
+	 * head for this particular state instead of crashing.
+	 */
+	if (WARN_ON(p->dc == NULL))
+		p->dc = dc;
+
+	pm_runtime_get_sync(dc->dev);
+
+	value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS);
+	value &= ~WIN_ENABLE;
+	tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS);
+
+	tegra_dc_remove_shared_plane(dc, p);
+
+	pm_runtime_put(dc->dev);
+}
+
+static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
+					     struct drm_plane_state *old_state)
+{
+	struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
+	struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
+	unsigned int zpos = plane->state->normalized_zpos;
+	struct drm_framebuffer *fb = plane->state->fb;
+	struct tegra_plane *p = to_tegra_plane(plane);
+	struct tegra_bo *bo;
+	dma_addr_t base;
+	u32 value;
+
+	/* rien ne va plus */
+	if (!plane->state->crtc || !plane->state->fb)
+		return;
+
+	if (!plane->state->visible) {
+		tegra_shared_plane_atomic_disable(plane, old_state);
+		return;
+	}
+
+	pm_runtime_get_sync(dc->dev);
+
+	tegra_dc_assign_shared_plane(dc, p);
+
+	tegra_plane_writel(p, VCOUNTER, DC_WIN_CORE_ACT_CONTROL);
+
+	/* blending */
+	value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 |
+		BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC |
+		BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC;
+	tegra_plane_writel(p, value, DC_WIN_BLEND_MATCH_SELECT);
+
+	value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 |
+		BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC |
+		BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC;
+	tegra_plane_writel(p, value, DC_WIN_BLEND_NOMATCH_SELECT);
+
+	value = K2(255) | K1(255) | WINDOW_LAYER_DEPTH(255 - zpos);
+	tegra_plane_writel(p, value, DC_WIN_BLEND_LAYER_CONTROL);
+
+	/* bypass scaling */
+	value = HORIZONTAL_TAPS_5 | VERTICAL_TAPS_5;
+	tegra_plane_writel(p, value, DC_WIN_WINDOWGROUP_SET_CONTROL_INPUT_SCALER);
+
+	value = INPUT_SCALER_VBYPASS | INPUT_SCALER_HBYPASS;
+	tegra_plane_writel(p, value, DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE);
+
+	/* disable compression */
+	tegra_plane_writel(p, 0, DC_WINBUF_CDE_CONTROL);
+
+	bo = tegra_fb_get_plane(fb, 0);
+	base = bo->paddr;
+
+	tegra_plane_writel(p, state->format, DC_WIN_COLOR_DEPTH);
+	tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS);
+
+	value = V_POSITION(plane->state->crtc_y) |
+		H_POSITION(plane->state->crtc_x);
+	tegra_plane_writel(p, value, DC_WIN_POSITION);
+
+	value = V_SIZE(plane->state->crtc_h) | H_SIZE(plane->state->crtc_w);
+	tegra_plane_writel(p, value, DC_WIN_SIZE);
+
+	value = WIN_ENABLE | COLOR_EXPAND;
+	tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS);
+
+	value = V_SIZE(plane->state->crtc_h) | H_SIZE(plane->state->crtc_w);
+	tegra_plane_writel(p, value, DC_WIN_CROPPED_SIZE);
+
+	tegra_plane_writel(p, upper_32_bits(base), DC_WINBUF_START_ADDR_HI);
+	tegra_plane_writel(p, lower_32_bits(base), DC_WINBUF_START_ADDR);
+
+	value = PITCH(fb->pitches[0]);
+	tegra_plane_writel(p, value, DC_WIN_PLANAR_STORAGE);
+
+	value = CLAMP_BEFORE_BLEND | DEGAMMA_SRGB | INPUT_RANGE_FULL;
+	tegra_plane_writel(p, value, DC_WIN_SET_PARAMS);
+
+	value = OFFSET_X(plane->state->src_y >> 16) |
+		OFFSET_Y(plane->state->src_x >> 16);
+	tegra_plane_writel(p, value, DC_WINBUF_CROPPED_POINT);
+
+	if (dc->soc->supports_block_linear) {
+		unsigned long height = state->tiling.value;
+
+		/* XXX */
+		switch (state->tiling.mode) {
+		case TEGRA_BO_TILING_MODE_PITCH:
+			value = DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(0) |
+				DC_WINBUF_SURFACE_KIND_PITCH;
+			break;
+
+		/* XXX not supported on Tegra186 and later */
+		case TEGRA_BO_TILING_MODE_TILED:
+			value = DC_WINBUF_SURFACE_KIND_TILED;
+			break;
+
+		case TEGRA_BO_TILING_MODE_BLOCK:
+			value = DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(height) |
+				DC_WINBUF_SURFACE_KIND_BLOCK;
+			break;
+		}
+
+		tegra_plane_writel(p, value, DC_WINBUF_SURFACE_KIND);
+	}
+
+	/* disable gamut CSC */
+	value = tegra_plane_readl(p, DC_WIN_WINDOW_SET_CONTROL);
+	value &= ~CONTROL_CSC_ENABLE;
+	tegra_plane_writel(p, value, DC_WIN_WINDOW_SET_CONTROL);
+
+	pm_runtime_put(dc->dev);
+}
+
+static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = {
+	.atomic_check = tegra_shared_plane_atomic_check,
+	.atomic_update = tegra_shared_plane_atomic_update,
+	.atomic_disable = tegra_shared_plane_atomic_disable,
+};
+
+struct drm_plane *tegra_shared_plane_create(struct drm_device *drm,
+					    struct tegra_dc *dc,
+					    unsigned int wgrp,
+					    unsigned int index)
+{
+	enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY;
+	struct tegra_drm *tegra = drm->dev_private;
+	struct tegra_display_hub *hub = tegra->hub;
+	/* planes can be assigned to arbitrary CRTCs */
+	unsigned int possible_crtcs = 0x7;
+	struct tegra_shared_plane *plane;
+	unsigned int num_formats;
+	struct drm_plane *p;
+	const u32 *formats;
+	int err;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	plane->base.offset = 0x0a00 + 0x0300 * index;
+	plane->base.index = index;
+
+	plane->wgrp = &hub->wgrps[wgrp];
+	plane->wgrp->parent = dc->dev;
+
+	p = &plane->base.base;
+
+	num_formats = ARRAY_SIZE(tegra_shared_plane_formats);
+	formats = tegra_shared_plane_formats;
+
+	err = drm_universal_plane_init(drm, p, possible_crtcs,
+				       &tegra_plane_funcs, formats,
+				       num_formats, NULL, type, NULL);
+	if (err < 0) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
+	drm_plane_helper_add(p, &tegra_shared_plane_helper_funcs);
+	drm_plane_create_zpos_property(p, 0, 0, 255);
+
+	return p;
+}
+
+static void tegra_display_hub_update(struct tegra_dc *dc)
+{
+	u32 value;
+
+	pm_runtime_get_sync(dc->dev);
+
+	value = tegra_dc_readl(dc, DC_CMD_IHUB_COMMON_MISC_CTL);
+	value &= ~LATENCY_EVENT;
+	tegra_dc_writel(dc, value, DC_CMD_IHUB_COMMON_MISC_CTL);
+
+	value = tegra_dc_readl(dc, DC_DISP_IHUB_COMMON_DISPLAY_FETCH_METER);
+	value = CURS_SLOTS(1) | WGRP_SLOTS(1);
+	tegra_dc_writel(dc, value, DC_DISP_IHUB_COMMON_DISPLAY_FETCH_METER);
+
+	tegra_dc_writel(dc, COMMON_UPDATE, DC_CMD_STATE_CONTROL);
+	tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, COMMON_ACTREQ, DC_CMD_STATE_CONTROL);
+	tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+
+	pm_runtime_put(dc->dev);
+}
+
+void tegra_display_hub_atomic_commit(struct drm_device *drm,
+				     struct drm_atomic_state *state)
+{
+	struct tegra_atomic_state *s = to_tegra_atomic_state(state);
+	struct tegra_drm *tegra = drm->dev_private;
+	struct tegra_display_hub *hub = tegra->hub;
+	struct device *dev = hub->client.dev;
+	int err;
+
+	if (s->clk_disp) {
+		err = clk_set_rate(s->clk_disp, s->rate);
+		if (err < 0)
+			dev_err(dev, "failed to set rate of %pC to %lu Hz\n",
+				s->clk_disp, s->rate);
+
+		err = clk_set_parent(hub->clk_disp, s->clk_disp);
+		if (err < 0)
+			dev_err(dev, "failed to set parent of %pC to %pC: %d\n",
+				hub->clk_disp, s->clk_disp, err);
+	}
+
+	if (s->dc)
+		tegra_display_hub_update(s->dc);
+}
+
+static int tegra_display_hub_init(struct host1x_client *client)
+{
+	struct tegra_display_hub *hub = to_tegra_display_hub(client);
+	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = drm->dev_private;
+
+	tegra->hub = hub;
+
+	return 0;
+}
+
+static int tegra_display_hub_exit(struct host1x_client *client)
+{
+	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = drm->dev_private;
+
+	tegra->hub = NULL;
+
+	return 0;
+}
+
+static const struct host1x_client_ops tegra_display_hub_ops = {
+	.init = tegra_display_hub_init,
+	.exit = tegra_display_hub_exit,
+};
+
+static int tegra_display_hub_probe(struct platform_device *pdev)
+{
+	struct tegra_display_hub *hub;
+	unsigned int i;
+	int err;
+
+	hub = devm_kzalloc(&pdev->dev, sizeof(*hub), GFP_KERNEL);
+	if (!hub)
+		return -ENOMEM;
+
+	hub->soc = of_device_get_match_data(&pdev->dev);
+
+	hub->clk_disp = devm_clk_get(&pdev->dev, "disp");
+	if (IS_ERR(hub->clk_disp)) {
+		err = PTR_ERR(hub->clk_disp);
+		return err;
+	}
+
+	hub->clk_dsc = devm_clk_get(&pdev->dev, "dsc");
+	if (IS_ERR(hub->clk_dsc)) {
+		err = PTR_ERR(hub->clk_dsc);
+		return err;
+	}
+
+	hub->clk_hub = devm_clk_get(&pdev->dev, "hub");
+	if (IS_ERR(hub->clk_hub)) {
+		err = PTR_ERR(hub->clk_hub);
+		return err;
+	}
+
+	hub->rst = devm_reset_control_get(&pdev->dev, "misc");
+	if (IS_ERR(hub->rst)) {
+		err = PTR_ERR(hub->rst);
+		return err;
+	}
+
+	hub->wgrps = devm_kcalloc(&pdev->dev, hub->soc->num_wgrps,
+				  sizeof(*hub->wgrps), GFP_KERNEL);
+	if (!hub->wgrps)
+		return -ENOMEM;
+
+	for (i = 0; i < hub->soc->num_wgrps; i++) {
+		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
+		char id[8];
+
+		snprintf(id, sizeof(id), "wgrp%u", i);
+		mutex_init(&wgrp->lock);
+		wgrp->usecount = 0;
+		wgrp->index = i;
+
+		wgrp->rst = devm_reset_control_get(&pdev->dev, id);
+		if (IS_ERR(wgrp->rst))
+			return PTR_ERR(wgrp->rst);
+
+		err = reset_control_assert(wgrp->rst);
+		if (err < 0)
+			return err;
+	}
+
+	/* XXX: enable clock across reset? */
+	err = reset_control_assert(hub->rst);
+	if (err < 0)
+		return err;
+
+	platform_set_drvdata(pdev, hub);
+	pm_runtime_enable(&pdev->dev);
+
+	INIT_LIST_HEAD(&hub->client.list);
+	hub->client.ops = &tegra_display_hub_ops;
+	hub->client.dev = &pdev->dev;
+
+	err = host1x_client_register(&hub->client);
+	if (err < 0)
+		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
+			err);
+
+	return err;
+}
+
+static int tegra_display_hub_remove(struct platform_device *pdev)
+{
+	struct tegra_display_hub *hub = platform_get_drvdata(pdev);
+	int err;
+
+	err = host1x_client_unregister(&hub->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
+			err);
+	}
+
+	pm_runtime_disable(&pdev->dev);
+
+	return err;
+}
+
+static int __maybe_unused tegra_display_hub_suspend(struct device *dev)
+{
+	struct tegra_display_hub *hub = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_assert(hub->rst);
+	if (err < 0)
+		return err;
+
+	clk_disable_unprepare(hub->clk_hub);
+	clk_disable_unprepare(hub->clk_dsc);
+	clk_disable_unprepare(hub->clk_disp);
+
+	return 0;
+}
+
+static int __maybe_unused tegra_display_hub_resume(struct device *dev)
+{
+	struct tegra_display_hub *hub = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(hub->clk_disp);
+	if (err < 0)
+		return err;
+
+	err = clk_prepare_enable(hub->clk_dsc);
+	if (err < 0)
+		goto disable_disp;
+
+	err = clk_prepare_enable(hub->clk_hub);
+	if (err < 0)
+		goto disable_dsc;
+
+	err = reset_control_deassert(hub->rst);
+	if (err < 0)
+		goto disable_hub;
+
+	return 0;
+
+disable_hub:
+	clk_disable_unprepare(hub->clk_hub);
+disable_dsc:
+	clk_disable_unprepare(hub->clk_dsc);
+disable_disp:
+	clk_disable_unprepare(hub->clk_disp);
+	return err;
+}
+
+static const struct dev_pm_ops tegra_display_hub_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_display_hub_suspend,
+			   tegra_display_hub_resume, NULL)
+};
+
+static const struct tegra_display_hub_soc tegra186_display_hub = {
+	.num_wgrps = 6,
+};
+
+static const struct of_device_id tegra_display_hub_of_match[] = {
+	{
+		.compatible = "nvidia,tegra186-display",
+		.data = &tegra186_display_hub
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, tegra_display_hub_of_match);
+
+struct platform_driver tegra_display_hub_driver = {
+	.driver = {
+		.name = "tegra-display-hub",
+		.of_match_table = tegra_display_hub_of_match,
+		.pm = &tegra_display_hub_pm_ops,
+	},
+	.probe = tegra_display_hub_probe,
+	.remove = tegra_display_hub_remove,
+};
diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h
new file mode 100644
index 0000000000000000000000000000000000000000..890a47cd05c310aa507d2a95a363dfaec73471da
--- /dev/null
+++ b/drivers/gpu/drm/tegra/hub.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_HUB_H
+#define TEGRA_HUB_H 1
+
+#include <drm/drmP.h>
+#include <drm/drm_plane.h>
+
+#include "plane.h"
+
+struct tegra_dc;
+
+struct tegra_windowgroup {
+	unsigned int usecount;
+	struct mutex lock;
+
+	unsigned int index;
+	struct device *parent;
+	struct reset_control *rst;
+};
+
+struct tegra_shared_plane {
+	struct tegra_plane base;
+	struct tegra_windowgroup *wgrp;
+};
+
+static inline struct tegra_shared_plane *
+to_tegra_shared_plane(struct drm_plane *plane)
+{
+	return container_of(plane, struct tegra_shared_plane, base.base);
+}
+
+struct tegra_display_hub_soc {
+	unsigned int num_wgrps;
+};
+
+struct tegra_display_hub {
+	struct host1x_client client;
+	struct clk *clk_disp;
+	struct clk *clk_dsc;
+	struct clk *clk_hub;
+	struct reset_control *rst;
+
+	const struct tegra_display_hub_soc *soc;
+	struct tegra_windowgroup *wgrps;
+};
+
+static inline struct tegra_display_hub *
+to_tegra_display_hub(struct host1x_client *client)
+{
+	return container_of(client, struct tegra_display_hub, client);
+}
+
+struct tegra_dc;
+struct tegra_plane;
+
+int tegra_display_hub_prepare(struct tegra_display_hub *hub);
+void tegra_display_hub_cleanup(struct tegra_display_hub *hub);
+
+struct drm_plane *tegra_shared_plane_create(struct drm_device *drm,
+					    struct tegra_dc *dc,
+					    unsigned int wgrp,
+					    unsigned int index);
+
+void tegra_display_hub_atomic_commit(struct drm_device *drm,
+				     struct drm_atomic_state *state);
+
+#define DC_CMD_IHUB_COMMON_MISC_CTL 0x068
+#define  LATENCY_EVENT (1 << 3)
+
+#define DC_DISP_IHUB_COMMON_DISPLAY_FETCH_METER 0x451
+#define  CURS_SLOTS(x) (((x) & 0xff) << 8)
+#define  WGRP_SLOTS(x) (((x) & 0xff) << 0)
+
+#endif /* TEGRA_HUB_H */
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 24f8a3b712b4afb0fd6ac98afdf285b17c176e40..ffe34bd0bb9d1f1a694299c1162767604a02c8bd 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -9,7 +9,9 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_panel.h>
+
 #include "drm.h"
+#include "dc.h"
 
 #include <media/cec-notifier.h>
 
@@ -218,3 +220,25 @@ void tegra_output_exit(struct tegra_output *output)
 	if (output->panel)
 		drm_panel_detach(output->panel);
 }
+
+void tegra_output_find_possible_crtcs(struct tegra_output *output,
+				      struct drm_device *drm)
+{
+	struct device *dev = output->dev;
+	struct drm_crtc *crtc;
+	unsigned int mask = 0;
+
+	drm_for_each_crtc(crtc, drm) {
+		struct tegra_dc *dc = to_tegra_dc(crtc);
+
+		if (tegra_dc_has_output(dc, dev))
+			mask |= drm_crtc_mask(crtc);
+	}
+
+	if (mask == 0) {
+		dev_warn(dev, "missing output definition for heads in DT\n");
+		mask = 0x3;
+	}
+
+	output->encoder.possible_crtcs = mask;
+}
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
new file mode 100644
index 0000000000000000000000000000000000000000..36a06a99369821aed88bbf9701550848b420493e
--- /dev/null
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+
+#include "dc.h"
+#include "plane.h"
+
+static void tegra_plane_destroy(struct drm_plane *plane)
+{
+	struct tegra_plane *p = to_tegra_plane(plane);
+
+	drm_plane_cleanup(plane);
+	kfree(p);
+}
+
+static void tegra_plane_reset(struct drm_plane *plane)
+{
+	struct tegra_plane_state *state;
+
+	if (plane->state)
+		__drm_atomic_helper_plane_destroy_state(plane->state);
+
+	kfree(plane->state);
+	plane->state = NULL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state) {
+		plane->state = &state->base;
+		plane->state->plane = plane;
+	}
+}
+
+static struct drm_plane_state *
+tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
+{
+	struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
+	struct tegra_plane_state *copy;
+	unsigned int i;
+
+	copy = kmalloc(sizeof(*copy), GFP_KERNEL);
+	if (!copy)
+		return NULL;
+
+	__drm_atomic_helper_plane_duplicate_state(plane, &copy->base);
+	copy->tiling = state->tiling;
+	copy->format = state->format;
+	copy->swap = state->swap;
+	copy->opaque = state->opaque;
+
+	for (i = 0; i < 3; i++)
+		copy->dependent[i] = state->dependent[i];
+
+	return &copy->base;
+}
+
+static void tegra_plane_atomic_destroy_state(struct drm_plane *plane,
+					     struct drm_plane_state *state)
+{
+	__drm_atomic_helper_plane_destroy_state(state);
+	kfree(state);
+}
+
+const struct drm_plane_funcs tegra_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = tegra_plane_destroy,
+	.reset = tegra_plane_reset,
+	.atomic_duplicate_state = tegra_plane_atomic_duplicate_state,
+	.atomic_destroy_state = tegra_plane_atomic_destroy_state,
+};
+
+int tegra_plane_state_add(struct tegra_plane *plane,
+			  struct drm_plane_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+	struct tegra_dc_state *tegra;
+	struct drm_rect clip;
+	int err;
+
+	/* Propagate errors from allocation or locking failures. */
+	crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	clip.x1 = 0;
+	clip.y1 = 0;
+	clip.x2 = crtc_state->mode.hdisplay;
+	clip.y2 = crtc_state->mode.vdisplay;
+
+	/* Check plane state for visibility and calculate clipping bounds */
+	err = drm_atomic_helper_check_plane_state(state, crtc_state, &clip,
+						  0, INT_MAX, true, true);
+	if (err < 0)
+		return err;
+
+	tegra = to_dc_state(crtc_state);
+
+	tegra->planes |= WIN_A_ACT_REQ << plane->index;
+
+	return 0;
+}
+
+int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap)
+{
+	/* assume no swapping of fetched data */
+	if (swap)
+		*swap = BYTE_SWAP_NOSWAP;
+
+	switch (fourcc) {
+	case DRM_FORMAT_ARGB4444:
+		*format = WIN_COLOR_DEPTH_B4G4R4A4;
+		break;
+
+	case DRM_FORMAT_ARGB1555:
+		*format = WIN_COLOR_DEPTH_B5G5R5A1;
+		break;
+
+	case DRM_FORMAT_RGB565:
+		*format = WIN_COLOR_DEPTH_B5G6R5;
+		break;
+
+	case DRM_FORMAT_RGBA5551:
+		*format = WIN_COLOR_DEPTH_A1B5G5R5;
+		break;
+
+	case DRM_FORMAT_ARGB8888:
+		*format = WIN_COLOR_DEPTH_B8G8R8A8;
+		break;
+
+	case DRM_FORMAT_ABGR8888:
+		*format = WIN_COLOR_DEPTH_R8G8B8A8;
+		break;
+
+	case DRM_FORMAT_ABGR4444:
+		*format = WIN_COLOR_DEPTH_R4G4B4A4;
+		break;
+
+	case DRM_FORMAT_ABGR1555:
+		*format = WIN_COLOR_DEPTH_R5G5B5A;
+		break;
+
+	case DRM_FORMAT_BGRA5551:
+		*format = WIN_COLOR_DEPTH_AR5G5B5;
+		break;
+
+	case DRM_FORMAT_XRGB1555:
+		*format = WIN_COLOR_DEPTH_B5G5R5X1;
+		break;
+
+	case DRM_FORMAT_RGBX5551:
+		*format = WIN_COLOR_DEPTH_X1B5G5R5;
+		break;
+
+	case DRM_FORMAT_XBGR1555:
+		*format = WIN_COLOR_DEPTH_R5G5B5X1;
+		break;
+
+	case DRM_FORMAT_BGRX5551:
+		*format = WIN_COLOR_DEPTH_X1R5G5B5;
+		break;
+
+	case DRM_FORMAT_BGR565:
+		*format = WIN_COLOR_DEPTH_R5G6B5;
+		break;
+
+	case DRM_FORMAT_BGRA8888:
+		*format = WIN_COLOR_DEPTH_A8R8G8B8;
+		break;
+
+	case DRM_FORMAT_RGBA8888:
+		*format = WIN_COLOR_DEPTH_A8B8G8R8;
+		break;
+
+	case DRM_FORMAT_XRGB8888:
+		*format = WIN_COLOR_DEPTH_B8G8R8X8;
+		break;
+
+	case DRM_FORMAT_XBGR8888:
+		*format = WIN_COLOR_DEPTH_R8G8B8X8;
+		break;
+
+	case DRM_FORMAT_UYVY:
+		*format = WIN_COLOR_DEPTH_YCbCr422;
+		break;
+
+	case DRM_FORMAT_YUYV:
+		if (!swap)
+			return -EINVAL;
+
+		*format = WIN_COLOR_DEPTH_YCbCr422;
+		*swap = BYTE_SWAP_SWAP2;
+		break;
+
+	case DRM_FORMAT_YUV420:
+		*format = WIN_COLOR_DEPTH_YCbCr420P;
+		break;
+
+	case DRM_FORMAT_YUV422:
+		*format = WIN_COLOR_DEPTH_YCbCr422P;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+bool tegra_plane_format_is_yuv(unsigned int format, bool *planar)
+{
+	switch (format) {
+	case WIN_COLOR_DEPTH_YCbCr422:
+	case WIN_COLOR_DEPTH_YUV422:
+		if (planar)
+			*planar = false;
+
+		return true;
+
+	case WIN_COLOR_DEPTH_YCbCr420P:
+	case WIN_COLOR_DEPTH_YUV420P:
+	case WIN_COLOR_DEPTH_YCbCr422P:
+	case WIN_COLOR_DEPTH_YUV422P:
+	case WIN_COLOR_DEPTH_YCbCr422R:
+	case WIN_COLOR_DEPTH_YUV422R:
+	case WIN_COLOR_DEPTH_YCbCr422RA:
+	case WIN_COLOR_DEPTH_YUV422RA:
+		if (planar)
+			*planar = true;
+
+		return true;
+	}
+
+	if (planar)
+		*planar = false;
+
+	return false;
+}
+
+static bool __drm_format_has_alpha(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_RGBA5551:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_ARGB8888:
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * This is applicable to Tegra20 and Tegra30 only where the opaque formats can
+ * be emulated using the alpha formats and alpha blending disabled.
+ */
+bool tegra_plane_format_has_alpha(unsigned int format)
+{
+	switch (format) {
+	case WIN_COLOR_DEPTH_B5G5R5A1:
+	case WIN_COLOR_DEPTH_A1B5G5R5:
+	case WIN_COLOR_DEPTH_R8G8B8A8:
+	case WIN_COLOR_DEPTH_B8G8R8A8:
+		return true;
+	}
+
+	return false;
+}
+
+int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha)
+{
+	if (tegra_plane_format_is_yuv(opaque, NULL)) {
+		*alpha = opaque;
+		return 0;
+	}
+
+	switch (opaque) {
+	case WIN_COLOR_DEPTH_B5G5R5X1:
+		*alpha = WIN_COLOR_DEPTH_B5G5R5A1;
+		return 0;
+
+	case WIN_COLOR_DEPTH_X1B5G5R5:
+		*alpha = WIN_COLOR_DEPTH_A1B5G5R5;
+		return 0;
+
+	case WIN_COLOR_DEPTH_R8G8B8X8:
+		*alpha = WIN_COLOR_DEPTH_R8G8B8A8;
+		return 0;
+
+	case WIN_COLOR_DEPTH_B8G8R8X8:
+		*alpha = WIN_COLOR_DEPTH_B8G8R8A8;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane,
+					   struct tegra_plane *other)
+{
+	unsigned int index = 0, i;
+
+	WARN_ON(plane == other);
+
+	for (i = 0; i < 3; i++) {
+		if (i == plane->index)
+			continue;
+
+		if (i == other->index)
+			break;
+
+		index++;
+	}
+
+	return index;
+}
+
+void tegra_plane_check_dependent(struct tegra_plane *tegra,
+				 struct tegra_plane_state *state)
+{
+	struct drm_plane_state *old, *new;
+	struct drm_plane *plane;
+	unsigned int zpos[2];
+	unsigned int i;
+
+	for (i = 0; i < 3; i++)
+		state->dependent[i] = false;
+
+	for (i = 0; i < 2; i++)
+		zpos[i] = 0;
+
+	for_each_oldnew_plane_in_state(state->base.state, plane, old, new, i) {
+		struct tegra_plane *p = to_tegra_plane(plane);
+		unsigned index;
+
+		/* skip this plane and planes on different CRTCs */
+		if (p == tegra || new->crtc != state->base.crtc)
+			continue;
+
+		index = tegra_plane_get_overlap_index(tegra, p);
+
+		/*
+		 * If any of the other planes is on top of this plane and uses
+		 * a format with an alpha component, mark this plane as being
+		 * dependent, meaning it's alpha value will be 1 minus the sum
+		 * of alpha components of the overlapping planes.
+		 */
+		if (p->index > tegra->index) {
+			if (__drm_format_has_alpha(new->fb->format->format))
+				state->dependent[index] = true;
+
+			/* keep track of the Z position */
+			zpos[index] = p->index;
+		}
+	}
+
+	/*
+	 * The region where three windows overlap is the intersection of the
+	 * two regions where two windows overlap. It contributes to the area
+	 * if any of the windows on top of it have an alpha component.
+	 */
+	for (i = 0; i < 2; i++)
+		state->dependent[2] = state->dependent[2] ||
+				      state->dependent[i];
+
+	/*
+	 * However, if any of the windows on top of this window is opaque, it
+	 * will completely conceal this window within that area, so avoid the
+	 * window from contributing to the area.
+	 */
+	for (i = 0; i < 2; i++) {
+		if (zpos[i] > tegra->index)
+			state->dependent[2] = state->dependent[2] &&
+					      state->dependent[i];
+	}
+}
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
new file mode 100644
index 0000000000000000000000000000000000000000..6938719e7e5dd876d0b2e92df47f77dc14d5ec0e
--- /dev/null
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_PLANE_H
+#define TEGRA_PLANE_H 1
+
+#include <drm/drm_plane.h>
+
+struct tegra_bo;
+struct tegra_dc;
+
+struct tegra_plane {
+	struct drm_plane base;
+	struct tegra_dc *dc;
+	unsigned int offset;
+	unsigned int index;
+};
+
+struct tegra_cursor {
+	struct tegra_plane base;
+
+	struct tegra_bo *bo;
+	unsigned int width;
+	unsigned int height;
+};
+
+static inline struct tegra_plane *to_tegra_plane(struct drm_plane *plane)
+{
+	return container_of(plane, struct tegra_plane, base);
+}
+
+struct tegra_plane_state {
+	struct drm_plane_state base;
+
+	struct tegra_bo_tiling tiling;
+	u32 format;
+	u32 swap;
+
+	/* used for legacy blending support only */
+	bool opaque;
+	bool dependent[3];
+};
+
+static inline struct tegra_plane_state *
+to_tegra_plane_state(struct drm_plane_state *state)
+{
+	if (state)
+		return container_of(state, struct tegra_plane_state, base);
+
+	return NULL;
+}
+
+extern const struct drm_plane_funcs tegra_plane_funcs;
+
+int tegra_plane_state_add(struct tegra_plane *plane,
+			  struct drm_plane_state *state);
+
+int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap);
+bool tegra_plane_format_is_yuv(unsigned int format, bool *planar);
+bool tegra_plane_format_has_alpha(unsigned int format);
+int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha);
+void tegra_plane_check_dependent(struct tegra_plane *tegra,
+				 struct tegra_plane_state *state);
+
+#endif /* TEGRA_PLANE_H */
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index b0a1dedac8026e0ad89ee1227cb7b7881d3fee7d..7d2a955fc5152e3cb9dcace93fc877dc15b43e87 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -22,23 +22,37 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_scdc_helper.h>
 
 #include "dc.h"
 #include "drm.h"
 #include "sor.h"
 #include "trace.h"
 
+/*
+ * XXX Remove this after the commit adding it to soc/tegra/pmc.h has been
+ * merged. Having this around after the commit is merged should be safe since
+ * the preprocessor will effectively replace all occurrences and therefore no
+ * duplicate will be defined.
+ */
+#define TEGRA_IO_PAD_HDMI_DP0 26
+
 #define SOR_REKEY 0x38
 
 struct tegra_sor_hdmi_settings {
 	unsigned long frequency;
 
 	u8 vcocap;
+	u8 filter;
 	u8 ichpmp;
 	u8 loadadj;
-	u8 termadj;
-	u8 tx_pu;
-	u8 bg_vref;
+	u8 tmds_termadj;
+	u8 tx_pu_value;
+	u8 bg_temp_coef;
+	u8 bg_vref_level;
+	u8 avdd10_level;
+	u8 avdd14_level;
+	u8 sparepll;
 
 	u8 drive_current[4];
 	u8 preemphasis[4];
@@ -49,51 +63,76 @@ static const struct tegra_sor_hdmi_settings tegra210_sor_hdmi_defaults[] = {
 	{
 		.frequency = 54000000,
 		.vcocap = 0x0,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x10,
-		.bg_vref = 0x8,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x10,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0x8,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x33, 0x3a, 0x3a, 0x3a },
 		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
 	}, {
 		.frequency = 75000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x40,
-		.bg_vref = 0x8,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x40,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0x8,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x33, 0x3a, 0x3a, 0x3a },
 		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
 	}, {
 		.frequency = 150000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x66,
-		.bg_vref = 0x8,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x66,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0x8,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x33, 0x3a, 0x3a, 0x3a },
 		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
 	}, {
 		.frequency = 300000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x66,
-		.bg_vref = 0xa,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x66,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0xa,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x33, 0x3f, 0x3f, 0x3f },
 		.preemphasis = { 0x00, 0x17, 0x17, 0x17 },
 	}, {
 		.frequency = 600000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x66,
-		.bg_vref = 0x8,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x66,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0x8,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x33, 0x3f, 0x3f, 0x3f },
 		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
 	},
@@ -103,53 +142,170 @@ static const struct tegra_sor_hdmi_settings tegra210_sor_hdmi_defaults[] = {
 	{
 		.frequency = 75000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x40,
-		.bg_vref = 0x8,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x40,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0x8,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x29, 0x29, 0x29, 0x29 },
 		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
 	}, {
 		.frequency = 150000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x1,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x66,
-		.bg_vref = 0x8,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x66,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0x8,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x30, 0x37, 0x37, 0x37 },
 		.preemphasis = { 0x01, 0x02, 0x02, 0x02 },
 	}, {
 		.frequency = 300000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0x6,
 		.loadadj = 0x3,
-		.termadj = 0x9,
-		.tx_pu = 0x66,
-		.bg_vref = 0xf,
+		.tmds_termadj = 0x9,
+		.tx_pu_value = 0x66,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0xf,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x30, 0x37, 0x37, 0x37 },
 		.preemphasis = { 0x10, 0x3e, 0x3e, 0x3e },
 	}, {
 		.frequency = 600000000,
 		.vcocap = 0x3,
+		.filter = 0x0,
 		.ichpmp = 0xa,
 		.loadadj = 0x3,
-		.termadj = 0xb,
-		.tx_pu = 0x66,
-		.bg_vref = 0xe,
+		.tmds_termadj = 0xb,
+		.tx_pu_value = 0x66,
+		.bg_temp_coef = 0x3,
+		.bg_vref_level = 0xe,
+		.avdd10_level = 0x4,
+		.avdd14_level = 0x4,
+		.sparepll = 0x0,
 		.drive_current = { 0x35, 0x3e, 0x3e, 0x3e },
 		.preemphasis = { 0x02, 0x3f, 0x3f, 0x3f },
 	},
 };
 #endif
 
+static const struct tegra_sor_hdmi_settings tegra186_sor_hdmi_defaults[] = {
+	{
+		.frequency = 54000000,
+		.vcocap = 0,
+		.filter = 5,
+		.ichpmp = 5,
+		.loadadj = 3,
+		.tmds_termadj = 0xf,
+		.tx_pu_value = 0,
+		.bg_temp_coef = 3,
+		.bg_vref_level = 8,
+		.avdd10_level = 4,
+		.avdd14_level = 4,
+		.sparepll = 0x54,
+		.drive_current = { 0x3a, 0x3a, 0x3a, 0x33 },
+		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
+	}, {
+		.frequency = 75000000,
+		.vcocap = 1,
+		.filter = 5,
+		.ichpmp = 5,
+		.loadadj = 3,
+		.tmds_termadj = 0xf,
+		.tx_pu_value = 0,
+		.bg_temp_coef = 3,
+		.bg_vref_level = 8,
+		.avdd10_level = 4,
+		.avdd14_level = 4,
+		.sparepll = 0x44,
+		.drive_current = { 0x3a, 0x3a, 0x3a, 0x33 },
+		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
+	}, {
+		.frequency = 150000000,
+		.vcocap = 3,
+		.filter = 5,
+		.ichpmp = 5,
+		.loadadj = 3,
+		.tmds_termadj = 15,
+		.tx_pu_value = 0x66 /* 0 */,
+		.bg_temp_coef = 3,
+		.bg_vref_level = 8,
+		.avdd10_level = 4,
+		.avdd14_level = 4,
+		.sparepll = 0x00, /* 0x34 */
+		.drive_current = { 0x3a, 0x3a, 0x3a, 0x37 },
+		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
+	}, {
+		.frequency = 300000000,
+		.vcocap = 3,
+		.filter = 5,
+		.ichpmp = 5,
+		.loadadj = 3,
+		.tmds_termadj = 15,
+		.tx_pu_value = 64,
+		.bg_temp_coef = 3,
+		.bg_vref_level = 8,
+		.avdd10_level = 4,
+		.avdd14_level = 4,
+		.sparepll = 0x34,
+		.drive_current = { 0x3d, 0x3d, 0x3d, 0x33 },
+		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
+	}, {
+		.frequency = 600000000,
+		.vcocap = 3,
+		.filter = 5,
+		.ichpmp = 5,
+		.loadadj = 3,
+		.tmds_termadj = 12,
+		.tx_pu_value = 96,
+		.bg_temp_coef = 3,
+		.bg_vref_level = 8,
+		.avdd10_level = 4,
+		.avdd14_level = 4,
+		.sparepll = 0x34,
+		.drive_current = { 0x3d, 0x3d, 0x3d, 0x33 },
+		.preemphasis = { 0x00, 0x00, 0x00, 0x00 },
+	}
+};
+
+struct tegra_sor_regs {
+	unsigned int head_state0;
+	unsigned int head_state1;
+	unsigned int head_state2;
+	unsigned int head_state3;
+	unsigned int head_state4;
+	unsigned int head_state5;
+	unsigned int pll0;
+	unsigned int pll1;
+	unsigned int pll2;
+	unsigned int pll3;
+	unsigned int dp_padctl0;
+	unsigned int dp_padctl2;
+};
+
 struct tegra_sor_soc {
 	bool supports_edp;
 	bool supports_lvds;
 	bool supports_hdmi;
 	bool supports_dp;
 
+	const struct tegra_sor_regs *regs;
+	bool has_nvdisplay;
+
 	const struct tegra_sor_hdmi_settings *settings;
 	unsigned int num_settings;
 
@@ -171,6 +327,7 @@ struct tegra_sor {
 
 	const struct tegra_sor_soc *soc;
 	void __iomem *regs;
+	unsigned int index;
 
 	struct reset_control *rst;
 	struct clk *clk_parent;
@@ -183,10 +340,9 @@ struct tegra_sor {
 	struct drm_dp_aux *aux;
 
 	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
 
 	const struct tegra_sor_ops *ops;
+	enum tegra_io_pad pad;
 
 	/* for HDMI 2.0 */
 	struct tegra_sor_hdmi_settings *settings;
@@ -195,11 +351,16 @@ struct tegra_sor {
 	struct regulator *avdd_io_supply;
 	struct regulator *vdd_pll_supply;
 	struct regulator *hdmi_supply;
+
+	struct delayed_work scdc;
+	bool scdc_enabled;
 };
 
 struct tegra_sor_state {
 	struct drm_connector_state base;
 
+	unsigned int link_speed;
+	unsigned long pclk;
 	unsigned int bpc;
 };
 
@@ -389,23 +550,23 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 	/* disable LVDS mode */
 	tegra_sor_writel(sor, 0, SOR_LVDS);
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value |= SOR_DP_PADCTL_TX_PU_ENABLE;
 	value &= ~SOR_DP_PADCTL_TX_PU_MASK;
 	value |= SOR_DP_PADCTL_TX_PU(2); /* XXX: don't hardcode? */
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value |= SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 |
 		 SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	usleep_range(10, 100);
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~(SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 |
 		   SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0);
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	err = drm_dp_aux_prepare(sor->aux, DP_SET_ANSI_8B10B);
 	if (err < 0)
@@ -465,47 +626,6 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 	return 0;
 }
 
-static void tegra_sor_dp_term_calibrate(struct tegra_sor *sor)
-{
-	u32 mask = 0x08, adj = 0, value;
-
-	/* enable pad calibration logic */
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
-	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
-
-	value = tegra_sor_readl(sor, SOR_PLL1);
-	value |= SOR_PLL1_TMDS_TERM;
-	tegra_sor_writel(sor, value, SOR_PLL1);
-
-	while (mask) {
-		adj |= mask;
-
-		value = tegra_sor_readl(sor, SOR_PLL1);
-		value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
-		value |= SOR_PLL1_TMDS_TERMADJ(adj);
-		tegra_sor_writel(sor, value, SOR_PLL1);
-
-		usleep_range(100, 200);
-
-		value = tegra_sor_readl(sor, SOR_PLL1);
-		if (value & SOR_PLL1_TERM_COMPOUT)
-			adj &= ~mask;
-
-		mask >>= 1;
-	}
-
-	value = tegra_sor_readl(sor, SOR_PLL1);
-	value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
-	value |= SOR_PLL1_TMDS_TERMADJ(adj);
-	tegra_sor_writel(sor, value, SOR_PLL1);
-
-	/* disable pad calibration logic */
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
-	value |= SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
-}
-
 static void tegra_sor_super_update(struct tegra_sor *sor)
 {
 	tegra_sor_writel(sor, 0, SOR_SUPER_STATE0);
@@ -897,31 +1017,31 @@ static void tegra_sor_mode_set(struct tegra_sor *sor,
 	 */
 
 	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
+	tegra_sor_writel(sor, value, sor->soc->regs->head_state1 + dc->pipe);
 
 	/* sync end = sync width - 1 */
 	vse = mode->vsync_end - mode->vsync_start - 1;
 	hse = mode->hsync_end - mode->hsync_start - 1;
 
 	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
+	tegra_sor_writel(sor, value, sor->soc->regs->head_state2 + dc->pipe);
 
 	/* blank end = sync end + back porch */
 	vbe = vse + (mode->vtotal - mode->vsync_end);
 	hbe = hse + (mode->htotal - mode->hsync_end);
 
 	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
+	tegra_sor_writel(sor, value, sor->soc->regs->head_state3 + dc->pipe);
 
 	/* blank start = blank end + active */
 	vbs = vbe + mode->vdisplay;
 	hbs = hbe + mode->hdisplay;
 
 	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
+	tegra_sor_writel(sor, value, sor->soc->regs->head_state4 + dc->pipe);
 
 	/* XXX interlacing support */
-	tegra_sor_writel(sor, 0x001, SOR_HEAD_STATE5(dc->pipe));
+	tegra_sor_writel(sor, 0x001, sor->soc->regs->head_state5 + dc->pipe);
 }
 
 static int tegra_sor_detach(struct tegra_sor *sor)
@@ -1003,10 +1123,10 @@ static int tegra_sor_power_down(struct tegra_sor *sor)
 		return err;
 	}
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
 		   SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2);
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	/* stop lane sequencer */
 	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP |
@@ -1026,20 +1146,20 @@ static int tegra_sor_power_down(struct tegra_sor *sor)
 	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
 		return -ETIMEDOUT;
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value |= SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(20, 100);
 
-	value = tegra_sor_readl(sor, SOR_PLL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
 	value |= SOR_PLL0_VCOPD | SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, SOR_PLL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value |= SOR_PLL2_SEQ_PLLCAPPD;
 	value |= SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(20, 100);
 
@@ -1105,12 +1225,133 @@ static int tegra_sor_show_crc(struct seq_file *s, void *data)
 	return err;
 }
 
+#define DEBUGFS_REG32(_name) { .name = #_name, .offset = _name }
+
+static const struct debugfs_reg32 tegra_sor_regs[] = {
+	DEBUGFS_REG32(SOR_CTXSW),
+	DEBUGFS_REG32(SOR_SUPER_STATE0),
+	DEBUGFS_REG32(SOR_SUPER_STATE1),
+	DEBUGFS_REG32(SOR_STATE0),
+	DEBUGFS_REG32(SOR_STATE1),
+	DEBUGFS_REG32(SOR_HEAD_STATE0(0)),
+	DEBUGFS_REG32(SOR_HEAD_STATE0(1)),
+	DEBUGFS_REG32(SOR_HEAD_STATE1(0)),
+	DEBUGFS_REG32(SOR_HEAD_STATE1(1)),
+	DEBUGFS_REG32(SOR_HEAD_STATE2(0)),
+	DEBUGFS_REG32(SOR_HEAD_STATE2(1)),
+	DEBUGFS_REG32(SOR_HEAD_STATE3(0)),
+	DEBUGFS_REG32(SOR_HEAD_STATE3(1)),
+	DEBUGFS_REG32(SOR_HEAD_STATE4(0)),
+	DEBUGFS_REG32(SOR_HEAD_STATE4(1)),
+	DEBUGFS_REG32(SOR_HEAD_STATE5(0)),
+	DEBUGFS_REG32(SOR_HEAD_STATE5(1)),
+	DEBUGFS_REG32(SOR_CRC_CNTRL),
+	DEBUGFS_REG32(SOR_DP_DEBUG_MVID),
+	DEBUGFS_REG32(SOR_CLK_CNTRL),
+	DEBUGFS_REG32(SOR_CAP),
+	DEBUGFS_REG32(SOR_PWR),
+	DEBUGFS_REG32(SOR_TEST),
+	DEBUGFS_REG32(SOR_PLL0),
+	DEBUGFS_REG32(SOR_PLL1),
+	DEBUGFS_REG32(SOR_PLL2),
+	DEBUGFS_REG32(SOR_PLL3),
+	DEBUGFS_REG32(SOR_CSTM),
+	DEBUGFS_REG32(SOR_LVDS),
+	DEBUGFS_REG32(SOR_CRCA),
+	DEBUGFS_REG32(SOR_CRCB),
+	DEBUGFS_REG32(SOR_BLANK),
+	DEBUGFS_REG32(SOR_SEQ_CTL),
+	DEBUGFS_REG32(SOR_LANE_SEQ_CTL),
+	DEBUGFS_REG32(SOR_SEQ_INST(0)),
+	DEBUGFS_REG32(SOR_SEQ_INST(1)),
+	DEBUGFS_REG32(SOR_SEQ_INST(2)),
+	DEBUGFS_REG32(SOR_SEQ_INST(3)),
+	DEBUGFS_REG32(SOR_SEQ_INST(4)),
+	DEBUGFS_REG32(SOR_SEQ_INST(5)),
+	DEBUGFS_REG32(SOR_SEQ_INST(6)),
+	DEBUGFS_REG32(SOR_SEQ_INST(7)),
+	DEBUGFS_REG32(SOR_SEQ_INST(8)),
+	DEBUGFS_REG32(SOR_SEQ_INST(9)),
+	DEBUGFS_REG32(SOR_SEQ_INST(10)),
+	DEBUGFS_REG32(SOR_SEQ_INST(11)),
+	DEBUGFS_REG32(SOR_SEQ_INST(12)),
+	DEBUGFS_REG32(SOR_SEQ_INST(13)),
+	DEBUGFS_REG32(SOR_SEQ_INST(14)),
+	DEBUGFS_REG32(SOR_SEQ_INST(15)),
+	DEBUGFS_REG32(SOR_PWM_DIV),
+	DEBUGFS_REG32(SOR_PWM_CTL),
+	DEBUGFS_REG32(SOR_VCRC_A0),
+	DEBUGFS_REG32(SOR_VCRC_A1),
+	DEBUGFS_REG32(SOR_VCRC_B0),
+	DEBUGFS_REG32(SOR_VCRC_B1),
+	DEBUGFS_REG32(SOR_CCRC_A0),
+	DEBUGFS_REG32(SOR_CCRC_A1),
+	DEBUGFS_REG32(SOR_CCRC_B0),
+	DEBUGFS_REG32(SOR_CCRC_B1),
+	DEBUGFS_REG32(SOR_EDATA_A0),
+	DEBUGFS_REG32(SOR_EDATA_A1),
+	DEBUGFS_REG32(SOR_EDATA_B0),
+	DEBUGFS_REG32(SOR_EDATA_B1),
+	DEBUGFS_REG32(SOR_COUNT_A0),
+	DEBUGFS_REG32(SOR_COUNT_A1),
+	DEBUGFS_REG32(SOR_COUNT_B0),
+	DEBUGFS_REG32(SOR_COUNT_B1),
+	DEBUGFS_REG32(SOR_DEBUG_A0),
+	DEBUGFS_REG32(SOR_DEBUG_A1),
+	DEBUGFS_REG32(SOR_DEBUG_B0),
+	DEBUGFS_REG32(SOR_DEBUG_B1),
+	DEBUGFS_REG32(SOR_TRIG),
+	DEBUGFS_REG32(SOR_MSCHECK),
+	DEBUGFS_REG32(SOR_XBAR_CTRL),
+	DEBUGFS_REG32(SOR_XBAR_POL),
+	DEBUGFS_REG32(SOR_DP_LINKCTL0),
+	DEBUGFS_REG32(SOR_DP_LINKCTL1),
+	DEBUGFS_REG32(SOR_LANE_DRIVE_CURRENT0),
+	DEBUGFS_REG32(SOR_LANE_DRIVE_CURRENT1),
+	DEBUGFS_REG32(SOR_LANE4_DRIVE_CURRENT0),
+	DEBUGFS_REG32(SOR_LANE4_DRIVE_CURRENT1),
+	DEBUGFS_REG32(SOR_LANE_PREEMPHASIS0),
+	DEBUGFS_REG32(SOR_LANE_PREEMPHASIS1),
+	DEBUGFS_REG32(SOR_LANE4_PREEMPHASIS0),
+	DEBUGFS_REG32(SOR_LANE4_PREEMPHASIS1),
+	DEBUGFS_REG32(SOR_LANE_POSTCURSOR0),
+	DEBUGFS_REG32(SOR_LANE_POSTCURSOR1),
+	DEBUGFS_REG32(SOR_DP_CONFIG0),
+	DEBUGFS_REG32(SOR_DP_CONFIG1),
+	DEBUGFS_REG32(SOR_DP_MN0),
+	DEBUGFS_REG32(SOR_DP_MN1),
+	DEBUGFS_REG32(SOR_DP_PADCTL0),
+	DEBUGFS_REG32(SOR_DP_PADCTL1),
+	DEBUGFS_REG32(SOR_DP_PADCTL2),
+	DEBUGFS_REG32(SOR_DP_DEBUG0),
+	DEBUGFS_REG32(SOR_DP_DEBUG1),
+	DEBUGFS_REG32(SOR_DP_SPARE0),
+	DEBUGFS_REG32(SOR_DP_SPARE1),
+	DEBUGFS_REG32(SOR_DP_AUDIO_CTRL),
+	DEBUGFS_REG32(SOR_DP_AUDIO_HBLANK_SYMBOLS),
+	DEBUGFS_REG32(SOR_DP_AUDIO_VBLANK_SYMBOLS),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_HEADER),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK0),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK1),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK2),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK3),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK4),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK5),
+	DEBUGFS_REG32(SOR_DP_GENERIC_INFOFRAME_SUBPACK6),
+	DEBUGFS_REG32(SOR_DP_TPG),
+	DEBUGFS_REG32(SOR_DP_TPG_CONFIG),
+	DEBUGFS_REG32(SOR_DP_LQ_CSTM0),
+	DEBUGFS_REG32(SOR_DP_LQ_CSTM1),
+	DEBUGFS_REG32(SOR_DP_LQ_CSTM2),
+};
+
 static int tegra_sor_show_regs(struct seq_file *s, void *data)
 {
 	struct drm_info_node *node = s->private;
 	struct tegra_sor *sor = node->info_ent->data;
 	struct drm_crtc *crtc = sor->output.encoder.crtc;
 	struct drm_device *drm = node->minor->dev;
+	unsigned int i;
 	int err = 0;
 
 	drm_modeset_lock_all(drm);
@@ -1120,126 +1361,12 @@ static int tegra_sor_show_regs(struct seq_file *s, void *data)
 		goto unlock;
 	}
 
-#define DUMP_REG(name)						\
-	seq_printf(s, "%-38s %#05x %08x\n", #name, name,	\
-		   tegra_sor_readl(sor, name))
-
-	DUMP_REG(SOR_CTXSW);
-	DUMP_REG(SOR_SUPER_STATE0);
-	DUMP_REG(SOR_SUPER_STATE1);
-	DUMP_REG(SOR_STATE0);
-	DUMP_REG(SOR_STATE1);
-	DUMP_REG(SOR_HEAD_STATE0(0));
-	DUMP_REG(SOR_HEAD_STATE0(1));
-	DUMP_REG(SOR_HEAD_STATE1(0));
-	DUMP_REG(SOR_HEAD_STATE1(1));
-	DUMP_REG(SOR_HEAD_STATE2(0));
-	DUMP_REG(SOR_HEAD_STATE2(1));
-	DUMP_REG(SOR_HEAD_STATE3(0));
-	DUMP_REG(SOR_HEAD_STATE3(1));
-	DUMP_REG(SOR_HEAD_STATE4(0));
-	DUMP_REG(SOR_HEAD_STATE4(1));
-	DUMP_REG(SOR_HEAD_STATE5(0));
-	DUMP_REG(SOR_HEAD_STATE5(1));
-	DUMP_REG(SOR_CRC_CNTRL);
-	DUMP_REG(SOR_DP_DEBUG_MVID);
-	DUMP_REG(SOR_CLK_CNTRL);
-	DUMP_REG(SOR_CAP);
-	DUMP_REG(SOR_PWR);
-	DUMP_REG(SOR_TEST);
-	DUMP_REG(SOR_PLL0);
-	DUMP_REG(SOR_PLL1);
-	DUMP_REG(SOR_PLL2);
-	DUMP_REG(SOR_PLL3);
-	DUMP_REG(SOR_CSTM);
-	DUMP_REG(SOR_LVDS);
-	DUMP_REG(SOR_CRCA);
-	DUMP_REG(SOR_CRCB);
-	DUMP_REG(SOR_BLANK);
-	DUMP_REG(SOR_SEQ_CTL);
-	DUMP_REG(SOR_LANE_SEQ_CTL);
-	DUMP_REG(SOR_SEQ_INST(0));
-	DUMP_REG(SOR_SEQ_INST(1));
-	DUMP_REG(SOR_SEQ_INST(2));
-	DUMP_REG(SOR_SEQ_INST(3));
-	DUMP_REG(SOR_SEQ_INST(4));
-	DUMP_REG(SOR_SEQ_INST(5));
-	DUMP_REG(SOR_SEQ_INST(6));
-	DUMP_REG(SOR_SEQ_INST(7));
-	DUMP_REG(SOR_SEQ_INST(8));
-	DUMP_REG(SOR_SEQ_INST(9));
-	DUMP_REG(SOR_SEQ_INST(10));
-	DUMP_REG(SOR_SEQ_INST(11));
-	DUMP_REG(SOR_SEQ_INST(12));
-	DUMP_REG(SOR_SEQ_INST(13));
-	DUMP_REG(SOR_SEQ_INST(14));
-	DUMP_REG(SOR_SEQ_INST(15));
-	DUMP_REG(SOR_PWM_DIV);
-	DUMP_REG(SOR_PWM_CTL);
-	DUMP_REG(SOR_VCRC_A0);
-	DUMP_REG(SOR_VCRC_A1);
-	DUMP_REG(SOR_VCRC_B0);
-	DUMP_REG(SOR_VCRC_B1);
-	DUMP_REG(SOR_CCRC_A0);
-	DUMP_REG(SOR_CCRC_A1);
-	DUMP_REG(SOR_CCRC_B0);
-	DUMP_REG(SOR_CCRC_B1);
-	DUMP_REG(SOR_EDATA_A0);
-	DUMP_REG(SOR_EDATA_A1);
-	DUMP_REG(SOR_EDATA_B0);
-	DUMP_REG(SOR_EDATA_B1);
-	DUMP_REG(SOR_COUNT_A0);
-	DUMP_REG(SOR_COUNT_A1);
-	DUMP_REG(SOR_COUNT_B0);
-	DUMP_REG(SOR_COUNT_B1);
-	DUMP_REG(SOR_DEBUG_A0);
-	DUMP_REG(SOR_DEBUG_A1);
-	DUMP_REG(SOR_DEBUG_B0);
-	DUMP_REG(SOR_DEBUG_B1);
-	DUMP_REG(SOR_TRIG);
-	DUMP_REG(SOR_MSCHECK);
-	DUMP_REG(SOR_XBAR_CTRL);
-	DUMP_REG(SOR_XBAR_POL);
-	DUMP_REG(SOR_DP_LINKCTL0);
-	DUMP_REG(SOR_DP_LINKCTL1);
-	DUMP_REG(SOR_LANE_DRIVE_CURRENT0);
-	DUMP_REG(SOR_LANE_DRIVE_CURRENT1);
-	DUMP_REG(SOR_LANE4_DRIVE_CURRENT0);
-	DUMP_REG(SOR_LANE4_DRIVE_CURRENT1);
-	DUMP_REG(SOR_LANE_PREEMPHASIS0);
-	DUMP_REG(SOR_LANE_PREEMPHASIS1);
-	DUMP_REG(SOR_LANE4_PREEMPHASIS0);
-	DUMP_REG(SOR_LANE4_PREEMPHASIS1);
-	DUMP_REG(SOR_LANE_POSTCURSOR0);
-	DUMP_REG(SOR_LANE_POSTCURSOR1);
-	DUMP_REG(SOR_DP_CONFIG0);
-	DUMP_REG(SOR_DP_CONFIG1);
-	DUMP_REG(SOR_DP_MN0);
-	DUMP_REG(SOR_DP_MN1);
-	DUMP_REG(SOR_DP_PADCTL0);
-	DUMP_REG(SOR_DP_PADCTL1);
-	DUMP_REG(SOR_DP_DEBUG0);
-	DUMP_REG(SOR_DP_DEBUG1);
-	DUMP_REG(SOR_DP_SPARE0);
-	DUMP_REG(SOR_DP_SPARE1);
-	DUMP_REG(SOR_DP_AUDIO_CTRL);
-	DUMP_REG(SOR_DP_AUDIO_HBLANK_SYMBOLS);
-	DUMP_REG(SOR_DP_AUDIO_VBLANK_SYMBOLS);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_HEADER);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK0);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK1);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK2);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK3);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK4);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK5);
-	DUMP_REG(SOR_DP_GENERIC_INFOFRAME_SUBPACK6);
-	DUMP_REG(SOR_DP_TPG);
-	DUMP_REG(SOR_DP_TPG_CONFIG);
-	DUMP_REG(SOR_DP_LQ_CSTM0);
-	DUMP_REG(SOR_DP_LQ_CSTM1);
-	DUMP_REG(SOR_DP_LQ_CSTM2);
-
-#undef DUMP_REG
+	for (i = 0; i < ARRAY_SIZE(tegra_sor_regs); i++) {
+		unsigned int offset = tegra_sor_regs[i].offset;
+
+		seq_printf(s, "%-38s %#05x %08x\n", tegra_sor_regs[i].name,
+			   offset, tegra_sor_readl(sor, offset));
+	}
 
 unlock:
 	drm_modeset_unlock_all(drm);
@@ -1251,57 +1378,46 @@ static const struct drm_info_list debugfs_files[] = {
 	{ "regs", tegra_sor_show_regs, 0, NULL },
 };
 
-static int tegra_sor_debugfs_init(struct tegra_sor *sor,
-				  struct drm_minor *minor)
+static int tegra_sor_late_register(struct drm_connector *connector)
 {
-	const char *name = sor->soc->supports_dp ? "sor1" : "sor";
-	unsigned int i;
+	struct tegra_output *output = connector_to_output(connector);
+	unsigned int i, count = ARRAY_SIZE(debugfs_files);
+	struct drm_minor *minor = connector->dev->primary;
+	struct dentry *root = connector->debugfs_entry;
+	struct tegra_sor *sor = to_sor(output);
 	int err;
 
-	sor->debugfs = debugfs_create_dir(name, minor->debugfs_root);
-	if (!sor->debugfs)
-		return -ENOMEM;
-
 	sor->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				     GFP_KERNEL);
-	if (!sor->debugfs_files) {
-		err = -ENOMEM;
-		goto remove;
-	}
+	if (!sor->debugfs_files)
+		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
+	for (i = 0; i < count; i++)
 		sor->debugfs_files[i].data = sor;
 
-	err = drm_debugfs_create_files(sor->debugfs_files,
-				       ARRAY_SIZE(debugfs_files),
-				       sor->debugfs, minor);
+	err = drm_debugfs_create_files(sor->debugfs_files, count, root, minor);
 	if (err < 0)
 		goto free;
 
-	sor->minor = minor;
-
 	return 0;
 
 free:
 	kfree(sor->debugfs_files);
 	sor->debugfs_files = NULL;
-remove:
-	debugfs_remove_recursive(sor->debugfs);
-	sor->debugfs = NULL;
+
 	return err;
 }
 
-static void tegra_sor_debugfs_exit(struct tegra_sor *sor)
+static void tegra_sor_early_unregister(struct drm_connector *connector)
 {
-	drm_debugfs_remove_files(sor->debugfs_files, ARRAY_SIZE(debugfs_files),
-				 sor->minor);
-	sor->minor = NULL;
+	struct tegra_output *output = connector_to_output(connector);
+	unsigned int count = ARRAY_SIZE(debugfs_files);
+	struct tegra_sor *sor = to_sor(output);
 
+	drm_debugfs_remove_files(sor->debugfs_files, count,
+				 connector->dev->primary);
 	kfree(sor->debugfs_files);
 	sor->debugfs_files = NULL;
-
-	debugfs_remove_recursive(sor->debugfs);
-	sor->debugfs = NULL;
 }
 
 static void tegra_sor_connector_reset(struct drm_connector *connector)
@@ -1354,6 +1470,8 @@ static const struct drm_connector_funcs tegra_sor_connector_funcs = {
 	.destroy = tegra_output_connector_destroy,
 	.atomic_duplicate_state = tegra_sor_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.late_register = tegra_sor_late_register,
+	.early_unregister = tegra_sor_early_unregister,
 };
 
 static int tegra_sor_connector_get_modes(struct drm_connector *connector)
@@ -1377,10 +1495,6 @@ static enum drm_mode_status
 tegra_sor_connector_mode_valid(struct drm_connector *connector,
 			       struct drm_display_mode *mode)
 {
-	/* HDMI 2.0 modes are not yet supported */
-	if (mode->clock > 340000)
-		return MODE_NOCLOCK;
-
 	return MODE_OK;
 }
 
@@ -1417,7 +1531,7 @@ static void tegra_sor_edp_disable(struct drm_encoder *encoder)
 	 */
 	if (dc) {
 		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-		value &= ~SOR_ENABLE;
+		value &= ~SOR_ENABLE(0);
 		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
 		tegra_dc_commit(dc);
@@ -1433,9 +1547,9 @@ static void tegra_sor_edp_disable(struct drm_encoder *encoder)
 			dev_err(sor->dev, "failed to disable DP: %d\n", err);
 	}
 
-	err = tegra_io_rail_power_off(TEGRA_IO_RAIL_LVDS);
+	err = tegra_io_pad_power_disable(sor->pad);
 	if (err < 0)
-		dev_err(sor->dev, "failed to power off I/O rail: %d\n", err);
+		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
 
 	if (output->panel)
 		drm_panel_unprepare(output->panel);
@@ -1533,40 +1647,40 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 	usleep_range(20, 100);
 
-	value = tegra_sor_readl(sor, SOR_PLL3);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
 	value |= SOR_PLL3_PLL_VDD_MODE_3V3;
-	tegra_sor_writel(sor, value, SOR_PLL3);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
 
 	value = SOR_PLL0_ICHPMP(0xf) | SOR_PLL0_VCOCAP_RST |
 		SOR_PLL0_PLLREG_LEVEL_V45 | SOR_PLL0_RESISTOR_EXT;
-	tegra_sor_writel(sor, value, SOR_PLL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value |= SOR_PLL2_SEQ_PLLCAPPD;
 	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
 	value |= SOR_PLL2_LVDS_ENABLE;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	value = SOR_PLL1_TERM_COMPOUT | SOR_PLL1_TMDS_TERM;
-	tegra_sor_writel(sor, value, SOR_PLL1);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
 
 	while (true) {
-		value = tegra_sor_readl(sor, SOR_PLL2);
+		value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 		if ((value & SOR_PLL2_SEQ_PLLCAPPD_ENFORCE) == 0)
 			break;
 
 		usleep_range(250, 1000);
 	}
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
 	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	/*
 	 * power up
@@ -1579,49 +1693,49 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
 	/* step 1 */
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value |= SOR_PLL2_SEQ_PLLCAPPD_ENFORCE | SOR_PLL2_PORT_POWERDOWN |
 		 SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
-	value = tegra_sor_readl(sor, SOR_PLL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
 	value |= SOR_PLL0_VCOPD | SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, SOR_PLL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	/* step 2 */
-	err = tegra_io_rail_power_on(TEGRA_IO_RAIL_LVDS);
+	err = tegra_io_pad_power_enable(sor->pad);
 	if (err < 0)
-		dev_err(sor->dev, "failed to power on I/O rail: %d\n", err);
+		dev_err(sor->dev, "failed to power on I/O pad: %d\n", err);
 
 	usleep_range(5, 100);
 
 	/* step 3 */
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(20, 100);
 
 	/* step 4 */
-	value = tegra_sor_readl(sor, SOR_PLL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
 	value &= ~SOR_PLL0_VCOPD;
 	value &= ~SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, SOR_PLL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(200, 1000);
 
 	/* step 5 */
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	/* XXX not in TRM */
 	for (value = 0, i = 0; i < 5; i++)
@@ -1637,7 +1751,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
 
 	/* power DP lanes */
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 
 	if (link.num_lanes <= 2)
 		value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2);
@@ -1654,7 +1768,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	else
 		value |= SOR_DP_PADCTL_PD_TXD_0;
 
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
 	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
@@ -1698,9 +1812,9 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, value, SOR_DP_TPG);
 
 	/* enable pad calibration logic */
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value |= SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	err = drm_dp_link_probe(sor->aux, &link);
 	if (err < 0)
@@ -1773,7 +1887,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	tegra_sor_update(sor);
 
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-	value |= SOR_ENABLE;
+	value |= SOR_ENABLE(0);
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
 	tegra_dc_commit(dc);
@@ -1805,6 +1919,18 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 
 	info = &output->connector.display_info;
 
+	/*
+	 * For HBR2 modes, the SOR brick needs to use the x20 multiplier, so
+	 * the pixel clock must be corrected accordingly.
+	 */
+	if (pclk >= 340000000) {
+		state->link_speed = 20;
+		state->pclk = pclk / 2;
+	} else {
+		state->link_speed = 10;
+		state->pclk = pclk;
+	}
+
 	err = tegra_dc_state_setup_clock(dc, crtc_state, sor->clk_parent,
 					 pclk, 0);
 	if (err < 0) {
@@ -1955,6 +2081,81 @@ tegra_sor_hdmi_find_settings(struct tegra_sor *sor, unsigned long frequency)
 	return NULL;
 }
 
+static void tegra_sor_hdmi_disable_scrambling(struct tegra_sor *sor)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_HDMI2_CTRL);
+	value &= ~SOR_HDMI2_CTRL_CLOCK_MODE_DIV_BY_4;
+	value &= ~SOR_HDMI2_CTRL_SCRAMBLE;
+	tegra_sor_writel(sor, value, SOR_HDMI2_CTRL);
+}
+
+static void tegra_sor_hdmi_scdc_disable(struct tegra_sor *sor)
+{
+	struct i2c_adapter *ddc = sor->output.ddc;
+
+	drm_scdc_set_high_tmds_clock_ratio(ddc, false);
+	drm_scdc_set_scrambling(ddc, false);
+
+	tegra_sor_hdmi_disable_scrambling(sor);
+}
+
+static void tegra_sor_hdmi_scdc_stop(struct tegra_sor *sor)
+{
+	if (sor->scdc_enabled) {
+		cancel_delayed_work_sync(&sor->scdc);
+		tegra_sor_hdmi_scdc_disable(sor);
+	}
+}
+
+static void tegra_sor_hdmi_enable_scrambling(struct tegra_sor *sor)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_HDMI2_CTRL);
+	value |= SOR_HDMI2_CTRL_CLOCK_MODE_DIV_BY_4;
+	value |= SOR_HDMI2_CTRL_SCRAMBLE;
+	tegra_sor_writel(sor, value, SOR_HDMI2_CTRL);
+}
+
+static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor)
+{
+	struct i2c_adapter *ddc = sor->output.ddc;
+
+	drm_scdc_set_high_tmds_clock_ratio(ddc, true);
+	drm_scdc_set_scrambling(ddc, true);
+
+	tegra_sor_hdmi_enable_scrambling(sor);
+}
+
+static void tegra_sor_hdmi_scdc_work(struct work_struct *work)
+{
+	struct tegra_sor *sor = container_of(work, struct tegra_sor, scdc.work);
+	struct i2c_adapter *ddc = sor->output.ddc;
+
+	if (!drm_scdc_get_scrambling_status(ddc)) {
+		DRM_DEBUG_KMS("SCDC not scrambled\n");
+		tegra_sor_hdmi_scdc_enable(sor);
+	}
+
+	schedule_delayed_work(&sor->scdc, msecs_to_jiffies(5000));
+}
+
+static void tegra_sor_hdmi_scdc_start(struct tegra_sor *sor)
+{
+	struct drm_scdc *scdc = &sor->output.connector.display_info.hdmi.scdc;
+	struct drm_display_mode *mode;
+
+	mode = &sor->output.encoder.crtc->state->adjusted_mode;
+
+	if (mode->clock >= 340000 && scdc->supported) {
+		schedule_delayed_work(&sor->scdc, msecs_to_jiffies(5000));
+		tegra_sor_hdmi_scdc_enable(sor);
+		sor->scdc_enabled = true;
+	}
+}
+
 static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
@@ -1963,6 +2164,8 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	u32 value;
 	int err;
 
+	tegra_sor_hdmi_scdc_stop(sor);
+
 	err = tegra_sor_detach(sor);
 	if (err < 0)
 		dev_err(sor->dev, "failed to detach SOR: %d\n", err);
@@ -1972,8 +2175,12 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 
 	/* disable display to SOR clock */
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-	value &= ~SOR1_TIMING_CYA;
-	value &= ~SOR1_ENABLE;
+
+	if (!sor->soc->has_nvdisplay)
+		value &= ~(SOR1_TIMING_CYA | SOR_ENABLE(1));
+	else
+		value &= ~SOR_ENABLE(sor->index);
+
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
 	tegra_dc_commit(dc);
@@ -1982,9 +2189,9 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
 
-	err = tegra_io_rail_power_off(TEGRA_IO_RAIL_HDMI);
+	err = tegra_io_pad_power_disable(sor->pad);
 	if (err < 0)
-		dev_err(sor->dev, "failed to power off HDMI rail: %d\n", err);
+		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
 
 	pm_runtime_put(sor->dev);
 }
@@ -1998,12 +2205,14 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	struct tegra_sor *sor = to_sor(output);
 	struct tegra_sor_state *state;
 	struct drm_display_mode *mode;
+	unsigned long rate, pclk;
 	unsigned int div, i;
 	u32 value;
 	int err;
 
 	state = to_sor_state(output->connector.state);
 	mode = &encoder->crtc->state->adjusted_mode;
+	pclk = mode->clock * 1000;
 
 	pm_runtime_get_sync(sor->dev);
 
@@ -2016,44 +2225,44 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 
 	div = clk_get_rate(sor->clk) / 1000000 * 4;
 
-	err = tegra_io_rail_power_on(TEGRA_IO_RAIL_HDMI);
+	err = tegra_io_pad_power_enable(sor->pad);
 	if (err < 0)
-		dev_err(sor->dev, "failed to power on HDMI rail: %d\n", err);
+		dev_err(sor->dev, "failed to power on I/O pad: %d\n", err);
 
 	usleep_range(20, 100);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(20, 100);
 
-	value = tegra_sor_readl(sor, SOR_PLL3);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
 	value &= ~SOR_PLL3_PLL_VDD_MODE_3V3;
-	tegra_sor_writel(sor, value, SOR_PLL3);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
 
-	value = tegra_sor_readl(sor, SOR_PLL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
 	value &= ~SOR_PLL0_VCOPD;
 	value &= ~SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, SOR_PLL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(200, 400);
 
-	value = tegra_sor_readl(sor, SOR_PLL2);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
 	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, SOR_PLL2);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
 	usleep_range(20, 100);
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
 		 SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	while (true) {
 		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
@@ -2079,18 +2288,30 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
 	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
 
-	if (mode->clock < 340000)
+	if (mode->clock < 340000) {
+		DRM_DEBUG_KMS("setting 2.7 GHz link speed\n");
 		value |= SOR_CLK_CNTRL_DP_LINK_SPEED_G2_70;
-	else
+	} else {
+		DRM_DEBUG_KMS("setting 5.4 GHz link speed\n");
 		value |= SOR_CLK_CNTRL_DP_LINK_SPEED_G5_40;
+	}
 
 	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_PCLK;
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
+	/* SOR pad PLL stabilization time */
+	usleep_range(250, 1000);
+
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
+	value |= SOR_DP_LINKCTL_LANE_COUNT(4);
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
 	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
-	value |= SOR_DP_SPARE_DISP_VIDEO_PREAMBLE;
+	value &= ~SOR_DP_SPARE_DISP_VIDEO_PREAMBLE;
 	value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
-	value |= SOR_DP_SPARE_SEQ_ENABLE;
+	value &= ~SOR_DP_SPARE_SEQ_ENABLE;
+	value &= ~SOR_DP_SPARE_MACRO_SOR_CLK;
 	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
 
 	value = SOR_SEQ_CTL_PU_PC(0) | SOR_SEQ_CTL_PU_PC_ALT(0) |
@@ -2102,9 +2323,11 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, value, SOR_SEQ_INST(0));
 	tegra_sor_writel(sor, value, SOR_SEQ_INST(8));
 
-	/* program the reference clock */
-	value = SOR_REFCLK_DIV_INT(div) | SOR_REFCLK_DIV_FRAC(div);
-	tegra_sor_writel(sor, value, SOR_REFCLK);
+	if (!sor->soc->has_nvdisplay) {
+		/* program the reference clock */
+		value = SOR_REFCLK_DIV_INT(div) | SOR_REFCLK_DIV_FRAC(div);
+		tegra_sor_writel(sor, value, SOR_REFCLK);
+	}
 
 	/* XXX not in TRM */
 	for (value = 0, i = 0; i < 5; i++)
@@ -2127,13 +2350,25 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		return;
 	}
 
-	value = SOR_INPUT_CONTROL_HDMI_SRC_SELECT(dc->pipe);
+	/* adjust clock rate for HDMI 2.0 modes */
+	rate = clk_get_rate(sor->clk_parent);
+
+	if (mode->clock >= 340000)
+		rate /= 2;
 
-	/* XXX is this the proper check? */
-	if (mode->clock < 75000)
-		value |= SOR_INPUT_CONTROL_ARM_VIDEO_RANGE_LIMITED;
+	DRM_DEBUG_KMS("setting clock to %lu Hz, mode: %lu Hz\n", rate, pclk);
 
-	tegra_sor_writel(sor, value, SOR_INPUT_CONTROL);
+	clk_set_rate(sor->clk, rate);
+
+	if (!sor->soc->has_nvdisplay) {
+		value = SOR_INPUT_CONTROL_HDMI_SRC_SELECT(dc->pipe);
+
+		/* XXX is this the proper check? */
+		if (mode->clock < 75000)
+			value |= SOR_INPUT_CONTROL_ARM_VIDEO_RANGE_LIMITED;
+
+		tegra_sor_writel(sor, value, SOR_INPUT_CONTROL);
+	}
 
 	max_ac = ((mode->htotal - mode->hdisplay) - SOR_REKEY - 18) / 32;
 
@@ -2141,20 +2376,23 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		SOR_HDMI_CTRL_AUDIO_LAYOUT | SOR_HDMI_CTRL_REKEY(SOR_REKEY);
 	tegra_sor_writel(sor, value, SOR_HDMI_CTRL);
 
-	/* H_PULSE2 setup */
-	pulse_start = h_ref_to_sync + (mode->hsync_end - mode->hsync_start) +
-		      (mode->htotal - mode->hsync_end) - 10;
+	if (!dc->soc->has_nvdisplay) {
+		/* H_PULSE2 setup */
+		pulse_start = h_ref_to_sync +
+			      (mode->hsync_end - mode->hsync_start) +
+			      (mode->htotal - mode->hsync_end) - 10;
 
-	value = PULSE_LAST_END_A | PULSE_QUAL_VACTIVE |
-		PULSE_POLARITY_HIGH | PULSE_MODE_NORMAL;
-	tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_CONTROL);
+		value = PULSE_LAST_END_A | PULSE_QUAL_VACTIVE |
+			PULSE_POLARITY_HIGH | PULSE_MODE_NORMAL;
+		tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_CONTROL);
 
-	value = PULSE_END(pulse_start + 8) | PULSE_START(pulse_start);
-	tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_POSITION_A);
+		value = PULSE_END(pulse_start + 8) | PULSE_START(pulse_start);
+		tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_POSITION_A);
 
-	value = tegra_dc_readl(dc, DC_DISP_DISP_SIGNAL_OPTIONS0);
-	value |= H_PULSE2_ENABLE;
-	tegra_dc_writel(dc, value, DC_DISP_DISP_SIGNAL_OPTIONS0);
+		value = tegra_dc_readl(dc, DC_DISP_DISP_SIGNAL_OPTIONS0);
+		value |= H_PULSE2_ENABLE;
+		tegra_dc_writel(dc, value, DC_DISP_DISP_SIGNAL_OPTIONS0);
+	}
 
 	/* infoframe setup */
 	err = tegra_sor_hdmi_setup_avi_infoframe(sor, mode);
@@ -2171,9 +2409,9 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, value, SOR_STATE1);
 
 	/* power up pad calibration */
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
 	/* production settings */
 	settings = tegra_sor_hdmi_find_settings(sor, mode->clock * 1000);
@@ -2183,51 +2421,68 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		return;
 	}
 
-	value = tegra_sor_readl(sor, SOR_PLL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
 	value &= ~SOR_PLL0_ICHPMP_MASK;
+	value &= ~SOR_PLL0_FILTER_MASK;
 	value &= ~SOR_PLL0_VCOCAP_MASK;
 	value |= SOR_PLL0_ICHPMP(settings->ichpmp);
+	value |= SOR_PLL0_FILTER(settings->filter);
 	value |= SOR_PLL0_VCOCAP(settings->vcocap);
-	tegra_sor_writel(sor, value, SOR_PLL0);
-
-	tegra_sor_dp_term_calibrate(sor);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, SOR_PLL1);
+	/* XXX not in TRM */
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
 	value &= ~SOR_PLL1_LOADADJ_MASK;
+	value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
 	value |= SOR_PLL1_LOADADJ(settings->loadadj);
-	tegra_sor_writel(sor, value, SOR_PLL1);
+	value |= SOR_PLL1_TMDS_TERMADJ(settings->tmds_termadj);
+	value |= SOR_PLL1_TMDS_TERM;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
 
-	value = tegra_sor_readl(sor, SOR_PLL3);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
+	value &= ~SOR_PLL3_BG_TEMP_COEF_MASK;
 	value &= ~SOR_PLL3_BG_VREF_LEVEL_MASK;
-	value |= SOR_PLL3_BG_VREF_LEVEL(settings->bg_vref);
-	tegra_sor_writel(sor, value, SOR_PLL3);
-
-	value = settings->drive_current[0] << 24 |
-		settings->drive_current[1] << 16 |
-		settings->drive_current[2] <<  8 |
-		settings->drive_current[3] <<  0;
+	value &= ~SOR_PLL3_AVDD10_LEVEL_MASK;
+	value &= ~SOR_PLL3_AVDD14_LEVEL_MASK;
+	value |= SOR_PLL3_BG_TEMP_COEF(settings->bg_temp_coef);
+	value |= SOR_PLL3_BG_VREF_LEVEL(settings->bg_vref_level);
+	value |= SOR_PLL3_AVDD10_LEVEL(settings->avdd10_level);
+	value |= SOR_PLL3_AVDD14_LEVEL(settings->avdd14_level);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
+
+	value = settings->drive_current[3] << 24 |
+		settings->drive_current[2] << 16 |
+		settings->drive_current[1] <<  8 |
+		settings->drive_current[0] <<  0;
 	tegra_sor_writel(sor, value, SOR_LANE_DRIVE_CURRENT0);
 
-	value = settings->preemphasis[0] << 24 |
-		settings->preemphasis[1] << 16 |
-		settings->preemphasis[2] <<  8 |
-		settings->preemphasis[3] <<  0;
+	value = settings->preemphasis[3] << 24 |
+		settings->preemphasis[2] << 16 |
+		settings->preemphasis[1] <<  8 |
+		settings->preemphasis[0] <<  0;
 	tegra_sor_writel(sor, value, SOR_LANE_PREEMPHASIS0);
 
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~SOR_DP_PADCTL_TX_PU_MASK;
 	value |= SOR_DP_PADCTL_TX_PU_ENABLE;
-	value |= SOR_DP_PADCTL_TX_PU(settings->tx_pu);
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	value |= SOR_DP_PADCTL_TX_PU(settings->tx_pu_value);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl2);
+	value &= ~SOR_DP_PADCTL_SPAREPLL_MASK;
+	value |= SOR_DP_PADCTL_SPAREPLL(settings->sparepll);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl2);
 
 	/* power down pad calibration */
-	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value |= SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
-	/* miscellaneous display controller settings */
-	value = VSYNC_H_POSITION(1);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_TIMING_OPTIONS);
+	if (!dc->soc->has_nvdisplay) {
+		/* miscellaneous display controller settings */
+		value = VSYNC_H_POSITION(1);
+		tegra_dc_writel(dc, value, DC_DISP_DISP_TIMING_OPTIONS);
+	}
 
 	value = tegra_dc_readl(dc, DC_DISP_DISP_COLOR_CONTROL);
 	value &= ~DITHER_CONTROL_MASK;
@@ -2242,6 +2497,14 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		value |= BASE_COLOR_SIZE_888;
 		break;
 
+	case 10:
+		value |= BASE_COLOR_SIZE_101010;
+		break;
+
+	case 12:
+		value |= BASE_COLOR_SIZE_121212;
+		break;
+
 	default:
 		WARN(1, "%u bits-per-color not supported\n", state->bpc);
 		value |= BASE_COLOR_SIZE_888;
@@ -2250,40 +2513,65 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 
 	tegra_dc_writel(dc, value, DC_DISP_DISP_COLOR_CONTROL);
 
+	/* XXX set display head owner */
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_OWNER_MASK;
+	value |= SOR_STATE_ASY_OWNER(1 + dc->pipe);
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
 	err = tegra_sor_power_up(sor, 250);
 	if (err < 0)
 		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
 
 	/* configure dynamic range of output */
-	value = tegra_sor_readl(sor, SOR_HEAD_STATE0(dc->pipe));
+	value = tegra_sor_readl(sor, sor->soc->regs->head_state0 + dc->pipe);
 	value &= ~SOR_HEAD_STATE_RANGECOMPRESS_MASK;
 	value &= ~SOR_HEAD_STATE_DYNRANGE_MASK;
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE0(dc->pipe));
+	tegra_sor_writel(sor, value, sor->soc->regs->head_state0 + dc->pipe);
 
 	/* configure colorspace */
-	value = tegra_sor_readl(sor, SOR_HEAD_STATE0(dc->pipe));
+	value = tegra_sor_readl(sor, sor->soc->regs->head_state0 + dc->pipe);
 	value &= ~SOR_HEAD_STATE_COLORSPACE_MASK;
 	value |= SOR_HEAD_STATE_COLORSPACE_RGB;
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE0(dc->pipe));
+	tegra_sor_writel(sor, value, sor->soc->regs->head_state0 + dc->pipe);
 
 	tegra_sor_mode_set(sor, mode, state);
 
 	tegra_sor_update(sor);
 
+	/* program preamble timing in SOR (XXX) */
+	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
+	value &= ~SOR_DP_SPARE_DISP_VIDEO_PREAMBLE;
+	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+
 	err = tegra_sor_attach(sor);
 	if (err < 0)
 		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
 
 	/* enable display to SOR clock and generate HDMI preamble */
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-	value |= SOR1_ENABLE | SOR1_TIMING_CYA;
+
+	if (!sor->soc->has_nvdisplay)
+		value |= SOR_ENABLE(1) | SOR1_TIMING_CYA;
+	else
+		value |= SOR_ENABLE(sor->index);
+
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
+	if (dc->soc->has_nvdisplay) {
+		value = tegra_dc_readl(dc, DC_DISP_CORE_SOR_SET_CONTROL(sor->index));
+		value &= ~PROTOCOL_MASK;
+		value |= PROTOCOL_SINGLE_TMDS_A;
+		tegra_dc_writel(dc, value, DC_DISP_CORE_SOR_SET_CONTROL(sor->index));
+	}
+
 	tegra_dc_commit(dc);
 
 	err = tegra_sor_wakeup(sor);
 	if (err < 0)
 		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
+
+	tegra_sor_hdmi_scdc_start(sor);
 }
 
 static const struct drm_encoder_helper_funcs tegra_sor_hdmi_helpers = {
@@ -2344,13 +2632,7 @@ static int tegra_sor_init(struct host1x_client *client)
 		return err;
 	}
 
-	sor->output.encoder.possible_crtcs = 0x3;
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_sor_debugfs_init(sor, drm->primary);
-		if (err < 0)
-			dev_err(sor->dev, "debugfs setup failed: %d\n", err);
-	}
+	tegra_output_find_possible_crtcs(&sor->output, drm);
 
 	if (sor->aux) {
 		err = drm_dp_aux_attach(sor->aux, &sor->output);
@@ -2420,9 +2702,6 @@ static int tegra_sor_exit(struct host1x_client *client)
 	clk_disable_unprepare(sor->clk_dp);
 	clk_disable_unprepare(sor->clk);
 
-	if (IS_ENABLED(CONFIG_DEBUG_FS))
-		tegra_sor_debugfs_exit(sor);
-
 	return 0;
 }
 
@@ -2480,6 +2759,8 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 		return err;
 	}
 
+	INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work);
+
 	return 0;
 }
 
@@ -2502,19 +2783,53 @@ static const u8 tegra124_sor_xbar_cfg[5] = {
 	0, 1, 2, 3, 4
 };
 
+static const struct tegra_sor_regs tegra124_sor_regs = {
+	.head_state0 = 0x05,
+	.head_state1 = 0x07,
+	.head_state2 = 0x09,
+	.head_state3 = 0x0b,
+	.head_state4 = 0x0d,
+	.head_state5 = 0x0f,
+	.pll0 = 0x17,
+	.pll1 = 0x18,
+	.pll2 = 0x19,
+	.pll3 = 0x1a,
+	.dp_padctl0 = 0x5c,
+	.dp_padctl2 = 0x73,
+};
+
 static const struct tegra_sor_soc tegra124_sor = {
 	.supports_edp = true,
 	.supports_lvds = true,
 	.supports_hdmi = false,
 	.supports_dp = false,
+	.regs = &tegra124_sor_regs,
+	.has_nvdisplay = false,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
 };
 
+static const struct tegra_sor_regs tegra210_sor_regs = {
+	.head_state0 = 0x05,
+	.head_state1 = 0x07,
+	.head_state2 = 0x09,
+	.head_state3 = 0x0b,
+	.head_state4 = 0x0d,
+	.head_state5 = 0x0f,
+	.pll0 = 0x17,
+	.pll1 = 0x18,
+	.pll2 = 0x19,
+	.pll3 = 0x1a,
+	.dp_padctl0 = 0x5c,
+	.dp_padctl2 = 0x73,
+};
+
 static const struct tegra_sor_soc tegra210_sor = {
 	.supports_edp = true,
 	.supports_lvds = false,
 	.supports_hdmi = false,
 	.supports_dp = false,
+	.regs = &tegra210_sor_regs,
+	.has_nvdisplay = false,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
 };
 
@@ -2528,13 +2843,60 @@ static const struct tegra_sor_soc tegra210_sor1 = {
 	.supports_hdmi = true,
 	.supports_dp = true,
 
+	.regs = &tegra210_sor_regs,
+	.has_nvdisplay = false,
+
 	.num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults),
 	.settings = tegra210_sor_hdmi_defaults,
 
 	.xbar_cfg = tegra210_sor_xbar_cfg,
 };
 
+static const struct tegra_sor_regs tegra186_sor_regs = {
+	.head_state0 = 0x151,
+	.head_state1 = 0x154,
+	.head_state2 = 0x157,
+	.head_state3 = 0x15a,
+	.head_state4 = 0x15d,
+	.head_state5 = 0x160,
+	.pll0 = 0x163,
+	.pll1 = 0x164,
+	.pll2 = 0x165,
+	.pll3 = 0x166,
+	.dp_padctl0 = 0x168,
+	.dp_padctl2 = 0x16a,
+};
+
+static const struct tegra_sor_soc tegra186_sor = {
+	.supports_edp = false,
+	.supports_lvds = false,
+	.supports_hdmi = false,
+	.supports_dp = true,
+
+	.regs = &tegra186_sor_regs,
+	.has_nvdisplay = true,
+
+	.xbar_cfg = tegra124_sor_xbar_cfg,
+};
+
+static const struct tegra_sor_soc tegra186_sor1 = {
+	.supports_edp = false,
+	.supports_lvds = false,
+	.supports_hdmi = true,
+	.supports_dp = true,
+
+	.regs = &tegra186_sor_regs,
+	.has_nvdisplay = true,
+
+	.num_settings = ARRAY_SIZE(tegra186_sor_hdmi_defaults),
+	.settings = tegra186_sor_hdmi_defaults,
+
+	.xbar_cfg = tegra124_sor_xbar_cfg,
+};
+
 static const struct of_device_id tegra_sor_of_match[] = {
+	{ .compatible = "nvidia,tegra186-sor1", .data = &tegra186_sor1 },
+	{ .compatible = "nvidia,tegra186-sor", .data = &tegra186_sor },
 	{ .compatible = "nvidia,tegra210-sor1", .data = &tegra210_sor1 },
 	{ .compatible = "nvidia,tegra210-sor", .data = &tegra210_sor },
 	{ .compatible = "nvidia,tegra124-sor", .data = &tegra124_sor },
@@ -2542,6 +2904,29 @@ static const struct of_device_id tegra_sor_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra_sor_of_match);
 
+static int tegra_sor_parse_dt(struct tegra_sor *sor)
+{
+	struct device_node *np = sor->dev->of_node;
+	u32 value;
+	int err;
+
+	if (sor->soc->has_nvdisplay) {
+		err = of_property_read_u32(np, "nvidia,interface", &value);
+		if (err < 0)
+			return err;
+
+		sor->index = value;
+
+		/*
+		 * override the default that we already set for Tegra210 and
+		 * earlier
+		 */
+		sor->pad = TEGRA_IO_PAD_HDMI_DP0 + sor->index;
+	}
+
+	return 0;
+}
+
 static int tegra_sor_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
@@ -2577,6 +2962,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 	if (!sor->aux) {
 		if (sor->soc->supports_hdmi) {
 			sor->ops = &tegra_sor_hdmi_ops;
+			sor->pad = TEGRA_IO_PAD_HDMI;
 		} else if (sor->soc->supports_lvds) {
 			dev_err(&pdev->dev, "LVDS not supported yet\n");
 			return -ENODEV;
@@ -2587,6 +2973,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 	} else {
 		if (sor->soc->supports_edp) {
 			sor->ops = &tegra_sor_edp_ops;
+			sor->pad = TEGRA_IO_PAD_LVDS;
 		} else if (sor->soc->supports_dp) {
 			dev_err(&pdev->dev, "DisplayPort not supported yet\n");
 			return -ENODEV;
@@ -2596,6 +2983,10 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		}
 	}
 
+	err = tegra_sor_parse_dt(sor);
+	if (err < 0)
+		return err;
+
 	err = tegra_output_probe(&sor->output);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to probe output: %d\n", err);
@@ -2656,6 +3047,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
 				name, err);
 			goto remove;
 		}
+	} else {
+		/* fall back to the module clock on SOR0 (eDP/LVDS only) */
+		sor->clk_out = sor->clk;
 	}
 
 	sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index 865c73b489685c74c8b438b145602adb20f8a5a9..fb0854d92a2790f6390bb5ec6fd79dbacea114e6 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -89,6 +89,8 @@
 #define SOR_PLL0 0x17
 #define  SOR_PLL0_ICHPMP_MASK			(0xf << 24)
 #define  SOR_PLL0_ICHPMP(x)			(((x) & 0xf) << 24)
+#define  SOR_PLL0_FILTER_MASK			(0xf << 16)
+#define  SOR_PLL0_FILTER(x)			(((x) & 0xf) << 16)
 #define  SOR_PLL0_VCOCAP_MASK			(0xf << 8)
 #define  SOR_PLL0_VCOCAP(x)			(((x) & 0xf) << 8)
 #define  SOR_PLL0_VCOCAP_RST			SOR_PLL0_VCOCAP(3)
@@ -122,10 +124,16 @@
 #define  SOR_PLL2_SEQ_PLL_PULLDOWN		(1 << 16)
 
 #define SOR_PLL3 0x1a
+#define  SOR_PLL3_BG_TEMP_COEF_MASK		(0xf << 28)
+#define  SOR_PLL3_BG_TEMP_COEF(x)		(((x) & 0xf) << 28)
 #define  SOR_PLL3_BG_VREF_LEVEL_MASK		(0xf << 24)
 #define  SOR_PLL3_BG_VREF_LEVEL(x)		(((x) & 0xf) << 24)
 #define  SOR_PLL3_PLL_VDD_MODE_1V8		(0 << 13)
 #define  SOR_PLL3_PLL_VDD_MODE_3V3		(1 << 13)
+#define  SOR_PLL3_AVDD10_LEVEL_MASK		(0xf << 8)
+#define  SOR_PLL3_AVDD10_LEVEL(x)		(((x) & 0xf) << 8)
+#define  SOR_PLL3_AVDD14_LEVEL_MASK		(0xf << 4)
+#define  SOR_PLL3_AVDD14_LEVEL(x)		(((x) & 0xf) << 4)
 
 #define SOR_CSTM 0x1b
 #define  SOR_CSTM_ROTCLK_MASK			(0xf << 24)
@@ -334,6 +342,10 @@
 #define SOR_DP_LQ_CSTM1 0x70
 #define SOR_DP_LQ_CSTM2 0x71
 
+#define SOR_DP_PADCTL2 0x73
+#define  SOR_DP_PADCTL_SPAREPLL_MASK (0xff << 24)
+#define  SOR_DP_PADCTL_SPAREPLL(x) (((x) & 0xff) << 24)
+
 #define SOR_HDMI_AUDIO_INFOFRAME_CTRL 0x9a
 #define SOR_HDMI_AUDIO_INFOFRAME_STATUS 0x9b
 #define SOR_HDMI_AUDIO_INFOFRAME_HEADER 0x9c
@@ -370,4 +382,8 @@
 #define SOR_HDMI_VSI_INFOFRAME_STATUS 0x124
 #define SOR_HDMI_VSI_INFOFRAME_HEADER 0x125
 
+#define SOR_HDMI2_CTRL 0x13e
+#define  SOR_HDMI2_CTRL_CLOCK_MODE_DIV_BY_4 (1 << 1)
+#define  SOR_HDMI2_CTRL_SCRAMBLE (1 << 0)
+
 #endif
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 18024183aa2b7af497aa1a487e5da2d27c7a38c6..f5794dd49f3b0b344bd16fa9fffbd8a635818d9d 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -115,7 +115,7 @@ static int vic_boot(struct vic *vic)
 }
 
 static void *vic_falcon_alloc(struct falcon *falcon, size_t size,
-			       dma_addr_t *iova)
+			      dma_addr_t *iova)
 {
 	struct tegra_drm *tegra = falcon->data;
 
@@ -138,13 +138,14 @@ static const struct falcon_ops vic_falcon_ops = {
 static int vic_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct iommu_group *group = iommu_group_get(client->dev);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
 	int err;
 
-	if (tegra->domain) {
-		err = iommu_attach_device(tegra->domain, vic->dev);
+	if (group && tegra->domain) {
+		err = iommu_attach_group(tegra->domain, group);
 		if (err < 0) {
 			dev_err(vic->dev, "failed to attach to domain: %d\n",
 				err);
@@ -158,13 +159,13 @@ static int vic_init(struct host1x_client *client)
 		vic->falcon.data = tegra;
 		err = falcon_load_firmware(&vic->falcon);
 		if (err < 0)
-			goto detach_device;
+			goto detach;
 	}
 
 	vic->channel = host1x_channel_request(client->dev);
 	if (!vic->channel) {
 		err = -ENOMEM;
-		goto detach_device;
+		goto detach;
 	}
 
 	client->syncpts[0] = host1x_syncpt_request(client, 0);
@@ -183,9 +184,9 @@ static int vic_init(struct host1x_client *client)
 	host1x_syncpt_free(client->syncpts[0]);
 free_channel:
 	host1x_channel_put(vic->channel);
-detach_device:
-	if (tegra->domain)
-		iommu_detach_device(tegra->domain, vic->dev);
+detach:
+	if (group && tegra->domain)
+		iommu_detach_group(tegra->domain, group);
 
 	return err;
 }
@@ -193,6 +194,7 @@ static int vic_init(struct host1x_client *client)
 static int vic_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct iommu_group *group = iommu_group_get(client->dev);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
@@ -206,7 +208,7 @@ static int vic_exit(struct host1x_client *client)
 	host1x_channel_put(vic->channel);
 
 	if (vic->domain) {
-		iommu_detach_device(vic->domain, vic->dev);
+		iommu_detach_group(vic->domain, group);
 		vic->domain = NULL;
 	}
 
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 6ef4d1a1e3a9944c9bff444a014a9f90a5825386..8bf6bb93dc790ef90fed82c8efb903f090e7fc7c 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -51,12 +51,8 @@ struct tilcdc_crtc {
 	ktime_t last_vblank;
 	unsigned int hvtotal_us;
 
-	struct drm_framebuffer *curr_fb;
 	struct drm_framebuffer *next_fb;
 
-	/* for deferred fb unref's: */
-	struct drm_flip_work unref_work;
-
 	/* Only set if an external encoder is connected */
 	bool simulate_vesa_sync;
 
@@ -70,20 +66,8 @@ struct tilcdc_crtc {
 };
 #define to_tilcdc_crtc(x) container_of(x, struct tilcdc_crtc, base)
 
-static void unref_worker(struct drm_flip_work *work, void *val)
-{
-	struct tilcdc_crtc *tilcdc_crtc =
-		container_of(work, struct tilcdc_crtc, unref_work);
-	struct drm_device *dev = tilcdc_crtc->base.dev;
-
-	mutex_lock(&dev->mode_config.mutex);
-	drm_framebuffer_put(val);
-	mutex_unlock(&dev->mode_config.mutex);
-}
-
 static void set_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb)
 {
-	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_drm_private *priv = dev->dev_private;
 	struct drm_gem_cma_object *gem;
@@ -108,12 +92,6 @@ static void set_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb)
 
 	dma_base_and_ceiling = (u64)end << 32 | start;
 	tilcdc_write64(dev, LCDC_DMA_FB_BASE_ADDR_0_REG, dma_base_and_ceiling);
-
-	if (tilcdc_crtc->curr_fb)
-		drm_flip_work_queue(&tilcdc_crtc->unref_work,
-			tilcdc_crtc->curr_fb);
-
-	tilcdc_crtc->curr_fb = fb;
 }
 
 /*
@@ -294,7 +272,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc)
 				LCDC_V2_CORE_CLK_EN);
 }
 
-uint tilcdc_mode_hvtotal(const struct drm_display_mode *mode)
+static uint tilcdc_mode_hvtotal(const struct drm_display_mode *mode)
 {
 	return (uint) div_u64(1000llu * mode->htotal * mode->vtotal,
 			      mode->clock);
@@ -464,8 +442,6 @@ static void tilcdc_crtc_set_mode(struct drm_crtc *crtc)
 
 	set_scanout(crtc, fb);
 
-	drm_framebuffer_get(fb);
-
 	crtc->hwmode = crtc->state->adjusted_mode;
 
 	tilcdc_crtc->hvtotal_us =
@@ -524,7 +500,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
-	struct tilcdc_drm_private *priv = dev->dev_private;
 	int ret;
 
 	mutex_lock(&tilcdc_crtc->enable_lock);
@@ -554,20 +529,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
 
 	pm_runtime_put_sync(dev->dev);
 
-	if (tilcdc_crtc->next_fb) {
-		drm_flip_work_queue(&tilcdc_crtc->unref_work,
-				    tilcdc_crtc->next_fb);
-		tilcdc_crtc->next_fb = NULL;
-	}
-
-	if (tilcdc_crtc->curr_fb) {
-		drm_flip_work_queue(&tilcdc_crtc->unref_work,
-				    tilcdc_crtc->curr_fb);
-		tilcdc_crtc->curr_fb = NULL;
-	}
-
-	drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-
 	tilcdc_crtc->enabled = false;
 	mutex_unlock(&tilcdc_crtc->enable_lock);
 }
@@ -614,7 +575,6 @@ static void tilcdc_crtc_recover_work(struct work_struct *work)
 
 static void tilcdc_crtc_destroy(struct drm_crtc *crtc)
 {
-	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct tilcdc_drm_private *priv = crtc->dev->dev_private;
 
 	tilcdc_crtc_shutdown(crtc);
@@ -623,7 +583,6 @@ static void tilcdc_crtc_destroy(struct drm_crtc *crtc)
 
 	of_node_put(crtc->port);
 	drm_crtc_cleanup(crtc);
-	drm_flip_work_cleanup(&tilcdc_crtc->unref_work);
 }
 
 int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
@@ -638,9 +597,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
 		return -EBUSY;
 	}
 
-	drm_framebuffer_get(fb);
-
-	crtc->primary->fb = fb;
 	tilcdc_crtc->event = event;
 
 	mutex_lock(&tilcdc_crtc->enable_lock);
@@ -936,8 +892,6 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 
 		now = ktime_get();
 
-		drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-
 		spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
 
 		tilcdc_crtc->last_vblank = now;
@@ -1064,9 +1018,6 @@ int tilcdc_crtc_create(struct drm_device *dev)
 
 	init_waitqueue_head(&tilcdc_crtc->frame_done_wq);
 
-	drm_flip_work_init(&tilcdc_crtc->unref_work,
-			"unref", unref_worker);
-
 	spin_lock_init(&tilcdc_crtc->irq_lock);
 	INIT_WORK(&tilcdc_crtc->recover_work, tilcdc_crtc_recover_work);
 
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index bc4feb3a84b9aa7bbe3806815d2ae67f18a382a7..1afde61f1247d1976014c2d434c5905e038fd7d1 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -69,12 +69,6 @@ static struct drm_framebuffer *tilcdc_fb_create(struct drm_device *dev,
 	return drm_gem_fb_create(dev, file_priv, mode_cmd);
 }
 
-static void tilcdc_fb_output_poll_changed(struct drm_device *dev)
-{
-	struct tilcdc_drm_private *priv = dev->dev_private;
-	drm_fbdev_cma_hotplug_event(priv->fbdev);
-}
-
 static int tilcdc_atomic_check(struct drm_device *dev,
 			       struct drm_atomic_state *state)
 {
@@ -146,7 +140,7 @@ static int tilcdc_commit(struct drm_device *dev,
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
 	.fb_create = tilcdc_fb_create,
-	.output_poll_changed = tilcdc_fb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = tilcdc_atomic_check,
 	.atomic_commit = tilcdc_commit,
 };
@@ -198,8 +192,7 @@ static void tilcdc_fini(struct drm_device *dev)
 
 	drm_kms_helper_poll_fini(dev);
 
-	if (priv->fbdev)
-		drm_fbdev_cma_fini(priv->fbdev);
+	drm_fb_cma_fbdev_fini(dev);
 
 	drm_irq_uninstall(dev);
 	drm_mode_config_cleanup(dev);
@@ -405,12 +398,9 @@ static int tilcdc_init(struct drm_driver *ddrv, struct device *dev)
 
 	drm_mode_config_reset(ddev);
 
-	priv->fbdev = drm_fbdev_cma_init(ddev, bpp,
-					 ddev->mode_config.num_connector);
-	if (IS_ERR(priv->fbdev)) {
-		ret = PTR_ERR(priv->fbdev);
+	ret = drm_fb_cma_fbdev_init(ddev, bpp, 0);
+	if (ret)
 		goto init_failed;
-	}
 
 	drm_kms_helper_poll_init(ddev);
 
@@ -427,12 +417,6 @@ static int tilcdc_init(struct drm_driver *ddrv, struct device *dev)
 	return ret;
 }
 
-static void tilcdc_lastclose(struct drm_device *dev)
-{
-	struct tilcdc_drm_private *priv = dev->dev_private;
-	drm_fbdev_cma_restore_mode(priv->fbdev);
-}
-
 static irqreturn_t tilcdc_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -537,7 +521,7 @@ DEFINE_DRM_GEM_CMA_FOPS(fops);
 static struct drm_driver tilcdc_driver = {
 	.driver_features    = (DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET |
 			       DRIVER_PRIME | DRIVER_ATOMIC),
-	.lastclose          = tilcdc_lastclose,
+	.lastclose          = drm_fb_helper_lastclose,
 	.irq_handler        = tilcdc_irq,
 	.gem_free_object_unlocked = drm_gem_cma_free_object,
 	.gem_print_info     = drm_gem_cma_print_info,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.h b/drivers/gpu/drm/tilcdc/tilcdc_drv.h
index 8caa11bc7aec3c308a699a1a5c6256090f44087b..ead5122166699be40787974f49f5c13a03491a90 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.h
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.h
@@ -79,8 +79,6 @@ struct tilcdc_drm_private {
 
 	struct workqueue_struct *wq;
 
-	struct drm_fbdev_cma *fbdev;
-
 	struct drm_crtc *crtc;
 
 	unsigned int num_encoders;
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_regs.h b/drivers/gpu/drm/tilcdc/tilcdc_regs.h
index 9d528c0a67a4fbffcb193a6b3f2f0843ff1c06f3..5048ebb86835043d263b3ee214fd888e92f8476a 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_regs.h
+++ b/drivers/gpu/drm/tilcdc/tilcdc_regs.h
@@ -133,7 +133,7 @@ static inline void tilcdc_write64(struct drm_device *dev, u32 reg, u64 data)
 	struct tilcdc_drm_private *priv = dev->dev_private;
 	volatile void __iomem *addr = priv->mmio + reg;
 
-#ifdef iowrite64
+#if defined(iowrite64) && !defined(iowrite64_is_nonatomic)
 	iowrite64(data, addr);
 #else
 	__iowmb();
diff --git a/drivers/gpu/drm/tinydrm/Kconfig b/drivers/gpu/drm/tinydrm/Kconfig
index 90c5bd5ef81b210bf2bce0a970b82cbc5cea672b..b0e567d416b36fd71bfae437645cece685239bb9 100644
--- a/drivers/gpu/drm/tinydrm/Kconfig
+++ b/drivers/gpu/drm/tinydrm/Kconfig
@@ -52,3 +52,13 @@ config TINYDRM_ST7586
 	  * LEGO MINDSTORMS EV3
 
 	  If M is selected the module will be called st7586.
+
+config TINYDRM_ST7735R
+	tristate "DRM support for Sitronix ST7735R display panels"
+	depends on DRM_TINYDRM && SPI
+	select TINYDRM_MIPI_DBI
+	help
+	  DRM driver Sitronix ST7735R with one of the following LCDs:
+	  * JD-T18003-T01 1.8" 128x160 TFT
+
+	  If M is selected the module will be called st7735r.
diff --git a/drivers/gpu/drm/tinydrm/Makefile b/drivers/gpu/drm/tinydrm/Makefile
index 8aeee532474f51e8bb53351ca1efd049530cad65..49a111929724a93f16f90953950b770b59ec8743 100644
--- a/drivers/gpu/drm/tinydrm/Makefile
+++ b/drivers/gpu/drm/tinydrm/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_TINYDRM_ILI9225)		+= ili9225.o
 obj-$(CONFIG_TINYDRM_MI0283QT)		+= mi0283qt.o
 obj-$(CONFIG_TINYDRM_REPAPER)		+= repaper.o
 obj-$(CONFIG_TINYDRM_ST7586)		+= st7586.o
+obj-$(CONFIG_TINYDRM_ST7735R)		+= st7735r.o
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
index bd7b82824a34c004247db1e7af91effe7bbcac7f..4c6616278c48b3ee2db8cedde3744740549be2c7 100644
--- a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
@@ -10,6 +10,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/tinydrm/tinydrm.h>
 #include <linux/device.h>
@@ -35,23 +36,6 @@
  * and registers the DRM device using devm_tinydrm_register().
  */
 
-/**
- * tinydrm_lastclose - DRM lastclose helper
- * @drm: DRM device
- *
- * This function ensures that fbdev is restored when drm_lastclose() is called
- * on the last drm_release(). Drivers can use this as their
- * &drm_driver->lastclose callback.
- */
-void tinydrm_lastclose(struct drm_device *drm)
-{
-	struct tinydrm_device *tdev = drm->dev_private;
-
-	DRM_DEBUG_KMS("\n");
-	drm_fbdev_cma_restore_mode(tdev->fbdev_cma);
-}
-EXPORT_SYMBOL(tinydrm_lastclose);
-
 /**
  * tinydrm_gem_cma_prime_import_sg_table - Produce a CMA GEM object from
  *     another driver's scatter/gather table of pinned pages
@@ -214,35 +198,24 @@ EXPORT_SYMBOL(devm_tinydrm_init);
 static int tinydrm_register(struct tinydrm_device *tdev)
 {
 	struct drm_device *drm = tdev->drm;
-	int bpp = drm->mode_config.preferred_depth;
-	struct drm_fbdev_cma *fbdev;
 	int ret;
 
 	ret = drm_dev_register(tdev->drm, 0);
 	if (ret)
 		return ret;
 
-	fbdev = drm_fbdev_cma_init_with_funcs(drm, bpp ? bpp : 32,
-					      drm->mode_config.num_connector,
-					      tdev->fb_funcs);
-	if (IS_ERR(fbdev))
-		DRM_ERROR("Failed to initialize fbdev: %ld\n", PTR_ERR(fbdev));
-	else
-		tdev->fbdev_cma = fbdev;
+	ret = drm_fb_cma_fbdev_init_with_funcs(drm, 0, 0, tdev->fb_funcs);
+	if (ret)
+		DRM_ERROR("Failed to initialize fbdev: %d\n", ret);
 
 	return 0;
 }
 
 static void tinydrm_unregister(struct tinydrm_device *tdev)
 {
-	struct drm_fbdev_cma *fbdev_cma = tdev->fbdev_cma;
-
 	drm_atomic_helper_shutdown(tdev->drm);
-	/* don't restore fbdev in lastclose, keep pipeline disabled */
-	tdev->fbdev_cma = NULL;
+	drm_fb_cma_fbdev_fini(tdev->drm);
 	drm_dev_unregister(tdev->drm);
-	if (fbdev_cma)
-		drm_fbdev_cma_fini(fbdev_cma);
 }
 
 static void devm_tinydrm_register_release(void *data)
diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c
index 3b766a26aa6124cd9ba100e2d60ab4ea58a61102..c0cf498493023d10f71f6b05970a0ddc1aa5e03f 100644
--- a/drivers/gpu/drm/tinydrm/ili9225.c
+++ b/drivers/gpu/drm/tinydrm/ili9225.c
@@ -20,6 +20,7 @@
 #include <linux/spi/spi.h>
 #include <video/mipi_display.h>
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/tinydrm/mipi-dbi.h>
 #include <drm/tinydrm/tinydrm-helpers.h>
@@ -381,7 +382,7 @@ static struct drm_driver ili9225_driver = {
 				  DRIVER_ATOMIC,
 	.fops			= &ili9225_fops,
 	TINYDRM_GEM_DRIVER_OPS,
-	.lastclose		= tinydrm_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.name			= "ili9225",
 	.desc			= "Ilitek ILI9225",
 	.date			= "20171106",
@@ -390,13 +391,13 @@ static struct drm_driver ili9225_driver = {
 };
 
 static const struct of_device_id ili9225_of_match[] = {
-	{ .compatible = "ilitek,ili9225-2.2in-176x220" },
+	{ .compatible = "vot,v220hf01a-t" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, ili9225_of_match);
 
 static const struct spi_device_id ili9225_id[] = {
-	{ "ili9225-2.2in-176x220", 0 },
+	{ "v220hf01a-t", 0 },
 	{ },
 };
 MODULE_DEVICE_TABLE(spi, ili9225_id);
diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c
index 70ae4f76f45512cea9b1f5f509bff9917c27b3ea..674d407640bee55305775222cf52a3d319955af8 100644
--- a/drivers/gpu/drm/tinydrm/mi0283qt.c
+++ b/drivers/gpu/drm/tinydrm/mi0283qt.c
@@ -9,6 +9,7 @@
  * (at your option) any later version.
  */
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/tinydrm/ili9341.h>
 #include <drm/tinydrm/mipi-dbi.h>
@@ -140,7 +141,7 @@ static struct drm_driver mi0283qt_driver = {
 				  DRIVER_ATOMIC,
 	.fops			= &mi0283qt_fops,
 	TINYDRM_GEM_DRIVER_OPS,
-	.lastclose		= tinydrm_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "mi0283qt",
 	.desc			= "Multi-Inno MI0283QT",
diff --git a/drivers/gpu/drm/tinydrm/st7586.c b/drivers/gpu/drm/tinydrm/st7586.c
index 0a2c60da5c0e4b806e8eee167d26bb01bd145954..5aebfceb740e66a2dcc628948edc4082409cde7a 100644
--- a/drivers/gpu/drm/tinydrm/st7586.c
+++ b/drivers/gpu/drm/tinydrm/st7586.c
@@ -17,6 +17,7 @@
 #include <linux/spi/spi.h>
 #include <video/mipi_display.h>
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/tinydrm/mipi-dbi.h>
 #include <drm/tinydrm/tinydrm-helpers.h>
@@ -320,7 +321,7 @@ static struct drm_driver st7586_driver = {
 				  DRIVER_ATOMIC,
 	.fops			= &st7586_fops,
 	TINYDRM_GEM_DRIVER_OPS,
-	.lastclose		= tinydrm_lastclose,
+	.lastclose		= drm_fb_helper_lastclose,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "st7586",
 	.desc			= "Sitronix ST7586",
diff --git a/drivers/gpu/drm/tinydrm/st7735r.c b/drivers/gpu/drm/tinydrm/st7735r.c
new file mode 100644
index 0000000000000000000000000000000000000000..98ff447f40b4f6439a7df45276629bddc5240f60
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/st7735r.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * DRM driver for Sitronix ST7735R panels
+ *
+ * Copyright 2017 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-buf.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/spi/spi.h>
+#include <video/mipi_display.h>
+
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/tinydrm/mipi-dbi.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+
+#define ST7735R_FRMCTR1		0xb1
+#define ST7735R_FRMCTR2		0xb2
+#define ST7735R_FRMCTR3		0xb3
+#define ST7735R_INVCTR		0xb4
+#define ST7735R_PWCTR1		0xc0
+#define ST7735R_PWCTR2		0xc1
+#define ST7735R_PWCTR3		0xc2
+#define ST7735R_PWCTR4		0xc3
+#define ST7735R_PWCTR5		0xc4
+#define ST7735R_VMCTR1		0xc5
+#define ST7735R_GAMCTRP1	0xe0
+#define ST7735R_GAMCTRN1	0xe1
+
+#define ST7735R_MY	BIT(7)
+#define ST7735R_MX	BIT(6)
+#define ST7735R_MV	BIT(5)
+
+static void jd_t18003_t01_pipe_enable(struct drm_simple_display_pipe *pipe,
+				      struct drm_crtc_state *crtc_state)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	struct device *dev = tdev->drm->dev;
+	int ret;
+	u8 addr_mode;
+
+	DRM_DEBUG_KMS("\n");
+
+	mipi_dbi_hw_reset(mipi);
+
+	ret = mipi_dbi_command(mipi, MIPI_DCS_SOFT_RESET);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "Error sending command %d\n", ret);
+		return;
+	}
+
+	msleep(150);
+
+	mipi_dbi_command(mipi, MIPI_DCS_EXIT_SLEEP_MODE);
+	msleep(500);
+
+	mipi_dbi_command(mipi, ST7735R_FRMCTR1, 0x01, 0x2c, 0x2d);
+	mipi_dbi_command(mipi, ST7735R_FRMCTR2, 0x01, 0x2c, 0x2d);
+	mipi_dbi_command(mipi, ST7735R_FRMCTR3, 0x01, 0x2c, 0x2d, 0x01, 0x2c,
+			 0x2d);
+	mipi_dbi_command(mipi, ST7735R_INVCTR, 0x07);
+	mipi_dbi_command(mipi, ST7735R_PWCTR1, 0xa2, 0x02, 0x84);
+	mipi_dbi_command(mipi, ST7735R_PWCTR2, 0xc5);
+	mipi_dbi_command(mipi, ST7735R_PWCTR3, 0x0a, 0x00);
+	mipi_dbi_command(mipi, ST7735R_PWCTR4, 0x8a, 0x2a);
+	mipi_dbi_command(mipi, ST7735R_PWCTR5, 0x8a, 0xee);
+	mipi_dbi_command(mipi, ST7735R_VMCTR1, 0x0e);
+	mipi_dbi_command(mipi, MIPI_DCS_EXIT_INVERT_MODE);
+	switch (mipi->rotation) {
+	default:
+		addr_mode = ST7735R_MX | ST7735R_MY;
+		break;
+	case 90:
+		addr_mode = ST7735R_MX | ST7735R_MV;
+		break;
+	case 180:
+		addr_mode = 0;
+		break;
+	case 270:
+		addr_mode = ST7735R_MY | ST7735R_MV;
+		break;
+	}
+	mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT,
+			 MIPI_DCS_PIXEL_FMT_16BIT);
+	mipi_dbi_command(mipi, ST7735R_GAMCTRP1, 0x02, 0x1c, 0x07, 0x12, 0x37,
+			 0x32, 0x29, 0x2d, 0x29, 0x25, 0x2b, 0x39, 0x00, 0x01,
+			 0x03, 0x10);
+	mipi_dbi_command(mipi, ST7735R_GAMCTRN1, 0x03, 0x1d, 0x07, 0x06, 0x2e,
+			 0x2c, 0x29, 0x2d, 0x2e, 0x2e, 0x37, 0x3f, 0x00, 0x00,
+			 0x02, 0x10);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_ON);
+
+	msleep(100);
+
+	mipi_dbi_command(mipi, MIPI_DCS_ENTER_NORMAL_MODE);
+
+	msleep(20);
+
+	mipi_dbi_pipe_enable(pipe, crtc_state);
+}
+
+static const struct drm_simple_display_pipe_funcs jd_t18003_t01_pipe_funcs = {
+	.enable		= jd_t18003_t01_pipe_enable,
+	.disable	= mipi_dbi_pipe_disable,
+	.update		= tinydrm_display_pipe_update,
+	.prepare_fb	= tinydrm_display_pipe_prepare_fb,
+};
+
+static const struct drm_display_mode jd_t18003_t01_mode = {
+	TINYDRM_MODE(128, 160, 28, 35),
+};
+
+DEFINE_DRM_GEM_CMA_FOPS(st7735r_fops);
+
+static struct drm_driver st7735r_driver = {
+	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME |
+				  DRIVER_ATOMIC,
+	.fops			= &st7735r_fops,
+	TINYDRM_GEM_DRIVER_OPS,
+	.lastclose		= drm_fb_helper_lastclose,
+	.debugfs_init		= mipi_dbi_debugfs_init,
+	.name			= "st7735r",
+	.desc			= "Sitronix ST7735R",
+	.date			= "20171128",
+	.major			= 1,
+	.minor			= 0,
+};
+
+static const struct of_device_id st7735r_of_match[] = {
+	{ .compatible = "jianda,jd-t18003-t01" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, st7735r_of_match);
+
+static const struct spi_device_id st7735r_id[] = {
+	{ "jd-t18003-t01", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, st7735r_id);
+
+static int st7735r_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct mipi_dbi *mipi;
+	struct gpio_desc *dc;
+	u32 rotation = 0;
+	int ret;
+
+	mipi = devm_kzalloc(dev, sizeof(*mipi), GFP_KERNEL);
+	if (!mipi)
+		return -ENOMEM;
+
+	mipi->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(mipi->reset)) {
+		DRM_DEV_ERROR(dev, "Failed to get gpio 'reset'\n");
+		return PTR_ERR(mipi->reset);
+	}
+
+	dc = devm_gpiod_get(dev, "dc", GPIOD_OUT_LOW);
+	if (IS_ERR(dc)) {
+		DRM_DEV_ERROR(dev, "Failed to get gpio 'dc'\n");
+		return PTR_ERR(dc);
+	}
+
+	mipi->backlight = tinydrm_of_find_backlight(dev);
+	if (IS_ERR(mipi->backlight))
+		return PTR_ERR(mipi->backlight);
+
+	device_property_read_u32(dev, "rotation", &rotation);
+
+	ret = mipi_dbi_spi_init(spi, mipi, dc);
+	if (ret)
+		return ret;
+
+	/* Cannot read from Adafruit 1.8" display via SPI */
+	mipi->read_commands = NULL;
+
+	ret = mipi_dbi_init(&spi->dev, mipi, &jd_t18003_t01_pipe_funcs,
+			    &st7735r_driver, &jd_t18003_t01_mode, rotation);
+	if (ret)
+		return ret;
+
+	spi_set_drvdata(spi, mipi);
+
+	return devm_tinydrm_register(&mipi->tinydrm);
+}
+
+static void st7735r_shutdown(struct spi_device *spi)
+{
+	struct mipi_dbi *mipi = spi_get_drvdata(spi);
+
+	tinydrm_shutdown(&mipi->tinydrm);
+}
+
+static struct spi_driver st7735r_spi_driver = {
+	.driver = {
+		.name = "st7735r",
+		.owner = THIS_MODULE,
+		.of_match_table = st7735r_of_match,
+	},
+	.id_table = st7735r_id,
+	.probe = st7735r_probe,
+	.shutdown = st7735r_shutdown,
+};
+module_spi_driver(st7735r_spi_driver);
+
+MODULE_DESCRIPTION("Sitronix ST7735R DRM driver");
+MODULE_AUTHOR("David Lechner <david@lechnology.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 028ab6007873c443b6432e4fd5fe0a4ed09d866d..3e795a099d06f118f0380af26cdc8f5e90e0f04d 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -133,12 +133,12 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_bo_device *bdev,
 }
 EXPORT_SYMBOL(ttm_agp_tt_create);
 
-int ttm_agp_tt_populate(struct ttm_tt *ttm)
+int ttm_agp_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 EXPORT_SYMBOL(ttm_agp_tt_populate);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 97c3da6d5f17b75530ccae023abf357122101210..893003fc76a14d430f1367f4ef4458b21fcdca3e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -42,11 +42,6 @@
 #include <linux/atomic.h>
 #include <linux/reservation.h>
 
-#define TTM_ASSERT_LOCKED(param)
-#define TTM_DEBUG(fmt, arg...)
-#define TTM_BO_HASH_ORDER 13
-
-static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
 static void ttm_bo_global_kobj_release(struct kobject *kobj);
 
 static struct attribute ttm_bo_count = {
@@ -165,7 +160,7 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man;
 
-	lockdep_assert_held(&bo->resv->lock.base);
+	reservation_object_assert_held(bo->resv);
 
 	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
 
@@ -175,7 +170,8 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 		list_add_tail(&bo->lru, &man->lru[bo->priority]);
 		kref_get(&bo->list_kref);
 
-		if (bo->ttm && !(bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+		if (bo->ttm && !(bo->ttm->page_flags &
+				 (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) {
 			list_add_tail(&bo->swap,
 				      &bo->glob->swap_lru[bo->priority]);
 			kref_get(&bo->list_kref);
@@ -216,7 +212,7 @@ EXPORT_SYMBOL(ttm_bo_del_sub_from_lru);
 
 void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
 {
-	lockdep_assert_held(&bo->resv->lock.base);
+	reservation_object_assert_held(bo->resv);
 
 	ttm_bo_del_from_lru(bo);
 	ttm_bo_add_to_lru(bo);
@@ -233,7 +229,7 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 	int ret = 0;
 	uint32_t page_flags = 0;
 
-	TTM_ASSERT_LOCKED(&bo->mutex);
+	reservation_object_assert_held(bo->resv);
 	bo->ttm = NULL;
 
 	if (bdev->need_dma32)
@@ -305,7 +301,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 			goto out_err;
 
 		if (mem->mem_type != TTM_PL_SYSTEM) {
-			ret = ttm_tt_bind(bo->ttm, mem);
+			ret = ttm_tt_bind(bo->ttm, mem, ctx);
 			if (ret)
 				goto out_err;
 		}
@@ -324,13 +320,11 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-		ret = ttm_bo_move_ttm(bo, ctx->interruptible,
-				      ctx->no_wait_gpu, mem);
+		ret = ttm_bo_move_ttm(bo, ctx, mem);
 	else if (bdev->driver->move)
 		ret = bdev->driver->move(bo, evict, ctx, mem);
 	else
-		ret = ttm_bo_move_memcpy(bo, ctx->interruptible,
-					 ctx->no_wait_gpu, mem);
+		ret = ttm_bo_move_memcpy(bo, ctx, mem);
 
 	if (ret) {
 		if (bdev->driver->move_notify) {
@@ -588,12 +582,19 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 				      ddestroy);
 		kref_get(&bo->list_kref);
 		list_move_tail(&bo->ddestroy, &removed);
-		spin_unlock(&glob->lru_lock);
 
-		reservation_object_lock(bo->resv, NULL);
+		if (remove_all || bo->resv != &bo->ttm_resv) {
+			spin_unlock(&glob->lru_lock);
+			reservation_object_lock(bo->resv, NULL);
 
-		spin_lock(&glob->lru_lock);
-		ttm_bo_cleanup_refs(bo, false, !remove_all, true);
+			spin_lock(&glob->lru_lock);
+			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
+
+		} else if (reservation_object_trylock(bo->resv)) {
+			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
+		} else {
+			spin_unlock(&glob->lru_lock);
+		}
 
 		kref_put(&bo->list_kref, ttm_bo_release_list);
 		spin_lock(&glob->lru_lock);
@@ -662,7 +663,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	lockdep_assert_held(&bo->resv->lock.base);
+	reservation_object_assert_held(bo->resv);
 
 	evict_mem = bo->mem;
 	evict_mem.mm_node = NULL;
@@ -708,8 +709,35 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
+/**
+ * Check the target bo is allowable to be evicted or swapout, including cases:
+ *
+ * a. if share same reservation object with ctx->resv, have assumption
+ * reservation objects should already be locked, so not lock again and
+ * return true directly when either the opreation allow_reserved_eviction
+ * or the target bo already is in delayed free list;
+ *
+ * b. Otherwise, trylock it.
+ */
+static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
+			struct ttm_operation_ctx *ctx, bool *locked)
+{
+	bool ret = false;
+
+	*locked = false;
+	if (bo->resv == ctx->resv) {
+		reservation_object_assert_held(bo->resv);
+		if (ctx->allow_reserved_eviction || !list_empty(&bo->ddestroy))
+			ret = true;
+	} else {
+		*locked = reservation_object_trylock(bo->resv);
+		ret = *locked;
+	}
+
+	return ret;
+}
+
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-			       struct reservation_object *resv,
 			       uint32_t mem_type,
 			       const struct ttm_place *place,
 			       struct ttm_operation_ctx *ctx)
@@ -724,14 +752,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
-			if (bo->resv == resv) {
-				if (list_empty(&bo->ddestroy))
-					continue;
-			} else {
-				locked = reservation_object_trylock(bo->resv);
-				if (!locked)
-					continue;
-			}
+			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
+				continue;
 
 			if (place && !bdev->driver->eviction_valuable(bo,
 								      place)) {
@@ -836,7 +858,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place, ctx);
+		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
@@ -1018,7 +1040,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	int ret = 0;
 	struct ttm_mem_reg mem;
 
-	lockdep_assert_held(&bo->resv->lock.base);
+	reservation_object_assert_held(bo->resv);
 
 	mem.num_pages = bo->num_pages;
 	mem.size = mem.num_pages << PAGE_SHIFT;
@@ -1088,7 +1110,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	int ret;
 	uint32_t new_flags;
 
-	lockdep_assert_held(&bo->resv->lock.base);
+	reservation_object_assert_held(bo->resv);
 	/*
 	 * Check whether we need to move buffer.
 	 */
@@ -1134,7 +1156,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 	struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
 	bool locked;
 
-	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
+	ret = ttm_mem_global_alloc(mem_glob, acc_size, ctx);
 	if (ret) {
 		pr_err("Out of kernel memory\n");
 		if (destroy)
@@ -1182,7 +1204,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 	bo->sg = sg;
 	if (resv) {
 		bo->resv = resv;
-		lockdep_assert_held(&bo->resv->lock.base);
+		reservation_object_assert_held(bo->resv);
 	} else {
 		bo->resv = &bo->ttm_resv;
 	}
@@ -1204,7 +1226,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 	 * since otherwise lockdep will be angered in radeon.
 	 */
 	if (!resv) {
-		locked = ww_mutex_trylock(&bo->resv->lock);
+		locked = reservation_object_trylock(bo->resv);
 		WARN_ON(!locked);
 	}
 
@@ -1333,8 +1355,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_mem_evict_first(bdev, NULL, mem_type,
-						  NULL, &ctx);
+			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
 			if (ret)
 				return ret;
 			spin_lock(&glob->lru_lock);
@@ -1455,7 +1476,6 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj)
 	struct ttm_bo_global *glob =
 		container_of(kobj, struct ttm_bo_global, kobj);
 
-	ttm_mem_unregister_shrink(glob->mem_glob, &glob->shrink);
 	__free_page(glob->dummy_read_page);
 	kfree(glob);
 }
@@ -1480,6 +1500,7 @@ int ttm_bo_global_init(struct drm_global_reference *ref)
 	mutex_init(&glob->device_list_mutex);
 	spin_lock_init(&glob->lru_lock);
 	glob->mem_glob = bo_ref->mem_glob;
+	glob->mem_glob->bo_glob = glob;
 	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
 
 	if (unlikely(glob->dummy_read_page == NULL)) {
@@ -1490,14 +1511,6 @@ int ttm_bo_global_init(struct drm_global_reference *ref)
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		INIT_LIST_HEAD(&glob->swap_lru[i]);
 	INIT_LIST_HEAD(&glob->device_list);
-
-	ttm_mem_init_shrink(&glob->shrink, ttm_bo_swapout);
-	ret = ttm_mem_register_shrink(glob->mem_glob, &glob->shrink);
-	if (unlikely(ret != 0)) {
-		pr_err("Could not register buffer object swapout\n");
-		goto out_no_shrink;
-	}
-
 	atomic_set(&glob->bo_count, 0);
 
 	ret = kobject_init_and_add(
@@ -1505,8 +1518,6 @@ int ttm_bo_global_init(struct drm_global_reference *ref)
 	if (unlikely(ret != 0))
 		kobject_put(&glob->kobj);
 	return ret;
-out_no_shrink:
-	__free_page(glob->dummy_read_page);
 out_no_drp:
 	kfree(glob);
 	return ret;
@@ -1541,12 +1552,12 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
 	cancel_delayed_work_sync(&bdev->wq);
 
 	if (ttm_bo_delayed_delete(bdev, true))
-		TTM_DEBUG("Delayed destroy list was clean\n");
+		pr_debug("Delayed destroy list was clean\n");
 
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		if (list_empty(&bdev->man[0].lru[0]))
-			TTM_DEBUG("Swap list %d was clean\n", i);
+			pr_debug("Swap list %d was clean\n", i);
 	spin_unlock(&glob->lru_lock);
 
 	drm_vma_offset_manager_destroy(&bdev->vma_manager);
@@ -1689,21 +1700,20 @@ EXPORT_SYMBOL(ttm_bo_synccpu_write_release);
  * A buffer object shrink method that tries to swap out the first
  * buffer object on the bo_global::swap_lru list.
  */
-
-static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
+int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
 {
-	struct ttm_bo_global *glob =
-	    container_of(shrink, struct ttm_bo_global, shrink);
 	struct ttm_buffer_object *bo;
 	int ret = -EBUSY;
+	bool locked;
 	unsigned i;
 
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
-			if (!ret)
+			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
+				ret = 0;
 				break;
+			}
 		}
 		if (!ret)
 			break;
@@ -1770,15 +1780,21 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	 * Unreserve without putting on LRU to avoid swapping out an
 	 * already swapped buffer.
 	 */
-
-	reservation_object_unlock(bo->resv);
+	if (locked)
+		reservation_object_unlock(bo->resv);
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 	return ret;
 }
+EXPORT_SYMBOL(ttm_bo_swapout);
 
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
 {
-	while (ttm_bo_swapout(&bdev->glob->shrink) == 0)
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
+
+	while (ttm_bo_swapout(bdev->glob, &ctx) == 0)
 		;
 }
 EXPORT_SYMBOL(ttm_bo_swapout_all);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index e7a519f1849b7b72d0d21fd3af44c753a5dfb81c..153de1bf0232fdeb22bbfe70e40d3f501d73b1cd 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -45,7 +45,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 }
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-		    bool interruptible, bool no_wait_gpu,
+		   struct ttm_operation_ctx *ctx,
 		    struct ttm_mem_reg *new_mem)
 {
 	struct ttm_tt *ttm = bo->ttm;
@@ -53,7 +53,7 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 	int ret;
 
 	if (old_mem->mem_type != TTM_PL_SYSTEM) {
-		ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+		ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
 
 		if (unlikely(ret != 0)) {
 			if (ret != -ERESTARTSYS)
@@ -73,7 +73,7 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 		return ret;
 
 	if (new_mem->mem_type != TTM_PL_SYSTEM) {
-		ret = ttm_tt_bind(ttm, new_mem);
+		ret = ttm_tt_bind(ttm, new_mem, ctx);
 		if (unlikely(ret != 0))
 			return ret;
 	}
@@ -329,7 +329,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 }
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool interruptible, bool no_wait_gpu,
+		       struct ttm_operation_ctx *ctx,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -345,7 +345,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	unsigned long add = 0;
 	int dir;
 
-	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
 	if (ret)
 		return ret;
 
@@ -376,7 +376,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	 * TTM might be null for moves within the same region.
 	 */
 	if (ttm && ttm->state == tt_unpopulated) {
-		ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+		ret = ttm->bdev->driver->ttm_tt_populate(ttm, ctx);
 		if (ret)
 			goto out1;
 	}
@@ -485,7 +485,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	fbo->acc_size = 0;
 	fbo->resv = &fbo->ttm_resv;
 	reservation_object_init(fbo->resv);
-	ret = ww_mutex_trylock(&fbo->resv->lock);
+	ret = reservation_object_trylock(fbo->resv);
 	WARN_ON(!ret);
 
 	*new_obj = fbo;
@@ -545,14 +545,19 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 			   unsigned long num_pages,
 			   struct ttm_bo_kmap_obj *map)
 {
-	struct ttm_mem_reg *mem = &bo->mem; pgprot_t prot;
+	struct ttm_mem_reg *mem = &bo->mem;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 	struct ttm_tt *ttm = bo->ttm;
+	pgprot_t prot;
 	int ret;
 
 	BUG_ON(!ttm);
 
 	if (ttm->state == tt_unpopulated) {
-		ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+		ret = ttm->bdev->driver->ttm_tt_populate(ttm, &ctx);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index c8ebb757e36b524f2074d67067888b61a012d9cd..08a3c324242e6fe705dc989e9888ac3b4fa23137 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -92,6 +92,18 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 	return ret;
 }
 
+static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+				       unsigned long page_offset)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+
+	if (bdev->driver->io_mem_pfn)
+		return bdev->driver->io_mem_pfn(bo, page_offset);
+
+	return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT)
+		+ page_offset;
+}
+
 static int ttm_bo_vm_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -215,12 +227,17 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf)
 		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
 						cvma.vm_page_prot);
 	} else {
+		struct ttm_operation_ctx ctx = {
+			.interruptible = false,
+			.no_wait_gpu = false
+		};
+
 		ttm = bo->ttm;
 		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
 						cvma.vm_page_prot);
 
 		/* Allocate all page at once, most common usage */
-		if (ttm->bdev->driver->ttm_tt_populate(ttm)) {
+		if (ttm->bdev->driver->ttm_tt_populate(ttm, &ctx)) {
 			retval = VM_FAULT_OOM;
 			goto out_io_unlock;
 		}
@@ -234,7 +251,7 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf)
 		if (bo->mem.bus.is_iomem) {
 			/* Iomem should not be marked encrypted */
 			cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
-			pfn = bdev->driver->io_mem_pfn(bo, page_offset);
+			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
 		} else {
 			page = ttm->pages[page_offset];
 			if (unlikely(!page && i == 0)) {
@@ -404,14 +421,6 @@ static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
 	return bo;
 }
 
-unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
-					unsigned long page_offset)
-{
-	return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT)
-		+ page_offset;
-}
-EXPORT_SYMBOL(ttm_bo_default_io_mem_pfn);
-
 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 		struct ttm_bo_device *bdev)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index e963749903986b8c3ea1296052422a79fc71646d..aa0c381369586b78504b7ae448e64cefbb2d5a41 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -211,35 +211,33 @@ static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
  */
 
 static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
-		       uint64_t extra)
+			uint64_t extra, struct ttm_operation_ctx *ctx)
 {
 	int ret;
-	struct ttm_mem_shrink *shrink;
 
 	spin_lock(&glob->lock);
-	if (glob->shrink == NULL)
-		goto out;
 
 	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
-		shrink = glob->shrink;
 		spin_unlock(&glob->lock);
-		ret = shrink->do_shrink(shrink);
+		ret = ttm_bo_swapout(glob->bo_glob, ctx);
 		spin_lock(&glob->lock);
 		if (unlikely(ret != 0))
-			goto out;
+			break;
 	}
-out:
+
 	spin_unlock(&glob->lock);
 }
 
-
-
 static void ttm_shrink_work(struct work_struct *work)
 {
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 	struct ttm_mem_global *glob =
 	    container_of(work, struct ttm_mem_global, work);
 
-	ttm_shrink(glob, true, 0ULL);
+	ttm_shrink(glob, true, 0ULL, &ctx);
 }
 
 static int ttm_mem_init_kernel_zone(struct ttm_mem_global *glob,
@@ -514,7 +512,7 @@ static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
 static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob,
 				     struct ttm_mem_zone *single_zone,
 				     uint64_t memory,
-				     bool no_wait, bool interruptible)
+				     struct ttm_operation_ctx *ctx)
 {
 	int count = TTM_MEMORY_ALLOC_RETRIES;
 
@@ -522,33 +520,32 @@ static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob,
 					       single_zone,
 					       memory, true)
 			!= 0)) {
-		if (no_wait)
+		if (ctx->no_wait_gpu)
 			return -ENOMEM;
 		if (unlikely(count-- == 0))
 			return -ENOMEM;
-		ttm_shrink(glob, false, memory + (memory >> 2) + 16);
+		ttm_shrink(glob, false, memory + (memory >> 2) + 16, ctx);
 	}
 
 	return 0;
 }
 
 int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
-			 bool no_wait, bool interruptible)
+			 struct ttm_operation_ctx *ctx)
 {
 	/**
 	 * Normal allocations of kernel memory are registered in
 	 * all zones.
 	 */
 
-	return ttm_mem_global_alloc_zone(glob, NULL, memory, no_wait,
-					 interruptible);
+	return ttm_mem_global_alloc_zone(glob, NULL, memory, ctx);
 }
 EXPORT_SYMBOL(ttm_mem_global_alloc);
 
 int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
-			      struct page *page, uint64_t size)
+			      struct page *page, uint64_t size,
+			      struct ttm_operation_ctx *ctx)
 {
-
 	struct ttm_mem_zone *zone = NULL;
 
 	/**
@@ -563,7 +560,7 @@ int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
 	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
 		zone = glob->zone_kernel;
 #endif
-	return ttm_mem_global_alloc_zone(glob, zone, size, false, false);
+	return ttm_mem_global_alloc_zone(glob, zone, size, ctx);
 }
 
 void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page,
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 26a7ad0f478978205be87f9409f309021b610884..1aa2baa83959c3a3c60bb7fd891783410ec14882 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -325,6 +325,10 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 	struct ttm_ref_object *ref;
 	struct drm_hash_item *hash;
 	struct ttm_mem_global *mem_glob = tfile->tdev->mem_glob;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 	int ret = -EINVAL;
 
 	if (base->tfile != tfile && !base->shareable)
@@ -350,7 +354,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 			return -EPERM;
 
 		ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref),
-					   false, false);
+					   &ctx);
 		if (unlikely(ret != 0))
 			return ret;
 		ref = kmalloc(sizeof(*ref), GFP_KERNEL);
@@ -686,7 +690,10 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
 	dma_buf = prime->dma_buf;
 	if (!dma_buf || !get_dma_buf_unless_doomed(dma_buf)) {
 		DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-
+		struct ttm_operation_ctx ctx = {
+			.interruptible = true,
+			.no_wait_gpu = false
+		};
 		exp_info.ops = &tdev->ops;
 		exp_info.size = prime->size;
 		exp_info.flags = flags;
@@ -696,7 +703,7 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
 		 * Need to create a new dma_buf, with memory accounting.
 		 */
 		ret = ttm_mem_global_alloc(tdev->mem_glob, tdev->dma_buf_size,
-					   false, true);
+					   &ctx);
 		if (unlikely(ret != 0)) {
 			mutex_unlock(&prime->mutex);
 			goto out_unref;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 44343a2bf55c65458a196b5968b0c494f1c569b0..2b12c55a3bff82e48b581a60dba23b5c4c33b7a2 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		freed += (nr_free_pool - shrink_pages) << pool->order;
 		if (freed >= sc->nr_to_scan)
 			break;
+		shrink_pages <<= pool->order;
 	}
 	mutex_unlock(&lock);
 	return freed;
@@ -476,12 +477,12 @@ ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 	return count;
 }
 
-static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager)
+static int ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager)
 {
 	manager->mm_shrink.count_objects = ttm_pool_shrink_count;
 	manager->mm_shrink.scan_objects = ttm_pool_shrink_scan;
 	manager->mm_shrink.seeks = 1;
-	register_shrinker(&manager->mm_shrink);
+	return register_shrinker(&manager->mm_shrink);
 }
 
 static void ttm_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
 	int r = 0;
 	unsigned i, j, cpages;
 	unsigned npages = 1 << order;
-	unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC);
+	unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
 
 	/* allocate array for page caching change */
 	caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
@@ -1006,6 +1007,8 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
 	pr_info("Initializing pool allocator\n");
 
 	_manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
+	if (!_manager)
+		return -ENOMEM;
 
 	ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0);
 
@@ -1031,15 +1034,18 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
 
 	ret = kobject_init_and_add(&_manager->kobj, &ttm_pool_kobj_type,
 				   &glob->kobj, "pool");
-	if (unlikely(ret != 0)) {
-		kobject_put(&_manager->kobj);
-		_manager = NULL;
-		return ret;
-	}
-
-	ttm_pool_mm_shrink_init(_manager);
+	if (unlikely(ret != 0))
+		goto error;
 
+	ret = ttm_pool_mm_shrink_init(_manager);
+	if (unlikely(ret != 0))
+		goto error;
 	return 0;
+
+error:
+	kobject_put(&_manager->kobj);
+	_manager = NULL;
+	return ret;
 }
 
 void ttm_page_alloc_fini(void)
@@ -1057,7 +1063,7 @@ void ttm_page_alloc_fini(void)
 	_manager = NULL;
 }
 
-int ttm_pool_populate(struct ttm_tt *ttm)
+int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
 	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 	unsigned i;
@@ -1069,15 +1075,17 @@ int ttm_pool_populate(struct ttm_tt *ttm)
 	ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
 			    ttm->caching_state);
 	if (unlikely(ret != 0)) {
-		ttm_pool_unpopulate(ttm);
+		ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
+			      ttm->caching_state);
 		return ret;
 	}
 
 	for (i = 0; i < ttm->num_pages; ++i) {
 		ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
-						PAGE_SIZE);
+						PAGE_SIZE, ctx);
 		if (unlikely(ret != 0)) {
-			ttm_pool_unpopulate(ttm);
+			ttm_put_pages(ttm->pages, ttm->num_pages,
+				      ttm->page_flags, ttm->caching_state);
 			return -ENOMEM;
 		}
 	}
@@ -1112,12 +1120,13 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm)
 }
 EXPORT_SYMBOL(ttm_pool_unpopulate);
 
-int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt)
+int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt,
+					struct ttm_operation_ctx *ctx)
 {
 	unsigned i, j;
 	int r;
 
-	r = ttm_pool_populate(&tt->ttm);
+	r = ttm_pool_populate(&tt->ttm, ctx);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index bda00b2ab51c1a84cd7b4d0d57bae76512d30796..a88051552acee7c90a526486a0632f1b2be1f98a 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -61,6 +61,7 @@
 #define SMALL_ALLOCATION		4
 #define FREE_ALL_PAGES			(~0U)
 #define VADDR_FLAG_HUGE_POOL		1UL
+#define VADDR_FLAG_UPDATED_COUNT	2UL
 
 enum pool_type {
 	IS_UNDEFINED	= 0,
@@ -333,14 +334,18 @@ static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
 static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
 {
 	struct dma_page *d_page;
+	unsigned long attrs = 0;
 	void *vaddr;
 
 	d_page = kmalloc(sizeof(struct dma_page), GFP_KERNEL);
 	if (!d_page)
 		return NULL;
 
-	vaddr = dma_alloc_coherent(pool->dev, pool->size, &d_page->dma,
-				   pool->gfp_flags);
+	if (pool->type & IS_HUGE)
+		attrs = DMA_ATTR_NO_WARN;
+
+	vaddr = dma_alloc_attrs(pool->dev, pool->size, &d_page->dma,
+				pool->gfp_flags, attrs);
 	if (vaddr) {
 		if (is_vmalloc_addr(vaddr))
 			d_page->p = vmalloc_to_page(vaddr);
@@ -870,18 +875,18 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool,
 }
 
 /*
- * @return count of pages still required to fulfill the request.
  * The populate list is actually a stack (not that is matters as TTM
  * allocates one page at a time.
+ * return dma_page pointer if success, otherwise NULL.
  */
-static int ttm_dma_pool_get_pages(struct dma_pool *pool,
+static struct dma_page *ttm_dma_pool_get_pages(struct dma_pool *pool,
 				  struct ttm_dma_tt *ttm_dma,
 				  unsigned index)
 {
-	struct dma_page *d_page;
+	struct dma_page *d_page = NULL;
 	struct ttm_tt *ttm = &ttm_dma->ttm;
 	unsigned long irq_flags;
-	int count, r = -ENOMEM;
+	int count;
 
 	spin_lock_irqsave(&pool->lock, irq_flags);
 	count = ttm_dma_page_pool_fill_locked(pool, &irq_flags);
@@ -890,12 +895,11 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool,
 		ttm->pages[index] = d_page->p;
 		ttm_dma->dma_address[index] = d_page->dma;
 		list_move_tail(&d_page->page_list, &ttm_dma->pages_list);
-		r = 0;
 		pool->npages_in_use += 1;
 		pool->npages_free -= 1;
 	}
 	spin_unlock_irqrestore(&pool->lock, irq_flags);
-	return r;
+	return d_page;
 }
 
 static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
@@ -923,12 +927,14 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
  * On success pages list will hold count number of correctly
  * cached pages. On failure will hold the negative return value (-ENOMEM, etc).
  */
-int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
+int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
+			struct ttm_operation_ctx *ctx)
 {
 	struct ttm_tt *ttm = &ttm_dma->ttm;
 	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 	unsigned long num_pages = ttm->num_pages;
 	struct dma_pool *pool;
+	struct dma_page *d_page;
 	enum pool_type type;
 	unsigned i;
 	int ret;
@@ -957,17 +963,18 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	while (num_pages >= HPAGE_PMD_NR) {
 		unsigned j;
 
-		ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
-		if (ret != 0)
+		d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i);
+		if (!d_page)
 			break;
 
 		ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
-						pool->size);
+						pool->size, ctx);
 		if (unlikely(ret != 0)) {
 			ttm_dma_unpopulate(ttm_dma, dev);
 			return -ENOMEM;
 		}
 
+		d_page->vaddr |= VADDR_FLAG_UPDATED_COUNT;
 		for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) {
 			ttm->pages[j] = ttm->pages[j - 1] + 1;
 			ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] +
@@ -991,19 +998,20 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	}
 
 	while (num_pages) {
-		ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
-		if (ret != 0) {
+		d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i);
+		if (!d_page) {
 			ttm_dma_unpopulate(ttm_dma, dev);
 			return -ENOMEM;
 		}
 
 		ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
-						pool->size);
+						pool->size, ctx);
 		if (unlikely(ret != 0)) {
 			ttm_dma_unpopulate(ttm_dma, dev);
 			return -ENOMEM;
 		}
 
+		d_page->vaddr |= VADDR_FLAG_UPDATED_COUNT;
 		++i;
 		--num_pages;
 	}
@@ -1044,8 +1052,11 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 				continue;
 
 			count++;
-			ttm_mem_global_free_page(ttm->glob->mem_glob,
-						 d_page->p, pool->size);
+			if (d_page->vaddr & VADDR_FLAG_UPDATED_COUNT) {
+				ttm_mem_global_free_page(ttm->glob->mem_glob,
+							 d_page->p, pool->size);
+				d_page->vaddr &= ~VADDR_FLAG_UPDATED_COUNT;
+			}
 			ttm_dma_page_put(pool, d_page);
 		}
 
@@ -1065,9 +1076,19 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 
 	/* make sure pages array match list and count number of pages */
 	count = 0;
-	list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) {
+	list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list,
+				 page_list) {
 		ttm->pages[count] = d_page->p;
 		count++;
+
+		if (d_page->vaddr & VADDR_FLAG_UPDATED_COUNT) {
+			ttm_mem_global_free_page(ttm->glob->mem_glob,
+						 d_page->p, pool->size);
+			d_page->vaddr &= ~VADDR_FLAG_UPDATED_COUNT;
+		}
+
+		if (is_cached)
+			ttm_dma_page_put(pool, d_page);
 	}
 
 	spin_lock_irqsave(&pool->lock, irq_flags);
@@ -1087,19 +1108,6 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	}
 	spin_unlock_irqrestore(&pool->lock, irq_flags);
 
-	if (is_cached) {
-		list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) {
-			ttm_mem_global_free_page(ttm->glob->mem_glob,
-						 d_page->p, pool->size);
-			ttm_dma_page_put(pool, d_page);
-		}
-	} else {
-		for (i = 0; i < count; i++) {
-			ttm_mem_global_free_page(ttm->glob->mem_glob,
-						 ttm->pages[i], pool->size);
-		}
-	}
-
 	INIT_LIST_HEAD(&ttm_dma->pages_list);
 	for (i = 0; i < ttm->num_pages; i++) {
 		ttm->pages[i] = NULL;
@@ -1177,12 +1185,12 @@ ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 	return count;
 }
 
-static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager)
+static int ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager)
 {
 	manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count;
 	manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan;
 	manager->mm_shrink.seeks = 1;
-	register_shrinker(&manager->mm_shrink);
+	return register_shrinker(&manager->mm_shrink);
 }
 
 static void ttm_dma_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
@@ -1192,7 +1200,7 @@ static void ttm_dma_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
 
 int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
 {
-	int ret = -ENOMEM;
+	int ret;
 
 	WARN_ON(_manager);
 
@@ -1200,7 +1208,7 @@ int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
 
 	_manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
 	if (!_manager)
-		goto err;
+		return -ENOMEM;
 
 	mutex_init(&_manager->lock);
 	INIT_LIST_HEAD(&_manager->pools);
@@ -1212,13 +1220,17 @@ int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
 	/* This takes care of auto-freeing the _manager */
 	ret = kobject_init_and_add(&_manager->kobj, &ttm_pool_kobj_type,
 				   &glob->kobj, "dma_pool");
-	if (unlikely(ret != 0)) {
-		kobject_put(&_manager->kobj);
-		goto err;
-	}
-	ttm_dma_pool_mm_shrink_init(_manager);
+	if (unlikely(ret != 0))
+		goto error;
+
+	ret = ttm_dma_pool_mm_shrink_init(_manager);
+	if (unlikely(ret != 0))
+		goto error;
 	return 0;
-err:
+
+error:
+	kobject_put(&_manager->kobj);
+	_manager = NULL;
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 8ebc8d3560c3669aab85072877703f977e9cc3ef..5a046a3c543a135df78ca11c2834acad3cac0e75 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -261,7 +261,8 @@ void ttm_tt_unbind(struct ttm_tt *ttm)
 	}
 }
 
-int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem,
+		struct ttm_operation_ctx *ctx)
 {
 	int ret = 0;
 
@@ -271,7 +272,7 @@ int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
 	if (ttm->state == tt_bound)
 		return 0;
 
-	ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+	ret = ttm->bdev->driver->ttm_tt_populate(ttm, ctx);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/tve200/tve200_drm.h b/drivers/gpu/drm/tve200/tve200_drm.h
index 628b79324c489ed3872ea451d679e1702f64f6da..5c270055bd5855943e70ee25ff48a89f022eb23e 100644
--- a/drivers/gpu/drm/tve200/tve200_drm.h
+++ b/drivers/gpu/drm/tve200/tve200_drm.h
@@ -103,7 +103,6 @@ struct tve200_drm_dev_private {
 	struct drm_panel *panel;
 	struct drm_bridge *bridge;
 	struct drm_simple_display_pipe pipe;
-	struct drm_fbdev_cma *fbdev;
 
 	void *regs;
 	struct clk *pclk;
diff --git a/drivers/gpu/drm/tve200/tve200_drv.c b/drivers/gpu/drm/tve200/tve200_drv.c
index bd6c9454d767d9f3f11b6c5b348abe79f48da815..44911d9218641ae178ee91d0eaa61974e76bd096 100644
--- a/drivers/gpu/drm/tve200/tve200_drv.c
+++ b/drivers/gpu/drm/tve200/tve200_drv.c
@@ -46,6 +46,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_of.h>
@@ -130,8 +131,7 @@ static int tve200_modeset_init(struct drm_device *dev)
 	 * Passing in 16 here will make the RGB656 mode the default
 	 * Passing in 32 will use XRGB8888 mode
 	 */
-	priv->fbdev = drm_fbdev_cma_init(dev, 16,
-					 dev->mode_config.num_connector);
+	drm_fb_cma_fbdev_init(dev, 16, 0);
 	drm_kms_helper_poll_init(dev);
 
 	goto finish;
@@ -146,17 +146,10 @@ static int tve200_modeset_init(struct drm_device *dev)
 
 DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
 
-static void tve200_lastclose(struct drm_device *dev)
-{
-	struct tve200_drm_dev_private *priv = dev->dev_private;
-
-	drm_fbdev_cma_restore_mode(priv->fbdev);
-}
-
 static struct drm_driver tve200_drm_driver = {
 	.driver_features =
 		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC,
-	.lastclose = tve200_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.ioctls = NULL,
 	.fops = &drm_fops,
 	.name = "tve200",
@@ -270,8 +263,7 @@ static int tve200_remove(struct platform_device *pdev)
 	struct tve200_drm_dev_private *priv = drm->dev_private;
 
 	drm_dev_unregister(drm);
-	if (priv->fbdev)
-		drm_fbdev_cma_fini(priv->fbdev);
+	drm_fb_cma_fbdev_fini(drm);
 	if (priv->panel)
 		drm_panel_bridge_remove(priv->bridge);
 	drm_mode_config_cleanup(drm);
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 4ae45d7dac428984fa63081338ac23f283fc5ac8..2decc8e2c79f58aad6ea7626a32a0f26665d23eb 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -637,7 +637,8 @@ int vc4_bo_inc_usecnt(struct vc4_bo *bo)
 	mutex_lock(&bo->madv_lock);
 	switch (bo->madv) {
 	case VC4_MADV_WILLNEED:
-		refcount_inc(&bo->usecnt);
+		if (!refcount_inc_not_zero(&bo->usecnt))
+			refcount_set(&bo->usecnt, 1);
 		ret = 0;
 		break;
 	case VC4_MADV_DONTNEED:
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index e3c29729da2ed669076c061a12c00fa73a51c007..ceb385fd69c595ea5acfca2dcd702921226af174 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -111,13 +111,6 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void vc4_lastclose(struct drm_device *dev)
-{
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-
-	drm_fbdev_cma_restore_mode(vc4->fbdev);
-}
-
 static const struct vm_operations_struct vc4_vm_ops = {
 	.fault = vc4_fault,
 	.open = drm_gem_vm_open,
@@ -159,7 +152,7 @@ static struct drm_driver vc4_drm_driver = {
 			    DRIVER_HAVE_IRQ |
 			    DRIVER_RENDER |
 			    DRIVER_PRIME),
-	.lastclose = vc4_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.irq_handler = vc4_irq,
 	.irq_preinstall = vc4_irq_preinstall,
 	.irq_postinstall = vc4_irq_postinstall,
@@ -301,12 +294,10 @@ static void vc4_drm_unbind(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm = platform_get_drvdata(pdev);
-	struct vc4_dev *vc4 = to_vc4_dev(drm);
 
 	drm_dev_unregister(drm);
 
-	if (vc4->fbdev)
-		drm_fbdev_cma_fini(vc4->fbdev);
+	drm_fb_cma_fbdev_fini(drm);
 
 	drm_mode_config_cleanup(drm);
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 9c0d380c96f260573b39458b6d8631052e764a71..3af22936d9b3b20dc34eaa7e6b47f18099bfdb51 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -39,8 +39,6 @@ struct vc4_dev {
 	struct vc4_dsi *dsi1;
 	struct vc4_vec *vec;
 
-	struct drm_fbdev_cma *fbdev;
-
 	struct vc4_hang_state *hang_state;
 
 	/* The kernel-space BO cache.  Tracks buffers that have been
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 6c32c89a83a96687d2a817522288ad205ffd5c63..638540943c61a5e095c87be8d2b2bf543ea933b1 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -888,8 +888,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 	/* If we got force-completed because of GPU reset rather than
 	 * through our IRQ handler, signal the fence now.
 	 */
-	if (exec->fence)
+	if (exec->fence) {
 		dma_fence_signal(exec->fence);
+		dma_fence_put(exec->fence);
+	}
 
 	if (exec->bo) {
 		for (i = 0; i < exec->bo_count; i++) {
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 61b2e5377993dc319cf6b362b66f0c3db9a768cd..3dd62d75f5319dbffcd9d27ea6f60927d193eb92 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -139,6 +139,7 @@ vc4_irq_finish_render_job(struct drm_device *dev)
 	list_move_tail(&exec->head, &vc4->job_done_list);
 	if (exec->fence) {
 		dma_fence_signal_locked(exec->fence);
+		dma_fence_put(exec->fence);
 		exec->fence = NULL;
 	}
 	vc4_submit_next_render_job(dev);
@@ -208,9 +209,6 @@ vc4_irq_postinstall(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-	/* Undo the effects of a previous vc4_irq_uninstall. */
-	enable_irq(dev->irq);
-
 	/* Enable both the render done and out of memory interrupts. */
 	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 50c4959b5bd33efe329119a0aa9fd39b255e098f..4256f294c3468416580c03132726517bca134c21 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -19,17 +19,11 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include "vc4_drv.h"
 
-static void vc4_output_poll_changed(struct drm_device *dev)
-{
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-
-	drm_fbdev_cma_hotplug_event(vc4->fbdev);
-}
-
 static void
 vc4_atomic_complete_commit(struct drm_atomic_state *state)
 {
@@ -194,7 +188,7 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev,
 }
 
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
-	.output_poll_changed = vc4_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = vc4_atomic_commit,
 	.fb_create = vc4_fb_create,
@@ -224,12 +218,8 @@ int vc4_kms_load(struct drm_device *dev)
 
 	drm_mode_config_reset(dev);
 
-	if (dev->mode_config.num_connector) {
-		vc4->fbdev = drm_fbdev_cma_init(dev, 32,
-						dev->mode_config.num_connector);
-		if (IS_ERR(vc4->fbdev))
-			vc4->fbdev = NULL;
-	}
+	if (dev->mode_config.num_connector)
+		drm_fb_cma_fbdev_init(dev, 32, 0);
 
 	drm_kms_helper_poll_init(dev);
 
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 622cd43840b8c53588396b6d1d82b5d234a69113..493f392b3a0a90e70820d3582c0847ebe3cc0bd7 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev)
 		return ret;
 
 	vc4_v3d_init_hw(vc4->dev);
+
+	/* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+	enable_irq(vc4->dev->irq);
 	vc4_irq_postinstall(vc4->dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index 6f66b7347cd0d68c1257170e3521bbdd100bbaea..0b90cdb3d9fef2d2dc4717ca450c59acd685c477 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -124,13 +124,17 @@ int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev,
 	int ret;
 	struct page **pages = bo->tbo.ttm->pages;
 	int nr_pages = bo->tbo.num_pages;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 
 	/* wtf swapping */
 	if (bo->pages)
 		return 0;
 
 	if (bo->tbo.ttm->state == tt_unpopulated)
-		bo->tbo.ttm->bdev->driver->ttm_tt_populate(bo->tbo.ttm);
+		bo->tbo.ttm->bdev->driver->ttm_tt_populate(bo->tbo.ttm, &ctx);
 	bo->pages = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
 	if (!bo->pages)
 		goto out;
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 488c6bd032fc6ab6bba167a76db8d4a60c28c59d..36655b709eb2e52035554d24caddfe7cb79fe8ea 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -324,12 +324,13 @@ static struct ttm_backend_func virtio_gpu_backend_func = {
 	.destroy = &virtio_gpu_ttm_backend_destroy,
 };
 
-static int virtio_gpu_ttm_tt_populate(struct ttm_tt *ttm)
+static int virtio_gpu_ttm_tt_populate(struct ttm_tt *ttm,
+		struct ttm_operation_ctx *ctx)
 {
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 
 static void virtio_gpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -430,7 +431,6 @@ static struct ttm_bo_driver virtio_gpu_bo_driver = {
 	.verify_access = &virtio_gpu_verify_access,
 	.io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve,
 	.io_mem_free = &virtio_gpu_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 	.move_notify = &virtio_gpu_bo_move_notify,
 	.swap_notify = &virtio_gpu_bo_swap_notify,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c
index 9c42e96da510560b78a9727da4ae6d158505fc4b..55d32ae43aa4ace13b6d33ed841f2bd235339f3a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c
@@ -1202,10 +1202,14 @@ struct vmw_ctx_binding_state *
 vmw_binding_state_alloc(struct vmw_private *dev_priv)
 {
 	struct vmw_ctx_binding_state *cbs;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 	int ret;
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), sizeof(*cbs),
-				   false, false);
+				&ctx);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index c7056322211cc80fa5b42742ffcd496973d19ed6..22231bc9e84511ebea9b50a806971b3fa3a95437 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -394,6 +394,10 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 	struct vmw_private *dev_priv = vmw_tt->dev_priv;
 	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
 	struct vmw_sg_table *vsgt = &vmw_tt->vsgt;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	struct vmw_piter iter;
 	dma_addr_t old;
 	int ret = 0;
@@ -417,8 +421,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 			sgt_size = ttm_round_pot(sizeof(struct sg_table));
 		}
 		vmw_tt->sg_alloc_size = sgt_size + sgl_size * vsgt->num_pages;
-		ret = ttm_mem_global_alloc(glob, vmw_tt->sg_alloc_size, false,
-					   true);
+		ret = ttm_mem_global_alloc(glob, vmw_tt->sg_alloc_size, &ctx);
 		if (unlikely(ret != 0))
 			return ret;
 
@@ -632,7 +635,7 @@ static void vmw_ttm_destroy(struct ttm_tt *ttm)
 }
 
 
-static int vmw_ttm_populate(struct ttm_tt *ttm)
+static int vmw_ttm_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
 	struct vmw_ttm_tt *vmw_tt =
 		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
@@ -646,15 +649,16 @@ static int vmw_ttm_populate(struct ttm_tt *ttm)
 	if (dev_priv->map_mode == vmw_dma_alloc_coherent) {
 		size_t size =
 			ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t));
-		ret = ttm_mem_global_alloc(glob, size, false, true);
+		ret = ttm_mem_global_alloc(glob, size, ctx);
 		if (unlikely(ret != 0))
 			return ret;
 
-		ret = ttm_dma_populate(&vmw_tt->dma_ttm, dev_priv->dev->dev);
+		ret = ttm_dma_populate(&vmw_tt->dma_ttm, dev_priv->dev->dev,
+					ctx);
 		if (unlikely(ret != 0))
 			ttm_mem_global_free(glob, size);
 	} else
-		ret = ttm_pool_populate(ttm);
+		ret = ttm_pool_populate(ttm, ctx);
 
 	return ret;
 }
@@ -859,5 +863,4 @@ struct ttm_bo_driver vmw_bo_driver = {
 	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
 	.io_mem_reserve = &vmw_ttm_io_mem_reserve,
 	.io_mem_free = &vmw_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 4212b3e673bce2df55c5c6b71894a97fbe9b7c5f..3767ac335acae2a0013b287785c4538806051efc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -746,6 +746,10 @@ static int vmw_context_define(struct drm_device *dev, void *data,
 	struct vmw_resource *tmp;
 	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct ttm_operation_ctx ttm_opt_ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	int ret;
 
 	if (!dev_priv->has_dx && dx) {
@@ -768,7 +772,7 @@ static int vmw_context_define(struct drm_device *dev, void *data,
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
 				   vmw_user_context_size,
-				   false, true);
+				   &ttm_opt_ctx);
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for context"
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index 92df0b08c194b430bdb02b7b48ea00fef7944480..cbf54ea7b4c0e05f9efd37053f1ea792d7c68b71 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -573,6 +573,10 @@ struct vmw_resource *vmw_cotable_alloc(struct vmw_private *dev_priv,
 				       u32 type)
 {
 	struct vmw_cotable *vcotbl;
+	struct ttm_operation_ctx ttm_opt_ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	int ret;
 	u32 num_entries;
 
@@ -580,7 +584,7 @@ struct vmw_resource *vmw_cotable_alloc(struct vmw_private *dev_priv,
 		cotable_acc_size = ttm_round_pot(sizeof(struct vmw_cotable));
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   cotable_acc_size, false, true);
+				   cotable_acc_size, &ttm_opt_ctx);
 	if (unlikely(ret))
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index b700667f6f0b0601ee1a3db668aee6868a4cf75c..c9d5cc237124e27abec4821e98b6b651ae104007 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
 	}
 
 	view_type = vmw_view_cmd_to_type(header->id);
+	if (view_type == vmw_view_max)
+		return -EINVAL;
 	cmd = container_of(header, typeof(*cmd), header);
 	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 				user_surface_converter,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index d6b1c509ae019d1aae404ff5163506d5e819155e..6c5c75cf5e6c3fea4a739c86bf1975adffceb262 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -588,6 +588,10 @@ int vmw_user_fence_create(struct drm_file *file_priv,
 	struct vmw_user_fence *ufence;
 	struct vmw_fence_obj *tmp;
 	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 	int ret;
 
 	/*
@@ -596,7 +600,7 @@ int vmw_user_fence_create(struct drm_file *file_priv,
 	 */
 
 	ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size,
-				   false, false);
+				   &ctx);
 	if (unlikely(ret != 0))
 		return ret;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index a2a93d7e2a0420612eadc9070cc3b9ffb61986e0..87a443013cbf7ac085f8d3c64570aabf971b75f9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -683,7 +683,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
 	vps->pinned = 0;
 
 	/* Mapping is managed by prepare_fb/cleanup_fb */
-	memset(&vps->guest_map, 0, sizeof(vps->guest_map));
 	memset(&vps->host_map, 0, sizeof(vps->host_map));
 	vps->cpp = 0;
 
@@ -746,11 +745,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 
 
 	/* Should have been freed by cleanup_fb */
-	if (vps->guest_map.virtual) {
-		DRM_ERROR("Guest mapping not freed\n");
-		ttm_bo_kunmap(&vps->guest_map);
-	}
-
 	if (vps->host_map.virtual) {
 		DRM_ERROR("Host mapping not freed\n");
 		ttm_bo_kunmap(&vps->host_map);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index ff9c8389ff21c3b2923c8387455976d5425e33f3..cd9da2dd79af1a062d4aaa3d6b6ded468bd4207e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -175,7 +175,7 @@ struct vmw_plane_state {
 	int pinned;
 
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map, guest_map;
+	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 };
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
index b17f08fc50d3fa2306505bedf33d6eea1ec7e323..736ca47e28ea6d06fd1bff6ee2a6b30411308604 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
@@ -240,6 +240,10 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv,
 	unsigned long offset;
 	unsigned long bo_size;
 	struct vmw_otable *otables = batch->otables;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
 	SVGAOTableType i;
 	int ret;
 
@@ -264,7 +268,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv,
 
 	ret = ttm_bo_reserve(batch->otable_bo, false, true, NULL);
 	BUG_ON(ret != 0);
-	ret = vmw_bo_driver.ttm_tt_populate(batch->otable_bo->ttm);
+	ret = vmw_bo_driver.ttm_tt_populate(batch->otable_bo->ttm, &ctx);
 	if (unlikely(ret != 0))
 		goto out_unreserve;
 	ret = vmw_bo_map_dma(batch->otable_bo);
@@ -430,6 +434,11 @@ static int vmw_mob_pt_populate(struct vmw_private *dev_priv,
 			       struct vmw_mob *mob)
 {
 	int ret;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
+
 	BUG_ON(mob->pt_bo != NULL);
 
 	ret = ttm_bo_create(&dev_priv->bdev, mob->num_pages * PAGE_SIZE,
@@ -442,7 +451,7 @@ static int vmw_mob_pt_populate(struct vmw_private *dev_priv,
 	ret = ttm_bo_reserve(mob->pt_bo, false, true, NULL);
 
 	BUG_ON(ret != 0);
-	ret = vmw_bo_driver.ttm_tt_populate(mob->pt_bo->ttm);
+	ret = vmw_bo_driver.ttm_tt_populate(mob->pt_bo->ttm, &ctx);
 	if (unlikely(ret != 0))
 		goto out_unreserve;
 	ret = vmw_bo_map_dma(mob->pt_bo);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index 004e18b8832c41a40622f446befa889b0640d96a..73b8e9a163685677c938a98126a7ce402bcaf6d8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -607,6 +607,10 @@ int vmw_dx_shader_add(struct vmw_cmdbuf_res_manager *man,
 	struct vmw_dx_shader *shader;
 	struct vmw_resource *res;
 	struct vmw_private *dev_priv = ctx->dev_priv;
+	struct ttm_operation_ctx ttm_opt_ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	int ret;
 
 	if (!vmw_shader_dx_size)
@@ -616,7 +620,7 @@ int vmw_dx_shader_add(struct vmw_cmdbuf_res_manager *man,
 		return -EINVAL;
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), vmw_shader_dx_size,
-				   false, true);
+				   &ttm_opt_ctx);
 	if (ret) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for shader "
@@ -730,6 +734,10 @@ static int vmw_user_shader_alloc(struct vmw_private *dev_priv,
 {
 	struct vmw_user_shader *ushader;
 	struct vmw_resource *res, *tmp;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	int ret;
 
 	/*
@@ -742,7 +750,7 @@ static int vmw_user_shader_alloc(struct vmw_private *dev_priv,
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
 				   vmw_user_shader_size,
-				   false, true);
+				   &ctx);
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for shader "
@@ -800,6 +808,10 @@ static struct vmw_resource *vmw_shader_alloc(struct vmw_private *dev_priv,
 {
 	struct vmw_shader *shader;
 	struct vmw_resource *res;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	int ret;
 
 	/*
@@ -812,7 +824,7 @@ static struct vmw_resource *vmw_shader_alloc(struct vmw_private *dev_priv,
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
 				   vmw_shader_size,
-				   false, true);
+				   &ctx);
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for shader "
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c
index 051d3b39b0eaf3db9886251f647a409d6be430ac..a0cb310665cc50548680ff6e729ea79dfc7269be 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c
@@ -149,6 +149,10 @@ vmw_simple_resource_create_ioctl(struct drm_device *dev, void *data,
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	size_t alloc_size;
 	size_t account_size;
 	int ret;
@@ -162,7 +166,7 @@ vmw_simple_resource_create_ioctl(struct drm_device *dev, void *data,
 		return ret;
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), account_size,
-				   false, true);
+				   &ctx);
 	ttm_read_unlock(&dev_priv->reservation_sem);
 	if (ret) {
 		if (ret != -ERESTARTSYS)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c
index 5a73eebd0f35f153d498d29f638d1acf2a59e30b..d3573c37c436575f4fed5fbe20b4cac050172ad8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c
@@ -329,6 +329,10 @@ int vmw_view_add(struct vmw_cmdbuf_res_manager *man,
 	struct vmw_private *dev_priv = ctx->dev_priv;
 	struct vmw_resource *res;
 	struct vmw_view *view;
+	struct ttm_operation_ctx ttm_opt_ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	size_t size;
 	int ret;
 
@@ -345,7 +349,7 @@ int vmw_view_add(struct vmw_cmdbuf_res_manager *man,
 
 	size = offsetof(struct vmw_view, cmd) + cmd_size;
 
-	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), size, false, true);
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), size, &ttm_opt_ctx);
 	if (ret) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for view"
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 90b5437fd787e0de7d360b5b697fb33d4a8551c3..b68d74888ab1100be82f8a2a9fdc3234a4e04293 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
 	bool defined;
 
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map, guest_map;
+	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 };
 
@@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	s32 src_pitch, dst_pitch;
 	u8 *src, *dst;
 	bool not_used;
-
+	struct ttm_bo_kmap_obj guest_map;
+	int ret;
 
 	if (!dirty->num_hits)
 		return;
@@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	if (width == 0 || height == 0)
 		return;
 
+	ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
+			  &guest_map);
+	if (ret) {
+		DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
+			  ret);
+		goto out_cleanup;
+	}
 
 	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
 	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
@@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 
 	dst_pitch = ddirty->pitch;
-	dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
+	dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
 	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 
 
@@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 		vmw_fifo_commit(dev_priv, sizeof(*cmd));
 	}
 
+	ttm_bo_kunmap(&guest_map);
 out_cleanup:
 	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->right = ddirty->bottom = S32_MIN;
@@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
 {
 	struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
-	if (vps->guest_map.virtual)
-		ttm_bo_kunmap(&vps->guest_map);
-
 	if (vps->host_map.virtual)
 		ttm_bo_kunmap(&vps->host_map);
 
@@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	 */
 	if (vps->content_fb_type == SEPARATE_DMA &&
 	    !(dev_priv->capabilities & SVGA_CAP_3D)) {
-
-		struct vmw_framebuffer_dmabuf *new_vfbd;
-
-		new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
-
-		ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
-				     NULL);
-		if (ret)
-			goto out_srf_unpin;
-
-		ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
-				  new_vfbd->buffer->base.num_pages,
-				  &vps->guest_map);
-
-		ttm_bo_unreserve(&new_vfbd->buffer->base);
-
-		if (ret) {
-			DRM_ERROR("Failed to map content buffer to CPU\n");
-			goto out_srf_unpin;
-		}
-
 		ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
 				  vps->surf->res.backup->base.num_pages,
 				  &vps->host_map);
 		if (ret) {
 			DRM_ERROR("Failed to map display buffer to CPU\n");
-			ttm_bo_kunmap(&vps->guest_map);
 			goto out_srf_unpin;
 		}
 
@@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 	stdu->display_srf = vps->surf;
 	stdu->content_fb_type = vps->content_fb_type;
 	stdu->cpp = vps->cpp;
-	memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
 	memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
 	if (!stdu->defined)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 6ac094ee898356593d73c1b615d8c1b9115fc892..db1bb166845eaf5033e3ca3a100625c8778a46b2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -700,6 +700,10 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct drm_vmw_surface_create_req *req = &arg->req;
 	struct drm_vmw_surface_arg *rep = &arg->rep;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	int ret;
 	int i, j;
 	uint32_t cur_bo_offset;
@@ -741,7 +745,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		return ret;
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   size, false, true);
+				   size, &ctx);
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for surface"
@@ -1479,6 +1483,10 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct vmw_user_surface *user_srf;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
 	struct vmw_surface *srf;
 	int ret;
 	u32 num_layers;
@@ -1525,7 +1533,7 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 		return ret;
 
 	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   user_accounting_size, false, true);
+				   user_accounting_size, &ctx);
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("Out of graphics memory for surface"
diff --git a/drivers/gpu/drm/zte/zx_drm_drv.c b/drivers/gpu/drm/zte/zx_drm_drv.c
index e8b8266c0cde6fa66ebe15423071630dd41ed1a0..6f4205e80378a673fd05812d51beaacf8832f57c 100644
--- a/drivers/gpu/drm/zte/zx_drm_drv.c
+++ b/drivers/gpu/drm/zte/zx_drm_drv.c
@@ -29,37 +29,19 @@
 #include "zx_drm_drv.h"
 #include "zx_vou.h"
 
-struct zx_drm_private {
-	struct drm_fbdev_cma *fbdev;
-};
-
-static void zx_drm_fb_output_poll_changed(struct drm_device *drm)
-{
-	struct zx_drm_private *priv = drm->dev_private;
-
-	drm_fbdev_cma_hotplug_event(priv->fbdev);
-}
-
 static const struct drm_mode_config_funcs zx_drm_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
-	.output_poll_changed = zx_drm_fb_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
-static void zx_drm_lastclose(struct drm_device *drm)
-{
-	struct zx_drm_private *priv = drm->dev_private;
-
-	drm_fbdev_cma_restore_mode(priv->fbdev);
-}
-
 DEFINE_DRM_GEM_CMA_FOPS(zx_drm_fops);
 
 static struct drm_driver zx_drm_driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME |
 			   DRIVER_ATOMIC,
-	.lastclose = zx_drm_lastclose,
+	.lastclose = drm_fb_helper_lastclose,
 	.gem_free_object_unlocked = drm_gem_cma_free_object,
 	.gem_vm_ops = &drm_gem_cma_vm_ops,
 	.dumb_create = drm_gem_cma_dumb_create,
@@ -83,18 +65,12 @@ static struct drm_driver zx_drm_driver = {
 static int zx_drm_bind(struct device *dev)
 {
 	struct drm_device *drm;
-	struct zx_drm_private *priv;
 	int ret;
 
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
 	drm = drm_dev_alloc(&zx_drm_driver, dev);
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
 
-	drm->dev_private = priv;
 	dev_set_drvdata(dev, drm);
 
 	drm_mode_config_init(drm);
@@ -125,12 +101,9 @@ static int zx_drm_bind(struct device *dev)
 	drm_mode_config_reset(drm);
 	drm_kms_helper_poll_init(drm);
 
-	priv->fbdev = drm_fbdev_cma_init(drm, 32,
-					 drm->mode_config.num_connector);
-	if (IS_ERR(priv->fbdev)) {
-		ret = PTR_ERR(priv->fbdev);
+	ret = drm_fb_cma_fbdev_init(drm, 32, 0);
+	if (ret) {
 		DRM_DEV_ERROR(dev, "failed to init cma fbdev: %d\n", ret);
-		priv->fbdev = NULL;
 		goto out_poll_fini;
 	}
 
@@ -141,10 +114,7 @@ static int zx_drm_bind(struct device *dev)
 	return 0;
 
 out_fbdev_fini:
-	if (priv->fbdev) {
-		drm_fbdev_cma_fini(priv->fbdev);
-		priv->fbdev = NULL;
-	}
+	drm_fb_cma_fbdev_fini(drm);
 out_poll_fini:
 	drm_kms_helper_poll_fini(drm);
 	drm_mode_config_cleanup(drm);
@@ -152,7 +122,6 @@ static int zx_drm_bind(struct device *dev)
 	component_unbind_all(dev, drm);
 out_unregister:
 	dev_set_drvdata(dev, NULL);
-	drm->dev_private = NULL;
 	drm_dev_unref(drm);
 	return ret;
 }
@@ -160,18 +129,13 @@ static int zx_drm_bind(struct device *dev)
 static void zx_drm_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct zx_drm_private *priv = drm->dev_private;
 
 	drm_dev_unregister(drm);
-	if (priv->fbdev) {
-		drm_fbdev_cma_fini(priv->fbdev);
-		priv->fbdev = NULL;
-	}
+	drm_fb_cma_fbdev_fini(drm);
 	drm_kms_helper_poll_fini(drm);
 	drm_mode_config_cleanup(drm);
 	component_unbind_all(dev, drm);
 	dev_set_drvdata(dev, NULL);
-	drm->dev_private = NULL;
 	drm_dev_unref(drm);
 }
 
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 2e57c9cea696e317ae67a5ee9a7eb2b512fb2cf7..88a3558b7916e454a9cf5e365916b0751cc531f6 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -211,8 +211,7 @@ int host1x_device_init(struct host1x_device *device)
 				dev_err(&device->dev,
 					"failed to initialize %s: %d\n",
 					dev_name(client->dev), err);
-				mutex_unlock(&device->clients_lock);
-				return err;
+				goto teardown;
 			}
 		}
 	}
@@ -220,6 +219,14 @@ int host1x_device_init(struct host1x_device *device)
 	mutex_unlock(&device->clients_lock);
 
 	return 0;
+
+teardown:
+	list_for_each_entry_continue_reverse(client, &device->clients, list)
+		if (client->ops->exit)
+			client->ops->exit(client);
+
+	mutex_unlock(&device->clients_lock);
+	return err;
 }
 EXPORT_SYMBOL(host1x_device_init);
 
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index bf67c3aeb6342e0554d5e568365f2272bf35ef73..03db71173f5ded9cd0448bd441cf8c0378b18b16 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -218,20 +218,27 @@ static int host1x_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	if (iommu_present(&platform_bus_type)) {
+	host->group = iommu_group_get(&pdev->dev);
+	if (host->group) {
 		struct iommu_domain_geometry *geometry;
 		unsigned long order;
 
 		host->domain = iommu_domain_alloc(&platform_bus_type);
-		if (!host->domain)
-			return -ENOMEM;
-
-		err = iommu_attach_device(host->domain, &pdev->dev);
-		if (err == -ENODEV) {
-			iommu_domain_free(host->domain);
-			host->domain = NULL;
-			goto skip_iommu;
-		} else if (err) {
+		if (!host->domain) {
+			err = -ENOMEM;
+			goto put_group;
+		}
+
+		err = iommu_attach_group(host->domain, host->group);
+		if (err) {
+			if (err == -ENODEV) {
+				iommu_domain_free(host->domain);
+				host->domain = NULL;
+				iommu_group_put(host->group);
+				host->group = NULL;
+				goto skip_iommu;
+			}
+
 			goto fail_free_domain;
 		}
 
@@ -294,13 +301,15 @@ static int host1x_probe(struct platform_device *pdev)
 fail_free_channels:
 	host1x_channel_list_free(&host->channel_list);
 fail_detach_device:
-	if (host->domain) {
+	if (host->group && host->domain) {
 		put_iova_domain(&host->iova);
-		iommu_detach_device(host->domain, &pdev->dev);
+		iommu_detach_group(host->domain, host->group);
 	}
 fail_free_domain:
 	if (host->domain)
 		iommu_domain_free(host->domain);
+put_group:
+	iommu_group_put(host->group);
 
 	return err;
 }
@@ -317,8 +326,9 @@ static int host1x_remove(struct platform_device *pdev)
 
 	if (host->domain) {
 		put_iova_domain(&host->iova);
-		iommu_detach_device(host->domain, &pdev->dev);
+		iommu_detach_group(host->domain, host->group);
 		iommu_domain_free(host->domain);
+		iommu_group_put(host->group);
 	}
 
 	return 0;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 50276972648060cc81d6ba0a12c42998904c019d..43e9fabb43a1f5bdf2f3e7e5ca37451934aba70d 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -117,6 +117,7 @@ struct host1x {
 	struct clk *clk;
 	struct reset_control *rst;
 
+	struct iommu_group *group;
 	struct iommu_domain *domain;
 	struct iova_domain iova;
 	dma_addr_t iova_end;
diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig
index 87a20b3dcf7a53a64fc464a3c8834d5c0a167781..fe6f8c5b444583cf0d4c65d7a640abc43ec6f150 100644
--- a/drivers/gpu/ipu-v3/Kconfig
+++ b/drivers/gpu/ipu-v3/Kconfig
@@ -1,7 +1,9 @@
 config IMX_IPUV3_CORE
 	tristate "IPUv3 core support"
-	depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM
+	depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM || COMPILE_TEST
 	depends on DRM || !DRM # if DRM=m, this can't be 'y'
+	select BITREVERSE
+	select GENERIC_ALLOCATOR if DRM
 	select GENERIC_IRQ_CHIP
 	help
 	  Choose this if you have a i.MX5/6 system and want to use the Image
diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index 1cb82f445f9178249f5652467a2ae99fb04461b4..bb9c087e6c0d2e81dddabc47f4174a91af67220c 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/bitrev.h>
 #include <linux/io.h>
+#include <linux/sizes.h>
 #include <drm/drm_fourcc.h>
 #include "ipu-prv.h"
 
diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
index 321eb983c2f59e4a3841e4a979eb1f15c2285235..67cc820253a99b84341825437e9e0bfce3cd16d6 100644
--- a/drivers/gpu/ipu-v3/ipu-ic.c
+++ b/drivers/gpu/ipu-v3/ipu-ic.c
@@ -17,6 +17,7 @@
 #include <linux/bitrev.h>
 #include <linux/io.h>
 #include <linux/err.h>
+#include <linux/sizes.h>
 #include "ipu-prv.h"
 
 /* IC Register Offsets */
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index c860a7997cb59c981557e01021d6a4278b2470ea..f1cec3d70498ab526f9765f66422e0835f338dc0 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -49,6 +49,10 @@
 #define IPU_PRE_TPR_CTRL				0x070
 #define  IPU_PRE_TPR_CTRL_TILE_FORMAT(v)		((v & 0xff) << 0)
 #define  IPU_PRE_TPR_CTRL_TILE_FORMAT_MASK		0xff
+#define  IPU_PRE_TPR_CTRL_TILE_FORMAT_16_BIT		(1 << 0)
+#define  IPU_PRE_TPR_CTRL_TILE_FORMAT_SPLIT_BUF		(1 << 4)
+#define  IPU_PRE_TPR_CTRL_TILE_FORMAT_SINGLE_BUF	(1 << 5)
+#define  IPU_PRE_TPR_CTRL_TILE_FORMAT_SUPER_TILED	(1 << 6)
 
 #define IPU_PRE_PREFETCH_ENG_CTRL			0x080
 #define  IPU_PRE_PREF_ENG_CTRL_PREFETCH_EN		(1 << 0)
@@ -147,7 +151,7 @@ int ipu_pre_get(struct ipu_pre *pre)
 	val = IPU_PRE_CTRL_HANDSHAKE_ABORT_SKIP_EN |
 	      IPU_PRE_CTRL_HANDSHAKE_EN |
 	      IPU_PRE_CTRL_TPR_REST_SEL |
-	      IPU_PRE_CTRL_BLOCK_16 | IPU_PRE_CTRL_SDW_UPDATE;
+	      IPU_PRE_CTRL_SDW_UPDATE;
 	writel(val, pre->regs + IPU_PRE_CTRL);
 
 	pre->in_use = true;
@@ -163,14 +167,17 @@ void ipu_pre_put(struct ipu_pre *pre)
 
 void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
 		       unsigned int height, unsigned int stride, u32 format,
-		       unsigned int bufaddr)
+		       uint64_t modifier, unsigned int bufaddr)
 {
 	const struct drm_format_info *info = drm_format_info(format);
 	u32 active_bpp = info->cpp[0] >> 1;
 	u32 val;
 
 	/* calculate safe window for ctrl register updates */
-	pre->safe_window_end = height - 2;
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		pre->safe_window_end = height - 2;
+	else
+		pre->safe_window_end = DIV_ROUND_UP(height, 4) - 1;
 
 	writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF);
 	writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF);
@@ -203,9 +210,25 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
 
 	writel(pre->buffer_paddr, pre->regs + IPU_PRE_STORE_ENG_ADDR);
 
+	val = readl(pre->regs + IPU_PRE_TPR_CTRL);
+	val &= ~IPU_PRE_TPR_CTRL_TILE_FORMAT_MASK;
+	if (modifier != DRM_FORMAT_MOD_LINEAR) {
+		/* only support single buffer formats for now */
+		val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_SINGLE_BUF;
+		if (modifier == DRM_FORMAT_MOD_VIVANTE_SUPER_TILED)
+			val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_SUPER_TILED;
+		if (info->cpp[0] == 2)
+			val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_16_BIT;
+	}
+	writel(val, pre->regs + IPU_PRE_TPR_CTRL);
+
 	val = readl(pre->regs + IPU_PRE_CTRL);
 	val |= IPU_PRE_CTRL_EN_REPEAT | IPU_PRE_CTRL_ENABLE |
 	       IPU_PRE_CTRL_SDW_UPDATE;
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		val &= ~IPU_PRE_CTRL_BLOCK_EN;
+	else
+		val |= IPU_PRE_CTRL_BLOCK_EN;
 	writel(val, pre->regs + IPU_PRE_CTRL);
 }
 
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 0013ca9f72c83e8f85b3c91aa62aaf619690f234..067365c733c63b257f3be41102d7f54354d52f18 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <video/imx-ipu-v3.h>
 
@@ -132,28 +133,25 @@ bool ipu_prg_format_supported(struct ipu_soc *ipu, uint32_t format,
 	if (info->num_planes != 1)
 		return false;
 
-	return true;
+	switch (modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+	case DRM_FORMAT_MOD_VIVANTE_TILED:
+	case DRM_FORMAT_MOD_VIVANTE_SUPER_TILED:
+		return true;
+	default:
+		return false;
+	}
 }
 EXPORT_SYMBOL_GPL(ipu_prg_format_supported);
 
 int ipu_prg_enable(struct ipu_soc *ipu)
 {
 	struct ipu_prg *prg = ipu->prg_priv;
-	int ret;
 
 	if (!prg)
 		return 0;
 
-	ret = clk_prepare_enable(prg->clk_axi);
-	if (ret)
-		goto fail_disable_ipg;
-
-	return 0;
-
-fail_disable_ipg:
-	clk_disable_unprepare(prg->clk_ipg);
-
-	return ret;
+	return pm_runtime_get_sync(prg->dev);
 }
 EXPORT_SYMBOL_GPL(ipu_prg_enable);
 
@@ -164,7 +162,7 @@ void ipu_prg_disable(struct ipu_soc *ipu)
 	if (!prg)
 		return;
 
-	clk_disable_unprepare(prg->clk_axi);
+	pm_runtime_put(prg->dev);
 }
 EXPORT_SYMBOL_GPL(ipu_prg_disable);
 
@@ -255,7 +253,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan)
 	if (!chan->enabled || prg_chan < 0)
 		return;
 
-	clk_prepare_enable(prg->clk_ipg);
+	pm_runtime_get_sync(prg->dev);
 
 	val = readl(prg->regs + IPU_PRG_CTL);
 	val |= IPU_PRG_CTL_BYPASS(prg_chan);
@@ -264,7 +262,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan)
 	val = IPU_PRG_REG_UPDATE_REG_UPDATE;
 	writel(val, prg->regs + IPU_PRG_REG_UPDATE);
 
-	clk_disable_unprepare(prg->clk_ipg);
+	pm_runtime_put(prg->dev);
 
 	ipu_prg_put_pre(prg, prg_chan);
 
@@ -275,7 +273,7 @@ EXPORT_SYMBOL_GPL(ipu_prg_channel_disable);
 int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 			      unsigned int axi_id, unsigned int width,
 			      unsigned int height, unsigned int stride,
-			      u32 format, unsigned long *eba)
+			      u32 format, uint64_t modifier, unsigned long *eba)
 {
 	int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
 	struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
@@ -296,14 +294,10 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 		return ret;
 
 	ipu_pre_configure(prg->pres[chan->used_pre],
-			  width, height, stride, format, *eba);
+			  width, height, stride, format, modifier, *eba);
 
 
-	ret = clk_prepare_enable(prg->clk_ipg);
-	if (ret) {
-		ipu_prg_put_pre(prg, prg_chan);
-		return ret;
-	}
+	pm_runtime_get_sync(prg->dev);
 
 	val = (stride - 1) & IPU_PRG_STRIDE_STRIDE_MASK;
 	writel(val, prg->regs + IPU_PRG_STRIDE(prg_chan));
@@ -336,7 +330,7 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 			   (val & IPU_PRG_STATUS_BUFFER1_READY(prg_chan)),
 			   5, 1000);
 
-	clk_disable_unprepare(prg->clk_ipg);
+	pm_runtime_put(prg->dev);
 
 	chan->enabled = true;
 	return 0;
@@ -384,6 +378,12 @@ static int ipu_prg_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	ret = clk_prepare_enable(prg->clk_axi);
+	if (ret) {
+		clk_disable_unprepare(prg->clk_ipg);
+		return ret;
+	}
+
 	/* init to free running mode */
 	val = readl(prg->regs + IPU_PRG_CTL);
 	val |= IPU_PRG_CTL_SHADOW_EN;
@@ -392,7 +392,8 @@ static int ipu_prg_probe(struct platform_device *pdev)
 	/* disable address threshold */
 	writel(0xffffffff, prg->regs + IPU_PRG_THD);
 
-	clk_disable_unprepare(prg->clk_ipg);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
 
 	prg->dev = dev;
 	platform_set_drvdata(pdev, prg);
@@ -414,6 +415,40 @@ static int ipu_prg_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int prg_suspend(struct device *dev)
+{
+	struct ipu_prg *prg = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(prg->clk_axi);
+	clk_disable_unprepare(prg->clk_ipg);
+
+	return 0;
+}
+
+static int prg_resume(struct device *dev)
+{
+	struct ipu_prg *prg = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(prg->clk_ipg);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(prg->clk_axi);
+	if (ret) {
+		clk_disable_unprepare(prg->clk_ipg);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops prg_pm_ops = {
+	SET_RUNTIME_PM_OPS(prg_suspend, prg_resume, NULL)
+};
+
 static const struct of_device_id ipu_prg_dt_ids[] = {
 	{ .compatible = "fsl,imx6qp-prg", },
 	{ /* sentinel */ },
@@ -424,6 +459,7 @@ struct platform_driver ipu_prg_drv = {
 	.remove		= ipu_prg_remove,
 	.driver		= {
 		.name	= "imx-ipu-prg",
+		.pm	= &prg_pm_ops,
 		.of_match_table = ipu_prg_dt_ids,
 	},
 };
diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h
index ac4b8d658500865886716c6da552e6918e308de1..d6beee99b6b8c14a8bc31062656a62c2dd86354b 100644
--- a/drivers/gpu/ipu-v3/ipu-prv.h
+++ b/drivers/gpu/ipu-v3/ipu-prv.h
@@ -269,8 +269,8 @@ int ipu_pre_get(struct ipu_pre *pre);
 void ipu_pre_put(struct ipu_pre *pre);
 u32 ipu_pre_get_baddr(struct ipu_pre *pre);
 void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
-		       unsigned int height,
-		       unsigned int stride, u32 format, unsigned int bufaddr);
+		       unsigned int height, unsigned int stride, u32 format,
+		       uint64_t modifier, unsigned int bufaddr);
 void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr);
 
 struct ipu_prg *ipu_prg_lookup_by_phandle(struct device *dev, const char *name,
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f3fcb836a1f9edd0af5b1f31362987d22eff11b2..0c3f608131cff48fdbec637da27135d970534807 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
 		ret = hid_add_field(parser, HID_FEATURE_REPORT, data);
 		break;
 	default:
-		hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag);
+		hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag);
 		ret = 0;
 	}
 
diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index 68cdc962265b1051d7e399e0c6a181a4426673ed..271f31461da427d93459632b096c578d71f3ee44 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
 					      (u8 *)&word, 2);
 		break;
 	case I2C_SMBUS_I2C_BLOCK_DATA:
-		size = I2C_SMBUS_BLOCK_DATA;
-		/* fallthrough */
+		if (read_write == I2C_SMBUS_READ) {
+			read_length = data->block[0];
+			count = cp2112_write_read_req(buf, addr, read_length,
+						      command, NULL, 0);
+		} else {
+			count = cp2112_write_req(buf, addr, command,
+						 data->block + 1,
+						 data->block[0]);
+		}
+		break;
 	case I2C_SMBUS_BLOCK_DATA:
 		if (I2C_SMBUS_READ == read_write) {
 			count = cp2112_write_read_req(buf, addr,
@@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
 	case I2C_SMBUS_WORD_DATA:
 		data->word = le16_to_cpup((__le16 *)buf);
 		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		memcpy(data->block + 1, buf, read_length);
+		break;
 	case I2C_SMBUS_BLOCK_DATA:
 		if (read_length > I2C_SMBUS_BLOCK_MAX) {
 			ret = -EPROTO;
diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c
index 9325545fc3ae1cac4e3919c1120af6841590d4f4..edc0f64bb584806f080c1b6f682c53a20acc8466 100644
--- a/drivers/hid/hid-holtekff.c
+++ b/drivers/hid/hid-holtekff.c
@@ -32,10 +32,6 @@
 
 #ifdef CONFIG_HOLTEK_FF
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
-MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
-
 /*
  * These commands and parameters are currently known:
  *
@@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = {
 	.probe = holtek_probe,
 };
 module_hid_driver(holtek_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
+MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 19f0cf37e0ed0748be1c0c5cd8cf482cb3384eec..ba0a092ae085d64e309ec9c5b19a5d80d6372a93 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -659,22 +659,28 @@ void vmbus_close(struct vmbus_channel *channel)
 		 */
 		return;
 	}
-	mutex_lock(&vmbus_connection.channel_mutex);
 	/*
 	 * Close all the sub-channels first and then close the
 	 * primary channel.
 	 */
 	list_for_each_safe(cur, tmp, &channel->sc_list) {
 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
-		vmbus_close_internal(cur_channel);
 		if (cur_channel->rescind) {
+			wait_for_completion(&cur_channel->rescind_event);
+			mutex_lock(&vmbus_connection.channel_mutex);
+			vmbus_close_internal(cur_channel);
 			hv_process_channel_removal(
 					   cur_channel->offermsg.child_relid);
+		} else {
+			mutex_lock(&vmbus_connection.channel_mutex);
+			vmbus_close_internal(cur_channel);
 		}
+		mutex_unlock(&vmbus_connection.channel_mutex);
 	}
 	/*
 	 * Now close the primary.
 	 */
+	mutex_lock(&vmbus_connection.channel_mutex);
 	vmbus_close_internal(channel);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 }
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index ec5454f3f4a698219fe42e1899537c44f8397a45..c21020b69114b18648ff83562aa97ab95b3b65b3 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -333,6 +333,7 @@ static struct vmbus_channel *alloc_channel(void)
 		return NULL;
 
 	spin_lock_init(&channel->lock);
+	init_completion(&channel->rescind_event);
 
 	INIT_LIST_HEAD(&channel->sc_list);
 	INIT_LIST_HEAD(&channel->percpu_list);
@@ -898,6 +899,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	/*
 	 * Now wait for offer handling to complete.
 	 */
+	vmbus_rescind_cleanup(channel);
 	while (READ_ONCE(channel->probe_done) == false) {
 		/*
 		 * We wait here until any channel offer is currently
@@ -913,7 +915,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	if (channel->device_obj) {
 		if (channel->chn_rescind_callback) {
 			channel->chn_rescind_callback(channel);
-			vmbus_rescind_cleanup(channel);
 			return;
 		}
 		/*
@@ -922,7 +923,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		 */
 		dev = get_device(&channel->device_obj->device);
 		if (dev) {
-			vmbus_rescind_cleanup(channel);
 			vmbus_device_unregister(channel->device_obj);
 			put_device(dev);
 		}
@@ -936,13 +936,14 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		 * 2. Then close the primary channel.
 		 */
 		mutex_lock(&vmbus_connection.channel_mutex);
-		vmbus_rescind_cleanup(channel);
 		if (channel->state == CHANNEL_OPEN_STATE) {
 			/*
 			 * The channel is currently not open;
 			 * it is safe for us to cleanup the channel.
 			 */
 			hv_process_channel_removal(rescind->child_relid);
+		} else {
+			complete(&channel->rescind_event);
 		}
 		mutex_unlock(&vmbus_connection.channel_mutex);
 	}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 76ed9a216f101e90ddaa3bb38a1e0e7175ea81d4..610223f0e94530635ac94ccf40b9f582c57d6d62 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj)
 	pr_debug("child device %s unregistered\n",
 		dev_name(&device_obj->device));
 
+	kset_unregister(device_obj->channels_kset);
+
 	/*
 	 * Kick off the process of unregistering the device.
 	 * This will call vmbus_remove() and eventually vmbus_device_release()
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index c9790e2c344016e91c26d02f4a69e6a15035b62a..af5123042990281c694311da2819107dd8336afb 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
 				    struct hwmon_device *hwdev, int index)
 {
 	struct hwmon_thermal_data *tdata;
+	struct thermal_zone_device *tzd;
 
 	tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
 	if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
 	tdata->hwdev = hwdev;
 	tdata->index = index;
 
-	devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
-					     &hwmon_thermal_ops);
+	tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
+						   &hwmon_thermal_ops);
+	/*
+	 * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
+	 * so ignore that error but forward any other error.
+	 */
+	if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
+		return PTR_ERR(tzd);
 
 	return 0;
 }
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 				if (!chip->ops->is_visible(drvdata, hwmon_temp,
 							   hwmon_temp_input, j))
 					continue;
-				if (info[i]->config[j] & HWMON_T_INPUT)
-					hwmon_thermal_add_sensor(dev, hwdev, j);
+				if (info[i]->config[j] & HWMON_T_INPUT) {
+					err = hwmon_thermal_add_sensor(dev,
+								hwdev, j);
+					if (err)
+						goto free_device;
+				}
 			}
 		}
 	}
 
 	return hdev;
 
+free_device:
+	device_unregister(hdev);
 free_hwmon:
 	kfree(hwdev);
 ida_remove:
diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c
index bd126a7c6da2aa1c16aaa5456644198cacfb5222..7da75644c7507c1f6161ea784a318b1aef76aac4 100644
--- a/drivers/hwtracing/stm/ftrace.c
+++ b/drivers/hwtracing/stm/ftrace.c
@@ -42,9 +42,11 @@ static struct stm_ftrace {
  * @len:	length of the data packet
  */
 static void notrace
-stm_ftrace_write(const void *buf, unsigned int len)
+stm_ftrace_write(struct trace_export *export, const void *buf, unsigned int len)
 {
-	stm_source_write(&stm_ftrace.data, STM_FTRACE_CHAN, buf, len);
+	struct stm_ftrace *stm = container_of(export, struct stm_ftrace, ftrace);
+
+	stm_source_write(&stm->data, STM_FTRACE_CHAN, buf, len);
 }
 
 static int stm_ftrace_link(struct stm_source_data *data)
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
index 0d05dadb2dc58a1d8599869cab82baed63ad165c..44cffad43701f4839096bbde5c5937ee22cce135 100644
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -379,7 +379,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_device_id cht_wc_i2c_adap_id_table[] = {
+static const struct platform_device_id cht_wc_i2c_adap_id_table[] = {
 	{ .name = "cht_wcove_ext_chgr" },
 	{},
 };
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 174579d32e5f39ecdc44d2c230b55fbfb5d073e2..462948e2c5354e64a09fa769eb1519f9d421d629 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -983,7 +983,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
 
 	if (adapdata->smba) {
 		i2c_del_adapter(adap);
-		if (adapdata->port == (0 << 1)) {
+		if (adapdata->port == (0 << piix4_port_shift_sb800)) {
 			release_region(adapdata->smba, SMBIOSIZE);
 			if (adapdata->sb800_main)
 				release_region(SB800_PIIX4_SMB_IDX, 2);
diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h
index dab51761f8c52b0aab12e4b49334aa1607d0a7a0..d4f9cef251acf457f1dba743e141dadc7dc59d1b 100644
--- a/drivers/i2c/busses/i2c-stm32.h
+++ b/drivers/i2c/busses/i2c-stm32.h
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * i2c-stm32.h
  *
  * Copyright (C) M'boumba Cedric Madianga 2017
+ * Copyright (C) STMicroelectronics 2017
  * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
  *
- * License terms:  GNU General Public License (GPL), version 2
  */
 
 #ifndef _I2C_STM32_H
diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c
index 4ec108496f15cdf5dcedd3bad1b368cac5482bd8..47c8d00de53f95377e857633035118ba9ed5be31 100644
--- a/drivers/i2c/busses/i2c-stm32f4.c
+++ b/drivers/i2c/busses/i2c-stm32f4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for STMicroelectronics STM32 I2C controller
  *
@@ -6,11 +7,11 @@
  * http://www.st.com/resource/en/reference_manual/DM00031020.pdf
  *
  * Copyright (C) M'boumba Cedric Madianga 2016
+ * Copyright (C) STMicroelectronics 2017
  * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
  *
  * This driver is based on i2c-st.c
  *
- * License terms:  GNU General Public License (GPL), version 2
  */
 
 #include <linux/clk.h>
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index d4a6e9c2e9aaeaa679bb159ade420bcdb37e6988..b445b3bb0bb11fe262363042a2eb56e458f6ffc2 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for STMicroelectronics STM32F7 I2C controller
  *
@@ -7,11 +8,11 @@
  * http://www.st.com/resource/en/reference_manual/dm00124865.pdf
  *
  * Copyright (C) M'boumba Cedric Madianga 2017
+ * Copyright (C) STMicroelectronics 2017
  * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
  *
  * This driver is based on i2c-stm32f4.c
  *
- * License terms:  GNU General Public License (GPL), version 2
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
diff --git a/drivers/iio/adc/cpcap-adc.c b/drivers/iio/adc/cpcap-adc.c
index 3576ec73ec232a9b33538377bf9a924e743e26bf..9ad60421d360539db31636b65baf42fe4bf67824 100644
--- a/drivers/iio/adc/cpcap-adc.c
+++ b/drivers/iio/adc/cpcap-adc.c
@@ -1011,7 +1011,7 @@ static int cpcap_adc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, indio_dev);
 
 	ddata->irq = platform_get_irq_byname(pdev, "adcdone");
-	if (!ddata->irq)
+	if (ddata->irq < 0)
 		return -ENODEV;
 
 	error = devm_request_threaded_irq(&pdev->dev, ddata->irq, NULL,
diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index 9c6932ffc0afdbc66b3dd1cb7032adf84fb8720e..36047147ce7c727003a0f08df34880302ebd2e08 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c
@@ -221,8 +221,10 @@ enum meson_sar_adc_chan7_mux_sel {
 
 struct meson_sar_adc_data {
 	bool					has_bl30_integration;
+	u32					bandgap_reg;
 	unsigned int				resolution;
 	const char				*name;
+	const struct regmap_config		*regmap_config;
 };
 
 struct meson_sar_adc_priv {
@@ -242,13 +244,20 @@ struct meson_sar_adc_priv {
 	int					calibscale;
 };
 
-static const struct regmap_config meson_sar_adc_regmap_config = {
+static const struct regmap_config meson_sar_adc_regmap_config_gxbb = {
 	.reg_bits = 8,
 	.val_bits = 32,
 	.reg_stride = 4,
 	.max_register = MESON_SAR_ADC_REG13,
 };
 
+static const struct regmap_config meson_sar_adc_regmap_config_meson8 = {
+	.reg_bits = 8,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = MESON_SAR_ADC_DELTA_10,
+};
+
 static unsigned int meson_sar_adc_get_fifo_count(struct iio_dev *indio_dev)
 {
 	struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
@@ -600,7 +609,7 @@ static int meson_sar_adc_clk_init(struct iio_dev *indio_dev,
 	init.num_parents = 1;
 
 	priv->clk_gate.reg = base + MESON_SAR_ADC_REG3;
-	priv->clk_gate.bit_idx = fls(MESON_SAR_ADC_REG3_CLK_EN);
+	priv->clk_gate.bit_idx = __ffs(MESON_SAR_ADC_REG3_CLK_EN);
 	priv->clk_gate.hw.init = &init;
 
 	priv->adc_clk = devm_clk_register(&indio_dev->dev, &priv->clk_gate.hw);
@@ -685,6 +694,20 @@ static int meson_sar_adc_init(struct iio_dev *indio_dev)
 	return 0;
 }
 
+static void meson_sar_adc_set_bandgap(struct iio_dev *indio_dev, bool on_off)
+{
+	struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
+	u32 enable_mask;
+
+	if (priv->data->bandgap_reg == MESON_SAR_ADC_REG11)
+		enable_mask = MESON_SAR_ADC_REG11_BANDGAP_EN;
+	else
+		enable_mask = MESON_SAR_ADC_DELTA_10_TS_VBG_EN;
+
+	regmap_update_bits(priv->regmap, priv->data->bandgap_reg, enable_mask,
+			   on_off ? enable_mask : 0);
+}
+
 static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
 {
 	struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
@@ -717,9 +740,9 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
 	regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1);
 	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0,
 			   MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval);
-	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG11,
-			   MESON_SAR_ADC_REG11_BANDGAP_EN,
-			   MESON_SAR_ADC_REG11_BANDGAP_EN);
+
+	meson_sar_adc_set_bandgap(indio_dev, true);
+
 	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
 			   MESON_SAR_ADC_REG3_ADC_EN,
 			   MESON_SAR_ADC_REG3_ADC_EN);
@@ -739,8 +762,7 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
 err_adc_clk:
 	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
 			   MESON_SAR_ADC_REG3_ADC_EN, 0);
-	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG11,
-			   MESON_SAR_ADC_REG11_BANDGAP_EN, 0);
+	meson_sar_adc_set_bandgap(indio_dev, false);
 	clk_disable_unprepare(priv->sana_clk);
 err_sana_clk:
 	clk_disable_unprepare(priv->core_clk);
@@ -765,8 +787,8 @@ static int meson_sar_adc_hw_disable(struct iio_dev *indio_dev)
 
 	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
 			   MESON_SAR_ADC_REG3_ADC_EN, 0);
-	regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG11,
-			   MESON_SAR_ADC_REG11_BANDGAP_EN, 0);
+
+	meson_sar_adc_set_bandgap(indio_dev, false);
 
 	clk_disable_unprepare(priv->sana_clk);
 	clk_disable_unprepare(priv->core_clk);
@@ -844,30 +866,40 @@ static const struct iio_info meson_sar_adc_iio_info = {
 
 static const struct meson_sar_adc_data meson_sar_adc_meson8_data = {
 	.has_bl30_integration = false,
+	.bandgap_reg = MESON_SAR_ADC_DELTA_10,
+	.regmap_config = &meson_sar_adc_regmap_config_meson8,
 	.resolution = 10,
 	.name = "meson-meson8-saradc",
 };
 
 static const struct meson_sar_adc_data meson_sar_adc_meson8b_data = {
 	.has_bl30_integration = false,
+	.bandgap_reg = MESON_SAR_ADC_DELTA_10,
+	.regmap_config = &meson_sar_adc_regmap_config_meson8,
 	.resolution = 10,
 	.name = "meson-meson8b-saradc",
 };
 
 static const struct meson_sar_adc_data meson_sar_adc_gxbb_data = {
 	.has_bl30_integration = true,
+	.bandgap_reg = MESON_SAR_ADC_REG11,
+	.regmap_config = &meson_sar_adc_regmap_config_gxbb,
 	.resolution = 10,
 	.name = "meson-gxbb-saradc",
 };
 
 static const struct meson_sar_adc_data meson_sar_adc_gxl_data = {
 	.has_bl30_integration = true,
+	.bandgap_reg = MESON_SAR_ADC_REG11,
+	.regmap_config = &meson_sar_adc_regmap_config_gxbb,
 	.resolution = 12,
 	.name = "meson-gxl-saradc",
 };
 
 static const struct meson_sar_adc_data meson_sar_adc_gxm_data = {
 	.has_bl30_integration = true,
+	.bandgap_reg = MESON_SAR_ADC_REG11,
+	.regmap_config = &meson_sar_adc_regmap_config_gxbb,
 	.resolution = 12,
 	.name = "meson-gxm-saradc",
 };
@@ -945,7 +977,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
 		return ret;
 
 	priv->regmap = devm_regmap_init_mmio(&pdev->dev, base,
-					     &meson_sar_adc_regmap_config);
+					     priv->data->regmap_config);
 	if (IS_ERR(priv->regmap))
 		return PTR_ERR(priv->regmap);
 
diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c
index 203ffb9cad6a2a3a98ac1eaf71951039303099ea..147a8c14235f3f8b39302c8f78beec811f6871dd 100644
--- a/drivers/iio/health/max30102.c
+++ b/drivers/iio/health/max30102.c
@@ -371,7 +371,7 @@ static int max30102_read_raw(struct iio_dev *indio_dev,
 		mutex_unlock(&indio_dev->mlock);
 		break;
 	case IIO_CHAN_INFO_SCALE:
-		*val = 1;  /* 0.0625 */
+		*val = 1000;  /* 62.5 */
 		*val2 = 16;
 		ret = IIO_VAL_FRACTIONAL;
 		break;
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9c4cfd19b7398677a8b1ac3c0fee5985160880e0..2f0998ebeed214dc0f062664595e1d4362d27e3e 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -631,7 +631,7 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type,
  * iio_format_value() - Formats a IIO value into its string representation
  * @buf:	The buffer to which the formatted value gets written
  *		which is assumed to be big enough (i.e. PAGE_SIZE).
- * @type:	One of the IIO_VAL_... constants. This decides how the val
+ * @type:	One of the IIO_VAL_* constants. This decides how the val
  *		and val2 parameters are formatted.
  * @size:	Number of IIO value entries contained in vals
  * @vals:	Pointer to the values, exact meaning depends on the
@@ -639,7 +639,7 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type,
  *
  * Return: 0 by default, a negative number on failure or the
  *	   total number of characters written for a type that belongs
- *	   to the IIO_VAL_... constant.
+ *	   to the IIO_VAL_* constant.
  */
 ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
 {
diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c
index 53c5d653e7809560ea7393f249e4d15a7e22bce0..df23dbcc030aea5b2d6e0ddde7237b4d906bf365 100644
--- a/drivers/iio/proximity/sx9500.c
+++ b/drivers/iio/proximity/sx9500.c
@@ -869,6 +869,7 @@ static int sx9500_init_device(struct iio_dev *indio_dev)
 static void sx9500_gpio_probe(struct i2c_client *client,
 			      struct sx9500_data *data)
 {
+	struct gpio_desc *gpiod_int;
 	struct device *dev;
 
 	if (!client)
@@ -876,6 +877,14 @@ static void sx9500_gpio_probe(struct i2c_client *client,
 
 	dev = &client->dev;
 
+	if (client->irq <= 0) {
+		gpiod_int = devm_gpiod_get(dev, SX9500_GPIO_INT, GPIOD_IN);
+		if (IS_ERR(gpiod_int))
+			dev_err(dev, "gpio get irq failed\n");
+		else
+			client->irq = gpiod_to_irq(gpiod_int);
+	}
+
 	data->gpiod_rst = devm_gpiod_get(dev, SX9500_GPIO_RESET, GPIOD_OUT_HIGH);
 	if (IS_ERR(data->gpiod_rst)) {
 		dev_warn(dev, "gpio get reset pin failed\n");
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 98ac46ed7214f574fbe13d5f617b9f2b0836bc40..cbf186522016f97f3024ac84bdac4330ddf1a26f 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -1,6 +1,6 @@
 menuconfig INFINIBAND
 	tristate "InfiniBand support"
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && HAS_DMA
 	depends on NET
 	depends on INET
 	depends on m || IPV6 != m
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 1fdb473b5df7be38c09ecd49121d516b4893ef13..6294a7001d33bee54b4c516fcfa5faff7556b28b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -801,6 +801,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
 	INIT_LIST_HEAD(&id_priv->mc_list);
 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 	id_priv->id.route.addr.dev_addr.net = get_net(net);
+	id_priv->seq_num &= 0x00ffffff;
 
 	return &id_priv->id;
 }
@@ -4457,7 +4458,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static const struct rdma_nl_cbs cma_cb_table[] = {
+static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
 	[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
 };
 
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index a1d687a664f85edc4eeb1bf50360cb02c65faa45..66f0268f37a6ca143f558a6c02080c1b702afcc3 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -314,7 +314,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 }
 #endif
 
-struct ib_device *__ib_device_get_by_index(u32 ifindex);
+struct ib_device *ib_device_get_by_index(u32 ifindex);
 /* RDMA device netlink */
 void nldev_init(void);
 void nldev_exit(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 84fc32a2c8b3e8fc4dfed5be20cbea0921ee737e..465520627e4b6b93c85aae4c276be44607eb7400 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -134,7 +134,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
 	return 0;
 }
 
-struct ib_device *__ib_device_get_by_index(u32 index)
+static struct ib_device *__ib_device_get_by_index(u32 index)
 {
 	struct ib_device *device;
 
@@ -145,6 +145,22 @@ struct ib_device *__ib_device_get_by_index(u32 index)
 	return NULL;
 }
 
+/*
+ * Caller is responsible to return refrerence count by calling put_device()
+ */
+struct ib_device *ib_device_get_by_index(u32 index)
+{
+	struct ib_device *device;
+
+	down_read(&lists_rwsem);
+	device = __ib_device_get_by_index(index);
+	if (device)
+		get_device(&device->dev);
+
+	up_read(&lists_rwsem);
+	return device;
+}
+
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
 	struct ib_device *device;
@@ -1146,7 +1162,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 }
 EXPORT_SYMBOL(ib_get_net_dev_by_params);
 
-static const struct rdma_nl_cbs ibnl_ls_cb_table[] = {
+static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
 	[RDMA_NL_LS_OP_RESOLVE] = {
 		.doit = ib_nl_handle_resolve_resp,
 		.flags = RDMA_NL_ADMIN_PERM,
@@ -1253,5 +1269,5 @@ static void __exit ib_core_cleanup(void)
 
 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
 
-module_init(ib_core_init);
+subsys_initcall(ib_core_init);
 module_exit(ib_core_cleanup);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index e9e189ec7502ca90ccdb25a58ade3bb0167ee731..5d676cff41f496ce519f4dc000eda17f6fd43999 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason)
 }
 EXPORT_SYMBOL(iwcm_reject_msg);
 
-static struct rdma_nl_cbs iwcm_nl_cb_table[] = {
+static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
 	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
 	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
 	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 2fae850a3eff6a92703aed33975af9b1e0fc3835..0dcd1aa6f683e8bc73901ecb4fee08c8a167801e 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -142,27 +142,34 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 
-	device = __ib_device_get_by_index(index);
+	device = ib_device_get_by_index(index);
 	if (!device)
 		return -EINVAL;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
+	if (!msg) {
+		err = -ENOMEM;
+		goto err;
+	}
 
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 			0, 0);
 
 	err = fill_dev_info(msg, device);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
+	if (err)
+		goto err_free;
 
 	nlmsg_end(msg, nlh);
 
+	put_device(&device->dev);
 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+	nlmsg_free(msg);
+err:
+	put_device(&device->dev);
+	return err;
 }
 
 static int _nldev_get_dumpit(struct ib_device *device,
@@ -220,31 +227,40 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
-	device = __ib_device_get_by_index(index);
+	device = ib_device_get_by_index(index);
 	if (!device)
 		return -EINVAL;
 
 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
-	if (!rdma_is_port_valid(device, port))
-		return -EINVAL;
+	if (!rdma_is_port_valid(device, port)) {
+		err = -EINVAL;
+		goto err;
+	}
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
+	if (!msg) {
+		err = -ENOMEM;
+		goto err;
+	}
 
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 			0, 0);
 
 	err = fill_port_info(msg, device, port);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
+	if (err)
+		goto err_free;
 
 	nlmsg_end(msg, nlh);
+	put_device(&device->dev);
 
 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+	nlmsg_free(msg);
+err:
+	put_device(&device->dev);
+	return err;
 }
 
 static int nldev_port_get_dumpit(struct sk_buff *skb,
@@ -265,7 +281,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
 		return -EINVAL;
 
 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
-	device = __ib_device_get_by_index(ifindex);
+	device = ib_device_get_by_index(ifindex);
 	if (!device)
 		return -EINVAL;
 
@@ -299,11 +315,13 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
 		nlmsg_end(skb, nlh);
 	}
 
-out:	cb->args[0] = idx;
+out:
+	put_device(&device->dev);
+	cb->args[0] = idx;
 	return skb->len;
 }
 
-static const struct rdma_nl_cbs nldev_cb_table[] = {
+static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
 		.dump = nldev_get_dumpit,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 23278ed5be4517fd42d8bba6eb07ccd485e5f7f6..59b2f96d986aa2b86491d29b8d7c3e25f244ba6f 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
 	if (ret)
 		return ret;
 
+	if (!qp->qp_sec)
+		return 0;
+
 	mutex_lock(&real_qp->qp_sec->mutex);
 	ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
 					  qp->qp_sec);
@@ -417,8 +420,17 @@ void ib_close_shared_qp_security(struct ib_qp_security *sec)
 
 int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
 {
+	u8 i = rdma_start_port(dev);
+	bool is_ib = false;
 	int ret;
 
+	while (i <= rdma_end_port(dev) && !is_ib)
+		is_ib = rdma_protocol_ib(dev, i++);
+
+	/* If this isn't an IB device don't create the security context */
+	if (!is_ib)
+		return 0;
+
 	qp->qp_sec = kzalloc(sizeof(*qp->qp_sec), GFP_KERNEL);
 	if (!qp->qp_sec)
 		return -ENOMEM;
@@ -441,6 +453,10 @@ EXPORT_SYMBOL(ib_create_qp_security);
 
 void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
 {
+	/* Return if not IB */
+	if (!sec)
+		return;
+
 	mutex_lock(&sec->mutex);
 
 	/* Remove the QP from the lists so it won't get added to
@@ -470,6 +486,10 @@ void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
 	int ret;
 	int i;
 
+	/* Return if not IB */
+	if (!sec)
+		return;
+
 	/* If a concurrent cache update is in progress this
 	 * QP security could be marked for an error state
 	 * transition.  Wait for this to complete.
@@ -505,6 +525,10 @@ void ib_destroy_qp_security_end(struct ib_qp_security *sec)
 {
 	int i;
 
+	/* Return if not IB */
+	if (!sec)
+		return;
+
 	/* If a concurrent cache update is occurring we must
 	 * wait until this QP security structure is processed
 	 * in the QP to error flow before destroying it because
@@ -557,7 +581,7 @@ int ib_security_modify_qp(struct ib_qp *qp,
 {
 	int ret = 0;
 	struct ib_ports_pkeys *tmp_pps;
-	struct ib_ports_pkeys *new_pps;
+	struct ib_ports_pkeys *new_pps = NULL;
 	struct ib_qp *real_qp = qp->real_qp;
 	bool special_qp = (real_qp->qp_type == IB_QPT_SMI ||
 			   real_qp->qp_type == IB_QPT_GSI ||
@@ -565,18 +589,27 @@ int ib_security_modify_qp(struct ib_qp *qp,
 	bool pps_change = ((qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) ||
 			   (qp_attr_mask & IB_QP_ALT_PATH));
 
+	WARN_ONCE((qp_attr_mask & IB_QP_PORT &&
+		   rdma_protocol_ib(real_qp->device, qp_attr->port_num) &&
+		   !real_qp->qp_sec),
+		   "%s: QP security is not initialized for IB QP: %d\n",
+		   __func__, real_qp->qp_num);
+
 	/* The port/pkey settings are maintained only for the real QP. Open
 	 * handles on the real QP will be in the shared_qp_list. When
 	 * enforcing security on the real QP all the shared QPs will be
 	 * checked as well.
 	 */
 
-	if (pps_change && !special_qp) {
+	if (pps_change && !special_qp && real_qp->qp_sec) {
 		mutex_lock(&real_qp->qp_sec->mutex);
 		new_pps = get_new_pps(real_qp,
 				      qp_attr,
 				      qp_attr_mask);
-
+		if (!new_pps) {
+			mutex_unlock(&real_qp->qp_sec->mutex);
+			return -ENOMEM;
+		}
 		/* Add this QP to the lists for the new port
 		 * and pkey settings before checking for permission
 		 * in case there is a concurrent cache update
@@ -600,7 +633,7 @@ int ib_security_modify_qp(struct ib_qp *qp,
 						 qp_attr_mask,
 						 udata);
 
-	if (pps_change && !special_qp) {
+	if (new_pps) {
 		/* Clean up the lists and free the appropriate
 		 * ports_pkeys structure.
 		 */
@@ -631,6 +664,9 @@ int ib_security_pkey_access(struct ib_device *dev,
 	u16 pkey;
 	int ret;
 
+	if (!rdma_protocol_ib(dev, port_num))
+		return 0;
+
 	ret = ib_get_cached_pkey(dev, port_num, pkey_index, &pkey);
 	if (ret)
 		return ret;
@@ -665,6 +701,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
 {
 	int ret;
 
+	if (!rdma_protocol_ib(agent->device, agent->port_num))
+		return 0;
+
 	ret = security_ib_alloc_security(&agent->security);
 	if (ret)
 		return ret;
@@ -690,6 +729,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
 
 void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
 {
+	if (!rdma_protocol_ib(agent->device, agent->port_num))
+		return;
+
 	security_ib_free_security(agent->security);
 	if (agent->lsm_nb_reg)
 		unregister_lsm_notifier(&agent->lsm_nb);
@@ -697,8 +739,14 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
 
 int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
 {
-	if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed)
-		return -EACCES;
+	if (!rdma_protocol_ib(map->agent.device, map->agent.port_num))
+		return 0;
+
+	if (map->agent.qp->qp_type == IB_QPT_SMI) {
+		if (!map->agent.smp_allowed)
+			return -EACCES;
+		return 0;
+	}
 
 	return ib_security_pkey_access(map->agent.device,
 				       map->agent.port_num,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 16d55710b1162aa11ca2d8ed3dd1c1690f60acc8..840b24096690ddcdef6ce894c5ee21b6285bf238 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1971,6 +1971,12 @@ static int modify_qp(struct ib_uverbs_file *file,
 		goto release_qp;
 	}
 
+	if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
+	    !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) {
+		ret = -EINVAL;
+		goto release_qp;
+	}
+
 	attr->qp_state		  = cmd->base.qp_state;
 	attr->cur_qp_state	  = cmd->base.cur_qp_state;
 	attr->path_mtu		  = cmd->base.path_mtu;
@@ -2068,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
 		return -EOPNOTSUPP;
 
 	if (ucore->inlen > sizeof(cmd)) {
-		if (ib_is_udata_cleared(ucore, sizeof(cmd),
-					ucore->inlen - sizeof(cmd)))
+		if (!ib_is_udata_cleared(ucore, sizeof(cmd),
+					 ucore->inlen - sizeof(cmd)))
 			return -EOPNOTSUPP;
 	}
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3fb8fb6cc824ef09f9c9c229e5a99a3e4801a65e..e36d27ed4daae3d46cb88b3ab4f747c34e6abb49 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp)
 	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
 
 	atomic_dec(&real_qp->usecnt);
-	ib_close_shared_qp_security(qp->qp_sec);
+	if (qp->qp_sec)
+		ib_close_shared_qp_security(qp->qp_sec);
 	kfree(qp);
 
 	return 0;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index ea55e95cd2c5df33bf9de567eb685fae3cbaea1c..6f2b26126c64a4503b6a3bf8b8c3991b65b65012 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -395,6 +395,11 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
 
 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
 {
+	if (DRAIN_CQE(cqe)) {
+		WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
+		return 0;
+	}
+
 	if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
 		return 0;
 
@@ -489,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 	/*
 	 * Special cqe for drain WR completions...
 	 */
-	if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
+	if (DRAIN_CQE(hw_cqe)) {
 		*cookie = CQE_DRAIN_COOKIE(hw_cqe);
 		*cqe = *hw_cqe;
 		goto skip_cqe;
@@ -566,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
-		if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+		if (unlikely(!CQE_STATUS(hw_cqe) &&
+			     CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
 			t4_set_wq_in_error(wq);
-			hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
-			goto proc_cqe;
+			hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
 		}
 		goto proc_cqe;
 	}
@@ -743,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 				c4iw_invalidate_mr(qhp->rhp,
 						   CQE_WRID_FR_STAG(&cqe));
 			break;
-		case C4IW_DRAIN_OPCODE:
-			wc->opcode = IB_WC_SEND;
-			break;
 		default:
 			pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
 			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 470f97a79ebb7f90e649179ab34b30ad0c089df7..65dd3726ca024db4e0fabff5c4527c676757065e 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
 	return IB_QPS_ERR;
 }
 
-#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
-
 static inline u32 c4iw_ib_to_tpt_access(int a)
 {
 	return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 5ee7fe433136bc22dc7a55ce9bad4930ee49ae1d..d5c92fc520d6471f5c1f41d77dd15b280a8a651b 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
 	return 0;
 }
 
-static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
+static int ib_to_fw_opcode(int ib_opcode)
+{
+	int opcode;
+
+	switch (ib_opcode) {
+	case IB_WR_SEND_WITH_INV:
+		opcode = FW_RI_SEND_WITH_INV;
+		break;
+	case IB_WR_SEND:
+		opcode = FW_RI_SEND;
+		break;
+	case IB_WR_RDMA_WRITE:
+		opcode = FW_RI_RDMA_WRITE;
+		break;
+	case IB_WR_RDMA_READ:
+	case IB_WR_RDMA_READ_WITH_INV:
+		opcode = FW_RI_READ_REQ;
+		break;
+	case IB_WR_REG_MR:
+		opcode = FW_RI_FAST_REGISTER;
+		break;
+	case IB_WR_LOCAL_INV:
+		opcode = FW_RI_LOCAL_INV;
+		break;
+	default:
+		opcode = -EINVAL;
+	}
+	return opcode;
+}
+
+static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
 {
 	struct t4_cqe cqe = {};
 	struct c4iw_cq *schp;
 	unsigned long flag;
 	struct t4_cq *cq;
+	int opcode;
 
 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
 	cq = &schp->cq;
 
+	opcode = ib_to_fw_opcode(wr->opcode);
+	if (opcode < 0)
+		return opcode;
+
 	cqe.u.drain_cookie = wr->wr_id;
 	cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
-				 CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+				 CQE_OPCODE_V(opcode) |
 				 CQE_TYPE_V(1) |
 				 CQE_SWCQE_V(1) |
+				 CQE_DRAIN_V(1) |
 				 CQE_QPID_V(qhp->wq.sq.qid));
 
 	spin_lock_irqsave(&schp->lock, flag);
@@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
 					   schp->ibcq.cq_context);
 		spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
 	}
+	return 0;
+}
+
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
+				struct ib_send_wr **bad_wr)
+{
+	int ret = 0;
+
+	while (wr) {
+		ret = complete_sq_drain_wr(qhp, wr);
+		if (ret) {
+			*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	return ret;
 }
 
 static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
@@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 
 	cqe.u.drain_cookie = wr->wr_id;
 	cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
-				 CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+				 CQE_OPCODE_V(FW_RI_SEND) |
 				 CQE_TYPE_V(0) |
 				 CQE_SWCQE_V(1) |
+				 CQE_DRAIN_V(1) |
 				 CQE_QPID_V(qhp->wq.sq.qid));
 
 	spin_lock_irqsave(&rchp->lock, flag);
@@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 	}
 }
 
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+{
+	while (wr) {
+		complete_rq_drain_wr(qhp, wr);
+		wr = wr->next;
+	}
+}
+
 int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		   struct ib_send_wr **bad_wr)
 {
@@ -868,9 +930,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	qhp = to_c4iw_qp(ibqp);
 	spin_lock_irqsave(&qhp->lock, flag);
-	if (t4_wq_in_error(&qhp->wq)) {
+
+	/*
+	 * If the qp has been flushed, then just insert a special
+	 * drain cqe.
+	 */
+	if (qhp->wq.flushed) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
-		complete_sq_drain_wr(qhp, wr);
+		err = complete_sq_drain_wrs(qhp, wr, bad_wr);
 		return err;
 	}
 	num_wrs = t4_sq_avail(&qhp->wq);
@@ -1011,9 +1078,14 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	qhp = to_c4iw_qp(ibqp);
 	spin_lock_irqsave(&qhp->lock, flag);
-	if (t4_wq_in_error(&qhp->wq)) {
+
+	/*
+	 * If the qp has been flushed, then just insert a special
+	 * drain cqe.
+	 */
+	if (qhp->wq.flushed) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
-		complete_rq_drain_wr(qhp, wr);
+		complete_rq_drain_wrs(qhp, wr);
 		return err;
 	}
 	num_wrs = t4_rq_avail(&qhp->wq);
@@ -1285,21 +1357,21 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 	spin_unlock_irqrestore(&rchp->lock, flag);
 
 	if (schp == rchp) {
-		if (t4_clear_cq_armed(&rchp->cq) &&
-		    (rq_flushed || sq_flushed)) {
+		if ((rq_flushed || sq_flushed) &&
+		    t4_clear_cq_armed(&rchp->cq)) {
 			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
 						   rchp->ibcq.cq_context);
 			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
 		}
 	} else {
-		if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+		if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
 			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
 						   rchp->ibcq.cq_context);
 			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
 		}
-		if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+		if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
 			spin_lock_irqsave(&schp->comp_handler_lock, flag);
 			(*schp->ibcq.comp_handler)(&schp->ibcq,
 						   schp->ibcq.cq_context);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e9ea94268d51545b07400ed9cba095d8461fb2a1..79e8ee12c391cf6d800911d4b2f0e16063c6de39 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -197,6 +197,11 @@ struct t4_cqe {
 #define CQE_SWCQE_G(x)    ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
 #define CQE_SWCQE_V(x)	  ((x)<<CQE_SWCQE_S)
 
+#define CQE_DRAIN_S       10
+#define CQE_DRAIN_M       0x1
+#define CQE_DRAIN_G(x)    ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
+#define CQE_DRAIN_V(x)	  ((x)<<CQE_DRAIN_S)
+
 #define CQE_STATUS_S      5
 #define CQE_STATUS_M      0x1F
 #define CQE_STATUS_G(x)   ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -213,6 +218,7 @@ struct t4_cqe {
 #define CQE_OPCODE_V(x)   ((x)<<CQE_OPCODE_S)
 
 #define SW_CQE(x)         (CQE_SWCQE_G(be32_to_cpu((x)->header)))
+#define DRAIN_CQE(x)      (CQE_DRAIN_G(be32_to_cpu((x)->header)))
 #define CQE_QPID(x)       (CQE_QPID_G(be32_to_cpu((x)->header)))
 #define CQE_TYPE(x)       (CQE_TYPE_G(be32_to_cpu((x)->header)))
 #define SQ_TYPE(x)	  (CQE_TYPE((x)))
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4a9b4d7efe6362f31457672882e882af3bfdfc46..8ce9118d4a7fbac52494de9e5b489dc44365706b 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1131,7 +1131,6 @@ struct hfi1_devdata {
 	u16 pcie_lnkctl;
 	u16 pcie_devctl2;
 	u32 pci_msix0;
-	u32 pci_lnkctl3;
 	u32 pci_tph2;
 
 	/*
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 09e50fd2a08f07bf7b2d42d3d4b4a1a00644b2ce..8c7e7a60b71584d5e587f6ef664cc773ab991991 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
 	if (ret)
 		goto error;
 
-	ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-				     dd->pci_lnkctl3);
-	if (ret)
-		goto error;
-
-	ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
-	if (ret)
-		goto error;
-
+	if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+		ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+					     dd->pci_tph2);
+		if (ret)
+			goto error;
+	}
 	return 0;
 
 error:
@@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
 	if (ret)
 		goto error;
 
-	ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-				    &dd->pci_lnkctl3);
-	if (ret)
-		goto error;
-
-	ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
-	if (ret)
-		goto error;
-
+	if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+		ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+					    &dd->pci_tph2);
+		if (ret)
+			goto error;
+	}
 	return 0;
 
 error:
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index fd01a760259fa1887d2f233a2fcbb3ee581e6f5f..af5f7936f7e5ed9eac26598a76fa31e5f5155046 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -814,7 +814,7 @@ static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp,
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_16b_header *hdr = &opa_hdr->opah;
 	struct ib_other_headers *ohdr;
-	u32 bth0, bth1;
+	u32 bth0, bth1 = 0;
 	u16 len, pkey;
 	u8 becn = !!is_fecn;
 	u8 l4 = OPA_16B_L4_IB_LOCAL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 3e4c5253ab5c23d5cd9d7b8789f38fef205b02f2..a40ec939ece58236cfcbbf285fb2a389b23ccadc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -162,14 +162,10 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
 {
 	int i;
 	struct device *dev = hr_dev->dev;
-	u32 bits_per_long = BITS_PER_LONG;
 
 	if (buf->nbufs == 1) {
 		dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
 	} else {
-		if (bits_per_long == 64 && buf->page_shift == PAGE_SHIFT)
-			vunmap(buf->direct.buf);
-
 		for (i = 0; i < buf->nbufs; ++i)
 			if (buf->page_list[i].buf)
 				dma_free_coherent(dev, 1 << buf->page_shift,
@@ -185,9 +181,7 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 {
 	int i = 0;
 	dma_addr_t t;
-	struct page **pages;
 	struct device *dev = hr_dev->dev;
-	u32 bits_per_long = BITS_PER_LONG;
 	u32 page_size = 1 << page_shift;
 	u32 order;
 
@@ -236,23 +230,6 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 			buf->page_list[i].map = t;
 			memset(buf->page_list[i].buf, 0, page_size);
 		}
-		if (bits_per_long == 64 && page_shift == PAGE_SHIFT) {
-			pages = kmalloc_array(buf->nbufs, sizeof(*pages),
-					      GFP_KERNEL);
-			if (!pages)
-				goto err_free;
-
-			for (i = 0; i < buf->nbufs; ++i)
-				pages[i] = virt_to_page(buf->page_list[i].buf);
-
-			buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP,
-					       PAGE_KERNEL);
-			kfree(pages);
-			if (!buf->direct.buf)
-				goto err_free;
-		} else {
-			buf->direct.buf = NULL;
-		}
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 01d3d695cbba1f2926929f0b59b47691cb08dd18..b154ce40cded846676feba3cabfdbd5240ad0b5a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -726,11 +726,9 @@ static inline struct hns_roce_qp
 
 static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
 {
-	u32 bits_per_long_val = BITS_PER_LONG;
 	u32 page_size = 1 << buf->page_shift;
 
-	if ((bits_per_long_val == 64 && buf->page_shift == PAGE_SHIFT) ||
-	    buf->nbufs == 1)
+	if (buf->nbufs == 1)
 		return (char *)(buf->direct.buf) + offset;
 	else
 		return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 8b733a66fae5f27da8c3b183ba8e4b0ad60d018d..0eeabfbee192efed31c46d948ee6db264d1085fd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -224,6 +224,7 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
 			sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
 			chunk->npages = 0;
 			chunk->nsg = 0;
+			memset(chunk->buf, 0, sizeof(chunk->buf));
 			list_add_tail(&chunk->list, &hem->chunk_list);
 		}
 
@@ -240,8 +241,7 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
 		if (!buf)
 			goto fail;
 
-		sg_set_buf(mem, buf, PAGE_SIZE << order);
-		WARN_ON(mem->offset);
+		chunk->buf[chunk->npages] = buf;
 		sg_dma_len(mem) = PAGE_SIZE << order;
 
 		++chunk->npages;
@@ -267,8 +267,8 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
 	list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i)
 			dma_free_coherent(hr_dev->dev,
-				   chunk->mem[i].length,
-				   lowmem_page_address(sg_page(&chunk->mem[i])),
+				   sg_dma_len(&chunk->mem[i]),
+				   chunk->buf[i],
 				   sg_dma_address(&chunk->mem[i]));
 		kfree(chunk);
 	}
@@ -722,11 +722,12 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
 	struct hns_roce_hem_chunk *chunk;
 	struct hns_roce_hem_mhop mhop;
 	struct hns_roce_hem *hem;
-	struct page *page = NULL;
+	void *addr = NULL;
 	unsigned long mhop_obj = obj;
 	unsigned long obj_per_chunk;
 	unsigned long idx_offset;
 	int offset, dma_offset;
+	int length;
 	int i, j;
 	u32 hem_idx = 0;
 
@@ -763,25 +764,25 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
 
 	list_for_each_entry(chunk, &hem->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i) {
+			length = sg_dma_len(&chunk->mem[i]);
 			if (dma_handle && dma_offset >= 0) {
-				if (sg_dma_len(&chunk->mem[i]) >
-				    (u32)dma_offset)
+				if (length > (u32)dma_offset)
 					*dma_handle = sg_dma_address(
 						&chunk->mem[i]) + dma_offset;
-				dma_offset -= sg_dma_len(&chunk->mem[i]);
+				dma_offset -= length;
 			}
 
-			if (chunk->mem[i].length > (u32)offset) {
-				page = sg_page(&chunk->mem[i]);
+			if (length > (u32)offset) {
+				addr = chunk->buf[i] + offset;
 				goto out;
 			}
-			offset -= chunk->mem[i].length;
+			offset -= length;
 		}
 	}
 
 out:
 	mutex_unlock(&table->mutex);
-	return page ? lowmem_page_address(page) + offset : NULL;
+	return addr;
 }
 EXPORT_SYMBOL_GPL(hns_roce_table_find);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index db66db12075e2b42151fbc24d148615e0841b8f9..e8850d59e7804caa45dd5e2cd77b140c7bfd7047 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -78,6 +78,7 @@ struct hns_roce_hem_chunk {
 	int			 npages;
 	int			 nsg;
 	struct scatterlist	 mem[HNS_ROCE_HEM_CHUNK_LEN];
+	void			 *buf[HNS_ROCE_HEM_CHUNK_LEN];
 };
 
 struct hns_roce_hem {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 8f719c00467b833e15a507e522ac05c7857c5940..8e18445714a96db307d6e4cdda09117333c567ae 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1126,9 +1126,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 {
 	struct hns_roce_v2_mpt_entry *mpt_entry;
 	struct scatterlist *sg;
+	u64 page_addr;
 	u64 *pages;
+	int i, j;
+	int len;
 	int entry;
-	int i;
 
 	mpt_entry = mb_buf;
 	memset(mpt_entry, 0, sizeof(*mpt_entry));
@@ -1186,14 +1188,20 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 
 	i = 0;
 	for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
-		pages[i] = ((u64)sg_dma_address(sg)) >> 6;
-
-		/* Record the first 2 entry directly to MTPT table */
-		if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
-			break;
-		i++;
+		len = sg_dma_len(sg) >> PAGE_SHIFT;
+		for (j = 0; j < len; ++j) {
+			page_addr = sg_dma_address(sg) +
+				    (j << mr->umem->page_shift);
+			pages[i] = page_addr >> 6;
+
+			/* Record the first 2 entry directly to MTPT table */
+			if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
+				goto found;
+			i++;
+		}
 	}
 
+found:
 	mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
 	roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
 		       V2_MPT_BYTE_56_PA0_H_S,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 493d6ef3d2d57e4f1e020fd680ab4fc009f8cb7e..77870f9e173684d91f99a5a0627d0490b14342e9 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1043,7 +1043,7 @@ static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type,
  * i40iw_schedule_cm_timer
  * @@cm_node: connection's node
  * @sqbuf: buffer to send
- * @type: if it es send ot close
+ * @type: if it is send or close
  * @send_retrans: if rexmits to be done
  * @close_when_complete: is cm_node to be removed
  *
@@ -1067,7 +1067,8 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
 
 	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
 	if (!new_send) {
-		i40iw_free_sqbuf(vsi, (void *)sqbuf);
+		if (type != I40IW_TIMER_TYPE_CLOSE)
+			i40iw_free_sqbuf(vsi, (void *)sqbuf);
 		return -ENOMEM;
 	}
 	new_send->retrycount = I40IW_DEFAULT_RETRYS;
@@ -1082,7 +1083,6 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
 		new_send->timetosend += (HZ / 10);
 		if (cm_node->close_entry) {
 			kfree(new_send);
-			i40iw_free_sqbuf(vsi, (void *)sqbuf);
 			i40iw_pr_err("already close entry\n");
 			return -EINVAL;
 		}
@@ -2947,8 +2947,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
 			loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
 			cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale;
 			loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
-			loopback_remotenode->state = I40IW_CM_STATE_MPAREQ_RCVD;
-			i40iw_create_event(loopback_remotenode, I40IW_CM_EVENT_MPA_REQ);
 		}
 		return cm_node;
 	}
@@ -3689,11 +3687,16 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	cm_id->add_ref(cm_id);
 	i40iw_add_ref(&iwqp->ibqp);
 
-	i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
-
 	attr.qp_state = IB_QPS_RTS;
 	cm_node->qhash_set = false;
 	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+	cm_node->accelerated = 1;
+	status =
+		i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+	if (status)
+		i40iw_debug(dev, I40IW_DEBUG_CM, "error sending cm event - ESTABLISHED\n");
+
 	if (cm_node->loopbackpartner) {
 		cm_node->loopbackpartner->pdata.size = conn_param->private_data_len;
 
@@ -3704,7 +3707,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED);
 	}
 
-	cm_node->accelerated = 1;
 	if (cm_node->accept_pend) {
 		atomic_dec(&cm_node->listener->pend_accepts_cnt);
 		cm_node->accept_pend = 0;
@@ -3864,6 +3866,12 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 			goto err;
 	}
 
+	if (cm_node->loopbackpartner) {
+		cm_node->loopbackpartner->state = I40IW_CM_STATE_MPAREQ_RCVD;
+		i40iw_create_event(cm_node->loopbackpartner,
+				   I40IW_CM_EVENT_MPA_REQ);
+	}
+
 	i40iw_debug(cm_node->dev,
 		    I40IW_DEBUG_CM,
 		    "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
@@ -4044,9 +4052,6 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
 	dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
 	if (iwqp->page)
 		kunmap(iwqp->page);
-	status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
-	if (status)
-		i40iw_pr_err("send cm event\n");
 
 	memset(&attr, 0, sizeof(attr));
 	attr.qp_state = IB_QPS_RTS;
@@ -4054,6 +4059,10 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
 	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
 
 	cm_node->accelerated = 1;
+	status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
+				     0);
+	if (status)
+		i40iw_debug(dev, I40IW_DEBUG_CM, "error sending cm event - CONNECT_REPLY\n");
 
 	return;
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index d88c6cf47cf275565ea98aba66f9638387db702c..da9821a10e0dfe8e3cb2fcede8f34b92f8a8f41e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -513,7 +513,7 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
 
 	ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
 					  &cqp->sdbuf,
-					  128,
+					  I40IW_UPDATE_SD_BUF_SIZE * cqp->sq_size,
 					  I40IW_SD_BUF_ALIGNMENT);
 
 	if (ret_code)
@@ -596,14 +596,15 @@ void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp)
 }
 
 /**
- * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
- * @cqp: struct for cqp hw
- * @wqe_idx: we index of cqp ring
+ * i40iw_sc_cqp_get_next_send_wqe_idx - get next WQE on CQP SQ and pass back the index
+ * @cqp: pointer to CQP structure
+ * @scratch: private data for CQP WQE
+ * @wqe_idx: WQE index for next WQE on CQP SQ
  */
-u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
+static u64 *i40iw_sc_cqp_get_next_send_wqe_idx(struct i40iw_sc_cqp *cqp,
+					       u64 scratch, u32 *wqe_idx)
 {
 	u64 *wqe = NULL;
-	u32	wqe_idx;
 	enum i40iw_status_code ret_code;
 
 	if (I40IW_RING_FULL_ERR(cqp->sq_ring)) {
@@ -616,20 +617,32 @@ u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
 			    cqp->sq_ring.size);
 		return NULL;
 	}
-	I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+	I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, *wqe_idx, ret_code);
 	cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++;
 	if (ret_code)
 		return NULL;
-	if (!wqe_idx)
+	if (!*wqe_idx)
 		cqp->polarity = !cqp->polarity;
 
-	wqe = cqp->sq_base[wqe_idx].elem;
-	cqp->scratch_array[wqe_idx] = scratch;
+	wqe = cqp->sq_base[*wqe_idx].elem;
+	cqp->scratch_array[*wqe_idx] = scratch;
 	I40IW_CQP_INIT_WQE(wqe);
 
 	return wqe;
 }
 
+/**
+ * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
+ * @cqp: struct for cqp hw
+ * @scratch: private data for CQP WQE
+ */
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
+{
+	u32 wqe_idx;
+
+	return i40iw_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
+}
+
 /**
  * i40iw_sc_cqp_destroy - destroy cqp during close
  * @cqp: struct for cqp hw
@@ -3587,8 +3600,10 @@ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
 	u64 *wqe;
 	int mem_entries, wqe_entries;
 	struct i40iw_dma_mem *sdbuf = &cqp->sdbuf;
+	u64 offset;
+	u32 wqe_idx;
 
-	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	wqe = i40iw_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
 	if (!wqe)
 		return I40IW_ERR_RING_FULL;
 
@@ -3601,8 +3616,10 @@ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
 		 LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT);
 
 	if (mem_entries) {
-		memcpy(sdbuf->va, &info->entry[3], (mem_entries << 4));
-		data = sdbuf->pa;
+		offset = wqe_idx * I40IW_UPDATE_SD_BUF_SIZE;
+		memcpy((char *)sdbuf->va + offset, &info->entry[3],
+		       mem_entries << 4);
+		data = (u64)sdbuf->pa + offset;
 	} else {
 		data = 0;
 	}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 65ec39e3746b42fe351e3c8efaf1fc4e9960f3fa..029083cb81d53d1809c3ea0967fa95a2230d3345 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -1114,7 +1114,7 @@
 #define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT)
 
 #define I40IWQPC_ARPIDX_SHIFT 48
-#define I40IWQPC_ARPIDX_MASK (0xfffULL << I40IWQPC_ARPIDX_SHIFT)
+#define I40IWQPC_ARPIDX_MASK (0xffffULL << I40IWQPC_ARPIDX_SHIFT)
 
 #define I40IWQPC_FLOWLABEL_SHIFT 0
 #define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT)
@@ -1526,7 +1526,7 @@ enum i40iw_alignment {
 	I40IW_AEQ_ALIGNMENT =		0x100,
 	I40IW_CEQ_ALIGNMENT =		0x100,
 	I40IW_CQ0_ALIGNMENT =		0x100,
-	I40IW_SD_BUF_ALIGNMENT =	0x100
+	I40IW_SD_BUF_ALIGNMENT =	0x80
 };
 
 #define I40IW_WQE_SIZE_64	64
@@ -1534,6 +1534,8 @@ enum i40iw_alignment {
 #define I40IW_QP_WQE_MIN_SIZE	32
 #define I40IW_QP_WQE_MAX_SIZE	128
 
+#define I40IW_UPDATE_SD_BUF_SIZE 128
+
 #define I40IW_CQE_QTYPE_RQ 0
 #define I40IW_CQE_QTYPE_SQ 1
 
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 313bfb9ccb71a3b42ebacbaa9b98b91daca51a19..4975f3e6596e49aaa8586ad9a13da719ef4dd6ab 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -642,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
 		goto err_free_mr;
 
 	mr->max_pages = max_num_sg;
-
 	err = mlx4_mr_enable(dev->dev, &mr->mmr);
 	if (err)
 		goto err_free_pl;
@@ -653,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
 	return &mr->ibmr;
 
 err_free_pl:
+	mr->ibmr.device = pd->device;
 	mlx4_free_priv_pages(mr);
 err_free_mr:
 	(void) mlx4_mr_free(dev->dev, &mr->mmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 013049bcdb53d5fb95ca61d1ab9640aaa62b0756..caf490ab24c809e403f07bf2471bd2e1290e36b7 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -666,6 +666,19 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
 		return (-EOPNOTSUPP);
 	}
 
+	if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4	|
+					  MLX4_IB_RX_HASH_DST_IPV4	|
+					  MLX4_IB_RX_HASH_SRC_IPV6	|
+					  MLX4_IB_RX_HASH_DST_IPV6	|
+					  MLX4_IB_RX_HASH_SRC_PORT_TCP	|
+					  MLX4_IB_RX_HASH_DST_PORT_TCP	|
+					  MLX4_IB_RX_HASH_SRC_PORT_UDP	|
+					  MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+		pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
+			 ucmd->rx_hash_fields_mask);
+		return (-EOPNOTSUPP);
+	}
+
 	if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) &&
 	    (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
 		rss_ctx->flags = MLX4_RSS_IPV4;
@@ -691,11 +704,11 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
 			return (-EOPNOTSUPP);
 		}
 
-		if (rss_ctx->flags & MLX4_RSS_IPV4) {
+		if (rss_ctx->flags & MLX4_RSS_IPV4)
 			rss_ctx->flags |= MLX4_RSS_UDP_IPV4;
-		} else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+		if (rss_ctx->flags & MLX4_RSS_IPV6)
 			rss_ctx->flags |= MLX4_RSS_UDP_IPV6;
-		} else {
+		if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
 			pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n");
 			return (-EOPNOTSUPP);
 		}
@@ -707,15 +720,14 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
 
 	if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) &&
 	    (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
-		if (rss_ctx->flags & MLX4_RSS_IPV4) {
+		if (rss_ctx->flags & MLX4_RSS_IPV4)
 			rss_ctx->flags |= MLX4_RSS_TCP_IPV4;
-		} else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+		if (rss_ctx->flags & MLX4_RSS_IPV6)
 			rss_ctx->flags |= MLX4_RSS_TCP_IPV6;
-		} else {
+		if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
 			pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n");
 			return (-EOPNOTSUPP);
 		}
-
 	} else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) ||
 		   (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
 		pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n");
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 470995fa38d23ee9c5917f9986c53c582db8d542..6f6712f87a730de3cc8c05aeb73f50de73c07487 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
 	return err;
 }
 
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-				bool reset, void *out, int out_size)
-{
-	u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
-	MLX5_SET(query_cong_statistics_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
-	MLX5_SET(query_cong_statistics_in, in, clear, reset);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
 			       void *out, int out_size)
 {
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index af4c24596274deabd07ea0d95b719f4fd5d990e1..78ffded7cc2c59e2744d6279d96adc6e10cd1348 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,8 +37,6 @@
 #include <linux/mlx5/driver.h>
 
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-				bool reset, void *out, int out_size);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
 			       void *out, int out_size);
 int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 543d0a4c8bf36eb43af6d9047c7eaa6cd75d9638..8ac50de2b2421ea66cb23eff88c09f8ecf3a0f94 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	}
 
 	INIT_LIST_HEAD(&context->vma_private_list);
+	mutex_init(&context->vma_private_list_mutex);
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
@@ -1624,7 +1625,9 @@ static void  mlx5_ib_vma_close(struct vm_area_struct *area)
 	 * mlx5_ib_disassociate_ucontext().
 	 */
 	mlx5_ib_vma_priv_data->vma = NULL;
+	mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
 	list_del(&mlx5_ib_vma_priv_data->list);
+	mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
 	kfree(mlx5_ib_vma_priv_data);
 }
 
@@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	vma_prv->vma = vma;
+	vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
 	vma->vm_private_data = vma_prv;
 	vma->vm_ops =  &mlx5_ib_vm_ops;
 
+	mutex_lock(&ctx->vma_private_list_mutex);
 	list_add(&vma_prv->list, vma_head);
+	mutex_unlock(&ctx->vma_private_list_mutex);
 
 	return 0;
 }
@@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 	 * mlx5_ib_vma_close.
 	 */
 	down_write(&owning_mm->mmap_sem);
+	mutex_lock(&context->vma_private_list_mutex);
 	list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
 				 list) {
 		vma = vma_private->vma;
@@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 		list_del(&vma_private->list);
 		kfree(vma_private);
 	}
+	mutex_unlock(&context->vma_private_list_mutex);
 	up_write(&owning_mm->mmap_sem);
 	mmput(owning_mm);
 	put_task_struct(owning_process);
@@ -3737,34 +3745,6 @@ static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
 	return ret;
 }
 
-static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
-				       struct mlx5_ib_port *port,
-				       struct rdma_hw_stats *stats)
-{
-	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
-	void *out;
-	int ret, i;
-	int offset = port->cnts.num_q_counters;
-
-	out = kvzalloc(outlen, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
-	if (ret)
-		goto free;
-
-	for (i = 0; i < port->cnts.num_cong_counters; i++) {
-		stats->value[i + offset] =
-			be64_to_cpup((__be64 *)(out +
-				     port->cnts.offsets[i + offset]));
-	}
-
-free:
-	kvfree(out);
-	return ret;
-}
-
 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 				struct rdma_hw_stats *stats,
 				u8 port_num, int index)
@@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 	num_counters = port->cnts.num_q_counters;
 
 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
-		ret = mlx5_ib_query_cong_counters(dev, port, stats);
+		ret = mlx5_lag_query_cong_counters(dev->mdev,
+						   stats->value +
+						   port->cnts.num_q_counters,
+						   port->cnts.num_cong_counters,
+						   port->cnts.offsets +
+						   port->cnts.num_q_counters);
 		if (ret)
 			return ret;
 		num_counters += port->cnts.num_cong_counters;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6dd8cac78de2c44854f1d5006dd058d2bc81e170..2c5f3533bbc9cbf08ce4fc816ecb86bc5ff04612 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -115,6 +115,8 @@ enum {
 struct mlx5_ib_vma_private_data {
 	struct list_head list;
 	struct vm_area_struct *vma;
+	/* protect vma_private_list add/del */
+	struct mutex *vma_private_list_mutex;
 };
 
 struct mlx5_ib_ucontext {
@@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
 	/* Transport Domain number */
 	u32			tdn;
 	struct list_head	vma_private_list;
+	/* protect vma_private_list add/del */
+	struct mutex		vma_private_list_mutex;
 
 	unsigned long		upd_xlt_page;
 	/* protect ODP/KSM */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ee0ee1f9994b4fae933d8590b72ba138d29e176d..d109fe8290a70964d71e44ceb1408215f24d2058 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 	MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
 	MLX5_SET(mkc, mkc, umr_en, 1);
 
+	mr->ibmr.device = pd->device;
 	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
 	if (err)
 		goto err_destroy_psv;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 63bc2efc34eb57e37f364e5b73e5a157e6040e47..4f7bd3b6a3152937a8ec2fa4de8637e3d9391a9a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -94,7 +94,7 @@ struct pvrdma_cq {
 	u32 cq_handle;
 	bool is_kernel;
 	atomic_t refcnt;
-	wait_queue_head_t wait;
+	struct completion free;
 };
 
 struct pvrdma_id_table {
@@ -175,7 +175,7 @@ struct pvrdma_srq {
 	u32 srq_handle;
 	int npages;
 	refcount_t refcnt;
-	wait_queue_head_t wait;
+	struct completion free;
 };
 
 struct pvrdma_qp {
@@ -197,7 +197,7 @@ struct pvrdma_qp {
 	bool is_kernel;
 	struct mutex mutex; /* QP state mutex. */
 	atomic_t refcnt;
-	wait_queue_head_t wait;
+	struct completion free;
 };
 
 struct pvrdma_dev {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 3562c0c30492d07a7d1eddacb033ef769a1f9264..e529622cefad6a501492dc165458b738636842b3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 		pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
 
 	atomic_set(&cq->refcnt, 1);
-	init_waitqueue_head(&cq->wait);
+	init_completion(&cq->free);
 	spin_lock_init(&cq->cq_lock);
 
 	memset(cmd, 0, sizeof(*cmd));
@@ -230,8 +230,9 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 
 static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
 {
-	atomic_dec(&cq->refcnt);
-	wait_event(cq->wait, !atomic_read(&cq->refcnt));
+	if (atomic_dec_and_test(&cq->refcnt))
+		complete(&cq->free);
+	wait_for_completion(&cq->free);
 
 	if (!cq->is_kernel)
 		ib_umem_release(cq->umem);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 1f4e18717a006da9cd3566318123f0fff1038df9..e92681878c93f4b0d4f29724700cc7ba474d7de4 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
 		ibqp->event_handler(&e, ibqp->qp_context);
 	}
 	if (qp) {
-		atomic_dec(&qp->refcnt);
-		if (atomic_read(&qp->refcnt) == 0)
-			wake_up(&qp->wait);
+		if (atomic_dec_and_test(&qp->refcnt))
+			complete(&qp->free);
 	}
 }
 
@@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
 		ibcq->event_handler(&e, ibcq->cq_context);
 	}
 	if (cq) {
-		atomic_dec(&cq->refcnt);
-		if (atomic_read(&cq->refcnt) == 0)
-			wake_up(&cq->wait);
+		if (atomic_dec_and_test(&cq->refcnt))
+			complete(&cq->free);
 	}
 }
 
@@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
 	}
 	if (srq) {
 		if (refcount_dec_and_test(&srq->refcnt))
-			wake_up(&srq->wait);
+			complete(&srq->free);
 	}
 }
 
@@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
 		if (cq && cq->ibcq.comp_handler)
 			cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 		if (cq) {
-			atomic_dec(&cq->refcnt);
-			if (atomic_read(&cq->refcnt))
-				wake_up(&cq->wait);
+			if (atomic_dec_and_test(&cq->refcnt))
+				complete(&cq->free);
 		}
 		pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
 	}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 10420a18d02f46dc07f1698387e11105ab76c004..4059308e1454a5bfda72a31d9931afaa50d39441 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 		spin_lock_init(&qp->rq.lock);
 		mutex_init(&qp->mutex);
 		atomic_set(&qp->refcnt, 1);
-		init_waitqueue_head(&qp->wait);
+		init_completion(&qp->free);
 
 		qp->state = IB_QPS_RESET;
 
@@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
 
 	pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 
-	atomic_dec(&qp->refcnt);
-	wait_event(qp->wait, !atomic_read(&qp->refcnt));
+	if (atomic_dec_and_test(&qp->refcnt))
+		complete(&qp->free);
+	wait_for_completion(&qp->free);
+
+	if (!qp->is_kernel) {
+		if (qp->rumem)
+			ib_umem_release(qp->rumem);
+		if (qp->sumem)
+			ib_umem_release(qp->sumem);
+	}
 
 	pvrdma_page_dir_cleanup(dev, &qp->pdir);
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 826ccb864596dc19879211f9cdeefee47e536d5c..5acebb1ef631ae0070d28917530345689a2654de 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
 
 	spin_lock_init(&srq->lock);
 	refcount_set(&srq->refcnt, 1);
-	init_waitqueue_head(&srq->wait);
+	init_completion(&srq->free);
 
 	dev_dbg(&dev->pdev->dev,
 		"create shared receive queue from user space\n");
@@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
 	dev->srq_tbl[srq->srq_handle] = NULL;
 	spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
 
-	refcount_dec(&srq->refcnt);
-	wait_event(srq->wait, !refcount_read(&srq->refcnt));
+	if (refcount_dec_and_test(&srq->refcnt))
+		complete(&srq->free);
+	wait_for_completion(&srq->free);
 
 	/* There is no support for kernel clients, so this is safe. */
 	ib_umem_release(srq->umem);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 87f4bd99cdf7102e1e6e43c23a03615103d13b2c..2c13123bfd69499e3ac7661871d176c57979664b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1145,6 +1145,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 	noio_flag = memalloc_noio_save();
 	p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring));
 	if (!p->tx_ring) {
+		memalloc_noio_restore(noio_flag);
 		ret = -ENOMEM;
 		goto err_tx;
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 3b96cdaf9a835d0faebad2b6727dde1132f109f1..e6151a29c412a36d7d9db66833ad659ab853656b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 		ipoib_ib_dev_down(dev);
 
 	if (level == IPOIB_FLUSH_HEAVY) {
-		rtnl_lock();
 		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
 			ipoib_ib_dev_stop(dev);
 
-		result = ipoib_ib_dev_open(dev);
-		rtnl_unlock();
-		if (result)
+		if (ipoib_ib_dev_open(dev))
 			return;
 
 		if (netif_queue_stopped(dev))
@@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
 	struct ipoib_dev_priv *priv =
 		container_of(work, struct ipoib_dev_priv, flush_heavy);
 
+	rtnl_lock();
 	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
+	rtnl_unlock();
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 12b7f911f0e5b9b9fce4f034bde66db4fcc26e81..8880351df179382fa6d64b4b6e698c2fdb563e94 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev,
 	return 0;
 }
 
-static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
-			   struct net_device *dev)
+static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
+					  struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct rdma_netdev *rn = netdev_priv(dev);
@@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 		spin_unlock_irqrestore(&priv->lock, flags);
 		++dev->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
-		return;
+		return NULL;
+	}
+
+	/* To avoid race condition, make sure that the
+	 * neigh will be added only once.
+	 */
+	if (unlikely(!list_empty(&neigh->list))) {
+		spin_unlock_irqrestore(&priv->lock, flags);
+		return neigh;
 	}
 
 	path = __path_find(dev, daddr + 4);
@@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 			path->ah->last_send = rn->send(dev, skb, path->ah->ah,
 						       IPOIB_QPN(daddr));
 			ipoib_neigh_put(neigh);
-			return;
+			return NULL;
 		}
 	} else {
 		neigh->ah  = NULL;
@@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 	ipoib_neigh_put(neigh);
-	return;
+	return NULL;
 
 err_path:
 	ipoib_neigh_free(neigh);
@@ -983,6 +991,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 	ipoib_neigh_put(neigh);
+
+	return NULL;
 }
 
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	case htons(ETH_P_TIPC):
 		neigh = ipoib_neigh_get(dev, phdr->hwaddr);
 		if (unlikely(!neigh)) {
-			neigh_add_path(skb, phdr->hwaddr, dev);
-			return NETDEV_TX_OK;
+			neigh = neigh_add_path(skb, phdr->hwaddr, dev);
+			if (likely(!neigh))
+				return NETDEV_TX_OK;
 		}
 		break;
 	case htons(ETH_P_ARP):
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 93e149efc1f5fc0382b61dcfc9f84d786d8b52ca..9b3f47ae201603e8bc4e1527a0b3546a0de69878 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 		spin_lock_irqsave(&priv->lock, flags);
 		if (!neigh) {
 			neigh = ipoib_neigh_alloc(daddr, dev);
-			if (neigh) {
+			/* Make sure that the neigh will be added only
+			 * once to mcast list.
+			 */
+			if (neigh && list_empty(&neigh->list)) {
 				kref_get(&mcast->ah->ref);
 				neigh->ah	= mcast->ah;
 				list_add_tail(&neigh->list, &mcast->neigh_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 8a1bd354b1cc1cb195d77be4d545a11c1bc93ac4..bfa576aa9f03c75727b9b61db67d47bfc8846797 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1013,8 +1013,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
 		return -ENOMEM;
 
 	attr->qp_state = IB_QPS_INIT;
-	attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
-	    IB_ACCESS_REMOTE_WRITE;
+	attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
 	attr->port_num = ch->sport->port;
 	attr->pkey_index = 0;
 
@@ -2078,7 +2077,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 		goto destroy_ib;
 	}
 
-	guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
+	guid = (__be16 *)&param->primary_path->dgid.global.interface_id;
 	snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
 		 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
 		 be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 3d8ff09eba57696677b242d29e33fb1bac3f592d..c868a878c84f1dd5de44729af5531473699a54bb 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -163,7 +163,7 @@ static unsigned int get_time_pit(void)
 #define GET_TIME(x)	do { x = (unsigned int)rdtsc(); } while (0)
 #define DELTA(x,y)	((y)-(x))
 #define TIME_NAME	"TSC"
-#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE)
+#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE)
 #define GET_TIME(x)	do { x = get_cycles(); } while (0)
 #define DELTA(x,y)	((y)-(x))
 #define TIME_NAME	"get_cycles"
diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index ae473123583bb22bedbb33426f11eb8f802ea368..3d51175c4d7207aac2b7d625a2c9af26ec558cf0 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1651,7 +1651,7 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf)
 				return union_desc;
 
 			dev_err(&intf->dev,
-				"Union descriptor to short (%d vs %zd\n)",
+				"Union descriptor too short (%d vs %zd)\n",
 				union_desc->bLength, sizeof(*union_desc));
 			return NULL;
 		}
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 6bf56bb5f8d97dd4b5835e54c7e7ca764db98434..d91f3b1c53755f44dc9e6a469359d0173dbbf264 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -326,8 +326,6 @@ static int xenkbd_probe(struct xenbus_device *dev,
 				     0, width, 0, 0);
 		input_set_abs_params(mtouch, ABS_MT_POSITION_Y,
 				     0, height, 0, 0);
-		input_set_abs_params(mtouch, ABS_MT_PRESSURE,
-				     0, 255, 0, 0);
 
 		ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT);
 		if (ret) {
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index b84cd978fce2da18df706ee0a6ef72ab0a45cb5b..a4aaa748e987f7e4db918e290d20d356f9eec21b 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd)
 		case 5:
 			etd->hw_version = 3;
 			break;
-		case 6 ... 14:
+		case 6 ... 15:
 			etd->hw_version = 4;
 			break;
 		default:
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index e102d7764bc25e50649e34e229afda59552dfe8b..a458e5ec9e41eaee8a3e04c6643aca614c79a1c3 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/async.h>
 #include <linux/i2c.h>
@@ -1261,10 +1262,13 @@ static int elants_i2c_probe(struct i2c_client *client,
 	}
 
 	/*
-	 * Systems using device tree should set up interrupt via DTS,
-	 * the rest will use the default falling edge interrupts.
+	 * Platform code (ACPI, DTS) should normally set up interrupt
+	 * for us, but in case it did not let's fall back to using falling
+	 * edge to be compatible with older Chromebooks.
 	 */
-	irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+	irqflags = irq_get_trigger_type(client->irq);
+	if (!irqflags)
+		irqflags = IRQF_TRIGGER_FALLING;
 
 	error = devm_request_threaded_irq(&client->dev, client->irq,
 					  NULL, elants_i2c_irq,
diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c
index fc080a7c2e1fbb8c17275d5d7e1c77462015a3d3..f1cd4dd9a4a3459a55ce66c79d39405972201e9d 100644
--- a/drivers/input/touchscreen/hideep.c
+++ b/drivers/input/touchscreen/hideep.c
@@ -10,8 +10,7 @@
 #include <linux/of.h>
 #include <linux/firmware.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
-#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/acpi.h>
 #include <linux/interrupt.h>
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 7d5eb004091d1d64f6956825e9dd14bf975a8b68..97baf88d950589afa9a712005d903fa1603eea14 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
 			       struct irq_cfg *cfg);
 
 static int irq_remapping_activate(struct irq_domain *domain,
-				  struct irq_data *irq_data, bool early)
+				  struct irq_data *irq_data, bool reserve)
 {
 	struct amd_ir_data *data = irq_data->chip_data;
 	struct irq_2_irte *irte_info = &data->irq_2_irte;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f122071688fd530b3fedd5e21ddbaf0ccd4bc203..744592d330ca13f2734470c3992f5bb5b1b75088 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1698,13 +1698,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 	domain->geometry.aperture_end = (1UL << ias) - 1;
 	domain->geometry.force_aperture = true;
-	smmu_domain->pgtbl_ops = pgtbl_ops;
 
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
-	if (ret < 0)
+	if (ret < 0) {
 		free_io_pgtable_ops(pgtbl_ops);
+		return ret;
+	}
 
-	return ret;
+	smmu_domain->pgtbl_ops = pgtbl_ops;
+	return 0;
 }
 
 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
@@ -1731,7 +1733,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 
 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 {
-	int i;
+	int i, j;
 	struct arm_smmu_master_data *master = fwspec->iommu_priv;
 	struct arm_smmu_device *smmu = master->smmu;
 
@@ -1739,6 +1741,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 		u32 sid = fwspec->ids[i];
 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
 
+		/* Bridged PCI devices may end up with duplicated IDs */
+		for (j = 0; j < i; j++)
+			if (fwspec->ids[j] == sid)
+				break;
+		if (j < i)
+			continue;
+
 		arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
 	}
 }
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a0babdbf71460dda5dc156bfe7b7b9313e0da679..4a2de34895ec3177eb07082afe46a8921fa9f958 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2250,10 +2250,12 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 		uint64_t tmp;
 
 		if (!sg_res) {
+			unsigned int pgoff = sg->offset & ~PAGE_MASK;
+
 			sg_res = aligned_nrpages(sg->offset, sg->length);
-			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
+			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
 			sg->dma_length = sg->length;
-			pteval = page_to_phys(sg_page(sg)) | prot;
+			pteval = (sg_phys(sg) - pgoff) | prot;
 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
 		}
 
@@ -3787,7 +3789,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
 
 	for_each_sg(sglist, sg, nelems, i) {
 		BUG_ON(!sg_page(sg));
-		sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
+		sg->dma_address = sg_phys(sg);
 		sg->dma_length = sg->length;
 	}
 	return nelems;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 76a193c7fcfc69b012d3e6e1f1dd246ab8d5acc8..66f69af2c2191f6a247a82acc227ed64b85c6bff 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
 }
 
 static int intel_irq_remapping_activate(struct irq_domain *domain,
-					struct irq_data *irq_data, bool early)
+					struct irq_data *irq_data, bool reserve)
 {
 	intel_ir_reconfigure_irte(irq_data, true);
 	return 0;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 4039e64cd34211db8fac8ebc2f993c5e081e9c83..06f025fd5726f6b230d51c880e7b8accf9e8c738 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 }
 
 static int its_irq_domain_activate(struct irq_domain *domain,
-				   struct irq_data *d, bool early)
+				   struct irq_data *d, bool reserve)
 {
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	u32 event = its_get_event_id(d);
@@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
 }
 
 static int its_vpe_irq_domain_activate(struct irq_domain *domain,
-				       struct irq_data *d, bool early)
+				       struct irq_data *d, bool reserve)
 {
 	struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
 	struct its_node *its;
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 06f29cf5018a151f7d35641d90b8d043aec85516..cee59fe1321c44f9e37c9b6e5bcfc1a57fb98f41 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id)
  */
 static struct lock_class_key intc_irqpin_irq_lock_class;
 
+/* And this is for the request mutex */
+static struct lock_class_key intc_irqpin_irq_request_class;
+
 static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
 				      irq_hw_number_t hw)
 {
@@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
 
 	intc_irqpin_dbg(&p->irq[hw], "map");
 	irq_set_chip_data(virq, h->host_data);
-	irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class);
+	irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class,
+			      &intc_irqpin_irq_request_class);
 	irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq);
 	return 0;
 }
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index fd83c7f77a95d4998cff58be3d78ba467faddeab..ede4fa0ac2cceb736f69890a8823909a5d162f38 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -188,6 +188,7 @@ void led_blink_set(struct led_classdev *led_cdev,
 {
 	del_timer_sync(&led_cdev->blink_timer);
 
+	clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
 	clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
 	clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
 
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index b8ac591aaaa7070bfbd6d32c20993fb9130961f8..c546b567f3b50a3f43b0c074e9319ca908ec5971 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1611,7 +1611,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
 	int l;
 	struct dm_buffer *b, *tmp;
 	unsigned long freed = 0;
-	unsigned long count = nr_to_scan;
+	unsigned long count = c->n_buffers[LIST_CLEAN] +
+			      c->n_buffers[LIST_DIRTY];
 	unsigned long retain_target = get_retain_buffers(c);
 
 	for (l = 0; l < LIST_SIZE; l++) {
@@ -1647,8 +1648,11 @@ static unsigned long
 dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
+	unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
+			      READ_ONCE(c->n_buffers[LIST_DIRTY]);
+	unsigned long retain_target = get_retain_buffers(c);
 
-	return READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]);
+	return (count < retain_target) ? 0 : (count - retain_target);
 }
 
 /*
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index cf23a14f9c6a6572955746f040802dcfd34d801d..47407e43b96a168eed8660a7284a60132284d189 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3472,18 +3472,18 @@ static int __init dm_cache_init(void)
 {
 	int r;
 
-	r = dm_register_target(&cache_target);
-	if (r) {
-		DMERR("cache target registration failed: %d", r);
-		return r;
-	}
-
 	migration_cache = KMEM_CACHE(dm_cache_migration, 0);
 	if (!migration_cache) {
 		dm_unregister_target(&cache_target);
 		return -ENOMEM;
 	}
 
+	r = dm_register_target(&cache_target);
+	if (r) {
+		DMERR("cache target registration failed: %d", r);
+		return r;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c8faa2b8584268f75a8177f39677b94edba55289..f7810cc869ac883e11b60e0ad3f29253444d411a 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -457,6 +457,38 @@ do {									\
 		 dm_noflush_suspending((m)->ti));			\
 } while (0)
 
+/*
+ * Check whether bios must be queued in the device-mapper core rather
+ * than here in the target.
+ *
+ * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold
+ * the same value then we are not between multipath_presuspend()
+ * and multipath_resume() calls and we have no need to check
+ * for the DMF_NOFLUSH_SUSPENDING flag.
+ */
+static bool __must_push_back(struct multipath *m, unsigned long flags)
+{
+	return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) !=
+		 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) &&
+		dm_noflush_suspending(m->ti));
+}
+
+/*
+ * Following functions use READ_ONCE to get atomic access to
+ * all m->flags to avoid taking spinlock
+ */
+static bool must_push_back_rq(struct multipath *m)
+{
+	unsigned long flags = READ_ONCE(m->flags);
+	return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags);
+}
+
+static bool must_push_back_bio(struct multipath *m)
+{
+	unsigned long flags = READ_ONCE(m->flags);
+	return __must_push_back(m, flags);
+}
+
 /*
  * Map cloned requests (request-based multipath)
  */
@@ -478,7 +510,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 		pgpath = choose_pgpath(m, nr_bytes);
 
 	if (!pgpath) {
-		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+		if (must_push_back_rq(m))
 			return DM_MAPIO_DELAY_REQUEUE;
 		dm_report_EIO(m);	/* Failed */
 		return DM_MAPIO_KILL;
@@ -553,7 +585,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
 	}
 
 	if (!pgpath) {
-		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+		if (must_push_back_bio(m))
 			return DM_MAPIO_REQUEUE;
 		dm_report_EIO(m);
 		return DM_MAPIO_KILL;
@@ -651,8 +683,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
 	assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags,
 		   (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) ||
 		   (!save_old_value && queue_if_no_path));
-	assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags,
-		   queue_if_no_path || dm_noflush_suspending(m->ti));
+	assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
 	spin_unlock_irqrestore(&m->lock, flags);
 
 	if (!queue_if_no_path) {
@@ -1486,7 +1517,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 			fail_path(pgpath);
 
 		if (atomic_read(&m->nr_valid_paths) == 0 &&
-		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+		    !must_push_back_rq(m)) {
 			if (error == BLK_STS_IOERR)
 				dm_report_EIO(m);
 			/* complete with the original error */
@@ -1521,8 +1552,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
 
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
 	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-		dm_report_EIO(m);
-		*error = BLK_STS_IOERR;
+		if (must_push_back_bio(m)) {
+			r = DM_ENDIO_REQUEUE;
+		} else {
+			dm_report_EIO(m);
+			*error = BLK_STS_IOERR;
+		}
 		goto done;
 	}
 
@@ -1957,13 +1992,6 @@ static int __init dm_multipath_init(void)
 {
 	int r;
 
-	r = dm_register_target(&multipath_target);
-	if (r < 0) {
-		DMERR("request-based register failed %d", r);
-		r = -EINVAL;
-		goto bad_register_target;
-	}
-
 	kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
 	if (!kmultipathd) {
 		DMERR("failed to create workqueue kmpathd");
@@ -1985,13 +2013,20 @@ static int __init dm_multipath_init(void)
 		goto bad_alloc_kmpath_handlerd;
 	}
 
+	r = dm_register_target(&multipath_target);
+	if (r < 0) {
+		DMERR("request-based register failed %d", r);
+		r = -EINVAL;
+		goto bad_register_target;
+	}
+
 	return 0;
 
+bad_register_target:
+	destroy_workqueue(kmpath_handlerd);
 bad_alloc_kmpath_handlerd:
 	destroy_workqueue(kmultipathd);
 bad_alloc_kmultipathd:
-	dm_unregister_target(&multipath_target);
-bad_register_target:
 	return r;
 }
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 1113b42e1edae4029f550b71c635ea80c76a46b9..a0613bd8ed00efc17d545a3335c39cb6cfb83919 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2411,24 +2411,6 @@ static int __init dm_snapshot_init(void)
 		return r;
 	}
 
-	r = dm_register_target(&snapshot_target);
-	if (r < 0) {
-		DMERR("snapshot target register failed %d", r);
-		goto bad_register_snapshot_target;
-	}
-
-	r = dm_register_target(&origin_target);
-	if (r < 0) {
-		DMERR("Origin target register failed %d", r);
-		goto bad_register_origin_target;
-	}
-
-	r = dm_register_target(&merge_target);
-	if (r < 0) {
-		DMERR("Merge target register failed %d", r);
-		goto bad_register_merge_target;
-	}
-
 	r = init_origin_hash();
 	if (r) {
 		DMERR("init_origin_hash failed.");
@@ -2449,19 +2431,37 @@ static int __init dm_snapshot_init(void)
 		goto bad_pending_cache;
 	}
 
+	r = dm_register_target(&snapshot_target);
+	if (r < 0) {
+		DMERR("snapshot target register failed %d", r);
+		goto bad_register_snapshot_target;
+	}
+
+	r = dm_register_target(&origin_target);
+	if (r < 0) {
+		DMERR("Origin target register failed %d", r);
+		goto bad_register_origin_target;
+	}
+
+	r = dm_register_target(&merge_target);
+	if (r < 0) {
+		DMERR("Merge target register failed %d", r);
+		goto bad_register_merge_target;
+	}
+
 	return 0;
 
-bad_pending_cache:
-	kmem_cache_destroy(exception_cache);
-bad_exception_cache:
-	exit_origin_hash();
-bad_origin_hash:
-	dm_unregister_target(&merge_target);
 bad_register_merge_target:
 	dm_unregister_target(&origin_target);
 bad_register_origin_target:
 	dm_unregister_target(&snapshot_target);
 bad_register_snapshot_target:
+	kmem_cache_destroy(pending_cache);
+bad_pending_cache:
+	kmem_cache_destroy(exception_cache);
+bad_exception_cache:
+	exit_origin_hash();
+bad_origin_hash:
 	dm_exception_store_exit();
 
 	return r;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 88130b5d95f909ead8441dec7f3fb5d80a7914c7..aaffd0c0ee9a76c71f23f9bb1074ec8057b8c6f7 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -453,14 +453,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 
 		refcount_set(&dd->count, 1);
 		list_add(&dd->list, &t->devices);
+		goto out;
 
 	} else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
 		r = upgrade_mode(dd, mode, t->md);
 		if (r)
 			return r;
-		refcount_inc(&dd->count);
 	}
-
+	refcount_inc(&dd->count);
+out:
 	*result = dd->dm_dev;
 	return 0;
 }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 89e5dff9b4cfc1b87049529238c5c01978345b81..f91d771fff4b6e9d9a488a7a67916326a1e85897 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4355,30 +4355,28 @@ static struct target_type thin_target = {
 
 static int __init dm_thin_init(void)
 {
-	int r;
+	int r = -ENOMEM;
 
 	pool_table_init();
 
+	_new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
+	if (!_new_mapping_cache)
+		return r;
+
 	r = dm_register_target(&thin_target);
 	if (r)
-		return r;
+		goto bad_new_mapping_cache;
 
 	r = dm_register_target(&pool_target);
 	if (r)
-		goto bad_pool_target;
-
-	r = -ENOMEM;
-
-	_new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
-	if (!_new_mapping_cache)
-		goto bad_new_mapping_cache;
+		goto bad_thin_target;
 
 	return 0;
 
-bad_new_mapping_cache:
-	dm_unregister_target(&pool_target);
-bad_pool_target:
+bad_thin_target:
 	dm_unregister_target(&thin_target);
+bad_new_mapping_cache:
+	kmem_cache_destroy(_new_mapping_cache);
 
 	return r;
 }
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 41c050b59ec454f139b359fdcdc116d964103d61..4e4dee0ec2de336eba90e2400f1c051b2fff1733 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7605,7 +7605,9 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
 		if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
 			/* Still cleaning up */
 			resync = max_sectors;
-	} else
+	} else if (resync > max_sectors)
+		resync = max_sectors;
+	else
 		resync -= atomic_read(&mddev->recovery_active);
 
 	if (resync == 0) {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cc9d337a1ed37e84994622f5de76717734cca4a1..6df398e3a008801f91198b9d9556776d919deb41 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -809,11 +809,15 @@ static void flush_pending_writes(struct r1conf *conf)
 	spin_lock_irq(&conf->device_lock);
 
 	if (conf->pending_bio_list.head) {
+		struct blk_plug plug;
 		struct bio *bio;
+
 		bio = bio_list_get(&conf->pending_bio_list);
 		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
+		blk_start_plug(&plug);
 		flush_bio_list(conf, bio);
+		blk_finish_plug(&plug);
 	} else
 		spin_unlock_irq(&conf->device_lock);
 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b9edbc747a95932424b42c5d16c1e9d5d61e9491..c131835cf008c2df85e240259cac8182115e4e62 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -894,10 +894,13 @@ static void flush_pending_writes(struct r10conf *conf)
 	spin_lock_irq(&conf->device_lock);
 
 	if (conf->pending_bio_list.head) {
+		struct blk_plug plug;
 		struct bio *bio;
+
 		bio = bio_list_get(&conf->pending_bio_list);
 		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
+		blk_start_plug(&plug);
 		/* flush any pending bitmap writes to disk
 		 * before proceeding w/ I/O */
 		bitmap_unplug(conf->mddev->bitmap);
@@ -918,6 +921,7 @@ static void flush_pending_writes(struct r10conf *conf)
 				generic_make_request(bio);
 			bio = next;
 		}
+		blk_finish_plug(&plug);
 	} else
 		spin_unlock_irq(&conf->device_lock);
 }
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index f1c86d938502e61c93d8951680767f1af5de98e5..39f31f07ffe9ece14d9764f0eb511ea00f812618 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2577,31 +2577,22 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
 int r5c_journal_mode_set(struct mddev *mddev, int mode)
 {
 	struct r5conf *conf;
-	int err;
 
 	if (mode < R5C_JOURNAL_MODE_WRITE_THROUGH ||
 	    mode > R5C_JOURNAL_MODE_WRITE_BACK)
 		return -EINVAL;
 
-	err = mddev_lock(mddev);
-	if (err)
-		return err;
 	conf = mddev->private;
-	if (!conf || !conf->log) {
-		mddev_unlock(mddev);
+	if (!conf || !conf->log)
 		return -ENODEV;
-	}
 
 	if (raid5_calc_degraded(conf) > 0 &&
-	    mode == R5C_JOURNAL_MODE_WRITE_BACK) {
-		mddev_unlock(mddev);
+	    mode == R5C_JOURNAL_MODE_WRITE_BACK)
 		return -EINVAL;
-	}
 
 	mddev_suspend(mddev);
 	conf->log->r5c_journal_mode = mode;
 	mddev_resume(mddev);
-	mddev_unlock(mddev);
 
 	pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n",
 		 mdname(mddev), mode, r5c_journal_mode_str[mode]);
@@ -2614,6 +2605,7 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
 {
 	int mode = ARRAY_SIZE(r5c_journal_mode_str);
 	size_t len = length;
+	int ret;
 
 	if (len < 2)
 		return -EINVAL;
@@ -2625,8 +2617,12 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
 		if (strlen(r5c_journal_mode_str[mode]) == len &&
 		    !strncmp(page, r5c_journal_mode_str[mode], len))
 			break;
-
-	return r5c_journal_mode_set(mddev, mode) ?: length;
+	ret = mddev_lock(mddev);
+	if (ret)
+		return ret;
+	ret = r5c_journal_mode_set(mddev, mode);
+	mddev_unlock(mddev);
+	return ret ?: length;
 }
 
 struct md_sysfs_entry
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 31dc25e2871ad3942a1fba66f1c7c999612ce83b..98ce4272ace9631045f6e00cf3fd7ea8f96b8d77 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2677,13 +2677,13 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 	pr_debug("raid456: error called\n");
 
 	spin_lock_irqsave(&conf->device_lock, flags);
+	set_bit(Faulty, &rdev->flags);
 	clear_bit(In_sync, &rdev->flags);
 	mddev->degraded = raid5_calc_degraded(conf);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
 	set_bit(Blocked, &rdev->flags);
-	set_bit(Faulty, &rdev->flags);
 	set_mask_bits(&mddev->sb_flags, 0,
 		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
 	pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
diff --git a/drivers/media/common/siano/smscoreapi.c b/drivers/media/common/siano/smscoreapi.c
index e4ea2a0c7a240abb9b57deffc9bb988eda407bc7..c5c827e11b640dc40a762550be7677f5ac6db5b6 100644
--- a/drivers/media/common/siano/smscoreapi.c
+++ b/drivers/media/common/siano/smscoreapi.c
@@ -521,13 +521,13 @@ static void list_add_locked(struct list_head *new, struct list_head *head,
 	spin_unlock_irqrestore(lock, flags);
 }
 
-/**
+/*
  * register a client callback that called when device plugged in/unplugged
  * NOTE: if devices exist callback is called immediately for each device
  *
  * @param hotplug callback
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 int smscore_register_hotplug(hotplug_t hotplug)
 {
@@ -562,7 +562,7 @@ int smscore_register_hotplug(hotplug_t hotplug)
 }
 EXPORT_SYMBOL_GPL(smscore_register_hotplug);
 
-/**
+/*
  * unregister a client callback that called when device plugged in/unplugged
  *
  * @param hotplug callback
@@ -636,7 +636,7 @@ smscore_buffer_t *smscore_createbuffer(u8 *buffer, void *common_buffer,
 	return cb;
 }
 
-/**
+/*
  * creates coredev object for a device, prepares buffers,
  * creates buffer mappings, notifies registered hotplugs about new device.
  *
@@ -644,7 +644,7 @@ smscore_buffer_t *smscore_createbuffer(u8 *buffer, void *common_buffer,
  *               and handlers
  * @param coredev pointer to a value that receives created coredev object
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 int smscore_register_device(struct smsdevice_params_t *params,
 			    struct smscore_device_t **coredev,
@@ -764,10 +764,10 @@ static int smscore_sendrequest_and_wait(struct smscore_device_t *coredev,
 			0 : -ETIME;
 }
 
-/**
+/*
  * Starts & enables IR operations
  *
- * @return 0 on success, < 0 on error.
+ * return: 0 on success, < 0 on error.
  */
 static int smscore_init_ir(struct smscore_device_t *coredev)
 {
@@ -812,13 +812,13 @@ static int smscore_init_ir(struct smscore_device_t *coredev)
 	return 0;
 }
 
-/**
+/*
  * configures device features according to board configuration structure.
  *
  * @param coredev pointer to a coredev object returned by
  *                smscore_register_device
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 static int smscore_configure_board(struct smscore_device_t *coredev)
 {
@@ -861,13 +861,13 @@ static int smscore_configure_board(struct smscore_device_t *coredev)
 	return 0;
 }
 
-/**
+/*
  * sets initial device mode and notifies client hotplugs that device is ready
  *
  * @param coredev pointer to a coredev object returned by
  *		  smscore_register_device
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 int smscore_start_device(struct smscore_device_t *coredev)
 {
@@ -1087,7 +1087,7 @@ static char *smscore_fw_lkup[][DEVICE_MODE_MAX] = {
 	},
 };
 
-/**
+/*
  * get firmware file name from one of the two mechanisms : sms_boards or
  * smscore_fw_lkup.
  * @param coredev pointer to a coredev object returned by
@@ -1096,7 +1096,7 @@ static char *smscore_fw_lkup[][DEVICE_MODE_MAX] = {
  * @param lookup if 1, always get the fw filename from smscore_fw_lkup
  *	 table. if 0, try first to get from sms_boards
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 static char *smscore_get_fw_filename(struct smscore_device_t *coredev,
 			      int mode)
@@ -1125,7 +1125,7 @@ static char *smscore_get_fw_filename(struct smscore_device_t *coredev,
 	return fw[mode];
 }
 
-/**
+/*
  * loads specified firmware into a buffer and calls device loadfirmware_handler
  *
  * @param coredev pointer to a coredev object returned by
@@ -1133,7 +1133,7 @@ static char *smscore_get_fw_filename(struct smscore_device_t *coredev,
  * @param filename null-terminated string specifies firmware file name
  * @param loadfirmware_handler device handler that loads firmware
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 static int smscore_load_firmware_from_file(struct smscore_device_t *coredev,
 					   int mode,
@@ -1182,14 +1182,14 @@ static int smscore_load_firmware_from_file(struct smscore_device_t *coredev,
 	return rc;
 }
 
-/**
+/*
  * notifies all clients registered with the device, notifies hotplugs,
  * frees all buffers and coredev object
  *
  * @param coredev pointer to a coredev object returned by
  *                smscore_register_device
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 void smscore_unregister_device(struct smscore_device_t *coredev)
 {
@@ -1282,14 +1282,14 @@ static int smscore_detect_mode(struct smscore_device_t *coredev)
 	return rc;
 }
 
-/**
+/*
  * send init device request and wait for response
  *
  * @param coredev pointer to a coredev object returned by
  *                smscore_register_device
  * @param mode requested mode of operation
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 static int smscore_init_device(struct smscore_device_t *coredev, int mode)
 {
@@ -1315,7 +1315,7 @@ static int smscore_init_device(struct smscore_device_t *coredev, int mode)
 	return rc;
 }
 
-/**
+/*
  * calls device handler to change mode of operation
  * NOTE: stellar/usb may disconnect when changing mode
  *
@@ -1323,7 +1323,7 @@ static int smscore_init_device(struct smscore_device_t *coredev, int mode)
  *                smscore_register_device
  * @param mode requested mode of operation
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 int smscore_set_device_mode(struct smscore_device_t *coredev, int mode)
 {
@@ -1411,13 +1411,13 @@ int smscore_set_device_mode(struct smscore_device_t *coredev, int mode)
 	return rc;
 }
 
-/**
+/*
  * calls device handler to get current mode of operation
  *
  * @param coredev pointer to a coredev object returned by
  *                smscore_register_device
  *
- * @return current mode
+ * return: current mode
  */
 int smscore_get_device_mode(struct smscore_device_t *coredev)
 {
@@ -1425,7 +1425,7 @@ int smscore_get_device_mode(struct smscore_device_t *coredev)
 }
 EXPORT_SYMBOL_GPL(smscore_get_device_mode);
 
-/**
+/*
  * find client by response id & type within the clients list.
  * return client handle or NULL.
  *
@@ -1462,7 +1462,7 @@ smscore_client_t *smscore_find_client(struct smscore_device_t *coredev,
 	return client;
 }
 
-/**
+/*
  * find client by response id/type, call clients onresponse handler
  * return buffer to pool on error
  *
@@ -1615,13 +1615,13 @@ void smscore_onresponse(struct smscore_device_t *coredev,
 }
 EXPORT_SYMBOL_GPL(smscore_onresponse);
 
-/**
+/*
  * return pointer to next free buffer descriptor from core pool
  *
  * @param coredev pointer to a coredev object returned by
  *                smscore_register_device
  *
- * @return pointer to descriptor on success, NULL on error.
+ * return: pointer to descriptor on success, NULL on error.
  */
 
 static struct smscore_buffer_t *get_entry(struct smscore_device_t *coredev)
@@ -1648,7 +1648,7 @@ struct smscore_buffer_t *smscore_getbuffer(struct smscore_device_t *coredev)
 }
 EXPORT_SYMBOL_GPL(smscore_getbuffer);
 
-/**
+/*
  * return buffer descriptor to a pool
  *
  * @param coredev pointer to a coredev object returned by
@@ -1693,7 +1693,7 @@ static int smscore_validate_client(struct smscore_device_t *coredev,
 	return 0;
 }
 
-/**
+/*
  * creates smsclient object, check that id is taken by another client
  *
  * @param coredev pointer to a coredev object from clients hotplug
@@ -1705,7 +1705,7 @@ static int smscore_validate_client(struct smscore_device_t *coredev,
  * @param context client-specific context
  * @param client pointer to a value that receives created smsclient object
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 int smscore_register_client(struct smscore_device_t *coredev,
 			    struct smsclient_params_t *params,
@@ -1740,7 +1740,7 @@ int smscore_register_client(struct smscore_device_t *coredev,
 }
 EXPORT_SYMBOL_GPL(smscore_register_client);
 
-/**
+/*
  * frees smsclient object and all subclients associated with it
  *
  * @param client pointer to smsclient object returned by
@@ -1771,7 +1771,7 @@ void smscore_unregister_client(struct smscore_client_t *client)
 }
 EXPORT_SYMBOL_GPL(smscore_unregister_client);
 
-/**
+/*
  * verifies that source id is not taken by another client,
  * calls device handler to send requests to the device
  *
@@ -1780,7 +1780,7 @@ EXPORT_SYMBOL_GPL(smscore_unregister_client);
  * @param buffer pointer to a request buffer
  * @param size size (in bytes) of request buffer
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 int smsclient_sendrequest(struct smscore_client_t *client,
 			  void *buffer, size_t size)
diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index 95b3723282f4623b725e1e1c22214f653fa65c74..d48b61eb01f4cdc7f11913208a6beef0136a7bff 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -206,7 +206,7 @@ static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
  * @hlen: Number of bytes in haystack.
  * @needle: Buffer to find.
  * @nlen: Number of bytes in needle.
- * @return Pointer into haystack needle was found at, or NULL if not found.
+ * return: Pointer into haystack needle was found at, or NULL if not found.
  */
 static char *findstr(char *haystack, int hlen, char *needle, int nlen)
 {
@@ -226,7 +226,7 @@ static char *findstr(char *haystack, int hlen, char *needle, int nlen)
 /* ************************************************************************** */
 /* EN50221 physical interface functions */
 
-/**
+/*
  * dvb_ca_en50221_check_camstatus - Check CAM status.
  */
 static int dvb_ca_en50221_check_camstatus(struct dvb_ca_private *ca, int slot)
@@ -275,9 +275,9 @@ static int dvb_ca_en50221_check_camstatus(struct dvb_ca_private *ca, int slot)
  * @ca: CA instance.
  * @slot: Slot on interface.
  * @waitfor: Flags to wait for.
- * @timeout_ms: Timeout in milliseconds.
+ * @timeout_hz: Timeout in milliseconds.
  *
- * @return 0 on success, nonzero on error.
+ * return: 0 on success, nonzero on error.
  */
 static int dvb_ca_en50221_wait_if_status(struct dvb_ca_private *ca, int slot,
 					 u8 waitfor, int timeout_hz)
@@ -325,7 +325,7 @@ static int dvb_ca_en50221_wait_if_status(struct dvb_ca_private *ca, int slot,
  * @ca: CA instance.
  * @slot: Slot id.
  *
- * @return 0 on success, nonzero on failure.
+ * return: 0 on success, nonzero on failure.
  */
 static int dvb_ca_en50221_link_init(struct dvb_ca_private *ca, int slot)
 {
@@ -397,11 +397,11 @@ static int dvb_ca_en50221_link_init(struct dvb_ca_private *ca, int slot)
  * @ca: CA instance.
  * @slot: Slot id.
  * @address: Address to read from. Updated.
- * @tupleType: Tuple id byte. Updated.
- * @tupleLength: Tuple length. Updated.
+ * @tuple_type: Tuple id byte. Updated.
+ * @tuple_length: Tuple length. Updated.
  * @tuple: Dest buffer for tuple (must be 256 bytes). Updated.
  *
- * @return 0 on success, nonzero on error.
+ * return: 0 on success, nonzero on error.
  */
 static int dvb_ca_en50221_read_tuple(struct dvb_ca_private *ca, int slot,
 				     int *address, int *tuple_type,
@@ -455,7 +455,7 @@ static int dvb_ca_en50221_read_tuple(struct dvb_ca_private *ca, int slot,
  * @ca: CA instance.
  * @slot: Slot id.
  *
- * @return 0 on success, <0 on failure.
+ * return: 0 on success, <0 on failure.
  */
 static int dvb_ca_en50221_parse_attributes(struct dvb_ca_private *ca, int slot)
 {
@@ -632,10 +632,11 @@ static int dvb_ca_en50221_set_configoption(struct dvb_ca_private *ca, int slot)
  * @ca: CA instance.
  * @slot: Slot to read from.
  * @ebuf: If non-NULL, the data will be written to this buffer. If NULL,
- * the data will be added into the buffering system as a normal fragment.
+ *	  the data will be added into the buffering system as a normal
+ *	  fragment.
  * @ecount: Size of ebuf. Ignored if ebuf is NULL.
  *
- * @return Number of bytes read, or < 0 on error
+ * return: Number of bytes read, or < 0 on error
  */
 static int dvb_ca_en50221_read_data(struct dvb_ca_private *ca, int slot,
 				    u8 *ebuf, int ecount)
@@ -784,11 +785,11 @@ static int dvb_ca_en50221_read_data(struct dvb_ca_private *ca, int slot,
  *
  * @ca: CA instance.
  * @slot: Slot to write to.
- * @ebuf: The data in this buffer is treated as a complete link-level packet to
- * be written.
- * @count: Size of ebuf.
+ * @buf: The data in this buffer is treated as a complete link-level packet to
+ * 	 be written.
+ * @bytes_write: Size of ebuf.
  *
- * @return Number of bytes written, or < 0 on error.
+ * return: Number of bytes written, or < 0 on error.
  */
 static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
 				     u8 *buf, int bytes_write)
@@ -933,7 +934,7 @@ static int dvb_ca_en50221_slot_shutdown(struct dvb_ca_private *ca, int slot)
 /**
  * dvb_ca_en50221_camchange_irq - A CAMCHANGE IRQ has occurred.
  *
- * @ca: CA instance.
+ * @pubca: CA instance.
  * @slot: Slot concerned.
  * @change_type: One of the DVB_CA_CAMCHANGE_* values.
  */
@@ -963,7 +964,7 @@ EXPORT_SYMBOL(dvb_ca_en50221_camchange_irq);
 /**
  * dvb_ca_en50221_camready_irq - A CAMREADY IRQ has occurred.
  *
- * @ca: CA instance.
+ * @pubca: CA instance.
  * @slot: Slot concerned.
  */
 void dvb_ca_en50221_camready_irq(struct dvb_ca_en50221 *pubca, int slot)
@@ -983,7 +984,7 @@ EXPORT_SYMBOL(dvb_ca_en50221_camready_irq);
 /**
  * dvb_ca_en50221_frda_irq - An FR or DA IRQ has occurred.
  *
- * @ca: CA instance.
+ * @pubca: CA instance.
  * @slot: Slot concerned.
  */
 void dvb_ca_en50221_frda_irq(struct dvb_ca_en50221 *pubca, int slot)
@@ -1091,7 +1092,7 @@ static void dvb_ca_en50221_thread_update_delay(struct dvb_ca_private *ca)
  *
  * @ca: CA instance.
  * @slot: Slot to process.
- * @return: 0 .. no change
+ * return:: 0 .. no change
  *          1 .. CAM state changed
  */
 
@@ -1296,7 +1297,7 @@ static void dvb_ca_en50221_thread_state_machine(struct dvb_ca_private *ca,
 	mutex_unlock(&sl->slot_lock);
 }
 
-/**
+/*
  * Kernel thread which monitors CA slots for CAM changes, and performs data
  * transfers.
  */
@@ -1336,12 +1337,11 @@ static int dvb_ca_en50221_thread(void *data)
  * Real ioctl implementation.
  * NOTE: CA_SEND_MSG/CA_GET_MSG ioctls have userspace buffers passed to them.
  *
- * @inode: Inode concerned.
  * @file: File concerned.
  * @cmd: IOCTL command.
- * @arg: Associated argument.
+ * @parg: Associated argument.
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 static int dvb_ca_en50221_io_do_ioctl(struct file *file,
 				      unsigned int cmd, void *parg)
@@ -1420,12 +1420,11 @@ static int dvb_ca_en50221_io_do_ioctl(struct file *file,
 /**
  * Wrapper for ioctl implementation.
  *
- * @inode: Inode concerned.
  * @file: File concerned.
  * @cmd: IOCTL command.
  * @arg: Associated argument.
  *
- * @return 0 on success, <0 on error.
+ * return: 0 on success, <0 on error.
  */
 static long dvb_ca_en50221_io_ioctl(struct file *file,
 				    unsigned int cmd, unsigned long arg)
@@ -1441,7 +1440,7 @@ static long dvb_ca_en50221_io_ioctl(struct file *file,
  * @count: Size of source buffer.
  * @ppos: Position in file (ignored).
  *
- * @return Number of bytes read, or <0 on error.
+ * return: Number of bytes read, or <0 on error.
  */
 static ssize_t dvb_ca_en50221_io_write(struct file *file,
 				       const char __user *buf, size_t count,
@@ -1536,7 +1535,7 @@ static ssize_t dvb_ca_en50221_io_write(struct file *file,
 	return status;
 }
 
-/**
+/*
  * Condition for waking up in dvb_ca_en50221_io_read_condition
  */
 static int dvb_ca_en50221_io_read_condition(struct dvb_ca_private *ca,
@@ -1593,7 +1592,7 @@ static int dvb_ca_en50221_io_read_condition(struct dvb_ca_private *ca,
  * @count: Size of destination buffer.
  * @ppos: Position in file (ignored).
  *
- * @return Number of bytes read, or <0 on error.
+ * return: Number of bytes read, or <0 on error.
  */
 static ssize_t dvb_ca_en50221_io_read(struct file *file, char __user *buf,
 				      size_t count, loff_t *ppos)
@@ -1702,7 +1701,7 @@ static ssize_t dvb_ca_en50221_io_read(struct file *file, char __user *buf,
  * @inode: Inode concerned.
  * @file: File concerned.
  *
- * @return 0 on success, <0 on failure.
+ * return: 0 on success, <0 on failure.
  */
 static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
 {
@@ -1752,7 +1751,7 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
  * @inode: Inode concerned.
  * @file: File concerned.
  *
- * @return 0 on success, <0 on failure.
+ * return: 0 on success, <0 on failure.
  */
 static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
 {
@@ -1781,7 +1780,7 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
  * @file: File concerned.
  * @wait: poll wait table.
  *
- * @return Standard poll mask.
+ * return: Standard poll mask.
  */
 static unsigned int dvb_ca_en50221_io_poll(struct file *file, poll_table *wait)
 {
@@ -1838,11 +1837,11 @@ static const struct dvb_device dvbdev_ca = {
  * Initialise a new DVB CA EN50221 interface device.
  *
  * @dvb_adapter: DVB adapter to attach the new CA device to.
- * @ca: The dvb_ca instance.
+ * @pubca: The dvb_ca instance.
  * @flags: Flags describing the CA device (DVB_CA_FLAG_*).
  * @slot_count: Number of slots supported.
  *
- * @return 0 on success, nonzero on failure
+ * return: 0 on success, nonzero on failure
  */
 int dvb_ca_en50221_init(struct dvb_adapter *dvb_adapter,
 			struct dvb_ca_en50221 *pubca, int flags, int slot_count)
@@ -1929,8 +1928,7 @@ EXPORT_SYMBOL(dvb_ca_en50221_init);
 /**
  * Release a DVB CA EN50221 interface device.
  *
- * @ca_dev: The dvb_device_t instance for the CA device.
- * @ca: The associated dvb_ca instance.
+ * @pubca: The associated dvb_ca instance.
  */
 void dvb_ca_en50221_release(struct dvb_ca_en50221 *pubca)
 {
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 3ad83359098bde793f9789aaeb800bebc76ddd31..2afaa82263421b3e729dd41ac5d951084d4f003c 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -369,11 +369,14 @@ static void dvb_frontend_swzigzag_update_delay(struct dvb_frontend_private *fepr
 }
 
 /**
- * Performs automatic twiddling of frontend parameters.
+ * dvb_frontend_swzigzag_autotune - Performs automatic twiddling of frontend
+ *	parameters.
  *
- * @param fe The frontend concerned.
- * @param check_wrapped Checks if an iteration has completed. DO NOT SET ON THE FIRST ATTEMPT
- * @returns Number of complete iterations that have been performed.
+ * @fe: The frontend concerned.
+ * @check_wrapped: Checks if an iteration has completed.
+ *		   DO NOT SET ON THE FIRST ATTEMPT.
+ *
+ * return: Number of complete iterations that have been performed.
  */
 static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wrapped)
 {
@@ -1253,7 +1256,7 @@ dtv_property_legacy_params_sync(struct dvb_frontend *fe,
  * dtv_get_frontend - calls a callback for retrieving DTV parameters
  * @fe:		struct dvb_frontend pointer
  * @c:		struct dtv_frontend_properties pointer (DVBv5 cache)
- * @p_out	struct dvb_frontend_parameters pointer (DVBv3 FE struct)
+ * @p_out:	struct dvb_frontend_parameters pointer (DVBv3 FE struct)
  *
  * This routine calls either the DVBv3 or DVBv5 get_frontend call.
  * If c is not null, it will update the DVBv5 cache struct pointed by it.
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 06b0dcc13695af9ff25292de21ef1c3859b732d5..c018e3c06d5df229a4dd23527fd18383ca9742bb 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -125,7 +125,7 @@ struct dvb_net_priv {
 };
 
 
-/**
+/*
  *	Determine the packet's protocol ID. The rule here is that we
  *	assume 802.3 if the type field is short enough to be a length.
  *	This is normal practice and works for any 'now in use' protocol.
@@ -155,7 +155,7 @@ static __be16 dvb_net_eth_type_trans(struct sk_buff *skb,
 
 	rawp = skb->data;
 
-	/**
+	/*
 	 *	This is a magic hack to spot IPX packets. Older Novell breaks
 	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
 	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
@@ -164,7 +164,7 @@ static __be16 dvb_net_eth_type_trans(struct sk_buff *skb,
 	if (*(unsigned short *)rawp == 0xFFFF)
 		return htons(ETH_P_802_3);
 
-	/**
+	/*
 	 *	Real 802.2 LLC
 	 */
 	return htons(ETH_P_802_2);
@@ -215,7 +215,8 @@ static int ule_exthdr_padding(struct dvb_net_priv *p)
 	return 0;
 }
 
-/** Handle ULE extension headers.
+/*
+ * Handle ULE extension headers.
  *  Function is called after a successful CRC32 verification of an ULE SNDU to complete its decoding.
  *  Returns: >= 0: nr. of bytes consumed by next extension header
  *	     -1:   Mandatory extension header that is not recognized or TEST SNDU; discard.
@@ -291,7 +292,7 @@ static int handle_ule_extensions( struct dvb_net_priv *p )
 }
 
 
-/** Prepare for a new ULE SNDU: reset the decoder state. */
+/* Prepare for a new ULE SNDU: reset the decoder state. */
 static inline void reset_ule( struct dvb_net_priv *p )
 {
 	p->ule_skb = NULL;
@@ -304,7 +305,7 @@ static inline void reset_ule( struct dvb_net_priv *p )
 	p->ule_bridged = 0;
 }
 
-/**
+/*
  * Decode ULE SNDUs according to draft-ietf-ipdvb-ule-03.txt from a sequence of
  * TS cells of a single PID.
  */
@@ -1005,7 +1006,7 @@ static int dvb_net_sec_callback(const u8 *buffer1, size_t buffer1_len,
 {
 	struct net_device *dev = filter->priv;
 
-	/**
+	/*
 	 * we rely on the DVB API definition where exactly one complete
 	 * section is delivered in buffer1
 	 */
diff --git a/drivers/media/dvb-frontends/af9013.h b/drivers/media/dvb-frontends/af9013.h
index 353274524f1b29d72fa06dc66cd0466e1b8189ef..a290722c04fd2167d7908e110cab56bb47a754e1 100644
--- a/drivers/media/dvb-frontends/af9013.h
+++ b/drivers/media/dvb-frontends/af9013.h
@@ -38,6 +38,13 @@
  * @api_version: Firmware API version.
  * @gpio: GPIOs.
  * @get_dvb_frontend: Get DVB frontend callback.
+ *
+ * AF9013/5 GPIOs (mostly guessed):
+ *   * demod#1-gpio#0 - set demod#2 i2c-addr for dual devices
+ *   * demod#1-gpio#1 - xtal setting (?)
+ *   * demod#1-gpio#3 - tuner#1
+ *   * demod#2-gpio#0 - tuner#2
+ *   * demod#2-gpio#1 - xtal setting (?)
  */
 struct af9013_platform_data {
 	/*
@@ -89,16 +96,15 @@ struct af9013_platform_data {
 #define AF9013_TS_PARALLEL  AF9013_TS_MODE_PARALLEL
 #define AF9013_TS_SERIAL    AF9013_TS_MODE_SERIAL
 
-/*
- * AF9013/5 GPIOs (mostly guessed)
- * demod#1-gpio#0 - set demod#2 i2c-addr for dual devices
- * demod#1-gpio#1 - xtal setting (?)
- * demod#1-gpio#3 - tuner#1
- * demod#2-gpio#0 - tuner#2
- * demod#2-gpio#1 - xtal setting (?)
- */
-
 #if IS_REACHABLE(CONFIG_DVB_AF9013)
+/**
+ * Attach an af9013 demod
+ *
+ * @config: pointer to &struct af9013_config with demod configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *af9013_attach(const struct af9013_config *config,
 	struct i2c_adapter *i2c);
 #else
diff --git a/drivers/media/dvb-frontends/ascot2e.h b/drivers/media/dvb-frontends/ascot2e.h
index dc61bf7d1b0982e10b969976dbc5b014a8ee7ead..418c565baf83ff04b0037e60eb6f4a08b1de859a 100644
--- a/drivers/media/dvb-frontends/ascot2e.h
+++ b/drivers/media/dvb-frontends/ascot2e.h
@@ -41,6 +41,15 @@ struct ascot2e_config {
 };
 
 #if IS_REACHABLE(CONFIG_DVB_ASCOT2E)
+/**
+ * Attach an ascot2e tuner
+ *
+ * @fe: frontend to be attached
+ * @config: pointer to &struct ascot2e_config with tuner configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *ascot2e_attach(struct dvb_frontend *fe,
 					const struct ascot2e_config *config,
 					struct i2c_adapter *i2c);
diff --git a/drivers/media/dvb-frontends/cxd2820r.h b/drivers/media/dvb-frontends/cxd2820r.h
index f3ff8f6eb3bb91ab7eab2695a7374acf1ab15641..a49400c0e28eac26d2a689080bb7eb209c855377 100644
--- a/drivers/media/dvb-frontends/cxd2820r.h
+++ b/drivers/media/dvb-frontends/cxd2820r.h
@@ -49,7 +49,6 @@
  * @gpio_chip_base: GPIO.
  * @get_dvb_frontend: Get DVB frontend.
  */
-
 struct cxd2820r_platform_data {
 	u8 ts_mode;
 	bool ts_clk_inv;
@@ -62,6 +61,17 @@ struct cxd2820r_platform_data {
 	bool attach_in_use;
 };
 
+/**
+ * struct cxd2820r_config - configuration for cxd2020r demod
+ *
+ * @i2c_address: Demodulator I2C address. Driver determines DVB-C slave I2C
+ *		 address automatically from master address.
+ *		 Default: none, must set. Values: 0x6c, 0x6d.
+ * @ts_mode:	TS output mode. Default: none, must set. Values: FIXME?
+ * @ts_clock_inv: TS clock inverted. Default: 0. Values: 0, 1.
+ * @if_agc_polarity: Default: 0. Values: 0, 1
+ * @spec_inv:	Spectrum inversion. Default: 0. Values: 0, 1.
+ */
 struct cxd2820r_config {
 	/* Demodulator I2C address.
 	 * Driver determines DVB-C slave I2C address automatically from master
@@ -98,6 +108,18 @@ struct cxd2820r_config {
 
 
 #if IS_REACHABLE(CONFIG_DVB_CXD2820R)
+/**
+ * Attach a cxd2820r demod
+ *
+ * @config: pointer to &struct cxd2820r_config with demod configuration.
+ * @i2c: i2c adapter to use.
+ * @gpio_chip_base: if zero, disables GPIO setting. Otherwise, if
+ *		    CONFIG_GPIOLIB is set dynamically allocate
+ *		    gpio base; if is not set, use its value to
+ *		    setup the GPIO pins.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *cxd2820r_attach(
 	const struct cxd2820r_config *config,
 	struct i2c_adapter *i2c,
diff --git a/drivers/media/dvb-frontends/drx39xyj/bsp_i2c.h b/drivers/media/dvb-frontends/drx39xyj/bsp_i2c.h
index 5b5421f703886a68da09c1e4a64a92f94d43ee0d..2b3af247a1f120d2e36c734d9e83167618bd8a48 100644
--- a/drivers/media/dvb-frontends/drx39xyj/bsp_i2c.h
+++ b/drivers/media/dvb-frontends/drx39xyj/bsp_i2c.h
@@ -52,7 +52,7 @@ struct i2c_device_addr {
 };
 
 
-/**
+/*
 * \def IS_I2C_10BIT( addr )
 * \brief Determine if I2C address 'addr' is a 10 bits address or not.
 * \param addr The I2C address.
@@ -67,7 +67,7 @@ struct i2c_device_addr {
 Exported FUNCTIONS
 ------------------------------------------------------------------------------*/
 
-/**
+/*
 * \fn drxbsp_i2c_init()
 * \brief Initialize I2C communication module.
 * \return drx_status_t Return status.
@@ -76,7 +76,7 @@ Exported FUNCTIONS
 */
 	drx_status_t drxbsp_i2c_init(void);
 
-/**
+/*
 * \fn drxbsp_i2c_term()
 * \brief Terminate I2C communication module.
 * \return drx_status_t Return status.
@@ -85,7 +85,7 @@ Exported FUNCTIONS
 */
 	drx_status_t drxbsp_i2c_term(void);
 
-/**
+/*
 * \fn drx_status_t drxbsp_i2c_write_read( struct i2c_device_addr *w_dev_addr,
 *                                       u16 w_count,
 *                                       u8 *wData,
@@ -121,7 +121,7 @@ Exported FUNCTIONS
 					 struct i2c_device_addr *r_dev_addr,
 					 u16 r_count, u8 *r_data);
 
-/**
+/*
 * \fn drxbsp_i2c_error_text()
 * \brief Returns a human readable error.
 * Counter part of numerical drx_i2c_error_g.
@@ -130,7 +130,7 @@ Exported FUNCTIONS
 */
 	char *drxbsp_i2c_error_text(void);
 
-/**
+/*
 * \var drx_i2c_error_g;
 * \brief I2C specific error codes, platform dependent.
 */
diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
index cd69e187ba7a9c0eea95855531a82db3e09a528b..855685b6b38669d035d9b14b420033f4a1d1c487 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
@@ -46,7 +46,7 @@ struct i2c_device_addr {
 	void *user_data;		/* User data pointer */
 };
 
-/**
+/*
 * \def IS_I2C_10BIT( addr )
 * \brief Determine if I2C address 'addr' is a 10 bits address or not.
 * \param addr The I2C address.
@@ -61,7 +61,7 @@ struct i2c_device_addr {
 Exported FUNCTIONS
 ------------------------------------------------------------------------------*/
 
-/**
+/*
 * \fn drxbsp_i2c_init()
 * \brief Initialize I2C communication module.
 * \return int Return status.
@@ -70,7 +70,7 @@ Exported FUNCTIONS
 */
 int drxbsp_i2c_init(void);
 
-/**
+/*
 * \fn drxbsp_i2c_term()
 * \brief Terminate I2C communication module.
 * \return int Return status.
@@ -79,7 +79,7 @@ int drxbsp_i2c_init(void);
 */
 int drxbsp_i2c_term(void);
 
-/**
+/*
 * \fn int drxbsp_i2c_write_read( struct i2c_device_addr *w_dev_addr,
 *                                       u16 w_count,
 *                                       u8 * wData,
@@ -115,7 +115,7 @@ int drxbsp_i2c_write_read(struct i2c_device_addr *w_dev_addr,
 					struct i2c_device_addr *r_dev_addr,
 					u16 r_count, u8 *r_data);
 
-/**
+/*
 * \fn drxbsp_i2c_error_text()
 * \brief Returns a human readable error.
 * Counter part of numerical drx_i2c_error_g.
@@ -124,7 +124,7 @@ int drxbsp_i2c_write_read(struct i2c_device_addr *w_dev_addr,
 */
 char *drxbsp_i2c_error_text(void);
 
-/**
+/*
 * \var drx_i2c_error_g;
 * \brief I2C specific error codes, platform dependent.
 */
@@ -241,13 +241,13 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 						struct i2c_device_addr *r_dev_addr,
 						u16 r_count, u8 *r_data);
 
-/**************
+/*************
 *
 * This section configures the DRX Data Access Protocols (DAPs).
 *
 **************/
 
-/**
+/*
 * \def DRXDAP_SINGLE_MASTER
 * \brief Enable I2C single or I2C multimaster mode on host.
 *
@@ -262,7 +262,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 #define DRXDAP_SINGLE_MASTER 1
 #endif
 
-/**
+/*
 * \def DRXDAP_MAX_WCHUNKSIZE
 * \brief Defines maximum chunksize of an i2c write action by host.
 *
@@ -282,7 +282,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 #define  DRXDAP_MAX_WCHUNKSIZE 60
 #endif
 
-/**
+/*
 * \def DRXDAP_MAX_RCHUNKSIZE
 * \brief Defines maximum chunksize of an i2c read action by host.
 *
@@ -297,13 +297,13 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 #define  DRXDAP_MAX_RCHUNKSIZE 60
 #endif
 
-/**************
+/*************
 *
 * This section describes drxdriver defines.
 *
 **************/
 
-/**
+/*
 * \def DRX_UNKNOWN
 * \brief Generic UNKNOWN value for DRX enumerated types.
 *
@@ -313,7 +313,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 #define DRX_UNKNOWN (254)
 #endif
 
-/**
+/*
 * \def DRX_AUTO
 * \brief Generic AUTO value for DRX enumerated types.
 *
@@ -324,104 +324,104 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 #define DRX_AUTO    (255)
 #endif
 
-/**************
+/*************
 *
 * This section describes flag definitions for the device capbilities.
 *
 **************/
 
-/**
+/*
 * \brief LNA capability flag
 *
 * Device has a Low Noise Amplifier
 *
 */
 #define DRX_CAPABILITY_HAS_LNA           (1UL <<  0)
-/**
+/*
 * \brief OOB-RX capability flag
 *
 * Device has OOB-RX
 *
 */
 #define DRX_CAPABILITY_HAS_OOBRX         (1UL <<  1)
-/**
+/*
 * \brief ATV capability flag
 *
 * Device has ATV
 *
 */
 #define DRX_CAPABILITY_HAS_ATV           (1UL <<  2)
-/**
+/*
 * \brief DVB-T capability flag
 *
 * Device has DVB-T
 *
 */
 #define DRX_CAPABILITY_HAS_DVBT          (1UL <<  3)
-/**
+/*
 * \brief  ITU-B capability flag
 *
 * Device has ITU-B
 *
 */
 #define DRX_CAPABILITY_HAS_ITUB          (1UL <<  4)
-/**
+/*
 * \brief  Audio capability flag
 *
 * Device has Audio
 *
 */
 #define DRX_CAPABILITY_HAS_AUD           (1UL <<  5)
-/**
+/*
 * \brief  SAW switch capability flag
 *
 * Device has SAW switch
 *
 */
 #define DRX_CAPABILITY_HAS_SAWSW         (1UL <<  6)
-/**
+/*
 * \brief  GPIO1 capability flag
 *
 * Device has GPIO1
 *
 */
 #define DRX_CAPABILITY_HAS_GPIO1         (1UL <<  7)
-/**
+/*
 * \brief  GPIO2 capability flag
 *
 * Device has GPIO2
 *
 */
 #define DRX_CAPABILITY_HAS_GPIO2         (1UL <<  8)
-/**
+/*
 * \brief  IRQN capability flag
 *
 * Device has IRQN
 *
 */
 #define DRX_CAPABILITY_HAS_IRQN          (1UL <<  9)
-/**
+/*
 * \brief  8VSB capability flag
 *
 * Device has 8VSB
 *
 */
 #define DRX_CAPABILITY_HAS_8VSB          (1UL << 10)
-/**
+/*
 * \brief  SMA-TX capability flag
 *
 * Device has SMATX
 *
 */
 #define DRX_CAPABILITY_HAS_SMATX         (1UL << 11)
-/**
+/*
 * \brief  SMA-RX capability flag
 *
 * Device has SMARX
 *
 */
 #define DRX_CAPABILITY_HAS_SMARX         (1UL << 12)
-/**
+/*
 * \brief  ITU-A/C capability flag
 *
 * Device has ITU-A/C
@@ -439,7 +439,7 @@ MACROS
 	 DRX_VERSIONSTRING_HELP(PATCH)
 #define DRX_VERSIONSTRING_HELP(NUM) #NUM
 
-/**
+/*
 * \brief Macro to create byte array elements from 16 bit integers.
 * This macro is used to create byte arrays for block writes.
 * Block writes speed up I2C traffic between host and demod.
@@ -449,7 +449,7 @@ MACROS
 #define DRX_16TO8(x) ((u8) (((u16)x) & 0xFF)), \
 			((u8)((((u16)x)>>8)&0xFF))
 
-/**
+/*
 * \brief Macro to convert 16 bit register value to a s32
 */
 #define DRX_U16TODRXFREQ(x)   ((x & 0x8000) ? \
@@ -461,191 +461,191 @@ MACROS
 ENUM
 -------------------------------------------------------------------------*/
 
-/**
+/*
 * \enum enum drx_standard
 * \brief Modulation standards.
 */
 enum drx_standard {
-	DRX_STANDARD_DVBT = 0, /**< Terrestrial DVB-T.               */
-	DRX_STANDARD_8VSB,     /**< Terrestrial 8VSB.                */
-	DRX_STANDARD_NTSC,     /**< Terrestrial\Cable analog NTSC.   */
+	DRX_STANDARD_DVBT = 0, /*< Terrestrial DVB-T.               */
+	DRX_STANDARD_8VSB,     /*< Terrestrial 8VSB.                */
+	DRX_STANDARD_NTSC,     /*< Terrestrial\Cable analog NTSC.   */
 	DRX_STANDARD_PAL_SECAM_BG,
-				/**< Terrestrial analog PAL/SECAM B/G */
+				/*< Terrestrial analog PAL/SECAM B/G */
 	DRX_STANDARD_PAL_SECAM_DK,
-				/**< Terrestrial analog PAL/SECAM D/K */
+				/*< Terrestrial analog PAL/SECAM D/K */
 	DRX_STANDARD_PAL_SECAM_I,
-				/**< Terrestrial analog PAL/SECAM I   */
+				/*< Terrestrial analog PAL/SECAM I   */
 	DRX_STANDARD_PAL_SECAM_L,
-				/**< Terrestrial analog PAL/SECAM L
+				/*< Terrestrial analog PAL/SECAM L
 					with negative modulation        */
 	DRX_STANDARD_PAL_SECAM_LP,
-				/**< Terrestrial analog PAL/SECAM L
+				/*< Terrestrial analog PAL/SECAM L
 					with positive modulation        */
-	DRX_STANDARD_ITU_A,    /**< Cable ITU ANNEX A.               */
-	DRX_STANDARD_ITU_B,    /**< Cable ITU ANNEX B.               */
-	DRX_STANDARD_ITU_C,    /**< Cable ITU ANNEX C.               */
-	DRX_STANDARD_ITU_D,    /**< Cable ITU ANNEX D.               */
-	DRX_STANDARD_FM,       /**< Terrestrial\Cable FM radio       */
-	DRX_STANDARD_DTMB,     /**< Terrestrial DTMB standard (China)*/
+	DRX_STANDARD_ITU_A,    /*< Cable ITU ANNEX A.               */
+	DRX_STANDARD_ITU_B,    /*< Cable ITU ANNEX B.               */
+	DRX_STANDARD_ITU_C,    /*< Cable ITU ANNEX C.               */
+	DRX_STANDARD_ITU_D,    /*< Cable ITU ANNEX D.               */
+	DRX_STANDARD_FM,       /*< Terrestrial\Cable FM radio       */
+	DRX_STANDARD_DTMB,     /*< Terrestrial DTMB standard (China)*/
 	DRX_STANDARD_UNKNOWN = DRX_UNKNOWN,
-				/**< Standard unknown.                */
+				/*< Standard unknown.                */
 	DRX_STANDARD_AUTO = DRX_AUTO
-				/**< Autodetect standard.             */
+				/*< Autodetect standard.             */
 };
 
-/**
+/*
 * \enum enum drx_standard
 * \brief Modulation sub-standards.
 */
 enum drx_substandard {
-	DRX_SUBSTANDARD_MAIN = 0, /**< Main subvariant of standard   */
+	DRX_SUBSTANDARD_MAIN = 0, /*< Main subvariant of standard   */
 	DRX_SUBSTANDARD_ATV_BG_SCANDINAVIA,
 	DRX_SUBSTANDARD_ATV_DK_POLAND,
 	DRX_SUBSTANDARD_ATV_DK_CHINA,
 	DRX_SUBSTANDARD_UNKNOWN = DRX_UNKNOWN,
-					/**< Sub-standard unknown.         */
+					/*< Sub-standard unknown.         */
 	DRX_SUBSTANDARD_AUTO = DRX_AUTO
-					/**< Auto (default) sub-standard   */
+					/*< Auto (default) sub-standard   */
 };
 
-/**
+/*
 * \enum enum drx_bandwidth
 * \brief Channel bandwidth or channel spacing.
 */
 enum drx_bandwidth {
-	DRX_BANDWIDTH_8MHZ = 0,	 /**< Bandwidth 8 MHz.   */
-	DRX_BANDWIDTH_7MHZ,	 /**< Bandwidth 7 MHz.   */
-	DRX_BANDWIDTH_6MHZ,	 /**< Bandwidth 6 MHz.   */
+	DRX_BANDWIDTH_8MHZ = 0,	 /*< Bandwidth 8 MHz.   */
+	DRX_BANDWIDTH_7MHZ,	 /*< Bandwidth 7 MHz.   */
+	DRX_BANDWIDTH_6MHZ,	 /*< Bandwidth 6 MHz.   */
 	DRX_BANDWIDTH_UNKNOWN = DRX_UNKNOWN,
-					/**< Bandwidth unknown. */
+					/*< Bandwidth unknown. */
 	DRX_BANDWIDTH_AUTO = DRX_AUTO
-					/**< Auto Set Bandwidth */
+					/*< Auto Set Bandwidth */
 };
 
-/**
+/*
 * \enum enum drx_mirror
 * \brief Indicate if channel spectrum is mirrored or not.
 */
 enum drx_mirror {
-	DRX_MIRROR_NO = 0,   /**< Spectrum is not mirrored.           */
-	DRX_MIRROR_YES,	     /**< Spectrum is mirrored.               */
+	DRX_MIRROR_NO = 0,   /*< Spectrum is not mirrored.           */
+	DRX_MIRROR_YES,	     /*< Spectrum is mirrored.               */
 	DRX_MIRROR_UNKNOWN = DRX_UNKNOWN,
-				/**< Unknown if spectrum is mirrored.    */
+				/*< Unknown if spectrum is mirrored.    */
 	DRX_MIRROR_AUTO = DRX_AUTO
-				/**< Autodetect if spectrum is mirrored. */
+				/*< Autodetect if spectrum is mirrored. */
 };
 
-/**
+/*
 * \enum enum drx_modulation
 * \brief Constellation type of the channel.
 */
 enum drx_modulation {
-	DRX_CONSTELLATION_BPSK = 0,  /**< Modulation is BPSK.       */
-	DRX_CONSTELLATION_QPSK,	     /**< Constellation is QPSK.    */
-	DRX_CONSTELLATION_PSK8,	     /**< Constellation is PSK8.    */
-	DRX_CONSTELLATION_QAM16,     /**< Constellation is QAM16.   */
-	DRX_CONSTELLATION_QAM32,     /**< Constellation is QAM32.   */
-	DRX_CONSTELLATION_QAM64,     /**< Constellation is QAM64.   */
-	DRX_CONSTELLATION_QAM128,    /**< Constellation is QAM128.  */
-	DRX_CONSTELLATION_QAM256,    /**< Constellation is QAM256.  */
-	DRX_CONSTELLATION_QAM512,    /**< Constellation is QAM512.  */
-	DRX_CONSTELLATION_QAM1024,   /**< Constellation is QAM1024. */
-	DRX_CONSTELLATION_QPSK_NR,   /**< Constellation is QPSK_NR  */
+	DRX_CONSTELLATION_BPSK = 0,  /*< Modulation is BPSK.       */
+	DRX_CONSTELLATION_QPSK,	     /*< Constellation is QPSK.    */
+	DRX_CONSTELLATION_PSK8,	     /*< Constellation is PSK8.    */
+	DRX_CONSTELLATION_QAM16,     /*< Constellation is QAM16.   */
+	DRX_CONSTELLATION_QAM32,     /*< Constellation is QAM32.   */
+	DRX_CONSTELLATION_QAM64,     /*< Constellation is QAM64.   */
+	DRX_CONSTELLATION_QAM128,    /*< Constellation is QAM128.  */
+	DRX_CONSTELLATION_QAM256,    /*< Constellation is QAM256.  */
+	DRX_CONSTELLATION_QAM512,    /*< Constellation is QAM512.  */
+	DRX_CONSTELLATION_QAM1024,   /*< Constellation is QAM1024. */
+	DRX_CONSTELLATION_QPSK_NR,   /*< Constellation is QPSK_NR  */
 	DRX_CONSTELLATION_UNKNOWN = DRX_UNKNOWN,
-					/**< Constellation unknown.    */
+					/*< Constellation unknown.    */
 	DRX_CONSTELLATION_AUTO = DRX_AUTO
-					/**< Autodetect constellation. */
+					/*< Autodetect constellation. */
 };
 
-/**
+/*
 * \enum enum drx_hierarchy
 * \brief Hierarchy of the channel.
 */
 enum drx_hierarchy {
-	DRX_HIERARCHY_NONE = 0,	/**< None hierarchical channel.     */
-	DRX_HIERARCHY_ALPHA1,	/**< Hierarchical channel, alpha=1. */
-	DRX_HIERARCHY_ALPHA2,	/**< Hierarchical channel, alpha=2. */
-	DRX_HIERARCHY_ALPHA4,	/**< Hierarchical channel, alpha=4. */
+	DRX_HIERARCHY_NONE = 0,	/*< None hierarchical channel.     */
+	DRX_HIERARCHY_ALPHA1,	/*< Hierarchical channel, alpha=1. */
+	DRX_HIERARCHY_ALPHA2,	/*< Hierarchical channel, alpha=2. */
+	DRX_HIERARCHY_ALPHA4,	/*< Hierarchical channel, alpha=4. */
 	DRX_HIERARCHY_UNKNOWN = DRX_UNKNOWN,
-				/**< Hierarchy unknown.             */
+				/*< Hierarchy unknown.             */
 	DRX_HIERARCHY_AUTO = DRX_AUTO
-				/**< Autodetect hierarchy.          */
+				/*< Autodetect hierarchy.          */
 };
 
-/**
+/*
 * \enum enum drx_priority
 * \brief Channel priority in case of hierarchical transmission.
 */
 enum drx_priority {
-	DRX_PRIORITY_LOW = 0,  /**< Low priority channel.  */
-	DRX_PRIORITY_HIGH,     /**< High priority channel. */
+	DRX_PRIORITY_LOW = 0,  /*< Low priority channel.  */
+	DRX_PRIORITY_HIGH,     /*< High priority channel. */
 	DRX_PRIORITY_UNKNOWN = DRX_UNKNOWN
-				/**< Priority unknown.      */
+				/*< Priority unknown.      */
 };
 
-/**
+/*
 * \enum enum drx_coderate
 * \brief Channel priority in case of hierarchical transmission.
 */
 enum drx_coderate {
-		DRX_CODERATE_1DIV2 = 0,	/**< Code rate 1/2nd.      */
-		DRX_CODERATE_2DIV3,	/**< Code rate 2/3nd.      */
-		DRX_CODERATE_3DIV4,	/**< Code rate 3/4nd.      */
-		DRX_CODERATE_5DIV6,	/**< Code rate 5/6nd.      */
-		DRX_CODERATE_7DIV8,	/**< Code rate 7/8nd.      */
+		DRX_CODERATE_1DIV2 = 0,	/*< Code rate 1/2nd.      */
+		DRX_CODERATE_2DIV3,	/*< Code rate 2/3nd.      */
+		DRX_CODERATE_3DIV4,	/*< Code rate 3/4nd.      */
+		DRX_CODERATE_5DIV6,	/*< Code rate 5/6nd.      */
+		DRX_CODERATE_7DIV8,	/*< Code rate 7/8nd.      */
 		DRX_CODERATE_UNKNOWN = DRX_UNKNOWN,
-					/**< Code rate unknown.    */
+					/*< Code rate unknown.    */
 		DRX_CODERATE_AUTO = DRX_AUTO
-					/**< Autodetect code rate. */
+					/*< Autodetect code rate. */
 };
 
-/**
+/*
 * \enum enum drx_guard
 * \brief Guard interval of a channel.
 */
 enum drx_guard {
-	DRX_GUARD_1DIV32 = 0, /**< Guard interval 1/32nd.     */
-	DRX_GUARD_1DIV16,     /**< Guard interval 1/16th.     */
-	DRX_GUARD_1DIV8,      /**< Guard interval 1/8th.      */
-	DRX_GUARD_1DIV4,      /**< Guard interval 1/4th.      */
+	DRX_GUARD_1DIV32 = 0, /*< Guard interval 1/32nd.     */
+	DRX_GUARD_1DIV16,     /*< Guard interval 1/16th.     */
+	DRX_GUARD_1DIV8,      /*< Guard interval 1/8th.      */
+	DRX_GUARD_1DIV4,      /*< Guard interval 1/4th.      */
 	DRX_GUARD_UNKNOWN = DRX_UNKNOWN,
-				/**< Guard interval unknown.    */
+				/*< Guard interval unknown.    */
 	DRX_GUARD_AUTO = DRX_AUTO
-				/**< Autodetect guard interval. */
+				/*< Autodetect guard interval. */
 };
 
-/**
+/*
 * \enum enum drx_fft_mode
 * \brief FFT mode.
 */
 enum drx_fft_mode {
-	DRX_FFTMODE_2K = 0,    /**< 2K FFT mode.         */
-	DRX_FFTMODE_4K,	       /**< 4K FFT mode.         */
-	DRX_FFTMODE_8K,	       /**< 8K FFT mode.         */
+	DRX_FFTMODE_2K = 0,    /*< 2K FFT mode.         */
+	DRX_FFTMODE_4K,	       /*< 4K FFT mode.         */
+	DRX_FFTMODE_8K,	       /*< 8K FFT mode.         */
 	DRX_FFTMODE_UNKNOWN = DRX_UNKNOWN,
-				/**< FFT mode unknown.    */
+				/*< FFT mode unknown.    */
 	DRX_FFTMODE_AUTO = DRX_AUTO
-				/**< Autodetect FFT mode. */
+				/*< Autodetect FFT mode. */
 };
 
-/**
+/*
 * \enum enum drx_classification
 * \brief Channel classification.
 */
 enum drx_classification {
-	DRX_CLASSIFICATION_GAUSS = 0, /**< Gaussion noise.            */
-	DRX_CLASSIFICATION_HVY_GAUSS, /**< Heavy Gaussion noise.      */
-	DRX_CLASSIFICATION_COCHANNEL, /**< Co-channel.                */
-	DRX_CLASSIFICATION_STATIC,    /**< Static echo.               */
-	DRX_CLASSIFICATION_MOVING,    /**< Moving echo.               */
-	DRX_CLASSIFICATION_ZERODB,    /**< Zero dB echo.              */
+	DRX_CLASSIFICATION_GAUSS = 0, /*< Gaussion noise.            */
+	DRX_CLASSIFICATION_HVY_GAUSS, /*< Heavy Gaussion noise.      */
+	DRX_CLASSIFICATION_COCHANNEL, /*< Co-channel.                */
+	DRX_CLASSIFICATION_STATIC,    /*< Static echo.               */
+	DRX_CLASSIFICATION_MOVING,    /*< Moving echo.               */
+	DRX_CLASSIFICATION_ZERODB,    /*< Zero dB echo.              */
 	DRX_CLASSIFICATION_UNKNOWN = DRX_UNKNOWN,
-					/**< Unknown classification     */
+					/*< Unknown classification     */
 	DRX_CLASSIFICATION_AUTO = DRX_AUTO
-					/**< Autodetect classification. */
+					/*< Autodetect classification. */
 };
 
-/**
+/*
 * /enum enum drx_interleave_mode
 * /brief Interleave modes
 */
@@ -673,80 +673,80 @@ enum drx_interleave_mode {
 	DRX_INTERLEAVEMODE_B52_M48,
 	DRX_INTERLEAVEMODE_B52_M0,
 	DRX_INTERLEAVEMODE_UNKNOWN = DRX_UNKNOWN,
-					/**< Unknown interleave mode    */
+					/*< Unknown interleave mode    */
 	DRX_INTERLEAVEMODE_AUTO = DRX_AUTO
-					/**< Autodetect interleave mode */
+					/*< Autodetect interleave mode */
 };
 
-/**
+/*
 * \enum enum drx_carrier_mode
 * \brief Channel Carrier Mode.
 */
 enum drx_carrier_mode {
-	DRX_CARRIER_MULTI = 0,		/**< Multi carrier mode       */
-	DRX_CARRIER_SINGLE,		/**< Single carrier mode      */
+	DRX_CARRIER_MULTI = 0,		/*< Multi carrier mode       */
+	DRX_CARRIER_SINGLE,		/*< Single carrier mode      */
 	DRX_CARRIER_UNKNOWN = DRX_UNKNOWN,
-					/**< Carrier mode unknown.    */
-	DRX_CARRIER_AUTO = DRX_AUTO	/**< Autodetect carrier mode  */
+					/*< Carrier mode unknown.    */
+	DRX_CARRIER_AUTO = DRX_AUTO	/*< Autodetect carrier mode  */
 };
 
-/**
+/*
 * \enum enum drx_frame_mode
 * \brief Channel Frame Mode.
 */
 enum drx_frame_mode {
-	DRX_FRAMEMODE_420 = 0,	 /**< 420 with variable PN  */
-	DRX_FRAMEMODE_595,	 /**< 595                   */
-	DRX_FRAMEMODE_945,	 /**< 945 with variable PN  */
+	DRX_FRAMEMODE_420 = 0,	 /*< 420 with variable PN  */
+	DRX_FRAMEMODE_595,	 /*< 595                   */
+	DRX_FRAMEMODE_945,	 /*< 945 with variable PN  */
 	DRX_FRAMEMODE_420_FIXED_PN,
-					/**< 420 with fixed PN     */
+					/*< 420 with fixed PN     */
 	DRX_FRAMEMODE_945_FIXED_PN,
-					/**< 945 with fixed PN     */
+					/*< 945 with fixed PN     */
 	DRX_FRAMEMODE_UNKNOWN = DRX_UNKNOWN,
-					/**< Frame mode unknown.   */
+					/*< Frame mode unknown.   */
 	DRX_FRAMEMODE_AUTO = DRX_AUTO
-					/**< Autodetect frame mode */
+					/*< Autodetect frame mode */
 };
 
-/**
+/*
 * \enum enum drx_tps_frame
 * \brief Frame number in current super-frame.
 */
 enum drx_tps_frame {
-	DRX_TPS_FRAME1 = 0,	  /**< TPS frame 1.       */
-	DRX_TPS_FRAME2,		  /**< TPS frame 2.       */
-	DRX_TPS_FRAME3,		  /**< TPS frame 3.       */
-	DRX_TPS_FRAME4,		  /**< TPS frame 4.       */
+	DRX_TPS_FRAME1 = 0,	  /*< TPS frame 1.       */
+	DRX_TPS_FRAME2,		  /*< TPS frame 2.       */
+	DRX_TPS_FRAME3,		  /*< TPS frame 3.       */
+	DRX_TPS_FRAME4,		  /*< TPS frame 4.       */
 	DRX_TPS_FRAME_UNKNOWN = DRX_UNKNOWN
-					/**< TPS frame unknown. */
+					/*< TPS frame unknown. */
 };
 
-/**
+/*
 * \enum enum drx_ldpc
 * \brief TPS LDPC .
 */
 enum drx_ldpc {
-	DRX_LDPC_0_4 = 0,	  /**< LDPC 0.4           */
-	DRX_LDPC_0_6,		  /**< LDPC 0.6           */
-	DRX_LDPC_0_8,		  /**< LDPC 0.8           */
+	DRX_LDPC_0_4 = 0,	  /*< LDPC 0.4           */
+	DRX_LDPC_0_6,		  /*< LDPC 0.6           */
+	DRX_LDPC_0_8,		  /*< LDPC 0.8           */
 	DRX_LDPC_UNKNOWN = DRX_UNKNOWN,
-					/**< LDPC unknown.      */
-	DRX_LDPC_AUTO = DRX_AUTO  /**< Autodetect LDPC    */
+					/*< LDPC unknown.      */
+	DRX_LDPC_AUTO = DRX_AUTO  /*< Autodetect LDPC    */
 };
 
-/**
+/*
 * \enum enum drx_pilot_mode
 * \brief Pilot modes in DTMB.
 */
 enum drx_pilot_mode {
-	DRX_PILOT_ON = 0,	  /**< Pilot On             */
-	DRX_PILOT_OFF,		  /**< Pilot Off            */
+	DRX_PILOT_ON = 0,	  /*< Pilot On             */
+	DRX_PILOT_OFF,		  /*< Pilot Off            */
 	DRX_PILOT_UNKNOWN = DRX_UNKNOWN,
-					/**< Pilot unknown.       */
-	DRX_PILOT_AUTO = DRX_AUTO /**< Autodetect Pilot     */
+					/*< Pilot unknown.       */
+	DRX_PILOT_AUTO = DRX_AUTO /*< Autodetect Pilot     */
 };
 
-/**
+/*
  * enum drxu_code_action - indicate if firmware has to be uploaded or verified.
  * @UCODE_UPLOAD:	Upload the microcode image to device
  * @UCODE_VERIFY:	Compare microcode image with code on device
@@ -756,7 +756,7 @@ enum drxu_code_action {
 	UCODE_VERIFY
 };
 
-/**
+/*
 * \enum enum drx_lock_status * \brief Used to reflect current lock status of demodulator.
 *
 * The generic lock states have device dependent semantics.
@@ -801,7 +801,7 @@ enum drx_lock_status {
 	DRX_LOCKED
 };
 
-/**
+/*
 * \enum enum drx_uio* \brief Used to address a User IO (UIO).
 */
 enum drx_uio {
@@ -840,7 +840,7 @@ enum drx_uio {
 	DRX_UIO_MAX = DRX_UIO32
 };
 
-/**
+/*
 * \enum enum drxuio_mode * \brief Used to configure the modus oprandi of a UIO.
 *
 * DRX_UIO_MODE_FIRMWARE is an old uio mode.
@@ -850,37 +850,37 @@ enum drx_uio {
 */
 enum drxuio_mode {
 	DRX_UIO_MODE_DISABLE = 0x01,
-			    /**< not used, pin is configured as input */
+			    /*< not used, pin is configured as input */
 	DRX_UIO_MODE_READWRITE = 0x02,
-			    /**< used for read/write by application   */
+			    /*< used for read/write by application   */
 	DRX_UIO_MODE_FIRMWARE = 0x04,
-			    /**< controlled by firmware, function 0   */
+			    /*< controlled by firmware, function 0   */
 	DRX_UIO_MODE_FIRMWARE0 = DRX_UIO_MODE_FIRMWARE,
-					    /**< same as above        */
+					    /*< same as above        */
 	DRX_UIO_MODE_FIRMWARE1 = 0x08,
-			    /**< controlled by firmware, function 1   */
+			    /*< controlled by firmware, function 1   */
 	DRX_UIO_MODE_FIRMWARE2 = 0x10,
-			    /**< controlled by firmware, function 2   */
+			    /*< controlled by firmware, function 2   */
 	DRX_UIO_MODE_FIRMWARE3 = 0x20,
-			    /**< controlled by firmware, function 3   */
+			    /*< controlled by firmware, function 3   */
 	DRX_UIO_MODE_FIRMWARE4 = 0x40,
-			    /**< controlled by firmware, function 4   */
+			    /*< controlled by firmware, function 4   */
 	DRX_UIO_MODE_FIRMWARE5 = 0x80
-			    /**< controlled by firmware, function 5   */
+			    /*< controlled by firmware, function 5   */
 };
 
-/**
+/*
 * \enum enum drxoob_downstream_standard * \brief Used to select OOB standard.
 *
 * Based on ANSI 55-1 and 55-2
 */
 enum drxoob_downstream_standard {
 	DRX_OOB_MODE_A = 0,
-		       /**< ANSI 55-1   */
+		       /*< ANSI 55-1   */
 	DRX_OOB_MODE_B_GRADE_A,
-		       /**< ANSI 55-2 A */
+		       /*< ANSI 55-2 A */
 	DRX_OOB_MODE_B_GRADE_B
-		       /**< ANSI 55-2 B */
+		       /*< ANSI 55-2 B */
 };
 
 /*-------------------------------------------------------------------------
@@ -924,7 +924,7 @@ STRUCTS
 /*============================================================================*/
 /*============================================================================*/
 
-/**
+/*
  * struct drxu_code_info	Parameters for microcode upload and verfiy.
  *
  * @mc_file:	microcode file name
@@ -935,7 +935,7 @@ struct drxu_code_info {
 	char 			*mc_file;
 };
 
-/**
+/*
 * \struct drx_mc_version_rec_t
 * \brief Microcode version record
 * Version numbers are stored in BCD format, as usual:
@@ -963,7 +963,7 @@ struct drx_mc_version_rec {
 
 /*========================================*/
 
-/**
+/*
 * \struct drx_filter_info_t
 * \brief Parameters for loading filter coefficients
 *
@@ -971,18 +971,18 @@ struct drx_mc_version_rec {
 */
 struct drx_filter_info {
 	u8 *data_re;
-	      /**< pointer to coefficients for RE */
+	      /*< pointer to coefficients for RE */
 	u8 *data_im;
-	      /**< pointer to coefficients for IM */
+	      /*< pointer to coefficients for IM */
 	u16 size_re;
-	      /**< size of coefficients for RE    */
+	      /*< size of coefficients for RE    */
 	u16 size_im;
-	      /**< size of coefficients for IM    */
+	      /*< size of coefficients for IM    */
 };
 
 /*========================================*/
 
-/**
+/*
 * \struct struct drx_channel * \brief The set of parameters describing a single channel.
 *
 * Used by DRX_CTRL_SET_CHANNEL and DRX_CTRL_GET_CHANNEL.
@@ -991,29 +991,29 @@ struct drx_filter_info {
 */
 struct drx_channel {
 	s32 frequency;
-				/**< frequency in kHz                 */
+				/*< frequency in kHz                 */
 	enum drx_bandwidth bandwidth;
-				/**< bandwidth                        */
-	enum drx_mirror mirror;	/**< mirrored or not on RF            */
+				/*< bandwidth                        */
+	enum drx_mirror mirror;	/*< mirrored or not on RF            */
 	enum drx_modulation constellation;
-				/**< constellation                    */
+				/*< constellation                    */
 	enum drx_hierarchy hierarchy;
-				/**< hierarchy                        */
-	enum drx_priority priority;	/**< priority                         */
-	enum drx_coderate coderate;	/**< coderate                         */
-	enum drx_guard guard;	/**< guard interval                   */
-	enum drx_fft_mode fftmode;	/**< fftmode                          */
+				/*< hierarchy                        */
+	enum drx_priority priority;	/*< priority                         */
+	enum drx_coderate coderate;	/*< coderate                         */
+	enum drx_guard guard;	/*< guard interval                   */
+	enum drx_fft_mode fftmode;	/*< fftmode                          */
 	enum drx_classification classification;
-				/**< classification                   */
+				/*< classification                   */
 	u32 symbolrate;
-				/**< symbolrate in symbols/sec        */
+				/*< symbolrate in symbols/sec        */
 	enum drx_interleave_mode interleavemode;
-				/**< interleaveMode QAM               */
-	enum drx_ldpc ldpc;		/**< ldpc                             */
-	enum drx_carrier_mode carrier;	/**< carrier                          */
+				/*< interleaveMode QAM               */
+	enum drx_ldpc ldpc;		/*< ldpc                             */
+	enum drx_carrier_mode carrier;	/*< carrier                          */
 	enum drx_frame_mode framemode;
-				/**< frame mode                       */
-	enum drx_pilot_mode pilot;	/**< pilot mode                       */
+				/*< frame mode                       */
+	enum drx_pilot_mode pilot;	/*< pilot mode                       */
 };
 
 /*========================================*/
@@ -1027,74 +1027,74 @@ enum drx_cfg_sqi_speed {
 
 /*========================================*/
 
-/**
+/*
 * \struct struct drx_complex * A complex number.
 *
 * Used by DRX_CTRL_CONSTEL.
 */
 struct drx_complex {
 	s16 im;
-     /**< Imaginary part. */
+     /*< Imaginary part. */
 	s16 re;
-     /**< Real part.      */
+     /*< Real part.      */
 };
 
 /*========================================*/
 
-/**
+/*
 * \struct struct drx_frequency_plan * Array element of a frequency plan.
 *
 * Used by DRX_CTRL_SCAN_INIT.
 */
 struct drx_frequency_plan {
 	s32 first;
-		     /**< First centre frequency in this band        */
+		     /*< First centre frequency in this band        */
 	s32 last;
-		     /**< Last centre frequency in this band         */
+		     /*< Last centre frequency in this band         */
 	s32 step;
-		     /**< Stepping frequency in this band            */
+		     /*< Stepping frequency in this band            */
 	enum drx_bandwidth bandwidth;
-		     /**< Bandwidth within this frequency band       */
+		     /*< Bandwidth within this frequency band       */
 	u16 ch_number;
-		     /**< First channel number in this band, or first
+		     /*< First channel number in this band, or first
 			    index in ch_names                         */
 	char **ch_names;
-		     /**< Optional list of channel names in this
+		     /*< Optional list of channel names in this
 			    band                                     */
 };
 
 /*========================================*/
 
-/**
+/*
 * \struct struct drx_scan_param * Parameters for channel scan.
 *
 * Used by DRX_CTRL_SCAN_INIT.
 */
 struct drx_scan_param {
 	struct drx_frequency_plan *frequency_plan;
-				  /**< Frequency plan (array)*/
-	u16 frequency_plan_size;  /**< Number of bands       */
-	u32 num_tries;		  /**< Max channels tried    */
-	s32 skip;	  /**< Minimum frequency step to take
+				  /*< Frequency plan (array)*/
+	u16 frequency_plan_size;  /*< Number of bands       */
+	u32 num_tries;		  /*< Max channels tried    */
+	s32 skip;	  /*< Minimum frequency step to take
 					after a channel is found */
-	void *ext_params;	  /**< Standard specific params */
+	void *ext_params;	  /*< Standard specific params */
 };
 
 /*========================================*/
 
-/**
+/*
 * \brief Scan commands.
 * Used by scanning algorithms.
 */
 enum drx_scan_command {
-		DRX_SCAN_COMMAND_INIT = 0,/**< Initialize scanning */
-		DRX_SCAN_COMMAND_NEXT,	  /**< Next scan           */
-		DRX_SCAN_COMMAND_STOP	  /**< Stop scanning       */
+		DRX_SCAN_COMMAND_INIT = 0,/*< Initialize scanning */
+		DRX_SCAN_COMMAND_NEXT,	  /*< Next scan           */
+		DRX_SCAN_COMMAND_STOP	  /*< Stop scanning       */
 };
 
 /*========================================*/
 
-/**
+/*
 * \brief Inner scan function prototype.
 */
 typedef int(*drx_scan_func_t) (void *scan_context,
@@ -1104,77 +1104,77 @@ typedef int(*drx_scan_func_t) (void *scan_context,
 
 /*========================================*/
 
-/**
+/*
 * \struct struct drxtps_info * TPS information, DVB-T specific.
 *
 * Used by DRX_CTRL_TPS_INFO.
 */
 	struct drxtps_info {
-		enum drx_fft_mode fftmode;	/**< Fft mode       */
-		enum drx_guard guard;	/**< Guard interval */
+		enum drx_fft_mode fftmode;	/*< Fft mode       */
+		enum drx_guard guard;	/*< Guard interval */
 		enum drx_modulation constellation;
-					/**< Constellation  */
+					/*< Constellation  */
 		enum drx_hierarchy hierarchy;
-					/**< Hierarchy      */
+					/*< Hierarchy      */
 		enum drx_coderate high_coderate;
-					/**< High code rate */
+					/*< High code rate */
 		enum drx_coderate low_coderate;
-					/**< Low cod rate   */
-		enum drx_tps_frame frame;	/**< Tps frame      */
-		u8 length;		/**< Length         */
-		u16 cell_id;		/**< Cell id        */
+					/*< Low cod rate   */
+		enum drx_tps_frame frame;	/*< Tps frame      */
+		u8 length;		/*< Length         */
+		u16 cell_id;		/*< Cell id        */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \brief Power mode of device.
 *
 * Used by DRX_CTRL_SET_POWER_MODE.
 */
 	enum drx_power_mode {
 		DRX_POWER_UP = 0,
-			 /**< Generic         , Power Up Mode   */
+			 /*< Generic         , Power Up Mode   */
 		DRX_POWER_MODE_1,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_2,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_3,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_4,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_5,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_6,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_7,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 		DRX_POWER_MODE_8,
-			 /**< Device specific , Power Up Mode   */
+			 /*< Device specific , Power Up Mode   */
 
 		DRX_POWER_MODE_9,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_10,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_11,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_12,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_13,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_14,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_15,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_MODE_16,
-			 /**< Device specific , Power Down Mode */
+			 /*< Device specific , Power Down Mode */
 		DRX_POWER_DOWN = 255
-			 /**< Generic         , Power Down Mode */
+			 /*< Generic         , Power Down Mode */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \enum enum drx_module * \brief Software module identification.
 *
 * Used by DRX_CTRL_VERSION.
@@ -1191,93 +1191,93 @@ typedef int(*drx_scan_func_t) (void *scan_context,
 		DRX_MODULE_UNKNOWN
 	};
 
-/**
+/*
 * \enum struct drx_version * \brief Version information of one software module.
 *
 * Used by DRX_CTRL_VERSION.
 */
 	struct drx_version {
 		enum drx_module module_type;
-			       /**< Type identifier of the module */
+			       /*< Type identifier of the module */
 		char *module_name;
-			       /**< Name or description of module */
-		u16 v_major;  /**< Major version number          */
-		u16 v_minor;  /**< Minor version number          */
-		u16 v_patch;  /**< Patch version number          */
-		char *v_string; /**< Version as text string        */
+			       /*< Name or description of module */
+		u16 v_major;  /*< Major version number          */
+		u16 v_minor;  /*< Minor version number          */
+		u16 v_patch;  /*< Patch version number          */
+		char *v_string; /*< Version as text string        */
 	};
 
-/**
+/*
 * \enum struct drx_version_list * \brief List element of NULL terminated, linked list for version information.
 *
 * Used by DRX_CTRL_VERSION.
 */
 struct drx_version_list {
-	struct drx_version *version;/**< Version information */
+	struct drx_version *version;/*< Version information */
 	struct drx_version_list *next;
-			      /**< Next list element   */
+			      /*< Next list element   */
 };
 
 /*========================================*/
 
-/**
+/*
 * \brief Parameters needed to confiugure a UIO.
 *
 * Used by DRX_CTRL_UIO_CFG.
 */
 	struct drxuio_cfg {
 		enum drx_uio uio;
-		       /**< UIO identifier       */
+		       /*< UIO identifier       */
 		enum drxuio_mode mode;
-		       /**< UIO operational mode */
+		       /*< UIO operational mode */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \brief Parameters needed to read from or write to a UIO.
 *
 * Used by DRX_CTRL_UIO_READ and DRX_CTRL_UIO_WRITE.
 */
 	struct drxuio_data {
 		enum drx_uio uio;
-		   /**< UIO identifier              */
+		   /*< UIO identifier              */
 		bool value;
-		   /**< UIO value (true=1, false=0) */
+		   /*< UIO value (true=1, false=0) */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \brief Parameters needed to configure OOB.
 *
 * Used by DRX_CTRL_SET_OOB.
 */
 	struct drxoob {
-		s32 frequency;	   /**< Frequency in kHz      */
+		s32 frequency;	   /*< Frequency in kHz      */
 		enum drxoob_downstream_standard standard;
-						   /**< OOB standard          */
-		bool spectrum_inverted;	   /**< If true, then spectrum
+						   /*< OOB standard          */
+		bool spectrum_inverted;	   /*< If true, then spectrum
 							 is inverted          */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \brief Metrics from OOB.
 *
 * Used by DRX_CTRL_GET_OOB.
 */
 	struct drxoob_status {
-		s32 frequency; /**< Frequency in Khz         */
-		enum drx_lock_status lock;	  /**< Lock status              */
-		u32 mer;		  /**< MER                      */
-		s32 symbol_rate_offset;	  /**< Symbolrate offset in ppm */
+		s32 frequency; /*< Frequency in Khz         */
+		enum drx_lock_status lock;	  /*< Lock status              */
+		u32 mer;		  /*< MER                      */
+		s32 symbol_rate_offset;	  /*< Symbolrate offset in ppm */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \brief Device dependent configuration data.
 *
 * Used by DRX_CTRL_SET_CFG and DRX_CTRL_GET_CFG.
@@ -1285,14 +1285,14 @@ struct drx_version_list {
 */
 	struct drx_cfg {
 		u32 cfg_type;
-			  /**< Function identifier */
+			  /*< Function identifier */
 		void *cfg_data;
-			  /**< Function data */
+			  /*< Function data */
 	};
 
 /*========================================*/
 
-/**
+/*
 * /struct DRXMpegStartWidth_t
 * MStart width [nr MCLK cycles] for serial MPEG output.
 */
@@ -1303,7 +1303,7 @@ struct drx_version_list {
 	};
 
 /* CTRL CFG MPEG output */
-/**
+/*
 * \struct struct drx_cfg_mpeg_output * \brief Configuration parameters for MPEG output control.
 *
 * Used by DRX_CFG_MPEG_OUTPUT, in combination with DRX_CTRL_SET_CFG and
@@ -1311,29 +1311,29 @@ struct drx_version_list {
 */
 
 	struct drx_cfg_mpeg_output {
-		bool enable_mpeg_output;/**< If true, enable MPEG output      */
-		bool insert_rs_byte;	/**< If true, insert RS byte          */
-		bool enable_parallel;	/**< If true, parallel out otherwise
+		bool enable_mpeg_output;/*< If true, enable MPEG output      */
+		bool insert_rs_byte;	/*< If true, insert RS byte          */
+		bool enable_parallel;	/*< If true, parallel out otherwise
 								     serial   */
-		bool invert_data;	/**< If true, invert DATA signals     */
-		bool invert_err;	/**< If true, invert ERR signal       */
-		bool invert_str;	/**< If true, invert STR signals      */
-		bool invert_val;	/**< If true, invert VAL signals      */
-		bool invert_clk;	/**< If true, invert CLK signals      */
-		bool static_clk;	/**< If true, static MPEG clockrate
+		bool invert_data;	/*< If true, invert DATA signals     */
+		bool invert_err;	/*< If true, invert ERR signal       */
+		bool invert_str;	/*< If true, invert STR signals      */
+		bool invert_val;	/*< If true, invert VAL signals      */
+		bool invert_clk;	/*< If true, invert CLK signals      */
+		bool static_clk;	/*< If true, static MPEG clockrate
 					     will be used, otherwise clockrate
 					     will adapt to the bitrate of the
 					     TS                               */
-		u32 bitrate;		/**< Maximum bitrate in b/s in case
+		u32 bitrate;		/*< Maximum bitrate in b/s in case
 					     static clockrate is selected     */
 		enum drxmpeg_str_width width_str;
-					/**< MPEG start width                 */
+					/*< MPEG start width                 */
 	};
 
 
 /*========================================*/
 
-/**
+/*
 * \struct struct drxi2c_data * \brief Data for I2C via 2nd or 3rd or etc I2C port.
 *
 * Used by DRX_CTRL_I2C_READWRITE.
@@ -1341,187 +1341,187 @@ struct drx_version_list {
 *
 */
 	struct drxi2c_data {
-		u16 port_nr;	/**< I2C port number               */
+		u16 port_nr;	/*< I2C port number               */
 		struct i2c_device_addr *w_dev_addr;
-				/**< Write device address          */
-		u16 w_count;	/**< Size of write data in bytes   */
-		u8 *wData;	/**< Pointer to write data         */
+				/*< Write device address          */
+		u16 w_count;	/*< Size of write data in bytes   */
+		u8 *wData;	/*< Pointer to write data         */
 		struct i2c_device_addr *r_dev_addr;
-				/**< Read device address           */
-		u16 r_count;	/**< Size of data to read in bytes */
-		u8 *r_data;	/**< Pointer to read buffer        */
+				/*< Read device address           */
+		u16 r_count;	/*< Size of data to read in bytes */
+		u8 *r_data;	/*< Pointer to read buffer        */
 	};
 
 /*========================================*/
 
-/**
+/*
 * \enum enum drx_aud_standard * \brief Audio standard identifier.
 *
 * Used by DRX_CTRL_SET_AUD.
 */
 	enum drx_aud_standard {
-		DRX_AUD_STANDARD_BTSC,	   /**< set BTSC standard (USA)       */
-		DRX_AUD_STANDARD_A2,	   /**< set A2-Korea FM Stereo        */
-		DRX_AUD_STANDARD_EIAJ,	   /**< set to Japanese FM Stereo     */
-		DRX_AUD_STANDARD_FM_STEREO,/**< set to FM-Stereo Radio        */
-		DRX_AUD_STANDARD_M_MONO,   /**< for 4.5 MHz mono detected     */
-		DRX_AUD_STANDARD_D_K_MONO, /**< for 6.5 MHz mono detected     */
-		DRX_AUD_STANDARD_BG_FM,	   /**< set BG_FM standard            */
-		DRX_AUD_STANDARD_D_K1,	   /**< set D_K1 standard             */
-		DRX_AUD_STANDARD_D_K2,	   /**< set D_K2 standard             */
-		DRX_AUD_STANDARD_D_K3,	   /**< set D_K3 standard             */
+		DRX_AUD_STANDARD_BTSC,	   /*< set BTSC standard (USA)       */
+		DRX_AUD_STANDARD_A2,	   /*< set A2-Korea FM Stereo        */
+		DRX_AUD_STANDARD_EIAJ,	   /*< set to Japanese FM Stereo     */
+		DRX_AUD_STANDARD_FM_STEREO,/*< set to FM-Stereo Radio        */
+		DRX_AUD_STANDARD_M_MONO,   /*< for 4.5 MHz mono detected     */
+		DRX_AUD_STANDARD_D_K_MONO, /*< for 6.5 MHz mono detected     */
+		DRX_AUD_STANDARD_BG_FM,	   /*< set BG_FM standard            */
+		DRX_AUD_STANDARD_D_K1,	   /*< set D_K1 standard             */
+		DRX_AUD_STANDARD_D_K2,	   /*< set D_K2 standard             */
+		DRX_AUD_STANDARD_D_K3,	   /*< set D_K3 standard             */
 		DRX_AUD_STANDARD_BG_NICAM_FM,
-					   /**< set BG_NICAM_FM standard      */
+					   /*< set BG_NICAM_FM standard      */
 		DRX_AUD_STANDARD_L_NICAM_AM,
-					   /**< set L_NICAM_AM standard       */
+					   /*< set L_NICAM_AM standard       */
 		DRX_AUD_STANDARD_I_NICAM_FM,
-					   /**< set I_NICAM_FM standard       */
+					   /*< set I_NICAM_FM standard       */
 		DRX_AUD_STANDARD_D_K_NICAM_FM,
-					   /**< set D_K_NICAM_FM standard     */
-		DRX_AUD_STANDARD_NOT_READY,/**< used to detect audio standard */
+					   /*< set D_K_NICAM_FM standard     */
+		DRX_AUD_STANDARD_NOT_READY,/*< used to detect audio standard */
 		DRX_AUD_STANDARD_AUTO = DRX_AUTO,
-					   /**< Automatic Standard Detection  */
+					   /*< Automatic Standard Detection  */
 		DRX_AUD_STANDARD_UNKNOWN = DRX_UNKNOWN
-					   /**< used as auto and for readback */
+					   /*< used as auto and for readback */
 	};
 
 /* CTRL_AUD_GET_STATUS    - struct drx_aud_status */
-/**
+/*
 * \enum enum drx_aud_nicam_status * \brief Status of NICAM carrier.
 */
 	enum drx_aud_nicam_status {
 		DRX_AUD_NICAM_DETECTED = 0,
-					  /**< NICAM carrier detected         */
+					  /*< NICAM carrier detected         */
 		DRX_AUD_NICAM_NOT_DETECTED,
-					  /**< NICAM carrier not detected     */
-		DRX_AUD_NICAM_BAD	  /**< NICAM carrier bad quality      */
+					  /*< NICAM carrier not detected     */
+		DRX_AUD_NICAM_BAD	  /*< NICAM carrier bad quality      */
 	};
 
-/**
+/*
 * \struct struct drx_aud_status * \brief Audio status characteristics.
 */
 	struct drx_aud_status {
-		bool stereo;		  /**< stereo detection               */
-		bool carrier_a;	  /**< carrier A detected             */
-		bool carrier_b;	  /**< carrier B detected             */
-		bool sap;		  /**< sap / bilingual detection      */
-		bool rds;		  /**< RDS data array present         */
+		bool stereo;		  /*< stereo detection               */
+		bool carrier_a;	  /*< carrier A detected             */
+		bool carrier_b;	  /*< carrier B detected             */
+		bool sap;		  /*< sap / bilingual detection      */
+		bool rds;		  /*< RDS data array present         */
 		enum drx_aud_nicam_status nicam_status;
-					  /**< status of NICAM carrier        */
-		s8 fm_ident;		  /**< FM Identification value        */
+					  /*< status of NICAM carrier        */
+		s8 fm_ident;		  /*< FM Identification value        */
 	};
 
 /* CTRL_AUD_READ_RDS       - DRXRDSdata_t */
 
-/**
+/*
 * \struct DRXRDSdata_t
 * \brief Raw RDS data array.
 */
 	struct drx_cfg_aud_rds {
-		bool valid;		  /**< RDS data validation            */
-		u16 data[18];		  /**< data from one RDS data array   */
+		bool valid;		  /*< RDS data validation            */
+		u16 data[18];		  /*< data from one RDS data array   */
 	};
 
 /* DRX_CFG_AUD_VOLUME      - struct drx_cfg_aud_volume - set/get */
-/**
+/*
 * \enum DRXAudAVCDecayTime_t
 * \brief Automatic volume control configuration.
 */
 	enum drx_aud_avc_mode {
-		DRX_AUD_AVC_OFF,	  /**< Automatic volume control off   */
-		DRX_AUD_AVC_DECAYTIME_8S, /**< level volume in  8 seconds     */
-		DRX_AUD_AVC_DECAYTIME_4S, /**< level volume in  4 seconds     */
-		DRX_AUD_AVC_DECAYTIME_2S, /**< level volume in  2 seconds     */
-		DRX_AUD_AVC_DECAYTIME_20MS/**< level volume in 20 millisec    */
+		DRX_AUD_AVC_OFF,	  /*< Automatic volume control off   */
+		DRX_AUD_AVC_DECAYTIME_8S, /*< level volume in  8 seconds     */
+		DRX_AUD_AVC_DECAYTIME_4S, /*< level volume in  4 seconds     */
+		DRX_AUD_AVC_DECAYTIME_2S, /*< level volume in  2 seconds     */
+		DRX_AUD_AVC_DECAYTIME_20MS/*< level volume in 20 millisec    */
 	};
 
-/**
+/*
 * /enum DRXAudMaxAVCGain_t
 * /brief Automatic volume control max gain in audio baseband.
 */
 	enum drx_aud_avc_max_gain {
-		DRX_AUD_AVC_MAX_GAIN_0DB, /**< maximum AVC gain  0 dB         */
-		DRX_AUD_AVC_MAX_GAIN_6DB, /**< maximum AVC gain  6 dB         */
-		DRX_AUD_AVC_MAX_GAIN_12DB /**< maximum AVC gain 12 dB         */
+		DRX_AUD_AVC_MAX_GAIN_0DB, /*< maximum AVC gain  0 dB         */
+		DRX_AUD_AVC_MAX_GAIN_6DB, /*< maximum AVC gain  6 dB         */
+		DRX_AUD_AVC_MAX_GAIN_12DB /*< maximum AVC gain 12 dB         */
 	};
 
-/**
+/*
 * /enum DRXAudMaxAVCAtten_t
 * /brief Automatic volume control max attenuation in audio baseband.
 */
 	enum drx_aud_avc_max_atten {
 		DRX_AUD_AVC_MAX_ATTEN_12DB,
-					  /**< maximum AVC attenuation 12 dB  */
+					  /*< maximum AVC attenuation 12 dB  */
 		DRX_AUD_AVC_MAX_ATTEN_18DB,
-					  /**< maximum AVC attenuation 18 dB  */
-		DRX_AUD_AVC_MAX_ATTEN_24DB/**< maximum AVC attenuation 24 dB  */
+					  /*< maximum AVC attenuation 18 dB  */
+		DRX_AUD_AVC_MAX_ATTEN_24DB/*< maximum AVC attenuation 24 dB  */
 	};
-/**
+/*
 * \struct struct drx_cfg_aud_volume * \brief Audio volume configuration.
 */
 	struct drx_cfg_aud_volume {
-		bool mute;		  /**< mute overrides volume setting  */
-		s16 volume;		  /**< volume, range -114 to 12 dB    */
-		enum drx_aud_avc_mode avc_mode;  /**< AVC auto volume control mode   */
-		u16 avc_ref_level;	  /**< AVC reference level            */
+		bool mute;		  /*< mute overrides volume setting  */
+		s16 volume;		  /*< volume, range -114 to 12 dB    */
+		enum drx_aud_avc_mode avc_mode;  /*< AVC auto volume control mode   */
+		u16 avc_ref_level;	  /*< AVC reference level            */
 		enum drx_aud_avc_max_gain avc_max_gain;
-					  /**< AVC max gain selection         */
+					  /*< AVC max gain selection         */
 		enum drx_aud_avc_max_atten avc_max_atten;
-					  /**< AVC max attenuation selection  */
-		s16 strength_left;	  /**< quasi-peak, left speaker       */
-		s16 strength_right;	  /**< quasi-peak, right speaker      */
+					  /*< AVC max attenuation selection  */
+		s16 strength_left;	  /*< quasi-peak, left speaker       */
+		s16 strength_right;	  /*< quasi-peak, right speaker      */
 	};
 
 /* DRX_CFG_I2S_OUTPUT      - struct drx_cfg_i2s_output - set/get */
-/**
+/*
 * \enum enum drxi2s_mode * \brief I2S output mode.
 */
 	enum drxi2s_mode {
-		DRX_I2S_MODE_MASTER,	  /**< I2S is in master mode          */
-		DRX_I2S_MODE_SLAVE	  /**< I2S is in slave mode           */
+		DRX_I2S_MODE_MASTER,	  /*< I2S is in master mode          */
+		DRX_I2S_MODE_SLAVE	  /*< I2S is in slave mode           */
 	};
 
-/**
+/*
 * \enum enum drxi2s_word_length * \brief Width of I2S data.
 */
 	enum drxi2s_word_length {
-		DRX_I2S_WORDLENGTH_32 = 0,/**< I2S data is 32 bit wide        */
-		DRX_I2S_WORDLENGTH_16 = 1 /**< I2S data is 16 bit wide        */
+		DRX_I2S_WORDLENGTH_32 = 0,/*< I2S data is 32 bit wide        */
+		DRX_I2S_WORDLENGTH_16 = 1 /*< I2S data is 16 bit wide        */
 	};
 
-/**
+/*
 * \enum enum drxi2s_format * \brief Data wordstrobe alignment for I2S.
 */
 	enum drxi2s_format {
 		DRX_I2S_FORMAT_WS_WITH_DATA,
-				    /**< I2S data and wordstrobe are aligned  */
+				    /*< I2S data and wordstrobe are aligned  */
 		DRX_I2S_FORMAT_WS_ADVANCED
-				    /**< I2S data one cycle after wordstrobe  */
+				    /*< I2S data one cycle after wordstrobe  */
 	};
 
-/**
+/*
 * \enum enum drxi2s_polarity * \brief Polarity of I2S data.
 */
 	enum drxi2s_polarity {
-		DRX_I2S_POLARITY_RIGHT,/**< wordstrobe - right high, left low */
-		DRX_I2S_POLARITY_LEFT  /**< wordstrobe - right low, left high */
+		DRX_I2S_POLARITY_RIGHT,/*< wordstrobe - right high, left low */
+		DRX_I2S_POLARITY_LEFT  /*< wordstrobe - right low, left high */
 	};
 
-/**
+/*
 * \struct struct drx_cfg_i2s_output * \brief I2S output configuration.
 */
 	struct drx_cfg_i2s_output {
-		bool output_enable;	  /**< I2S output enable              */
-		u32 frequency;	  /**< range from 8000-48000 Hz       */
-		enum drxi2s_mode mode;	  /**< I2S mode, master or slave      */
+		bool output_enable;	  /*< I2S output enable              */
+		u32 frequency;	  /*< range from 8000-48000 Hz       */
+		enum drxi2s_mode mode;	  /*< I2S mode, master or slave      */
 		enum drxi2s_word_length word_length;
-					  /**< I2S wordlength, 16 or 32 bits  */
-		enum drxi2s_polarity polarity;/**< I2S wordstrobe polarity        */
-		enum drxi2s_format format;	  /**< I2S wordstrobe delay to data   */
+					  /*< I2S wordlength, 16 or 32 bits  */
+		enum drxi2s_polarity polarity;/*< I2S wordstrobe polarity        */
+		enum drxi2s_format format;	  /*< I2S wordstrobe delay to data   */
 	};
 
 /* ------------------------------expert interface-----------------------------*/
-/**
+/*
 * /enum enum drx_aud_fm_deemphasis * setting for FM-Deemphasis in audio demodulator.
 *
 */
@@ -1531,7 +1531,7 @@ struct drx_version_list {
 		DRX_AUD_FM_DEEMPH_OFF
 	};
 
-/**
+/*
 * /enum DRXAudDeviation_t
 * setting for deviation mode in audio demodulator.
 *
@@ -1541,7 +1541,7 @@ struct drx_version_list {
 		DRX_AUD_DEVIATION_HIGH
 	};
 
-/**
+/*
 * /enum enum drx_no_carrier_option * setting for carrier, mute/noise.
 *
 */
@@ -1550,7 +1550,7 @@ struct drx_version_list {
 		DRX_NO_CARRIER_NOISE
 	};
 
-/**
+/*
 * \enum DRXAudAutoSound_t
 * \brief Automatic Sound
 */
@@ -1560,7 +1560,7 @@ struct drx_version_list {
 		DRX_AUD_AUTO_SOUND_SELECT_ON_CHANGE_OFF
 	};
 
-/**
+/*
 * \enum DRXAudASSThres_t
 * \brief Automatic Sound Select Thresholds
 */
@@ -1570,7 +1570,7 @@ struct drx_version_list {
 		u16 nicam;	/* Nicam Threshold for ASS configuration */
 	};
 
-/**
+/*
 * \struct struct drx_aud_carrier * \brief Carrier detection related parameters
 */
 	struct drx_aud_carrier {
@@ -1580,7 +1580,7 @@ struct drx_version_list {
 		s32 dco;	/* frequency adjustment (A) */
 	};
 
-/**
+/*
 * \struct struct drx_cfg_aud_carriers * \brief combining carrier A & B to one struct
 */
 	struct drx_cfg_aud_carriers {
@@ -1588,7 +1588,7 @@ struct drx_version_list {
 		struct drx_aud_carrier b;
 	};
 
-/**
+/*
 * /enum enum drx_aud_i2s_src * Selection of audio source
 */
 	enum drx_aud_i2s_src {
@@ -1597,19 +1597,19 @@ struct drx_version_list {
 		DRX_AUD_SRC_STEREO_OR_A,
 		DRX_AUD_SRC_STEREO_OR_B};
 
-/**
+/*
 * \enum enum drx_aud_i2s_matrix * \brief Used for selecting I2S output.
 */
 	enum drx_aud_i2s_matrix {
 		DRX_AUD_I2S_MATRIX_A_MONO,
-					/**< A sound only, stereo or mono     */
+					/*< A sound only, stereo or mono     */
 		DRX_AUD_I2S_MATRIX_B_MONO,
-					/**< B sound only, stereo or mono     */
+					/*< B sound only, stereo or mono     */
 		DRX_AUD_I2S_MATRIX_STEREO,
-					/**< A+B sound, transparant           */
-		DRX_AUD_I2S_MATRIX_MONO	/**< A+B mixed to mono sum, (L+R)/2   */};
+					/*< A+B sound, transparant           */
+		DRX_AUD_I2S_MATRIX_MONO	/*< A+B mixed to mono sum, (L+R)/2   */};
 
-/**
+/*
 * /enum enum drx_aud_fm_matrix * setting for FM-Matrix in audio demodulator.
 *
 */
@@ -1620,7 +1620,7 @@ struct drx_version_list {
 		DRX_AUD_FM_MATRIX_SOUND_A,
 		DRX_AUD_FM_MATRIX_SOUND_B};
 
-/**
+/*
 * \struct DRXAudMatrices_t
 * \brief Mixer settings
 */
@@ -1630,22 +1630,22 @@ struct drx_cfg_aud_mixer {
 	enum drx_aud_fm_matrix matrix_fm;
 };
 
-/**
+/*
 * \enum DRXI2SVidSync_t
 * \brief Audio/video synchronization, interacts with I2S mode.
 * AUTO_1 and AUTO_2 are for automatic video standard detection with preference
 * for NTSC or Monochrome, because the frequencies are too close (59.94 & 60 Hz)
 */
 	enum drx_cfg_aud_av_sync {
-		DRX_AUD_AVSYNC_OFF,/**< audio/video synchronization is off   */
+		DRX_AUD_AVSYNC_OFF,/*< audio/video synchronization is off   */
 		DRX_AUD_AVSYNC_NTSC,
-				   /**< it is an NTSC system                 */
+				   /*< it is an NTSC system                 */
 		DRX_AUD_AVSYNC_MONOCHROME,
-				   /**< it is a MONOCHROME system            */
+				   /*< it is a MONOCHROME system            */
 		DRX_AUD_AVSYNC_PAL_SECAM
-				   /**< it is a PAL/SECAM system             */};
+				   /*< it is a PAL/SECAM system             */};
 
-/**
+/*
 * \struct struct drx_cfg_aud_prescale * \brief Prescalers
 */
 struct drx_cfg_aud_prescale {
@@ -1653,7 +1653,7 @@ struct drx_cfg_aud_prescale {
 	s16 nicam_gain;
 };
 
-/**
+/*
 * \struct struct drx_aud_beep * \brief Beep
 */
 struct drx_aud_beep {
@@ -1662,14 +1662,14 @@ struct drx_aud_beep {
 	bool mute;
 };
 
-/**
+/*
 * \enum enum drx_aud_btsc_detect * \brief BTSC detetcion mode
 */
 	enum drx_aud_btsc_detect {
 		DRX_BTSC_STEREO,
 		DRX_BTSC_MONO_AND_SAP};
 
-/**
+/*
 * \struct struct drx_aud_data * \brief Audio data structure
 */
 struct drx_aud_data {
@@ -1692,7 +1692,7 @@ struct drx_aud_data {
 	bool rds_data_present;
 };
 
-/**
+/*
 * \enum enum drx_qam_lock_range * \brief QAM lock range mode
 */
 	enum drx_qam_lock_range {
@@ -1782,7 +1782,7 @@ struct drx_aud_data {
 							     u32 wdata,	/* data to write               */
 							     u32 *rdata);	/* data to read                */
 
-/**
+/*
 * \struct struct drx_access_func * \brief Interface to an access protocol.
 */
 struct drx_access_func {
@@ -1811,85 +1811,85 @@ struct drx_reg_dump {
 /*============================================================================*/
 /*============================================================================*/
 
-/**
+/*
 * \struct struct drx_common_attr * \brief Set of common attributes, shared by all DRX devices.
 */
 	struct drx_common_attr {
 		/* Microcode (firmware) attributes */
-		char *microcode_file;   /**<  microcode filename           */
+		char *microcode_file;   /*<  microcode filename           */
 		bool verify_microcode;
-				   /**< Use microcode verify or not.          */
+				   /*< Use microcode verify or not.          */
 		struct drx_mc_version_rec mcversion;
-				   /**< Version record of microcode from file */
+				   /*< Version record of microcode from file */
 
 		/* Clocks and tuner attributes */
 		s32 intermediate_freq;
-				     /**< IF,if tuner instance not used. (kHz)*/
+				     /*< IF,if tuner instance not used. (kHz)*/
 		s32 sys_clock_freq;
-				     /**< Systemclock frequency.  (kHz)       */
+				     /*< Systemclock frequency.  (kHz)       */
 		s32 osc_clock_freq;
-				     /**< Oscillator clock frequency.  (kHz)  */
+				     /*< Oscillator clock frequency.  (kHz)  */
 		s16 osc_clock_deviation;
-				     /**< Oscillator clock deviation.  (ppm)  */
+				     /*< Oscillator clock deviation.  (ppm)  */
 		bool mirror_freq_spect;
-				     /**< Mirror IF frequency spectrum or not.*/
+				     /*< Mirror IF frequency spectrum or not.*/
 
 		/* Initial MPEG output attributes */
 		struct drx_cfg_mpeg_output mpeg_cfg;
-				     /**< MPEG configuration                  */
+				     /*< MPEG configuration                  */
 
-		bool is_opened;     /**< if true instance is already opened. */
+		bool is_opened;     /*< if true instance is already opened. */
 
 		/* Channel scan */
 		struct drx_scan_param *scan_param;
-				      /**< scan parameters                    */
+				      /*< scan parameters                    */
 		u16 scan_freq_plan_index;
-				      /**< next index in freq plan            */
+				      /*< next index in freq plan            */
 		s32 scan_next_frequency;
-				      /**< next freq to scan                  */
-		bool scan_ready;     /**< scan ready flag                    */
-		u32 scan_max_channels;/**< number of channels in freqplan     */
+				      /*< next freq to scan                  */
+		bool scan_ready;     /*< scan ready flag                    */
+		u32 scan_max_channels;/*< number of channels in freqplan     */
 		u32 scan_channels_scanned;
-					/**< number of channels scanned       */
+					/*< number of channels scanned       */
 		/* Channel scan - inner loop: demod related */
 		drx_scan_func_t scan_function;
-				      /**< function to check channel          */
+				      /*< function to check channel          */
 		/* Channel scan - inner loop: SYSObj related */
-		void *scan_context;    /**< Context Pointer of SYSObj          */
+		void *scan_context;    /*< Context Pointer of SYSObj          */
 		/* Channel scan - parameters for default DTV scan function in core driver  */
 		u16 scan_demod_lock_timeout;
-					 /**< millisecs to wait for lock      */
+					 /*< millisecs to wait for lock      */
 		enum drx_lock_status scan_desired_lock;
-				      /**< lock requirement for channel found */
+				      /*< lock requirement for channel found */
 		/* scan_active can be used by SetChannel to decide how to program the tuner,
 		   fast or slow (but stable). Usually fast during scan. */
-		bool scan_active;    /**< true when scan routines are active */
+		bool scan_active;    /*< true when scan routines are active */
 
 		/* Power management */
 		enum drx_power_mode current_power_mode;
-				      /**< current power management mode      */
+				      /*< current power management mode      */
 
 		/* Tuner */
-		u8 tuner_port_nr;     /**< nr of I2C port to wich tuner is    */
+		u8 tuner_port_nr;     /*< nr of I2C port to wich tuner is    */
 		s32 tuner_min_freq_rf;
-				      /**< minimum RF input frequency, in kHz */
+				      /*< minimum RF input frequency, in kHz */
 		s32 tuner_max_freq_rf;
-				      /**< maximum RF input frequency, in kHz */
-		bool tuner_rf_agc_pol; /**< if true invert RF AGC polarity     */
-		bool tuner_if_agc_pol; /**< if true invert IF AGC polarity     */
-		bool tuner_slow_mode; /**< if true invert IF AGC polarity     */
+				      /*< maximum RF input frequency, in kHz */
+		bool tuner_rf_agc_pol; /*< if true invert RF AGC polarity     */
+		bool tuner_if_agc_pol; /*< if true invert IF AGC polarity     */
+		bool tuner_slow_mode; /*< if true invert IF AGC polarity     */
 
 		struct drx_channel current_channel;
-				      /**< current channel parameters         */
+				      /*< current channel parameters         */
 		enum drx_standard current_standard;
-				      /**< current standard selection         */
+				      /*< current standard selection         */
 		enum drx_standard prev_standard;
-				      /**< previous standard selection        */
+				      /*< previous standard selection        */
 		enum drx_standard di_cache_standard;
-				      /**< standard in DI cache if available  */
-		bool use_bootloader; /**< use bootloader in open             */
-		u32 capabilities;   /**< capabilities flags                 */
-		u32 product_id;      /**< product ID inc. metal fix number   */};
+				      /*< standard in DI cache if available  */
+		bool use_bootloader; /*< use bootloader in open             */
+		u32 capabilities;   /*< capabilities flags                 */
+		u32 product_id;      /*< product ID inc. metal fix number   */};
 
 /*
 * Generic functions for DRX devices.
@@ -1897,16 +1897,16 @@ struct drx_reg_dump {
 
 struct drx_demod_instance;
 
-/**
+/*
 * \struct struct drx_demod_instance * \brief Top structure of demodulator instance.
 */
 struct drx_demod_instance {
-				/**< data access protocol functions       */
+				/*< data access protocol functions       */
 	struct i2c_device_addr *my_i2c_dev_addr;
-				/**< i2c address and device identifier    */
+				/*< i2c address and device identifier    */
 	struct drx_common_attr *my_common_attr;
-				/**< common DRX attributes                */
-	void *my_ext_attr;    /**< device specific attributes           */
+				/*< common DRX attributes                */
+	void *my_ext_attr;    /*< device specific attributes           */
 	/* generic demodulator data */
 
 	struct i2c_adapter	*i2c;
@@ -2195,7 +2195,7 @@ Conversion from enum values to human readable form.
 Access macros
 -------------------------------------------------------------------------*/
 
-/**
+/*
 * \brief Create a compilable reference to the microcode attribute
 * \param d pointer to demod instance
 *
@@ -2229,7 +2229,7 @@ Access macros
 #define DRX_ATTR_I2CDEVID(d)        ((d)->my_i2c_dev_addr->i2c_dev_id)
 #define DRX_ISMCVERTYPE(x) ((x) == AUX_VER_RECORD)
 
-/**************************/
+/*************************/
 
 /* Macros with device-specific handling are converted to CFG functions */
 
@@ -2285,7 +2285,7 @@ Access macros
 #define DRX_GET_QAM_LOCKRANGE(d, x) DRX_ACCESSMACRO_GET((d), (x), \
 	 DRX_XS_CFG_QAM_LOCKRANGE, enum drx_qam_lock_range, DRX_UNKNOWN)
 
-/**
+/*
 * \brief Macro to check if std is an ATV standard
 * \retval true std is an ATV standard
 * \retval false std is an ATV standard
@@ -2298,7 +2298,7 @@ Access macros
 			      ((std) == DRX_STANDARD_NTSC) || \
 			      ((std) == DRX_STANDARD_FM))
 
-/**
+/*
 * \brief Macro to check if std is an QAM standard
 * \retval true std is an QAM standards
 * \retval false std is an QAM standards
@@ -2308,14 +2308,14 @@ Access macros
 			      ((std) == DRX_STANDARD_ITU_C) || \
 			      ((std) == DRX_STANDARD_ITU_D))
 
-/**
+/*
 * \brief Macro to check if std is VSB standard
 * \retval true std is VSB standard
 * \retval false std is not VSB standard
 */
 #define DRX_ISVSBSTD(std) ((std) == DRX_STANDARD_8VSB)
 
-/**
+/*
 * \brief Macro to check if std is DVBT standard
 * \retval true std is DVBT standard
 * \retval false std is not DVBT standard
diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.c b/drivers/media/dvb-frontends/drx39xyj/drxj.c
index 499ccff557bff41469c2ccc4b85fdcec8bff1d7b..8cbd8cc21059d8a94112608170e386ebcf7866d5 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.c
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.c
@@ -73,7 +73,7 @@ INCLUDE FILES
 
 #define DRX39XX_MAIN_FIRMWARE "dvb-fe-drxj-mc-1.0.8.fw"
 
-/**
+/*
 * \brief Maximum u32 value.
 */
 #ifndef MAX_U32
@@ -100,8 +100,8 @@ INCLUDE FILES
 #ifndef OOB_DRX_DRIVE_STRENGTH
 #define OOB_DRX_DRIVE_STRENGTH 0x02
 #endif
-/**** START DJCOMBO patches to DRXJ registermap constants *********************/
-/**** registermap 200706071303 from drxj **************************************/
+/*** START DJCOMBO patches to DRXJ registermap constants *********************/
+/*** registermap 200706071303 from drxj **************************************/
 #define   ATV_TOP_CR_AMP_TH_FM                                              0x0
 #define   ATV_TOP_CR_AMP_TH_L                                               0xA
 #define   ATV_TOP_CR_AMP_TH_LP                                              0xA
@@ -188,7 +188,7 @@ INCLUDE FILES
 #define     IQM_RC_ADJ_SEL_B_OFF                                            0x0
 #define     IQM_RC_ADJ_SEL_B_QAM                                            0x1
 #define     IQM_RC_ADJ_SEL_B_VSB                                            0x2
-/**** END DJCOMBO patches to DRXJ registermap *********************************/
+/*** END DJCOMBO patches to DRXJ registermap *********************************/
 
 #include "drx_driver_version.h"
 
@@ -208,25 +208,25 @@ DEFINES
 #define DRXJ_WAKE_UP_KEY (demod->my_i2c_dev_addr->i2c_addr)
 #endif
 
-/**
+/*
 * \def DRXJ_DEF_I2C_ADDR
 * \brief Default I2C address of a demodulator instance.
 */
 #define DRXJ_DEF_I2C_ADDR (0x52)
 
-/**
+/*
 * \def DRXJ_DEF_DEMOD_DEV_ID
 * \brief Default device identifier of a demodultor instance.
 */
 #define DRXJ_DEF_DEMOD_DEV_ID      (1)
 
-/**
+/*
 * \def DRXJ_SCAN_TIMEOUT
 * \brief Timeout value for waiting on demod lock during channel scan (millisec).
 */
 #define DRXJ_SCAN_TIMEOUT    1000
 
-/**
+/*
 * \def HI_I2C_DELAY
 * \brief HI timing delay for I2C timing (in nano seconds)
 *
@@ -234,7 +234,7 @@ DEFINES
 */
 #define HI_I2C_DELAY    42
 
-/**
+/*
 * \def HI_I2C_BRIDGE_DELAY
 * \brief HI timing delay for I2C timing (in nano seconds)
 *
@@ -242,13 +242,13 @@ DEFINES
 */
 #define HI_I2C_BRIDGE_DELAY   750
 
-/**
+/*
 * \brief Time Window for MER and SER Measurement in Units of Segment duration.
 */
 #define VSB_TOP_MEASUREMENT_PERIOD  64
 #define SYMBOLS_PER_SEGMENT         832
 
-/**
+/*
 * \brief bit rate and segment rate constants used for SER and BER.
 */
 /* values taken from the QAM microcode */
@@ -260,21 +260,21 @@ DEFINES
 #define DRXJ_QAM_SL_SIG_POWER_QAM64       43008
 #define DRXJ_QAM_SL_SIG_POWER_QAM128      20992
 #define DRXJ_QAM_SL_SIG_POWER_QAM256      43520
-/**
+/*
 * \brief Min supported symbolrates.
 */
 #ifndef DRXJ_QAM_SYMBOLRATE_MIN
 #define DRXJ_QAM_SYMBOLRATE_MIN          (520000)
 #endif
 
-/**
+/*
 * \brief Max supported symbolrates.
 */
 #ifndef DRXJ_QAM_SYMBOLRATE_MAX
 #define DRXJ_QAM_SYMBOLRATE_MAX         (7233000)
 #endif
 
-/**
+/*
 * \def DRXJ_QAM_MAX_WAITTIME
 * \brief Maximal wait time for QAM auto constellation in ms
 */
@@ -290,7 +290,7 @@ DEFINES
 #define DRXJ_QAM_DEMOD_LOCK_EXT_WAITTIME 200
 #endif
 
-/**
+/*
 * \def SCU status and results
 * \brief SCU
 */
@@ -299,7 +299,7 @@ DEFINES
 #define FEC_RS_MEASUREMENT_PERIOD   12894	/* 1 sec */
 #define FEC_RS_MEASUREMENT_PRESCALE 1	/* n sec */
 
-/**
+/*
 * \def DRX_AUD_MAX_DEVIATION
 * \brief Needed for calculation of prescale feature in AUD
 */
@@ -307,14 +307,14 @@ DEFINES
 #define DRXJ_AUD_MAX_FM_DEVIATION  100	/* kHz */
 #endif
 
-/**
+/*
 * \brief Needed for calculation of NICAM prescale feature in AUD
 */
 #ifndef DRXJ_AUD_MAX_NICAM_PRESCALE
 #define DRXJ_AUD_MAX_NICAM_PRESCALE  (9)	/* dB */
 #endif
 
-/**
+/*
 * \brief Needed for calculation of NICAM prescale feature in AUD
 */
 #ifndef DRXJ_AUD_MAX_WAITTIME
@@ -371,21 +371,21 @@ DEFINES
 /*============================================================================*/
 /*=== GLOBAL VARIABLEs =======================================================*/
 /*============================================================================*/
-/**
+/*
 */
 
-/**
+/*
 * \brief Temporary register definitions.
 *        (register definitions that are not yet available in register master)
 */
 
-/******************************************************************************/
+/*****************************************************************************/
 /* Audio block 0x103 is write only. To avoid shadowing in driver accessing    */
 /* RAM adresses directly. This must be READ ONLY to avoid problems.           */
 /* Writing to the interface adresses is more than only writing the RAM        */
 /* locations                                                                  */
-/******************************************************************************/
-/**
+/*****************************************************************************/
+/*
 * \brief RAM location of MODUS registers
 */
 #define AUD_DEM_RAM_MODUS_HI__A              0x10204A3
@@ -394,13 +394,13 @@ DEFINES
 #define AUD_DEM_RAM_MODUS_LO__A              0x10204A4
 #define AUD_DEM_RAM_MODUS_LO__M              0x0FFF
 
-/**
+/*
 * \brief RAM location of I2S config registers
 */
 #define AUD_DEM_RAM_I2S_CONFIG1__A           0x10204B1
 #define AUD_DEM_RAM_I2S_CONFIG2__A           0x10204B2
 
-/**
+/*
 * \brief RAM location of DCO config registers
 */
 #define AUD_DEM_RAM_DCO_B_HI__A              0x1020461
@@ -408,20 +408,20 @@ DEFINES
 #define AUD_DEM_RAM_DCO_A_HI__A              0x1020463
 #define AUD_DEM_RAM_DCO_A_LO__A              0x1020464
 
-/**
+/*
 * \brief RAM location of Threshold registers
 */
 #define AUD_DEM_RAM_NICAM_THRSHLD__A         0x102045A
 #define AUD_DEM_RAM_A2_THRSHLD__A            0x10204BB
 #define AUD_DEM_RAM_BTSC_THRSHLD__A          0x10204A6
 
-/**
+/*
 * \brief RAM location of Carrier Threshold registers
 */
 #define AUD_DEM_RAM_CM_A_THRSHLD__A          0x10204AF
 #define AUD_DEM_RAM_CM_B_THRSHLD__A          0x10204B0
 
-/**
+/*
 * \brief FM Matrix register fix
 */
 #ifdef AUD_DEM_WR_FM_MATRIX__A
@@ -430,7 +430,7 @@ DEFINES
 #define AUD_DEM_WR_FM_MATRIX__A              0x105006F
 
 /*============================================================================*/
-/**
+/*
 * \brief Defines required for audio
 */
 #define AUD_VOLUME_ZERO_DB                      115
@@ -443,14 +443,14 @@ DEFINES
 #define AUD_I2S_FREQUENCY_MIN                   12000UL
 #define AUD_RDS_ARRAY_SIZE                      18
 
-/**
+/*
 * \brief Needed for calculation of prescale feature in AUD
 */
 #ifndef DRX_AUD_MAX_FM_DEVIATION
 #define DRX_AUD_MAX_FM_DEVIATION  (100)	/* kHz */
 #endif
 
-/**
+/*
 * \brief Needed for calculation of NICAM prescale feature in AUD
 */
 #ifndef DRX_AUD_MAX_NICAM_PRESCALE
@@ -478,7 +478,7 @@ DEFINES
 /*=== REGISTER ACCESS MACROS =================================================*/
 /*============================================================================*/
 
-/**
+/*
 * This macro is used to create byte arrays for block writes.
 * Block writes speed up I2C traffic between host and demod.
 * The macro takes care of the required byte order in a 16 bits word.
@@ -486,7 +486,7 @@ DEFINES
 */
 #define DRXJ_16TO8(x) ((u8) (((u16)x) & 0xFF)), \
 		       ((u8)((((u16)x)>>8)&0xFF))
-/**
+/*
 * This macro is used to convert byte array to 16 bit register value for block read.
 * Block read speed up I2C traffic between host and demod.
 * The macro takes care of the required byte order in a 16 bits word.
@@ -501,7 +501,7 @@ DEFINES
 /*=== HI COMMAND RELATED DEFINES =============================================*/
 /*============================================================================*/
 
-/**
+/*
 * \brief General maximum number of retries for ucode command interfaces
 */
 #define DRXJ_MAX_RETRIES (100)
@@ -807,7 +807,7 @@ static struct drxj_data drxj_data_g = {
 	 },
 };
 
-/**
+/*
 * \var drxj_default_addr_g
 * \brief Default I2C address and device identifier.
 */
@@ -816,7 +816,7 @@ static struct i2c_device_addr drxj_default_addr_g = {
 	DRXJ_DEF_DEMOD_DEV_ID	/* device id */
 };
 
-/**
+/*
 * \var drxj_default_comm_attr_g
 * \brief Default common attributes of a drxj demodulator instance.
 */
@@ -887,7 +887,7 @@ static struct drx_common_attr drxj_default_comm_attr_g = {
 	0			/* mfx */
 };
 
-/**
+/*
 * \var drxj_default_demod_g
 * \brief Default drxj demodulator instance.
 */
@@ -897,7 +897,7 @@ static struct drx_demod_instance drxj_default_demod_g = {
 	&drxj_data_g		/* demod device specific attributes */
 };
 
-/**
+/*
 * \brief Default audio data structure for DRK demodulator instance.
 *
 * This structure is DRXK specific.
@@ -997,7 +997,7 @@ struct drxj_hi_cmd {
 /*=== MICROCODE RELATED STRUCTURES ===========================================*/
 /*============================================================================*/
 
-/**
+/*
  * struct drxu_code_block_hdr - Structure of the microcode block headers
  *
  * @addr:	Destination address of the data in this block
@@ -1086,7 +1086,7 @@ static u32 frac28(u32 N, u32 D)
 	return Q1;
 }
 
-/**
+/*
 * \fn u32 log1_times100( u32 x)
 * \brief Compute: 100*log10(x)
 * \param x 32 bits
@@ -1198,7 +1198,7 @@ static u32 log1_times100(u32 x)
 
 }
 
-/**
+/*
 * \fn u32 frac_times1e6( u16 N, u32 D)
 * \brief Compute: (N/D) * 1000000.
 * \param N nominator 16-bits.
@@ -1235,7 +1235,7 @@ static u32 frac_times1e6(u32 N, u32 D)
 /*============================================================================*/
 
 
-/**
+/*
 * \brief Values for NICAM prescaler gain. Computed from dB to integer
 *        and rounded. For calc used formula: 16*10^(prescaleGain[dB]/20).
 *
@@ -1280,7 +1280,7 @@ static const u16 nicam_presc_table_val[43] = {
 #define DRXJ_DAP_AUDTRIF_TIMEOUT 80	/* millisec */
 /*============================================================================*/
 
-/**
+/*
 * \fn bool is_handled_by_aud_tr_if( u32 addr )
 * \brief Check if this address is handled by the audio token ring interface.
 * \param addr
@@ -1386,7 +1386,7 @@ int drxbsp_i2c_write_read(struct i2c_device_addr *w_dev_addr,
 
 /*============================================================================*/
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_read_block (
 *      struct i2c_device_addr *dev_addr,      -- address of I2C device
@@ -1498,7 +1498,7 @@ static int drxdap_fasi_read_block(struct i2c_device_addr *dev_addr,
 }
 
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_read_reg16 (
 *     struct i2c_device_addr *dev_addr, -- address of I2C device
@@ -1531,7 +1531,7 @@ static int drxdap_fasi_read_reg16(struct i2c_device_addr *dev_addr,
 	return rc;
 }
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_read_reg32 (
 *     struct i2c_device_addr *dev_addr, -- address of I2C device
@@ -1566,7 +1566,7 @@ static int drxdap_fasi_read_reg32(struct i2c_device_addr *dev_addr,
 	return rc;
 }
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_write_block (
 *      struct i2c_device_addr *dev_addr,    -- address of I2C device
@@ -1705,7 +1705,7 @@ static int drxdap_fasi_write_block(struct i2c_device_addr *dev_addr,
 	return first_err;
 }
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_write_reg16 (
 *     struct i2c_device_addr *dev_addr, -- address of I2C device
@@ -1734,7 +1734,7 @@ static int drxdap_fasi_write_reg16(struct i2c_device_addr *dev_addr,
 	return drxdap_fasi_write_block(dev_addr, addr, sizeof(data), buf, flags);
 }
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_read_modify_write_reg16 (
 *      struct i2c_device_addr *dev_addr,   -- address of I2C device
@@ -1778,7 +1778,7 @@ static int drxdap_fasi_read_modify_write_reg16(struct i2c_device_addr *dev_addr,
 	return rc;
 }
 
-/******************************
+/*****************************
 *
 * int drxdap_fasi_write_reg32 (
 *     struct i2c_device_addr *dev_addr, -- address of I2C device
@@ -1811,7 +1811,7 @@ static int drxdap_fasi_write_reg32(struct i2c_device_addr *dev_addr,
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int drxj_dap_rm_write_reg16short
 * \brief Read modify write 16 bits audio register using short format only.
 * \param dev_addr
@@ -1890,7 +1890,7 @@ static int drxj_dap_read_modify_write_reg16(struct i2c_device_addr *dev_addr,
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int drxj_dap_read_aud_reg16
 * \brief Read 16 bits audio register
 * \param dev_addr
@@ -1997,7 +1997,7 @@ static int drxj_dap_read_reg16(struct i2c_device_addr *dev_addr,
 }
 /*============================================================================*/
 
-/**
+/*
 * \fn int drxj_dap_write_aud_reg16
 * \brief Write 16 bits audio register
 * \param dev_addr
@@ -2086,7 +2086,7 @@ static int drxj_dap_write_reg16(struct i2c_device_addr *dev_addr,
 #define DRXJ_HI_ATOMIC_READ      SIO_HI_RA_RAM_PAR_3_ACP_RW_READ
 #define DRXJ_HI_ATOMIC_WRITE     SIO_HI_RA_RAM_PAR_3_ACP_RW_WRITE
 
-/**
+/*
 * \fn int drxj_dap_atomic_read_write_block()
 * \brief Basic access routine for atomic read or write access
 * \param dev_addr  pointer to i2c dev address
@@ -2168,7 +2168,7 @@ int drxj_dap_atomic_read_write_block(struct i2c_device_addr *dev_addr,
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int drxj_dap_atomic_read_reg32()
 * \brief Atomic read of 32 bits words
 */
@@ -2215,7 +2215,7 @@ int drxj_dap_atomic_read_reg32(struct i2c_device_addr *dev_addr,
 /*============================================================================*/
 /*============================================================================*/
 
-/**
+/*
 * \fn int hi_cfg_command()
 * \brief Configure HI with settings stored in the demod structure.
 * \param demod Demodulator.
@@ -2258,7 +2258,7 @@ static int hi_cfg_command(const struct drx_demod_instance *demod)
 	return rc;
 }
 
-/**
+/*
 * \fn int hi_command()
 * \brief Configure HI with settings stored in the demod structure.
 * \param dev_addr I2C address.
@@ -2369,7 +2369,7 @@ hi_command(struct i2c_device_addr *dev_addr, const struct drxj_hi_cmd *cmd, u16
 	return rc;
 }
 
-/**
+/*
 * \fn int init_hi( const struct drx_demod_instance *demod )
 * \brief Initialise and configurate HI.
 * \param demod pointer to demod data.
@@ -2450,7 +2450,7 @@ static int init_hi(const struct drx_demod_instance *demod)
 /*============================================================================*/
 /*============================================================================*/
 
-/**
+/*
 * \fn int get_device_capabilities()
 * \brief Get and store device capabilities.
 * \param demod  Pointer to demodulator instance.
@@ -2656,7 +2656,7 @@ static int get_device_capabilities(struct drx_demod_instance *demod)
 	return rc;
 }
 
-/**
+/*
 * \fn int power_up_device()
 * \brief Power up device.
 * \param demod  Pointer to demodulator instance.
@@ -2710,7 +2710,7 @@ static int power_up_device(struct drx_demod_instance *demod)
 /*----------------------------------------------------------------------------*/
 /* MPEG Output Configuration Functions - begin                                */
 /*----------------------------------------------------------------------------*/
-/**
+/*
 * \fn int ctrl_set_cfg_mpeg_output()
 * \brief Set MPEG output configuration of the device.
 * \param devmod  Pointer to demodulator instance.
@@ -3356,7 +3356,7 @@ ctrl_set_cfg_mpeg_output(struct drx_demod_instance *demod, struct drx_cfg_mpeg_o
 /* miscellaneous configurations - begin                           */
 /*----------------------------------------------------------------------------*/
 
-/**
+/*
 * \fn int set_mpegtei_handling()
 * \brief Activate MPEG TEI handling settings.
 * \param devmod  Pointer to demodulator instance.
@@ -3429,7 +3429,7 @@ static int set_mpegtei_handling(struct drx_demod_instance *demod)
 }
 
 /*----------------------------------------------------------------------------*/
-/**
+/*
 * \fn int bit_reverse_mpeg_output()
 * \brief Set MPEG output bit-endian settings.
 * \param devmod  Pointer to demodulator instance.
@@ -3472,7 +3472,7 @@ static int bit_reverse_mpeg_output(struct drx_demod_instance *demod)
 }
 
 /*----------------------------------------------------------------------------*/
-/**
+/*
 * \fn int set_mpeg_start_width()
 * \brief Set MPEG start width.
 * \param devmod  Pointer to demodulator instance.
@@ -3522,7 +3522,7 @@ static int set_mpeg_start_width(struct drx_demod_instance *demod)
 /*----------------------------------------------------------------------------*/
 /* UIO Configuration Functions - begin                                        */
 /*----------------------------------------------------------------------------*/
-/**
+/*
 * \fn int ctrl_set_uio_cfg()
 * \brief Configure modus oprandi UIO.
 * \param demod Pointer to demodulator instance.
@@ -3659,7 +3659,7 @@ static int ctrl_set_uio_cfg(struct drx_demod_instance *demod, struct drxuio_cfg
 	return rc;
 }
 
-/**
+/*
 * \fn int ctrl_uio_write()
 * \brief Write to a UIO.
 * \param demod Pointer to demodulator instance.
@@ -3868,7 +3868,7 @@ ctrl_uio_write(struct drx_demod_instance *demod, struct drxuio_data *uio_data)
 /*----------------------------------------------------------------------------*/
 /* I2C Bridge Functions - begin                                               */
 /*----------------------------------------------------------------------------*/
-/**
+/*
 * \fn int ctrl_i2c_bridge()
 * \brief Open or close the I2C switch to tuner.
 * \param demod Pointer to demodulator instance.
@@ -3903,7 +3903,7 @@ ctrl_i2c_bridge(struct drx_demod_instance *demod, bool *bridge_closed)
 /*----------------------------------------------------------------------------*/
 /* Smart antenna Functions - begin                                            */
 /*----------------------------------------------------------------------------*/
-/**
+/*
 * \fn int smart_ant_init()
 * \brief Initialize Smart Antenna.
 * \param pointer to struct drx_demod_instance.
@@ -4116,7 +4116,7 @@ static int scu_command(struct i2c_device_addr *dev_addr, struct drxjscu_cmd *cmd
 	return rc;
 }
 
-/**
+/*
 * \fn int DRXJ_DAP_SCUAtomicReadWriteBlock()
 * \brief Basic access routine for SCU atomic read or write access
 * \param dev_addr  pointer to i2c dev address
@@ -4188,7 +4188,7 @@ int drxj_dap_scu_atomic_read_write_block(struct i2c_device_addr *dev_addr, u32 a
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int DRXJ_DAP_AtomicReadReg16()
 * \brief Atomic read of 16 bits words
 */
@@ -4216,7 +4216,7 @@ int drxj_dap_scu_atomic_read_reg16(struct i2c_device_addr *dev_addr,
 }
 
 /*============================================================================*/
-/**
+/*
 * \fn int drxj_dap_scu_atomic_write_reg16()
 * \brief Atomic read of 16 bits words
 */
@@ -4237,7 +4237,7 @@ int drxj_dap_scu_atomic_write_reg16(struct i2c_device_addr *dev_addr,
 }
 
 /* -------------------------------------------------------------------------- */
-/**
+/*
 * \brief Measure result of ADC synchronisation
 * \param demod demod instance
 * \param count (returned) count
@@ -4297,7 +4297,7 @@ static int adc_sync_measurement(struct drx_demod_instance *demod, u16 *count)
 	return rc;
 }
 
-/**
+/*
 * \brief Synchronize analog and digital clock domains
 * \param demod demod instance
 * \return int.
@@ -4365,7 +4365,7 @@ static int adc_synchronization(struct drx_demod_instance *demod)
 /*==                8VSB & QAM COMMON DATAPATH FUNCTIONS                    ==*/
 /*============================================================================*/
 /*============================================================================*/
-/**
+/*
 * \fn int init_agc ()
 * \brief Initialize AGC for all standards.
 * \param demod instance of demodulator.
@@ -4741,7 +4741,7 @@ static int init_agc(struct drx_demod_instance *demod)
 	return rc;
 }
 
-/**
+/*
 * \fn int set_frequency ()
 * \brief Set frequency shift.
 * \param demod instance of demodulator.
@@ -4839,7 +4839,7 @@ set_frequency(struct drx_demod_instance *demod,
 	return rc;
 }
 
-/**
+/*
 * \fn int get_acc_pkt_err()
 * \brief Retrieve signal strength for VSB and QAM.
 * \param demod Pointer to demod instance
@@ -4891,7 +4891,7 @@ static int get_acc_pkt_err(struct drx_demod_instance *demod, u16 *packet_err)
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_agc_rf ()
 * \brief Configure RF AGC
 * \param demod instance of demodulator.
@@ -5105,7 +5105,7 @@ set_agc_rf(struct drx_demod_instance *demod, struct drxj_cfg_agc *agc_settings,
 	return rc;
 }
 
-/**
+/*
 * \fn int set_agc_if ()
 * \brief Configure If AGC
 * \param demod instance of demodulator.
@@ -5334,7 +5334,7 @@ set_agc_if(struct drx_demod_instance *demod, struct drxj_cfg_agc *agc_settings,
 	return rc;
 }
 
-/**
+/*
 * \fn int set_iqm_af ()
 * \brief Configure IQM AF registers
 * \param demod instance of demodulator.
@@ -5380,7 +5380,7 @@ static int set_iqm_af(struct drx_demod_instance *demod, bool active)
 /*============================================================================*/
 /*============================================================================*/
 
-/**
+/*
 * \fn int power_down_vsb ()
 * \brief Powr down QAM related blocks.
 * \param demod instance of demodulator.
@@ -5478,7 +5478,7 @@ static int power_down_vsb(struct drx_demod_instance *demod, bool primary)
 	return rc;
 }
 
-/**
+/*
 * \fn int set_vsb_leak_n_gain ()
 * \brief Set ATSC demod.
 * \param demod instance of demodulator.
@@ -5694,7 +5694,7 @@ static int set_vsb_leak_n_gain(struct drx_demod_instance *demod)
 	return rc;
 }
 
-/**
+/*
 * \fn int set_vsb()
 * \brief Set 8VSB demod.
 * \param demod instance of demodulator.
@@ -6200,7 +6200,7 @@ static int set_vsb(struct drx_demod_instance *demod)
 	return rc;
 }
 
-/**
+/*
 * \fn static short get_vsb_post_rs_pck_err(struct i2c_device_addr *dev_addr, u16 *PckErrs)
 * \brief Get the values of packet error in 8VSB mode
 * \return Error code
@@ -6239,7 +6239,7 @@ static int get_vsb_post_rs_pck_err(struct i2c_device_addr *dev_addr,
 	return rc;
 }
 
-/**
+/*
 * \fn static short GetVSBBer(struct i2c_device_addr *dev_addr, u32 *ber)
 * \brief Get the values of ber in VSB mode
 * \return Error code
@@ -6284,7 +6284,7 @@ static int get_vs_bpost_viterbi_ber(struct i2c_device_addr *dev_addr,
 	return rc;
 }
 
-/**
+/*
 * \fn static short get_vs_bpre_viterbi_ber(struct i2c_device_addr *dev_addr, u32 *ber)
 * \brief Get the values of ber in VSB mode
 * \return Error code
@@ -6306,7 +6306,7 @@ static int get_vs_bpre_viterbi_ber(struct i2c_device_addr *dev_addr,
 	return 0;
 }
 
-/**
+/*
 * \fn static int get_vsbmer(struct i2c_device_addr *dev_addr, u16 *mer)
 * \brief Get the values of MER
 * \return Error code
@@ -6340,7 +6340,7 @@ static int get_vsbmer(struct i2c_device_addr *dev_addr, u16 *mer)
 /*============================================================================*/
 /*============================================================================*/
 
-/**
+/*
 * \fn int power_down_qam ()
 * \brief Powr down QAM related blocks.
 * \param demod instance of demodulator.
@@ -6444,7 +6444,7 @@ static int power_down_qam(struct drx_demod_instance *demod, bool primary)
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_qam_measurement ()
 * \brief Setup of the QAM Measuremnt intervals for signal quality
 * \param demod instance of demod.
@@ -6656,7 +6656,7 @@ set_qam_measurement(struct drx_demod_instance *demod,
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_qam16 ()
 * \brief QAM16 specific setup
 * \param demod instance of demod.
@@ -6891,7 +6891,7 @@ static int set_qam16(struct drx_demod_instance *demod)
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_qam32 ()
 * \brief QAM32 specific setup
 * \param demod instance of demod.
@@ -7126,7 +7126,7 @@ static int set_qam32(struct drx_demod_instance *demod)
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_qam64 ()
 * \brief QAM64 specific setup
 * \param demod instance of demod.
@@ -7362,7 +7362,7 @@ static int set_qam64(struct drx_demod_instance *demod)
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_qam128 ()
 * \brief QAM128 specific setup
 * \param demod: instance of demod.
@@ -7597,7 +7597,7 @@ static int set_qam128(struct drx_demod_instance *demod)
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int set_qam256 ()
 * \brief QAM256 specific setup
 * \param demod: instance of demod.
@@ -7835,7 +7835,7 @@ static int set_qam256(struct drx_demod_instance *demod)
 #define QAM_SET_OP_CONSTELLATION 0x2
 #define QAM_SET_OP_SPECTRUM 0X4
 
-/**
+/*
 * \fn int set_qam ()
 * \brief Set QAM demod.
 * \param demod:   instance of demod.
@@ -8845,7 +8845,7 @@ static int qam_flip_spec(struct drx_demod_instance *demod, struct drx_channel *c
 #define  DEMOD_LOCKED   0x1
 #define  SYNC_FLIPPED   0x2
 #define  SPEC_MIRRORED  0x4
-/**
+/*
 * \fn int qam64auto ()
 * \brief auto do sync pattern switching and mirroring.
 * \param demod:   instance of demod.
@@ -8993,7 +8993,7 @@ qam64auto(struct drx_demod_instance *demod,
 	return rc;
 }
 
-/**
+/*
 * \fn int qam256auto ()
 * \brief auto do sync pattern switching and mirroring.
 * \param demod:   instance of demod.
@@ -9077,7 +9077,7 @@ qam256auto(struct drx_demod_instance *demod,
 	return rc;
 }
 
-/**
+/*
 * \fn int set_qam_channel ()
 * \brief Set QAM channel according to the requested constellation.
 * \param demod:   instance of demod.
@@ -9284,7 +9284,7 @@ set_qam_channel(struct drx_demod_instance *demod,
 
 /*============================================================================*/
 
-/**
+/*
 * \fn static short get_qamrs_err_count(struct i2c_device_addr *dev_addr)
 * \brief Get RS error count in QAM mode (used for post RS BER calculation)
 * \return Error code
@@ -9355,7 +9355,7 @@ get_qamrs_err_count(struct i2c_device_addr *dev_addr,
 
 /*============================================================================*/
 
-/**
+/*
  * \fn int get_sig_strength()
  * \brief Retrieve signal strength for VSB and QAM.
  * \param demod Pointer to demod instance
@@ -9435,7 +9435,7 @@ static int get_sig_strength(struct drx_demod_instance *demod, u16 *sig_strength)
 	return rc;
 }
 
-/**
+/*
 * \fn int ctrl_get_qam_sig_quality()
 * \brief Retrieve QAM signal quality from device.
 * \param devmod Pointer to demodulator instance.
@@ -9721,7 +9721,7 @@ ctrl_get_qam_sig_quality(struct drx_demod_instance *demod)
 */
 /* -------------------------------------------------------------------------- */
 
-/**
+/*
 * \fn int power_down_atv ()
 * \brief Power down ATV.
 * \param demod instance of demodulator
@@ -9822,7 +9822,7 @@ power_down_atv(struct drx_demod_instance *demod, enum drx_standard standard, boo
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Power up AUD.
 * \param demod instance of demodulator
 * \return int.
@@ -9850,7 +9850,7 @@ static int power_down_aud(struct drx_demod_instance *demod)
 	return rc;
 }
 
-/**
+/*
 * \fn int set_orx_nsu_aox()
 * \brief Configure OrxNsuAox for OOB
 * \param demod instance of demodulator.
@@ -9884,7 +9884,7 @@ static int set_orx_nsu_aox(struct drx_demod_instance *demod, bool active)
 	return rc;
 }
 
-/**
+/*
 * \fn int ctrl_set_oob()
 * \brief Set OOB channel to be used.
 * \param demod instance of demodulator
@@ -9986,9 +9986,9 @@ static int ctrl_set_oob(struct drx_demod_instance *demod, struct drxoob *oob_par
 		    20;
 	}
 
-   /*********/
+   /********/
 	/* Stop  */
-   /*********/
+   /********/
 	rc = drxj_dap_write_reg16(dev_addr, ORX_COMM_EXEC__A, ORX_COMM_EXEC_STOP, 0);
 	if (rc != 0) {
 		pr_err("error %d\n", rc);
@@ -10004,9 +10004,9 @@ static int ctrl_set_oob(struct drx_demod_instance *demod, struct drxoob *oob_par
 		pr_err("error %d\n", rc);
 		goto rw_error;
 	}
-   /*********/
+   /********/
 	/* Reset */
-   /*********/
+   /********/
 	scu_cmd.command = SCU_RAM_COMMAND_STANDARD_OOB
 	    | SCU_RAM_COMMAND_CMD_DEMOD_RESET;
 	scu_cmd.parameter_len = 0;
@@ -10017,9 +10017,9 @@ static int ctrl_set_oob(struct drx_demod_instance *demod, struct drxoob *oob_par
 		pr_err("error %d\n", rc);
 		goto rw_error;
 	}
-   /***********/
+   /**********/
 	/* SET_ENV */
-   /***********/
+   /**********/
 	/* set frequency, spectrum inversion and data rate */
 	scu_cmd.command = SCU_RAM_COMMAND_STANDARD_OOB
 	    | SCU_RAM_COMMAND_CMD_DEMOD_SET_ENV;
@@ -10376,9 +10376,9 @@ static int ctrl_set_oob(struct drx_demod_instance *demod, struct drxoob *oob_par
 		pr_err("error %d\n", rc);
 		goto rw_error;
 	}
-	/*********/
+	/********/
 	/* Start */
-	/*********/
+	/********/
 	scu_cmd.command = SCU_RAM_COMMAND_STANDARD_OOB
 	    | SCU_RAM_COMMAND_CMD_DEMOD_START;
 	scu_cmd.parameter_len = 0;
@@ -10419,7 +10419,7 @@ static int ctrl_set_oob(struct drx_demod_instance *demod, struct drxoob *oob_par
 /*=============================================================================
   ===== ctrl_set_channel() ==========================================================
   ===========================================================================*/
-/**
+/*
 * \fn int ctrl_set_channel()
 * \brief Select a new transmission channel.
 * \param demod instance of demod.
@@ -10652,7 +10652,7 @@ ctrl_set_channel(struct drx_demod_instance *demod, struct drx_channel *channel)
   ===== SigQuality() ==========================================================
   ===========================================================================*/
 
-/**
+/*
 * \fn int ctrl_sig_quality()
 * \brief Retrieve signal quality form device.
 * \param devmod Pointer to demodulator instance.
@@ -10768,7 +10768,7 @@ ctrl_sig_quality(struct drx_demod_instance *demod,
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int ctrl_lock_status()
 * \brief Retrieve lock status .
 * \param dev_addr Pointer to demodulator device address.
@@ -10856,7 +10856,7 @@ ctrl_lock_status(struct drx_demod_instance *demod, enum drx_lock_status *lock_st
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int ctrl_set_standard()
 * \brief Set modulation standard to be used.
 * \param standard Modulation standard.
@@ -11012,7 +11012,7 @@ static void drxj_reset_mode(struct drxj_data *ext_attr)
 	ext_attr->vsb_pre_saw_cfg.use_pre_saw = true;
 }
 
-/**
+/*
 * \fn int ctrl_power_mode()
 * \brief Set the power mode of the device to the specified power mode
 * \param demod Pointer to demodulator instance.
@@ -11171,7 +11171,7 @@ ctrl_power_mode(struct drx_demod_instance *demod, enum drx_power_mode *mode)
 /*== CTRL Set/Get Config related functions ===================================*/
 /*============================================================================*/
 
-/**
+/*
 * \fn int ctrl_set_cfg_pre_saw()
 * \brief Set Pre-saw reference.
 * \param demod demod instance
@@ -11234,7 +11234,7 @@ ctrl_set_cfg_pre_saw(struct drx_demod_instance *demod, struct drxj_cfg_pre_saw *
 
 /*============================================================================*/
 
-/**
+/*
 * \fn int ctrl_set_cfg_afe_gain()
 * \brief Set AFE Gain.
 * \param demod demod instance
@@ -11324,7 +11324,7 @@ static int drx_ctrl_u_code(struct drx_demod_instance *demod,
 		       enum drxu_code_action action);
 static int drxj_set_lna_state(struct drx_demod_instance *demod, bool state);
 
-/**
+/*
 * \fn drxj_open()
 * \brief Open the demod instance, configure device, configure drxdriver
 * \return Status_t Return status.
@@ -11543,7 +11543,7 @@ static int drxj_open(struct drx_demod_instance *demod)
 }
 
 /*============================================================================*/
-/**
+/*
 * \fn drxj_close()
 * \brief Close the demod instance, power down the device
 * \return Status_t Return status.
@@ -11594,7 +11594,7 @@ static int drxj_close(struct drx_demod_instance *demod)
  * Microcode related functions
  */
 
-/**
+/*
  * drx_u_code_compute_crc	- Compute CRC of block of microcode data.
  * @block_data: Pointer to microcode data.
  * @nr_words:   Size of microcode block (number of 16 bits words).
@@ -11622,7 +11622,7 @@ static u16 drx_u_code_compute_crc(u8 *block_data, u16 nr_words)
 	return (u16)(crc_word >> 16);
 }
 
-/**
+/*
  * drx_check_firmware - checks if the loaded firmware is valid
  *
  * @demod:	demod structure
@@ -11708,7 +11708,7 @@ static int drx_check_firmware(struct drx_demod_instance *demod, u8 *mc_data,
 	return -EINVAL;
 }
 
-/**
+/*
  * drx_ctrl_u_code - Handle microcode upload or verify.
  * @dev_addr: Address of device.
  * @mc_info:  Pointer to information about microcode data.
diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.h b/drivers/media/dvb-frontends/drx39xyj/drxj.h
index 6c5b8f78f9f63bf7d4d938c49c300be7ab14ca0e..d3ee1c23bb2f94619b24e9f58f2bd0dd1a92e0ab 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.h
@@ -69,15 +69,15 @@ TYPEDEFS
 
 	struct drxjscu_cmd {
 		u16 command;
-			/**< Command number */
+			/*< Command number */
 		u16 parameter_len;
-			/**< Data length in byte */
+			/*< Data length in byte */
 		u16 result_len;
-			/**< result length in byte */
+			/*< result length in byte */
 		u16 *parameter;
-			/**< General purpous param */
+			/*< General purpous param */
 		u16 *result;
-			/**< General purpous param */};
+			/*< General purpous param */};
 
 /*============================================================================*/
 /*============================================================================*/
@@ -130,7 +130,7 @@ TYPEDEFS
 
 		DRXJ_CFG_MAX	/* dummy, never to be used */};
 
-/**
+/*
 * /struct enum drxj_cfg_smart_ant_io * smart antenna i/o.
 */
 enum drxj_cfg_smart_ant_io {
@@ -138,7 +138,7 @@ enum drxj_cfg_smart_ant_io {
 	DRXJ_SMT_ANT_INPUT
 };
 
-/**
+/*
 * /struct struct drxj_cfg_smart_ant * Set smart antenna.
 */
 	struct drxj_cfg_smart_ant {
@@ -146,7 +146,7 @@ enum drxj_cfg_smart_ant_io {
 		u16 ctrl_data;
 	};
 
-/**
+/*
 * /struct DRXJAGCSTATUS_t
 * AGC status information from the DRXJ-IQM-AF.
 */
@@ -158,7 +158,7 @@ struct drxj_agc_status {
 
 /* DRXJ_CFG_AGC_RF, DRXJ_CFG_AGC_IF */
 
-/**
+/*
 * /struct enum drxj_agc_ctrl_mode * Available AGCs modes in the DRXJ.
 */
 	enum drxj_agc_ctrl_mode {
@@ -166,7 +166,7 @@ struct drxj_agc_status {
 		DRX_AGC_CTRL_USER,
 		DRX_AGC_CTRL_OFF};
 
-/**
+/*
 * /struct struct drxj_cfg_agc * Generic interface for all AGCs present on the DRXJ.
 */
 	struct drxj_cfg_agc {
@@ -182,7 +182,7 @@ struct drxj_agc_status {
 
 /* DRXJ_CFG_PRE_SAW */
 
-/**
+/*
 * /struct struct drxj_cfg_pre_saw * Interface to configure pre SAW sense.
 */
 	struct drxj_cfg_pre_saw {
@@ -192,14 +192,14 @@ struct drxj_agc_status {
 
 /* DRXJ_CFG_AFE_GAIN */
 
-/**
+/*
 * /struct struct drxj_cfg_afe_gain * Interface to configure gain of AFE (LNA + PGA).
 */
 	struct drxj_cfg_afe_gain {
 		enum drx_standard standard;	/* standard to which these settings apply */
 		u16 gain;	/* gain in 0.1 dB steps, DRXJ range 140 .. 335 */};
 
-/**
+/*
 * /struct drxjrs_errors
 * Available failure information in DRXJ_FEC_RS.
 *
@@ -208,25 +208,25 @@ struct drxj_agc_status {
 */
 	struct drxjrs_errors {
 		u16 nr_bit_errors;
-				/**< no of pre RS bit errors          */
+				/*< no of pre RS bit errors          */
 		u16 nr_symbol_errors;
-				/**< no of pre RS symbol errors       */
+				/*< no of pre RS symbol errors       */
 		u16 nr_packet_errors;
-				/**< no of pre RS packet errors       */
+				/*< no of pre RS packet errors       */
 		u16 nr_failures;
-				/**< no of post RS failures to decode */
+				/*< no of post RS failures to decode */
 		u16 nr_snc_par_fail_count;
-				/**< no of post RS bit erros          */
+				/*< no of post RS bit erros          */
 	};
 
-/**
+/*
 * /struct struct drxj_cfg_vsb_misc * symbol error rate
 */
 	struct drxj_cfg_vsb_misc {
 		u32 symb_error;
-			      /**< symbol error rate sps */};
+			      /*< symbol error rate sps */};
 
-/**
+/*
 * /enum enum drxj_mpeg_output_clock_rate * Mpeg output clock rate.
 *
 */
@@ -234,7 +234,7 @@ struct drxj_agc_status {
 		DRXJ_MPEG_START_WIDTH_1CLKCYC,
 		DRXJ_MPEG_START_WIDTH_8CLKCYC};
 
-/**
+/*
 * /enum enum drxj_mpeg_output_clock_rate * Mpeg output clock rate.
 *
 */
@@ -247,20 +247,20 @@ struct drxj_agc_status {
 		DRXJ_MPEGOUTPUT_CLOCK_RATE_25313K,
 		DRXJ_MPEGOUTPUT_CLOCK_RATE_21696K};
 
-/**
+/*
 * /struct DRXJCfgMisc_t
 * Change TEI bit of MPEG output
 * reverse MPEG output bit order
 * set MPEG output clock rate
 */
 	struct drxj_cfg_mpeg_output_misc {
-		bool disable_tei_handling;	      /**< if true pass (not change) TEI bit */
-		bool bit_reverse_mpeg_outout;	      /**< if true, parallel: msb on MD0; serial: lsb out first */
+		bool disable_tei_handling;	      /*< if true pass (not change) TEI bit */
+		bool bit_reverse_mpeg_outout;	      /*< if true, parallel: msb on MD0; serial: lsb out first */
 		enum drxj_mpeg_output_clock_rate mpeg_output_clock_rate;
-						      /**< set MPEG output clock rate that overwirtes the derived one from symbol rate */
-		enum drxj_mpeg_start_width mpeg_start_width;  /**< set MPEG output start width */};
+						      /*< set MPEG output clock rate that overwirtes the derived one from symbol rate */
+		enum drxj_mpeg_start_width mpeg_start_width;  /*< set MPEG output start width */};
 
-/**
+/*
 * /enum enum drxj_xtal_freq * Supported external crystal reference frequency.
 */
 	enum drxj_xtal_freq {
@@ -269,21 +269,21 @@ struct drxj_agc_status {
 		DRXJ_XTAL_FREQ_20P25MHZ,
 		DRXJ_XTAL_FREQ_4MHZ};
 
-/**
+/*
 * /enum enum drxj_xtal_freq * Supported external crystal reference frequency.
 */
 	enum drxji2c_speed {
 		DRXJ_I2C_SPEED_400KBPS,
 		DRXJ_I2C_SPEED_100KBPS};
 
-/**
+/*
 * /struct struct drxj_cfg_hw_cfg * Get hw configuration, such as crystal reference frequency, I2C speed, etc...
 */
 	struct drxj_cfg_hw_cfg {
 		enum drxj_xtal_freq xtal_freq;
-				   /**< crystal reference frequency */
+				   /*< crystal reference frequency */
 		enum drxji2c_speed i2c_speed;
-				   /**< 100 or 400 kbps */};
+				   /*< 100 or 400 kbps */};
 
 /*
  *  DRXJ_CFG_ATV_MISC
@@ -352,7 +352,7 @@ struct drxj_cfg_oob_misc {
  *  DRXJ_CFG_ATV_OUTPUT
  */
 
-/**
+/*
 * /enum DRXJAttenuation_t
 * Attenuation setting for SIF AGC.
 *
@@ -363,7 +363,7 @@ struct drxj_cfg_oob_misc {
 		DRXJ_SIF_ATTENUATION_6DB,
 		DRXJ_SIF_ATTENUATION_9DB};
 
-/**
+/*
 * /struct struct drxj_cfg_atv_output * SIF attenuation setting.
 *
 */
@@ -398,7 +398,7 @@ struct drxj_cfg_atv_output {
 /*============================================================================*/
 
 /*========================================*/
-/**
+/*
 * /struct struct drxj_data * DRXJ specific attributes.
 *
 * Global data container for DRXJ specific data.
@@ -406,93 +406,93 @@ struct drxj_cfg_atv_output {
 */
 	struct drxj_data {
 		/* device capabilties (determined during drx_open()) */
-		bool has_lna;		  /**< true if LNA (aka PGA) present */
-		bool has_oob;		  /**< true if OOB supported */
-		bool has_ntsc;		  /**< true if NTSC supported */
-		bool has_btsc;		  /**< true if BTSC supported */
-		bool has_smatx;	  /**< true if mat_tx is available */
-		bool has_smarx;	  /**< true if mat_rx is available */
-		bool has_gpio;		  /**< true if GPIO is available */
-		bool has_irqn;		  /**< true if IRQN is available */
+		bool has_lna;		  /*< true if LNA (aka PGA) present */
+		bool has_oob;		  /*< true if OOB supported */
+		bool has_ntsc;		  /*< true if NTSC supported */
+		bool has_btsc;		  /*< true if BTSC supported */
+		bool has_smatx;	  /*< true if mat_tx is available */
+		bool has_smarx;	  /*< true if mat_rx is available */
+		bool has_gpio;		  /*< true if GPIO is available */
+		bool has_irqn;		  /*< true if IRQN is available */
 		/* A1/A2/A... */
-		u8 mfx;		  /**< metal fix */
+		u8 mfx;		  /*< metal fix */
 
 		/* tuner settings */
-		bool mirror_freq_spect_oob;/**< tuner inversion (true = tuner mirrors the signal */
+		bool mirror_freq_spect_oob;/*< tuner inversion (true = tuner mirrors the signal */
 
 		/* standard/channel settings */
-		enum drx_standard standard;	  /**< current standard information                     */
+		enum drx_standard standard;	  /*< current standard information                     */
 		enum drx_modulation constellation;
-					  /**< current constellation                            */
-		s32 frequency; /**< center signal frequency in KHz                   */
+					  /*< current constellation                            */
+		s32 frequency; /*< center signal frequency in KHz                   */
 		enum drx_bandwidth curr_bandwidth;
-					  /**< current channel bandwidth                        */
-		enum drx_mirror mirror;	  /**< current channel mirror                           */
+					  /*< current channel bandwidth                        */
+		enum drx_mirror mirror;	  /*< current channel mirror                           */
 
 		/* signal quality information */
-		u32 fec_bits_desired;	  /**< BER accounting period                            */
-		u16 fec_vd_plen;	  /**< no of trellis symbols: VD SER measurement period */
-		u16 qam_vd_prescale;	  /**< Viterbi Measurement Prescale                     */
-		u16 qam_vd_period;	  /**< Viterbi Measurement period                       */
-		u16 fec_rs_plen;	  /**< defines RS BER measurement period                */
-		u16 fec_rs_prescale;	  /**< ReedSolomon Measurement Prescale                 */
-		u16 fec_rs_period;	  /**< ReedSolomon Measurement period                   */
-		bool reset_pkt_err_acc;	  /**< Set a flag to reset accumulated packet error     */
-		u16 pkt_err_acc_start;	  /**< Set a flag to reset accumulated packet error     */
+		u32 fec_bits_desired;	  /*< BER accounting period                            */
+		u16 fec_vd_plen;	  /*< no of trellis symbols: VD SER measurement period */
+		u16 qam_vd_prescale;	  /*< Viterbi Measurement Prescale                     */
+		u16 qam_vd_period;	  /*< Viterbi Measurement period                       */
+		u16 fec_rs_plen;	  /*< defines RS BER measurement period                */
+		u16 fec_rs_prescale;	  /*< ReedSolomon Measurement Prescale                 */
+		u16 fec_rs_period;	  /*< ReedSolomon Measurement period                   */
+		bool reset_pkt_err_acc;	  /*< Set a flag to reset accumulated packet error     */
+		u16 pkt_err_acc_start;	  /*< Set a flag to reset accumulated packet error     */
 
 		/* HI configuration */
-		u16 hi_cfg_timing_div;	  /**< HI Configure() parameter 2                       */
-		u16 hi_cfg_bridge_delay;	  /**< HI Configure() parameter 3                       */
-		u16 hi_cfg_wake_up_key;	  /**< HI Configure() parameter 4                       */
-		u16 hi_cfg_ctrl;	  /**< HI Configure() parameter 5                       */
-		u16 hi_cfg_transmit;	  /**< HI Configure() parameter 6                       */
+		u16 hi_cfg_timing_div;	  /*< HI Configure() parameter 2                       */
+		u16 hi_cfg_bridge_delay;	  /*< HI Configure() parameter 3                       */
+		u16 hi_cfg_wake_up_key;	  /*< HI Configure() parameter 4                       */
+		u16 hi_cfg_ctrl;	  /*< HI Configure() parameter 5                       */
+		u16 hi_cfg_transmit;	  /*< HI Configure() parameter 6                       */
 
 		/* UIO configuration */
-		enum drxuio_mode uio_sma_rx_mode;/**< current mode of SmaRx pin                        */
-		enum drxuio_mode uio_sma_tx_mode;/**< current mode of SmaTx pin                        */
-		enum drxuio_mode uio_gpio_mode; /**< current mode of ASEL pin                         */
-		enum drxuio_mode uio_irqn_mode; /**< current mode of IRQN pin                         */
+		enum drxuio_mode uio_sma_rx_mode;/*< current mode of SmaRx pin                        */
+		enum drxuio_mode uio_sma_tx_mode;/*< current mode of SmaTx pin                        */
+		enum drxuio_mode uio_gpio_mode; /*< current mode of ASEL pin                         */
+		enum drxuio_mode uio_irqn_mode; /*< current mode of IRQN pin                         */
 
 		/* IQM fs frequecy shift and inversion */
-		u32 iqm_fs_rate_ofs;	   /**< frequency shifter setting after setchannel      */
-		bool pos_image;	   /**< Ture: positive image                            */
+		u32 iqm_fs_rate_ofs;	   /*< frequency shifter setting after setchannel      */
+		bool pos_image;	   /*< Ture: positive image                            */
 		/* IQM RC frequecy shift */
-		u32 iqm_rc_rate_ofs;	   /**< frequency shifter setting after setchannel      */
+		u32 iqm_rc_rate_ofs;	   /*< frequency shifter setting after setchannel      */
 
 		/* ATV configuration */
-		u32 atv_cfg_changed_flags; /**< flag: flags cfg changes */
-		s16 atv_top_equ0[DRXJ_COEF_IDX_MAX];	     /**< shadow of ATV_TOP_EQU0__A */
-		s16 atv_top_equ1[DRXJ_COEF_IDX_MAX];	     /**< shadow of ATV_TOP_EQU1__A */
-		s16 atv_top_equ2[DRXJ_COEF_IDX_MAX];	     /**< shadow of ATV_TOP_EQU2__A */
-		s16 atv_top_equ3[DRXJ_COEF_IDX_MAX];	     /**< shadow of ATV_TOP_EQU3__A */
-		bool phase_correction_bypass;/**< flag: true=bypass */
-		s16 atv_top_vid_peak;	  /**< shadow of ATV_TOP_VID_PEAK__A */
-		u16 atv_top_noise_th;	  /**< shadow of ATV_TOP_NOISE_TH__A */
-		bool enable_cvbs_output;  /**< flag CVBS ouput enable */
-		bool enable_sif_output;	  /**< flag SIF ouput enable */
+		u32 atv_cfg_changed_flags; /*< flag: flags cfg changes */
+		s16 atv_top_equ0[DRXJ_COEF_IDX_MAX];	     /*< shadow of ATV_TOP_EQU0__A */
+		s16 atv_top_equ1[DRXJ_COEF_IDX_MAX];	     /*< shadow of ATV_TOP_EQU1__A */
+		s16 atv_top_equ2[DRXJ_COEF_IDX_MAX];	     /*< shadow of ATV_TOP_EQU2__A */
+		s16 atv_top_equ3[DRXJ_COEF_IDX_MAX];	     /*< shadow of ATV_TOP_EQU3__A */
+		bool phase_correction_bypass;/*< flag: true=bypass */
+		s16 atv_top_vid_peak;	  /*< shadow of ATV_TOP_VID_PEAK__A */
+		u16 atv_top_noise_th;	  /*< shadow of ATV_TOP_NOISE_TH__A */
+		bool enable_cvbs_output;  /*< flag CVBS ouput enable */
+		bool enable_sif_output;	  /*< flag SIF ouput enable */
 		 enum drxjsif_attenuation sif_attenuation;
-					  /**< current SIF att setting */
+					  /*< current SIF att setting */
 		/* Agc configuration for QAM and VSB */
-		struct drxj_cfg_agc qam_rf_agc_cfg; /**< qam RF AGC config */
-		struct drxj_cfg_agc qam_if_agc_cfg; /**< qam IF AGC config */
-		struct drxj_cfg_agc vsb_rf_agc_cfg; /**< vsb RF AGC config */
-		struct drxj_cfg_agc vsb_if_agc_cfg; /**< vsb IF AGC config */
+		struct drxj_cfg_agc qam_rf_agc_cfg; /*< qam RF AGC config */
+		struct drxj_cfg_agc qam_if_agc_cfg; /*< qam IF AGC config */
+		struct drxj_cfg_agc vsb_rf_agc_cfg; /*< vsb RF AGC config */
+		struct drxj_cfg_agc vsb_if_agc_cfg; /*< vsb IF AGC config */
 
 		/* PGA gain configuration for QAM and VSB */
-		u16 qam_pga_cfg;	  /**< qam PGA config */
-		u16 vsb_pga_cfg;	  /**< vsb PGA config */
+		u16 qam_pga_cfg;	  /*< qam PGA config */
+		u16 vsb_pga_cfg;	  /*< vsb PGA config */
 
 		/* Pre SAW configuration for QAM and VSB */
 		struct drxj_cfg_pre_saw qam_pre_saw_cfg;
-					  /**< qam pre SAW config */
+					  /*< qam pre SAW config */
 		struct drxj_cfg_pre_saw vsb_pre_saw_cfg;
-					  /**< qam pre SAW config */
+					  /*< qam pre SAW config */
 
 		/* Version information */
-		char v_text[2][12];	  /**< allocated text versions */
-		struct drx_version v_version[2]; /**< allocated versions structs */
+		char v_text[2][12];	  /*< allocated text versions */
+		struct drx_version v_version[2]; /*< allocated versions structs */
 		struct drx_version_list v_list_elements[2];
-					  /**< allocated version list */
+					  /*< allocated version list */
 
 		/* smart antenna configuration */
 		bool smart_ant_inverted;
@@ -502,25 +502,25 @@ struct drxj_cfg_atv_output {
 		bool oob_power_on;
 
 		/* MPEG static bitrate setting */
-		u32 mpeg_ts_static_bitrate;  /**< bitrate static MPEG output */
-		bool disable_te_ihandling;  /**< MPEG TS TEI handling */
-		bool bit_reverse_mpeg_outout;/**< MPEG output bit order */
+		u32 mpeg_ts_static_bitrate;  /*< bitrate static MPEG output */
+		bool disable_te_ihandling;  /*< MPEG TS TEI handling */
+		bool bit_reverse_mpeg_outout;/*< MPEG output bit order */
 		 enum drxj_mpeg_output_clock_rate mpeg_output_clock_rate;
-					    /**< MPEG output clock rate */
+					    /*< MPEG output clock rate */
 		 enum drxj_mpeg_start_width mpeg_start_width;
-					    /**< MPEG Start width */
+					    /*< MPEG Start width */
 
 		/* Pre SAW & Agc configuration for ATV */
 		struct drxj_cfg_pre_saw atv_pre_saw_cfg;
-					  /**< atv pre SAW config */
-		struct drxj_cfg_agc atv_rf_agc_cfg; /**< atv RF AGC config */
-		struct drxj_cfg_agc atv_if_agc_cfg; /**< atv IF AGC config */
-		u16 atv_pga_cfg;	  /**< atv pga config    */
+					  /*< atv pre SAW config */
+		struct drxj_cfg_agc atv_rf_agc_cfg; /*< atv RF AGC config */
+		struct drxj_cfg_agc atv_if_agc_cfg; /*< atv IF AGC config */
+		u16 atv_pga_cfg;	  /*< atv pga config    */
 
 		u32 curr_symbol_rate;
 
 		/* pin-safe mode */
-		bool pdr_safe_mode;	    /**< PDR safe mode activated      */
+		bool pdr_safe_mode;	    /*< PDR safe mode activated      */
 		u16 pdr_safe_restore_val_gpio;
 		u16 pdr_safe_restore_val_v_sync;
 		u16 pdr_safe_restore_val_sma_rx;
@@ -531,12 +531,12 @@ struct drxj_cfg_atv_output {
 		enum drxj_cfg_oob_lo_power oob_lo_pow;
 
 		struct drx_aud_data aud_data;
-				    /**< audio storage                  */};
+				    /*< audio storage                  */};
 
 /*-------------------------------------------------------------------------
 Access MACROS
 -------------------------------------------------------------------------*/
-/**
+/*
 * \brief Compilable references to attributes
 * \param d pointer to demod instance
 *
@@ -554,7 +554,7 @@ Access MACROS
 DEFINES
 -------------------------------------------------------------------------*/
 
-/**
+/*
 * \def DRXJ_NTSC_CARRIER_FREQ_OFFSET
 * \brief Offset from picture carrier to centre frequency in kHz, in RF domain
 *
@@ -569,7 +569,7 @@ DEFINES
 */
 #define DRXJ_NTSC_CARRIER_FREQ_OFFSET           ((s32)(1750))
 
-/**
+/*
 * \def DRXJ_PAL_SECAM_BG_CARRIER_FREQ_OFFSET
 * \brief Offset from picture carrier to centre frequency in kHz, in RF domain
 *
@@ -585,7 +585,7 @@ DEFINES
 */
 #define DRXJ_PAL_SECAM_BG_CARRIER_FREQ_OFFSET   ((s32)(2375))
 
-/**
+/*
 * \def DRXJ_PAL_SECAM_DKIL_CARRIER_FREQ_OFFSET
 * \brief Offset from picture carrier to centre frequency in kHz, in RF domain
 *
@@ -601,7 +601,7 @@ DEFINES
 */
 #define DRXJ_PAL_SECAM_DKIL_CARRIER_FREQ_OFFSET ((s32)(2775))
 
-/**
+/*
 * \def DRXJ_PAL_SECAM_LP_CARRIER_FREQ_OFFSET
 * \brief Offset from picture carrier to centre frequency in kHz, in RF domain
 *
@@ -616,7 +616,7 @@ DEFINES
 */
 #define DRXJ_PAL_SECAM_LP_CARRIER_FREQ_OFFSET   ((s32)(-3255))
 
-/**
+/*
 * \def DRXJ_FM_CARRIER_FREQ_OFFSET
 * \brief Offset from sound carrier to centre frequency in kHz, in RF domain
 *
diff --git a/drivers/media/dvb-frontends/drxk.h b/drivers/media/dvb-frontends/drxk.h
index eb9bdc9f59c4661bef12b63d8d5f8ef41ccb5b14..b16fedbb53a3a23b80f643b737418e182dc7c84f 100644
--- a/drivers/media/dvb-frontends/drxk.h
+++ b/drivers/media/dvb-frontends/drxk.h
@@ -20,17 +20,18 @@
  * @antenna_dvbt:	GPIO bit for changing antenna to DVB-C. A value of 1
  *			means that 1=DVBC, 0 = DVBT. Zero means the opposite.
  * @mpeg_out_clk_strength: DRXK Mpeg output clock drive strength.
+ * @chunk_size:		maximum size for I2C messages
  * @microcode_name:	Name of the firmware file with the microcode
  * @qam_demod_parameter_count:	The number of parameters used for the command
  *				to set the demodulator parameters. All
  *				firmwares are using the 2-parameter commmand.
- *				An exception is the "drxk_a3.mc" firmware,
+ *				An exception is the ``drxk_a3.mc`` firmware,
  *				which uses the 4-parameter command.
  *				A value of 0 (default) or lower indicates that
  *				the correct number of parameters will be
  *				automatically detected.
  *
- * On the *_gpio vars, bit 0 is UIO-1, bit 1 is UIO-2 and bit 2 is
+ * On the ``*_gpio`` vars, bit 0 is UIO-1, bit 1 is UIO-2 and bit 2 is
  * UIO-3.
  */
 struct drxk_config {
@@ -52,6 +53,14 @@ struct drxk_config {
 };
 
 #if IS_REACHABLE(CONFIG_DVB_DRXK)
+/**
+ * Attach a drxk demod
+ *
+ * @config: pointer to &struct drxk_config with demod configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *drxk_attach(const struct drxk_config *config,
 					struct i2c_adapter *i2c);
 #else
diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c
index 48a8aad47a74d00879cac9b328b18e33b175b3be..f59ac2e91c5995fa205d12d428575619323b61e4 100644
--- a/drivers/media/dvb-frontends/drxk_hard.c
+++ b/drivers/media/dvb-frontends/drxk_hard.c
@@ -207,9 +207,9 @@ static inline u32 log10times100(u32 value)
 	return (100L * intlog10(value)) >> 24;
 }
 
-/****************************************************************************/
+/***************************************************************************/
 /* I2C **********************************************************************/
-/****************************************************************************/
+/***************************************************************************/
 
 static int drxk_i2c_lock(struct drxk_state *state)
 {
@@ -3444,7 +3444,7 @@ static int dvbt_ctrl_set_sqi_speed(struct drxk_state *state,
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Activate DVBT specific presets
 * \param demod instance of demodulator.
 * \return DRXStatus_t.
@@ -3484,7 +3484,7 @@ static int dvbt_activate_presets(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Initialize channelswitch-independent settings for DVBT.
 * \param demod instance of demodulator.
 * \return DRXStatus_t.
@@ -3696,7 +3696,7 @@ static int set_dvbt_standard(struct drxk_state *state,
 }
 
 /*============================================================================*/
-/**
+/*
 * \brief start dvbt demodulating for channel.
 * \param demod instance of demodulator.
 * \return DRXStatus_t.
@@ -3732,7 +3732,7 @@ static int dvbt_start(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Set up dvbt demodulator for channel.
 * \param demod instance of demodulator.
 * \return DRXStatus_t.
@@ -4086,7 +4086,7 @@ static int set_dvbt(struct drxk_state *state, u16 intermediate_freqk_hz,
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Retrieve lock status .
 * \param demod    Pointer to demodulator instance.
 * \param lockStat Pointer to lock status structure.
@@ -4148,7 +4148,7 @@ static int power_up_qam(struct drxk_state *state)
 }
 
 
-/** Power Down QAM */
+/* Power Down QAM */
 static int power_down_qam(struct drxk_state *state)
 {
 	u16 data = 0;
@@ -4186,7 +4186,7 @@ static int power_down_qam(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Setup of the QAM Measurement intervals for signal quality
 * \param demod instance of demod.
 * \param modulation current modulation.
@@ -4461,7 +4461,7 @@ static int set_qam16(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief QAM32 specific setup
 * \param demod instance of demod.
 * \return DRXStatus_t.
@@ -4657,7 +4657,7 @@ static int set_qam32(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief QAM64 specific setup
 * \param demod instance of demod.
 * \return DRXStatus_t.
@@ -4852,7 +4852,7 @@ static int set_qam64(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief QAM128 specific setup
 * \param demod: instance of demod.
 * \return DRXStatus_t.
@@ -5049,7 +5049,7 @@ static int set_qam128(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief QAM256 specific setup
 * \param demod: instance of demod.
 * \return DRXStatus_t.
@@ -5244,7 +5244,7 @@ static int set_qam256(struct drxk_state *state)
 
 
 /*============================================================================*/
-/**
+/*
 * \brief Reset QAM block.
 * \param demod:   instance of demod.
 * \param channel: pointer to channel data.
@@ -5272,7 +5272,7 @@ static int qam_reset_qam(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Set QAM symbolrate.
 * \param demod:   instance of demod.
 * \param channel: pointer to channel data.
@@ -5341,7 +5341,7 @@ static int qam_set_symbolrate(struct drxk_state *state)
 
 /*============================================================================*/
 
-/**
+/*
 * \brief Get QAM lock status.
 * \param demod:   instance of demod.
 * \param channel: pointer to channel data.
diff --git a/drivers/media/dvb-frontends/dvb-pll.h b/drivers/media/dvb-frontends/dvb-pll.h
index 6aaa9c6bff9c253122c9b4d43b4fde449db7765e..212e0730f1549014029dfa7028a27d6724a4fd14 100644
--- a/drivers/media/dvb-frontends/dvb-pll.h
+++ b/drivers/media/dvb-frontends/dvb-pll.h
@@ -30,16 +30,17 @@
 #define DVB_PLL_TDEE4		       18
 #define DVB_PLL_THOMSON_DTT7520X       19
 
+#if IS_REACHABLE(CONFIG_DVB_PLL)
 /**
  * Attach a dvb-pll to the supplied frontend structure.
  *
- * @param fe Frontend to attach to.
- * @param pll_addr i2c address of the PLL (if used).
- * @param i2c i2c adapter to use (set to NULL if not used).
- * @param pll_desc_id dvb_pll_desc to use.
- * @return Frontend pointer on success, NULL on failure
+ * @fe: Frontend to attach to.
+ * @pll_addr: i2c address of the PLL (if used).
+ * @i2c: i2c adapter to use (set to NULL if not used).
+ * @pll_desc_id: dvb_pll_desc to use.
+ *
+ * return: Frontend pointer on success, NULL on failure
  */
-#if IS_REACHABLE(CONFIG_DVB_PLL)
 extern struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe,
 					   int pll_addr,
 					   struct i2c_adapter *i2c,
diff --git a/drivers/media/dvb-frontends/helene.h b/drivers/media/dvb-frontends/helene.h
index 333615491d9e8ee8ce1725e82dead3888c0cdb0f..c9fc81c7e4e7e0449daa6ba8d4485614bcb3f803 100644
--- a/drivers/media/dvb-frontends/helene.h
+++ b/drivers/media/dvb-frontends/helene.h
@@ -38,6 +38,7 @@ enum helene_xtal {
  * @set_tuner_priv:	Callback function private context
  * @set_tuner_callback:	Callback function that notifies the parent driver
  *			which tuner is active now
+ * @xtal: Cristal frequency as described by &enum helene_xtal
  */
 struct helene_config {
 	u8	i2c_address;
@@ -48,9 +49,31 @@ struct helene_config {
 };
 
 #if IS_REACHABLE(CONFIG_DVB_HELENE)
+/**
+ * Attach a helene tuner (terrestrial and cable standards)
+ *
+ * @fe: frontend to be attached
+ * @config: pointer to &struct helene_config with tuner configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
 					const struct helene_config *config,
 					struct i2c_adapter *i2c);
+
+/**
+ * Attach a helene tuner (satellite standards)
+ *
+ * @fe: frontend to be attached
+ * @config: pointer to &struct helene_config with tuner configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
+extern struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
+					const struct helene_config *config,
+					struct i2c_adapter *i2c);
 #else
 static inline struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
 					const struct helene_config *config,
@@ -59,13 +82,6 @@ static inline struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
 	pr_warn("%s: driver disabled by Kconfig\n", __func__);
 	return NULL;
 }
-#endif
-
-#if IS_REACHABLE(CONFIG_DVB_HELENE)
-extern struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
-					const struct helene_config *config,
-					struct i2c_adapter *i2c);
-#else
 static inline struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
 					const struct helene_config *config,
 					struct i2c_adapter *i2c)
diff --git a/drivers/media/dvb-frontends/horus3a.h b/drivers/media/dvb-frontends/horus3a.h
index 672a556df71a184f1a2a21a0d302378500ac7bfe..9157fd037e2fea82cfa0d3149ecb769d5f936244 100644
--- a/drivers/media/dvb-frontends/horus3a.h
+++ b/drivers/media/dvb-frontends/horus3a.h
@@ -41,6 +41,15 @@ struct horus3a_config {
 };
 
 #if IS_REACHABLE(CONFIG_DVB_HORUS3A)
+/**
+ * Attach a horus3a tuner
+ *
+ * @fe: frontend to be attached
+ * @config: pointer to &struct helene_config with tuner configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *horus3a_attach(struct dvb_frontend *fe,
 					const struct horus3a_config *config,
 					struct i2c_adapter *i2c);
diff --git a/drivers/media/dvb-frontends/ix2505v.c b/drivers/media/dvb-frontends/ix2505v.c
index 534b24fa2b95ae021dd4989091c312b8d1f4081e..965012ad5c596b4335abfaefcfde9f28aa2826a0 100644
--- a/drivers/media/dvb-frontends/ix2505v.c
+++ b/drivers/media/dvb-frontends/ix2505v.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Driver for Sharp IX2505V (marked B0017) DVB-S silicon tuner
  *
  * Copyright (C) 2010 Malcolm Priestley
@@ -36,7 +36,7 @@ struct ix2505v_state {
 	u32 frequency;
 };
 
-/**
+/*
  *  Data read format of the Sharp IX2505V B0017
  *
  *  byte1:   1   |   1   |   0   |   0   |   0   |  MA1  |  MA0  |  1
@@ -99,7 +99,7 @@ static void ix2505v_release(struct dvb_frontend *fe)
 
 }
 
-/**
+/*
  *  Data write format of the Sharp IX2505V B0017
  *
  *  byte1:   1   |   1   |   0   |   0   |   0   | 0(MA1)| 0(MA0)|  0
diff --git a/drivers/media/dvb-frontends/ix2505v.h b/drivers/media/dvb-frontends/ix2505v.h
index 0b0a431c74f61a404c7c3c38dce035bc90a826b6..49ed93e754edbfea23d028ef041c863687c2ea3e 100644
--- a/drivers/media/dvb-frontends/ix2505v.h
+++ b/drivers/media/dvb-frontends/ix2505v.h
@@ -20,31 +20,33 @@
 #include "dvb_frontend.h"
 
 /**
- * Attach a ix2505v tuner to the supplied frontend structure.
+ * struct ix2505v_config - ix2505 attachment configuration
  *
- * @param fe Frontend to attach to.
- * @param config ix2505v_config structure
- * @return FE pointer on success, NULL on failure.
+ * @tuner_address: tuner address
+ * @tuner_gain: Baseband AMP gain control 0/1=0dB(default) 2=-2bB 3=-4dB
+ * @tuner_chargepump: Charge pump output +/- 0=120 1=260 2=555 3=1200(default)
+ * @min_delay_ms: delay after tune
+ * @tuner_write_only: disables reads
  */
-
 struct ix2505v_config {
 	u8 tuner_address;
-
-	/*Baseband AMP gain control 0/1=0dB(default) 2=-2bB 3=-4dB */
 	u8 tuner_gain;
-
-	/*Charge pump output +/- 0=120 1=260 2=555 3=1200(default) */
 	u8 tuner_chargepump;
-
-	/* delay after tune */
 	int min_delay_ms;
-
-	/* disables reads*/
 	u8 tuner_write_only;
 
 };
 
 #if IS_REACHABLE(CONFIG_DVB_IX2505V)
+/**
+ * Attach a ix2505v tuner to the supplied frontend structure.
+ *
+ * @fe: Frontend to attach to.
+ * @config: pointer to &struct ix2505v_config
+ * @i2c: pointer to &struct i2c_adapter.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *ix2505v_attach(struct dvb_frontend *fe,
 	const struct ix2505v_config *config, struct i2c_adapter *i2c);
 #else
diff --git a/drivers/media/dvb-frontends/l64781.c b/drivers/media/dvb-frontends/l64781.c
index 68923c84679a2ea54aca1d35c6cc559c389c8c5c..e5a6c176666432d3e1c5c18a4c33233626f10e02 100644
--- a/drivers/media/dvb-frontends/l64781.c
+++ b/drivers/media/dvb-frontends/l64781.c
@@ -517,7 +517,7 @@ struct dvb_frontend* l64781_attach(const struct l64781_config* config,
 	state->i2c = i2c;
 	state->first = 1;
 
-	/**
+	/*
 	 *  the L64781 won't show up before we send the reset_and_configure()
 	 *  broadcast. If nothing responds there is no L64781 on the bus...
 	 */
diff --git a/drivers/media/dvb-frontends/m88ds3103.h b/drivers/media/dvb-frontends/m88ds3103.h
index 04b355a005fb539a7285a24a610a77e15ac1a321..1a8964a2265d7ef321a6c311d698b70f11fa4611 100644
--- a/drivers/media/dvb-frontends/m88ds3103.h
+++ b/drivers/media/dvb-frontends/m88ds3103.h
@@ -24,6 +24,34 @@
  * 0x68,
  */
 
+/**
+ * enum m88ds3103_ts_mode - TS connection mode
+ * @M88DS3103_TS_SERIAL:	TS output pin D0, normal
+ * @M88DS3103_TS_SERIAL_D7:	TS output pin D7
+ * @M88DS3103_TS_PARALLEL:	TS Parallel mode
+ * @M88DS3103_TS_CI:		TS CI Mode
+ */
+enum m88ds3103_ts_mode {
+	M88DS3103_TS_SERIAL,
+	M88DS3103_TS_SERIAL_D7,
+	M88DS3103_TS_PARALLEL,
+	M88DS3103_TS_CI
+};
+
+/**
+ * enum m88ds3103_clock_out
+ * @M88DS3103_CLOCK_OUT_DISABLED:	Clock output is disabled
+ * @M88DS3103_CLOCK_OUT_ENABLED:	Clock output is enabled with crystal
+ *					clock.
+ * @M88DS3103_CLOCK_OUT_ENABLED_DIV2:	Clock output is enabled with half
+ *					crystal clock.
+ */
+enum m88ds3103_clock_out {
+	M88DS3103_CLOCK_OUT_DISABLED,
+	M88DS3103_CLOCK_OUT_ENABLED,
+	M88DS3103_CLOCK_OUT_ENABLED_DIV2
+};
+
 /**
  * struct m88ds3103_platform_data - Platform data for the m88ds3103 driver
  * @clk: Clock frequency.
@@ -44,24 +72,16 @@
  * @get_dvb_frontend: Get DVB frontend.
  * @get_i2c_adapter: Get I2C adapter.
  */
-
 struct m88ds3103_platform_data {
 	u32 clk;
 	u16 i2c_wr_max;
-#define M88DS3103_TS_SERIAL             0 /* TS output pin D0, normal */
-#define M88DS3103_TS_SERIAL_D7          1 /* TS output pin D7 */
-#define M88DS3103_TS_PARALLEL           2 /* TS Parallel mode */
-#define M88DS3103_TS_CI                 3 /* TS CI Mode */
-	u8 ts_mode:2;
+	enum m88ds3103_ts_mode ts_mode;
 	u32 ts_clk;
+	enum m88ds3103_clock_out clk_out;
 	u8 ts_clk_pol:1;
 	u8 spec_inv:1;
 	u8 agc;
 	u8 agc_inv:1;
-#define M88DS3103_CLOCK_OUT_DISABLED        0
-#define M88DS3103_CLOCK_OUT_ENABLED         1
-#define M88DS3103_CLOCK_OUT_ENABLED_DIV2    2
-	u8 clk_out:2;
 	u8 envelope_mode:1;
 	u8 lnb_hv_pol:1;
 	u8 lnb_en_pol:1;
@@ -73,105 +93,60 @@ struct m88ds3103_platform_data {
 	u8 attach_in_use:1;
 };
 
-/*
- * Do not add new m88ds3103_attach() users! Use I2C bindings instead.
+/**
+ * struct m88ds3103_config - m88ds3102 configuration
+ *
+ * @i2c_addr:	I2C address. Default: none, must set. Example: 0x68, ...
+ * @clock:	Device's clock. Default: none, must set. Example: 27000000
+ * @i2c_wr_max: Max bytes I2C provider is asked to write at once.
+ *		Default: none, must set. Example: 33, 65, ...
+ * @ts_mode:	TS output mode, as defined by &enum m88ds3103_ts_mode.
+ *		Default: M88DS3103_TS_SERIAL.
+ * @ts_clk:	TS clk in KHz. Default: 0.
+ * @ts_clk_pol:	TS clk polarity.Default: 0.
+ *		1-active at falling edge; 0-active at rising edge.
+ * @spec_inv:	Spectrum inversion. Default: 0.
+ * @agc_inv:	AGC polarity. Default: 0.
+ * @clock_out:	Clock output, as defined by &enum m88ds3103_clock_out.
+ *		Default: M88DS3103_CLOCK_OUT_DISABLED.
+ * @envelope_mode: DiSEqC envelope mode. Default: 0.
+ * @agc:	AGC configuration. Default: none, must set.
+ * @lnb_hv_pol:	LNB H/V pin polarity. Default: 0. Values:
+ *		1: pin high set to VOLTAGE_13, pin low to set VOLTAGE_18;
+ *		0: pin high set to VOLTAGE_18, pin low to set VOLTAGE_13.
+ * @lnb_en_pol:	LNB enable pin polarity. Default: 0. Values:
+ *		1: pin high to enable, pin low to disable;
+ *		0: pin high to disable, pin low to enable.
  */
 struct m88ds3103_config {
-	/*
-	 * I2C address
-	 * Default: none, must set
-	 * 0x68, ...
-	 */
 	u8 i2c_addr;
-
-	/*
-	 * clock
-	 * Default: none, must set
-	 * 27000000
-	 */
 	u32 clock;
-
-	/*
-	 * max bytes I2C provider is asked to write at once
-	 * Default: none, must set
-	 * 33, 65, ...
-	 */
 	u16 i2c_wr_max;
-
-	/*
-	 * TS output mode
-	 * Default: M88DS3103_TS_SERIAL
-	 */
-#define M88DS3103_TS_SERIAL             0 /* TS output pin D0, normal */
-#define M88DS3103_TS_SERIAL_D7          1 /* TS output pin D7 */
-#define M88DS3103_TS_PARALLEL           2 /* TS Parallel mode */
-#define M88DS3103_TS_CI                 3 /* TS CI Mode */
 	u8 ts_mode;
-
-	/*
-	 * TS clk in KHz
-	 * Default: 0.
-	 */
 	u32 ts_clk;
-
-	/*
-	 * TS clk polarity.
-	 * Default: 0. 1-active at falling edge; 0-active at rising edge.
-	 */
 	u8 ts_clk_pol:1;
-
-	/*
-	 * spectrum inversion
-	 * Default: 0
-	 */
 	u8 spec_inv:1;
-
-	/*
-	 * AGC polarity
-	 * Default: 0
-	 */
 	u8 agc_inv:1;
-
-	/*
-	 * clock output
-	 * Default: M88DS3103_CLOCK_OUT_DISABLED
-	 */
-#define M88DS3103_CLOCK_OUT_DISABLED        0
-#define M88DS3103_CLOCK_OUT_ENABLED         1
-#define M88DS3103_CLOCK_OUT_ENABLED_DIV2    2
 	u8 clock_out;
-
-	/*
-	 * DiSEqC envelope mode
-	 * Default: 0
-	 */
 	u8 envelope_mode:1;
-
-	/*
-	 * AGC configuration
-	 * Default: none, must set
-	 */
 	u8 agc;
-
-	/*
-	 * LNB H/V pin polarity
-	 * Default: 0.
-	 * 1: pin high set to VOLTAGE_13, pin low to set VOLTAGE_18.
-	 * 0: pin high set to VOLTAGE_18, pin low to set VOLTAGE_13.
-	 */
 	u8 lnb_hv_pol:1;
-
-	/*
-	 * LNB enable pin polarity
-	 * Default: 0.
-	 * 1: pin high to enable, pin low to disable.
-	 * 0: pin high to disable, pin low to enable.
-	 */
 	u8 lnb_en_pol:1;
 };
 
 #if defined(CONFIG_DVB_M88DS3103) || \
 		(defined(CONFIG_DVB_M88DS3103_MODULE) && defined(MODULE))
+/**
+ * Attach a m88ds3103 demod
+ *
+ * @config: pointer to &struct m88ds3103_config with demod configuration.
+ * @i2c: i2c adapter to use.
+ * @tuner_i2c: on success, returns the I2C adapter associated with
+ *		m88ds3103 tuner.
+ *
+ * return: FE pointer on success, NULL on failure.
+ * Note: Do not add new m88ds3103_attach() users! Use I2C bindings instead.
+ */
 extern struct dvb_frontend *m88ds3103_attach(
 		const struct m88ds3103_config *config,
 		struct i2c_adapter *i2c,
diff --git a/drivers/media/dvb-frontends/mb86a20s.h b/drivers/media/dvb-frontends/mb86a20s.h
index dfb02db2126c1061b00225c698d3305c8d3c9220..05c9725d1c5f94f0b234681c8351f4efc6a98016 100644
--- a/drivers/media/dvb-frontends/mb86a20s.h
+++ b/drivers/media/dvb-frontends/mb86a20s.h
@@ -26,7 +26,6 @@
  * @demod_address:	the demodulator's i2c address
  * @is_serial:		if true, TS is serial. Otherwise, TS is parallel
  */
-
 struct mb86a20s_config {
 	u32	fclk;
 	u8	demod_address;
@@ -34,9 +33,17 @@ struct mb86a20s_config {
 };
 
 #if IS_REACHABLE(CONFIG_DVB_MB86A20S)
+/**
+ * Attach a mb86a20s demod
+ *
+ * @config: pointer to &struct mb86a20s_config with demod configuration.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *mb86a20s_attach(const struct mb86a20s_config *config,
 					   struct i2c_adapter *i2c);
-extern struct i2c_adapter *mb86a20s_get_tuner_i2c_adapter(struct dvb_frontend *);
+
 #else
 static inline struct dvb_frontend *mb86a20s_attach(
 	const struct mb86a20s_config *config, struct i2c_adapter *i2c)
@@ -44,12 +51,6 @@ static inline struct dvb_frontend *mb86a20s_attach(
 	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
 	return NULL;
 }
-static inline struct i2c_adapter *
-	mb86a20s_get_tuner_i2c_adapter(struct dvb_frontend *fe)
-{
-	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
-	return NULL;
-}
 #endif
 
 #endif /* MB86A20S */
diff --git a/drivers/media/dvb-frontends/mn88472.h b/drivers/media/dvb-frontends/mn88472.h
index 323632523876c6e7a97a4abf7c7b1f2e73b64fc2..8cd5ef61903b986b477eaea36a74d45e782d0655 100644
--- a/drivers/media/dvb-frontends/mn88472.h
+++ b/drivers/media/dvb-frontends/mn88472.h
@@ -19,21 +19,21 @@
 
 #include <linux/dvb/frontend.h>
 
+/* Define old names for backward compatibility */
+#define VARIABLE_TS_CLOCK   MN88472_TS_CLK_VARIABLE
+#define FIXED_TS_CLOCK      MN88472_TS_CLK_FIXED
+#define SERIAL_TS_MODE      MN88472_TS_MODE_SERIAL
+#define PARALLEL_TS_MODE    MN88472_TS_MODE_PARALLEL
+
 /**
  * struct mn88472_config - Platform data for the mn88472 driver
  * @xtal: Clock frequency.
  * @ts_mode: TS mode.
  * @ts_clock: TS clock config.
  * @i2c_wr_max: Max number of bytes driver writes to I2C at once.
- * @get_dvb_frontend: Get DVB frontend.
+ * @fe: pointer to a frontend pointer
+ * @get_dvb_frontend: Get DVB frontend callback.
  */
-
-/* Define old names for backward compatibility */
-#define VARIABLE_TS_CLOCK   MN88472_TS_CLK_VARIABLE
-#define FIXED_TS_CLOCK      MN88472_TS_CLK_FIXED
-#define SERIAL_TS_MODE      MN88472_TS_MODE_SERIAL
-#define PARALLEL_TS_MODE    MN88472_TS_MODE_PARALLEL
-
 struct mn88472_config {
 	unsigned int xtal;
 
diff --git a/drivers/media/dvb-frontends/rtl2830.h b/drivers/media/dvb-frontends/rtl2830.h
index 0cde151e66089c7b3319f5ea1c3ea39860986d36..458ac94e8a8b45da8ae4cde06a2837b4faf3a2fc 100644
--- a/drivers/media/dvb-frontends/rtl2830.h
+++ b/drivers/media/dvb-frontends/rtl2830.h
@@ -32,7 +32,6 @@
  * @pid_filter: Set PID to PID filter.
  * @pid_filter_ctrl: Control PID filter.
  */
-
 struct rtl2830_platform_data {
 	u32 clk;
 	bool spec_inv;
diff --git a/drivers/media/dvb-frontends/rtl2832.h b/drivers/media/dvb-frontends/rtl2832.h
index 03c0de039fa902e5d9ba524e1ee6e0770caf241a..6a124ff71c2b8694f430983210572d810645fba9 100644
--- a/drivers/media/dvb-frontends/rtl2832.h
+++ b/drivers/media/dvb-frontends/rtl2832.h
@@ -35,7 +35,6 @@
  * @pid_filter: Set PID to PID filter.
  * @pid_filter_ctrl: Control PID filter.
  */
-
 struct rtl2832_platform_data {
 	u32 clk;
 	/*
diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.h b/drivers/media/dvb-frontends/rtl2832_sdr.h
index d8fc7e7212e34317d58897cb159dee8191f51c4c..8f88c2fb86273606fac1db738b6ed63c3bd7ce87 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.h
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.h
@@ -33,15 +33,11 @@
  * struct rtl2832_sdr_platform_data - Platform data for the rtl2832_sdr driver
  * @clk: Clock frequency (4000000, 16000000, 25000000, 28800000).
  * @tuner: Used tuner model.
- * @i2c_client: rtl2832 demod driver I2C client.
- * @bulk_read: rtl2832 driver private I/O interface.
- * @bulk_write: rtl2832 driver private I/O interface.
- * @update_bits: rtl2832 driver private I/O interface.
+ * @regmap: pointer to &struct regmap.
  * @dvb_frontend: rtl2832 DVB frontend.
  * @v4l2_subdev: Tuner v4l2 controls.
  * @dvb_usb_device: DVB USB interface for USB streaming.
  */
-
 struct rtl2832_sdr_platform_data {
 	u32 clk;
 	/*
diff --git a/drivers/media/dvb-frontends/sp887x.c b/drivers/media/dvb-frontends/sp887x.c
index 7c511c3cd4ca5f1286c6d8f8e9184ed8176d30ba..d2c402b52c6ef9c18b9f88340c489b9b850d0720 100644
--- a/drivers/media/dvb-frontends/sp887x.c
+++ b/drivers/media/dvb-frontends/sp887x.c
@@ -57,7 +57,7 @@ static int sp887x_writereg (struct sp887x_state* state, u16 reg, u16 data)
 	int ret;
 
 	if ((ret = i2c_transfer(state->i2c, &msg, 1)) != 1) {
-		/**
+		/*
 		 *  in case of soft reset we ignore ACK errors...
 		 */
 		if (!(reg == 0xf1a && data == 0x000 &&
@@ -130,7 +130,7 @@ static void sp887x_setup_agc (struct sp887x_state* state)
 
 #define BLOCKSIZE 30
 #define FW_SIZE 0x4000
-/**
+/*
  *  load firmware and setup MPEG interface...
  */
 static int sp887x_initial_setup (struct dvb_frontend* fe, const struct firmware *fw)
@@ -279,7 +279,7 @@ static int configure_reg0xc05(struct dtv_frontend_properties *p, u16 *reg0xc05)
 	return 0;
 }
 
-/**
+/*
  *  estimates division of two 24bit numbers,
  *  derived from the ves1820/stv0299 driver code
  */
diff --git a/drivers/media/dvb-frontends/stb6000.h b/drivers/media/dvb-frontends/stb6000.h
index 78e75dfc317f271e92ff786c34ffa9183bae6830..e94a3d5facf67b60d21f4b1a685805ba4b770c7d 100644
--- a/drivers/media/dvb-frontends/stb6000.h
+++ b/drivers/media/dvb-frontends/stb6000.h
@@ -26,15 +26,16 @@
 #include <linux/i2c.h>
 #include "dvb_frontend.h"
 
+#if IS_REACHABLE(CONFIG_DVB_STB6000)
 /**
  * Attach a stb6000 tuner to the supplied frontend structure.
  *
- * @param fe Frontend to attach to.
- * @param addr i2c address of the tuner.
- * @param i2c i2c adapter to use.
- * @return FE pointer on success, NULL on failure.
+ * @fe: Frontend to attach to.
+ * @addr: i2c address of the tuner.
+ * @i2c: i2c adapter to use.
+ *
+ * return: FE pointer on success, NULL on failure.
  */
-#if IS_REACHABLE(CONFIG_DVB_STB6000)
 extern struct dvb_frontend *stb6000_attach(struct dvb_frontend *fe, int addr,
 					   struct i2c_adapter *i2c);
 #else
diff --git a/drivers/media/dvb-frontends/stv0299.c b/drivers/media/dvb-frontends/stv0299.c
index b36b21a1320142c59a9716f5d1d01161b57d35ec..b1f3d675d316d289c3269e30340a67380d4c390c 100644
--- a/drivers/media/dvb-frontends/stv0299.c
+++ b/drivers/media/dvb-frontends/stv0299.c
@@ -368,7 +368,7 @@ static int stv0299_set_voltage(struct dvb_frontend *fe,
 	reg0x08 = stv0299_readreg (state, 0x08);
 	reg0x0c = stv0299_readreg (state, 0x0c);
 
-	/**
+	/*
 	 *  H/V switching over OP0, OP1 and OP2 are LNB power enable bits
 	 */
 	reg0x0c &= 0x0f;
diff --git a/drivers/media/dvb-frontends/tda10071.h b/drivers/media/dvb-frontends/tda10071.h
index 8f184026ee11058b6baef5d5cc20198353a4b038..da1a87bc160378d58a4c249a579f05c62c63f2af 100644
--- a/drivers/media/dvb-frontends/tda10071.h
+++ b/drivers/media/dvb-frontends/tda10071.h
@@ -38,7 +38,6 @@
  * @tuner_i2c_addr: CX24118A tuner I2C address (0x14, 0x54, ...).
  * @get_dvb_frontend: Get DVB frontend.
  */
-
 struct tda10071_platform_data {
 	u32 clk;
 	u16 i2c_wr_max;
diff --git a/drivers/media/dvb-frontends/tda826x.h b/drivers/media/dvb-frontends/tda826x.h
index 81abe1aebe9fba8d6746a2a34bf8430b1a4332ad..6a7bed12e741b1935486d2348c92e3ebea2dcd44 100644
--- a/drivers/media/dvb-frontends/tda826x.h
+++ b/drivers/media/dvb-frontends/tda826x.h
@@ -29,11 +29,12 @@
 /**
  * Attach a tda826x tuner to the supplied frontend structure.
  *
- * @param fe Frontend to attach to.
- * @param addr i2c address of the tuner.
- * @param i2c i2c adapter to use.
- * @param has_loopthrough Set to 1 if the card has a loopthrough RF connector.
- * @return FE pointer on success, NULL on failure.
+ * @fe: Frontend to attach to.
+ * @addr: i2c address of the tuner.
+ * @i2c: i2c adapter to use.
+ * @has_loopthrough: Set to 1 if the card has a loopthrough RF connector.
+ *
+ * return: FE pointer on success, NULL on failure.
  */
 #if IS_REACHABLE(CONFIG_DVB_TDA826X)
 extern struct dvb_frontend* tda826x_attach(struct dvb_frontend *fe, int addr,
diff --git a/drivers/media/dvb-frontends/tua6100.c b/drivers/media/dvb-frontends/tua6100.c
index 18e6d4c5be21ce75c0d6afbf714cf5d11406b67a..1d41abd47f043432c01c6e937347f67699fc39cc 100644
--- a/drivers/media/dvb-frontends/tua6100.c
+++ b/drivers/media/dvb-frontends/tua6100.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Driver for Infineon tua6100 pll.
  *
  * (c) 2006 Andrew de Quincey
diff --git a/drivers/media/dvb-frontends/tua6100.h b/drivers/media/dvb-frontends/tua6100.h
index 9f15cbdfdeca98b85ec4b28245ca13c1ed557a4c..6c098a894ea65ce6a7b34d74515f2aebe51359d7 100644
--- a/drivers/media/dvb-frontends/tua6100.h
+++ b/drivers/media/dvb-frontends/tua6100.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Driver for Infineon tua6100 PLL.
  *
  * (c) 2006 Andrew de Quincey
diff --git a/drivers/media/dvb-frontends/zd1301_demod.h b/drivers/media/dvb-frontends/zd1301_demod.h
index ceb2e05e873cf9824e7d9283a2a8efa0cda4ddb3..6cd8f6f9c415632700101efecc131fdc59bc916a 100644
--- a/drivers/media/dvb-frontends/zd1301_demod.h
+++ b/drivers/media/dvb-frontends/zd1301_demod.h
@@ -27,7 +27,6 @@
  * @reg_read: Register read callback.
  * @reg_write: Register write callback.
  */
-
 struct zd1301_demod_platform_data {
 	void *reg_priv;
 	int (*reg_read)(void *, u16, u8 *);
@@ -41,8 +40,7 @@ struct zd1301_demod_platform_data {
  *
  * Return: Pointer to DVB frontend which given platform device owns.
  */
-
-struct dvb_frontend *zd1301_demod_get_dvb_frontend(struct platform_device *);
+struct dvb_frontend *zd1301_demod_get_dvb_frontend(struct platform_device *pdev);
 
 /**
  * zd1301_demod_get_i2c_adapter() - Get pointer to I2C adapter
@@ -50,11 +48,16 @@ struct dvb_frontend *zd1301_demod_get_dvb_frontend(struct platform_device *);
  *
  * Return: Pointer to I2C adapter which given platform device owns.
  */
-
-struct i2c_adapter *zd1301_demod_get_i2c_adapter(struct platform_device *);
+struct i2c_adapter *zd1301_demod_get_i2c_adapter(struct platform_device *pdev);
 
 #else
 
+/**
+ * zd1301_demod_get_dvb_frontend() - Attach a zd1301 frontend
+ * @dev: Pointer to platform device
+ *
+ * Return: Pointer to %struct dvb_frontend or NULL if attach fails.
+ */
 static inline struct dvb_frontend *zd1301_demod_get_dvb_frontend(struct platform_device *dev)
 {
 	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
diff --git a/drivers/media/dvb-frontends/zl10036.c b/drivers/media/dvb-frontends/zl10036.c
index 062282739ce56136f8abd0c2d30d8c4b0a94fcab..89dd65ae88ad479f4c50474437e7dd4a6314221a 100644
--- a/drivers/media/dvb-frontends/zl10036.c
+++ b/drivers/media/dvb-frontends/zl10036.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Driver for Zarlink zl10036 DVB-S silicon tuner
  *
  * Copyright (C) 2006 Tino Reichardt
@@ -157,7 +157,7 @@ static int zl10036_sleep(struct dvb_frontend *fe)
 	return ret;
 }
 
-/**
+/*
  * register map of the ZL10036/ZL10038
  *
  * reg[default] content
@@ -219,7 +219,7 @@ static int zl10036_set_bandwidth(struct zl10036_state *state, u32 fbw)
 	if (fbw <= 28820) {
 		br = _BR_MAXIMUM;
 	} else {
-		/**
+		/*
 		 *  f(bw)=34,6MHz f(xtal)=10.111MHz
 		 *  br = (10111/34600) * 63 * 1/K = 14;
 		 */
@@ -315,7 +315,7 @@ static int zl10036_set_params(struct dvb_frontend *fe)
 	||  (frequency > fe->ops.info.frequency_max))
 		return -EINVAL;
 
-	/**
+	/*
 	 * alpha = 1.35 for dvb-s
 	 * fBW = (alpha*symbolrate)/(2*0.8)
 	 * 1.35 / (2*0.8) = 27 / 32
diff --git a/drivers/media/dvb-frontends/zl10036.h b/drivers/media/dvb-frontends/zl10036.h
index 88751adfecf7c6f5d8bf824f9f7ba1b2a7d4b4dc..ec90ca9277392bba1b8cec2d7b05cb4b32f6a84b 100644
--- a/drivers/media/dvb-frontends/zl10036.h
+++ b/drivers/media/dvb-frontends/zl10036.h
@@ -20,20 +20,20 @@
 #include <linux/i2c.h>
 #include "dvb_frontend.h"
 
-/**
- * Attach a zl10036 tuner to the supplied frontend structure.
- *
- * @param fe Frontend to attach to.
- * @param config zl10036_config structure
- * @return FE pointer on success, NULL on failure.
- */
-
 struct zl10036_config {
 	u8 tuner_address;
 	int rf_loop_enable;
 };
 
 #if IS_REACHABLE(CONFIG_DVB_ZL10036)
+/**
+ * Attach a zl10036 tuner to the supplied frontend structure.
+ *
+ * @fe: Frontend to attach to.
+ * @config: zl10036_config structure.
+ * @i2c: pointer to struct i2c_adapter.
+ * return: FE pointer on success, NULL on failure.
+ */
 extern struct dvb_frontend *zl10036_attach(struct dvb_frontend *fe,
 	const struct zl10036_config *config, struct i2c_adapter *i2c);
 #else
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index 3c6d6428f5258c9f82e9d033c1d8540bc11283f4..cb5d7ff829151ca0940ce14d1372c0747233eccf 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -676,6 +676,7 @@ config VIDEO_OV13858
 	tristate "OmniVision OV13858 sensor support"
 	depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
 	depends on MEDIA_CAMERA_SUPPORT
+	select V4L2_FWNODE
 	---help---
 	  This is a Video4Linux2 sensor-level driver for the OmniVision
 	  OV13858 camera.
diff --git a/drivers/media/i2c/et8ek8/Kconfig b/drivers/media/i2c/et8ek8/Kconfig
index 14399365ad7f93421b32971c14a826721a2cc993..9fe409e956662b1dcc07bb900696b46a722d3bfc 100644
--- a/drivers/media/i2c/et8ek8/Kconfig
+++ b/drivers/media/i2c/et8ek8/Kconfig
@@ -1,6 +1,7 @@
 config VIDEO_ET8EK8
 	tristate "ET8EK8 camera sensor support"
 	depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
+	select V4L2_FWNODE
 	---help---
 	  This is a driver for the Toshiba ET8EK8 5 MP camera sensor.
 	  It is used for example in Nokia N900 (RX-51).
diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c
index 800b9bf9cdd31c430572f8e0a9490913379aaac0..2f71af2f90bf98978165579f28a5c01680def0c1 100644
--- a/drivers/media/i2c/imx274.c
+++ b/drivers/media/i2c/imx274.c
@@ -1770,8 +1770,7 @@ static int imx274_probe(struct i2c_client *client,
 	return 0;
 
 err_ctrls:
-	v4l2_async_unregister_subdev(sd);
-	v4l2_ctrl_handler_free(sd->ctrl_handler);
+	v4l2_ctrl_handler_free(&imx274->ctrls.handler);
 err_me:
 	media_entity_cleanup(&sd->entity);
 err_regmap:
@@ -1788,7 +1787,7 @@ static int imx274_remove(struct i2c_client *client)
 	imx274_write_table(imx274, mode_table[IMX274_MODE_STOP_STREAM]);
 
 	v4l2_async_unregister_subdev(sd);
-	v4l2_ctrl_handler_free(sd->ctrl_handler);
+	v4l2_ctrl_handler_free(&imx274->ctrls.handler);
 	media_entity_cleanup(&sd->entity);
 	mutex_destroy(&imx274->lock);
 	return 0;
diff --git a/drivers/media/i2c/lm3560.c b/drivers/media/i2c/lm3560.c
index 251a2aaf98c3b2665061266f31a248268d2f051e..b600e03aa94b06dd531f88b78adffd33b03ec051 100644
--- a/drivers/media/i2c/lm3560.c
+++ b/drivers/media/i2c/lm3560.c
@@ -50,6 +50,7 @@ enum led_enable {
 /**
  * struct lm3560_flash
  *
+ * @dev: pointer to &struct device
  * @pdata: platform data
  * @regmap: reg. map for i2c
  * @lock: muxtex for serial access.
diff --git a/drivers/media/i2c/m5mols/m5mols_capture.c b/drivers/media/i2c/m5mols/m5mols_capture.c
index a0cd6dc32eb0d75e55c58866686db9af87f31cf0..0fb457f579957445252ff8dc6cb1ef86146c2cd2 100644
--- a/drivers/media/i2c/m5mols/m5mols_capture.c
+++ b/drivers/media/i2c/m5mols/m5mols_capture.c
@@ -33,6 +33,10 @@
 
 /**
  * m5mols_read_rational - I2C read of a rational number
+ * @sd: sub-device, as pointed by struct v4l2_subdev
+ * @addr_num: numerator register
+ * @addr_den: denominator register
+ * @val: place to store the division result
  *
  * Read numerator and denominator from registers @addr_num and @addr_den
  * respectively and return the division result in @val.
@@ -53,6 +57,7 @@ static int m5mols_read_rational(struct v4l2_subdev *sd, u32 addr_num,
 
 /**
  * m5mols_capture_info - Gather captured image information
+ * @info: M-5MOLS driver data structure
  *
  * For now it gathers only EXIF information and file size.
  */
diff --git a/drivers/media/i2c/m5mols/m5mols_controls.c b/drivers/media/i2c/m5mols/m5mols_controls.c
index c2218c0a9e6fa11c43401265d065671c086e6cac..82eab7c2bc8cb59735db17529730a52c94c879e8 100644
--- a/drivers/media/i2c/m5mols/m5mols_controls.c
+++ b/drivers/media/i2c/m5mols/m5mols_controls.c
@@ -126,6 +126,7 @@ static struct m5mols_scenemode m5mols_default_scenemode[] = {
 
 /**
  * m5mols_do_scenemode() - Change current scenemode
+ * @info: M-5MOLS driver data structure
  * @mode:	Desired mode of the scenemode
  *
  * WARNING: The execution order is important. Do not change the order.
diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
index 463534d44756e81052967822881de9a1ae68e8ba..12e79f9e32d53c090a001bc1036398a209cb9de1 100644
--- a/drivers/media/i2c/m5mols/m5mols_core.c
+++ b/drivers/media/i2c/m5mols/m5mols_core.c
@@ -114,7 +114,8 @@ static const struct m5mols_resolution m5mols_reg_res[] = {
 
 /**
  * m5mols_swap_byte - an byte array to integer conversion function
- * @size: size in bytes of I2C packet defined in the M-5MOLS datasheet
+ * @data: byte array
+ * @length: size in bytes of I2C packet defined in the M-5MOLS datasheet
  *
  * Convert I2C data byte array with performing any required byte
  * reordering to assure proper values for each data type, regardless
@@ -132,8 +133,9 @@ static u32 m5mols_swap_byte(u8 *data, u8 length)
 
 /**
  * m5mols_read -  I2C read function
- * @reg: combination of size, category and command for the I2C packet
+ * @sd: sub-device, as pointed by struct v4l2_subdev
  * @size: desired size of I2C packet
+ * @reg: combination of size, category and command for the I2C packet
  * @val: read value
  *
  * Returns 0 on success, or else negative errno.
@@ -232,6 +234,7 @@ int m5mols_read_u32(struct v4l2_subdev *sd, u32 reg, u32 *val)
 
 /**
  * m5mols_write - I2C command write function
+ * @sd: sub-device, as pointed by struct v4l2_subdev
  * @reg: combination of size, category and command for the I2C packet
  * @val: value to write
  *
@@ -284,6 +287,7 @@ int m5mols_write(struct v4l2_subdev *sd, u32 reg, u32 val)
 
 /**
  * m5mols_busy_wait - Busy waiting with I2C register polling
+ * @sd: sub-device, as pointed by struct v4l2_subdev
  * @reg: the I2C_REG() address of an 8-bit status register to check
  * @value: expected status register value
  * @mask: bit mask for the read status register value
@@ -316,6 +320,8 @@ int m5mols_busy_wait(struct v4l2_subdev *sd, u32 reg, u32 value, u32 mask,
 
 /**
  * m5mols_enable_interrupt - Clear interrupt pending bits and unmask interrupts
+ * @sd: sub-device, as pointed by struct v4l2_subdev
+ * @reg: combination of size, category and command for the I2C packet
  *
  * Before writing desired interrupt value the INT_FACTOR register should
  * be read to clear pending interrupts.
@@ -349,6 +355,8 @@ int m5mols_wait_interrupt(struct v4l2_subdev *sd, u8 irq_mask, u32 timeout)
 
 /**
  * m5mols_reg_mode - Write the mode and check busy status
+ * @sd: sub-device, as pointed by struct v4l2_subdev
+ * @mode: the required operation mode
  *
  * It always accompanies a little delay changing the M-5MOLS mode, so it is
  * needed checking current busy status to guarantee right mode.
@@ -364,6 +372,7 @@ static int m5mols_reg_mode(struct v4l2_subdev *sd, u8 mode)
 
 /**
  * m5mols_set_mode - set the M-5MOLS controller mode
+ * @info: M-5MOLS driver data structure
  * @mode: the required operation mode
  *
  * The commands of M-5MOLS are grouped into specific modes. Each functionality
@@ -421,6 +430,7 @@ int m5mols_set_mode(struct m5mols_info *info, u8 mode)
 
 /**
  * m5mols_get_version - retrieve full revisions information of M-5MOLS
+ * @sd: sub-device, as pointed by struct v4l2_subdev
  *
  * The version information includes revisions of hardware and firmware,
  * AutoFocus alghorithm version and the version string.
@@ -489,6 +499,7 @@ static enum m5mols_restype __find_restype(u32 code)
 
 /**
  * __find_resolution - Lookup preset and type of M-5MOLS's resolution
+ * @sd: sub-device, as pointed by struct v4l2_subdev
  * @mf: pixel format to find/negotiate the resolution preset for
  * @type: M-5MOLS resolution type
  * @resolution:	M-5MOLS resolution preset register value
@@ -662,6 +673,7 @@ static const struct v4l2_subdev_pad_ops m5mols_pad_ops = {
 
 /**
  * m5mols_restore_controls - Apply current control values to the registers
+ * @info: M-5MOLS driver data structure
  *
  * m5mols_do_scenemode() handles all parameters for which there is yet no
  * individual control. It should be replaced at some point by setting each
@@ -686,6 +698,7 @@ int m5mols_restore_controls(struct m5mols_info *info)
 
 /**
  * m5mols_start_monitor - Start the monitor mode
+ * @info: M-5MOLS driver data structure
  *
  * Before applying the controls setup the resolution and frame rate
  * in PARAMETER mode, and then switch over to MONITOR mode.
@@ -789,6 +802,7 @@ int __attribute__ ((weak)) m5mols_update_fw(struct v4l2_subdev *sd,
 
 /**
  * m5mols_fw_start - M-5MOLS internal ARM controller initialization
+ * @sd: sub-device, as pointed by struct v4l2_subdev
  *
  * Execute the M-5MOLS internal ARM controller initialization sequence.
  * This function should be called after the supply voltage has been
@@ -844,6 +858,8 @@ static int m5mols_auto_focus_stop(struct m5mols_info *info)
 
 /**
  * m5mols_s_power - Main sensor power control function
+ * @sd: sub-device, as pointed by struct v4l2_subdev
+ * @on: if true, powers on the device; powers off otherwise.
  *
  * To prevent breaking the lens when the sensor is powered off the Soft-Landing
  * algorithm is called where available. The Soft-Landing algorithm availability
diff --git a/drivers/media/i2c/ov5647.c b/drivers/media/i2c/ov5647.c
index 34179d232a35d47dcc22f65c6ff1e8f673f4877e..da39c49de503c260b3cc78bed1c7a13c57472bf0 100644
--- a/drivers/media/i2c/ov5647.c
+++ b/drivers/media/i2c/ov5647.c
@@ -428,8 +428,8 @@ static int ov5647_sensor_set_register(struct v4l2_subdev *sd,
 }
 #endif
 
-/**
- * @short Subdev core operations registration
+/*
+ * Subdev core operations registration
  */
 static const struct v4l2_subdev_core_ops ov5647_subdev_core_ops = {
 	.s_power		= ov5647_sensor_power,
diff --git a/drivers/media/i2c/s5k6a3.c b/drivers/media/i2c/s5k6a3.c
index 67dcca76f981cc5a38a23f6a098cc71dc37622e9..2e140272794b2589f40ecf45628e325c06678af1 100644
--- a/drivers/media/i2c/s5k6a3.c
+++ b/drivers/media/i2c/s5k6a3.c
@@ -53,6 +53,9 @@ enum {
  * @gpio_reset: GPIO connected to the sensor's reset pin
  * @lock: mutex protecting the structure's members below
  * @format: media bus format at the sensor's source pad
+ * @clock: pointer to &struct clk.
+ * @clock_frequency: clock frequency
+ * @power_count: stores state if device is powered
  */
 struct s5k6a3 {
 	struct device *dev;
diff --git a/drivers/media/i2c/s5k6aa.c b/drivers/media/i2c/s5k6aa.c
index 9fd254a8e20d2771c2b92cfe29ba1494af1e2d1f..13c10b5e2b451ccedaae039e1d92a9ecc56d9155 100644
--- a/drivers/media/i2c/s5k6aa.c
+++ b/drivers/media/i2c/s5k6aa.c
@@ -421,6 +421,7 @@ static int s5k6aa_set_ahb_address(struct i2c_client *client)
 
 /**
  * s5k6aa_configure_pixel_clock - apply ISP main clock/PLL configuration
+ * @s5k6aa: pointer to &struct s5k6aa describing the device
  *
  * Configure the internal ISP PLL for the required output frequency.
  * Locking: called with s5k6aa.lock mutex held.
@@ -669,6 +670,7 @@ static int s5k6aa_set_input_params(struct s5k6aa *s5k6aa)
 
 /**
  * s5k6aa_configure_video_bus - configure the video output interface
+ * @s5k6aa: pointer to &struct s5k6aa describing the device
  * @bus_type: video bus type: parallel or MIPI-CSI
  * @nlanes: number of MIPI lanes to be used (MIPI-CSI only)
  *
@@ -724,6 +726,8 @@ static int s5k6aa_new_config_sync(struct i2c_client *client, int timeout,
 
 /**
  * s5k6aa_set_prev_config - write user preview register set
+ * @s5k6aa: pointer to &struct s5k6aa describing the device
+ * @preset: s5kaa preset to be applied
  *
  * Configure output resolution and color fromat, pixel clock
  * frequency range, device frame rate type and frame period range.
@@ -777,6 +781,7 @@ static int s5k6aa_set_prev_config(struct s5k6aa *s5k6aa,
 
 /**
  * s5k6aa_initialize_isp - basic ISP MCU initialization
+ * @sd: pointer to V4L2 sub-device descriptor
  *
  * Configure AHB addresses for registers read/write; configure PLLs for
  * required output pixel clock. The ISP power supply needs to be already
diff --git a/drivers/media/i2c/tvp514x.c b/drivers/media/i2c/tvp514x.c
index ad2df998f9c563dc9274d182429ad66eaed45251..d575b3e7e835efe60c94756f94027e55dd30da41 100644
--- a/drivers/media/i2c/tvp514x.c
+++ b/drivers/media/i2c/tvp514x.c
@@ -86,6 +86,7 @@ static int tvp514x_s_stream(struct v4l2_subdev *sd, int enable);
 /**
  * struct tvp514x_decoder - TVP5146/47 decoder object
  * @sd: Subdevice Slave handle
+ * @hdl: embedded &struct v4l2_ctrl_handler
  * @tvp514x_regs: copy of hw's regs with preset values.
  * @pdata: Board specific
  * @ver: Chip version
@@ -98,6 +99,9 @@ static int tvp514x_s_stream(struct v4l2_subdev *sd, int enable);
  * @std_list: Standards list
  * @input: Input routing at chip level
  * @output: Output routing at chip level
+ * @pad: subdev media pad associated with the decoder
+ * @format: media bus frame format
+ * @int_seq: driver's register init sequence
  */
 struct tvp514x_decoder {
 	struct v4l2_subdev sd;
@@ -211,7 +215,7 @@ static struct tvp514x_reg tvp514x_reg_list_default[] = {
 	{TOK_TERM, 0, 0},
 };
 
-/**
+/*
  * List of image formats supported by TVP5146/47 decoder
  * Currently we are using 8 bit mode only, but can be
  * extended to 10/20 bit mode.
@@ -226,7 +230,7 @@ static const struct v4l2_fmtdesc tvp514x_fmt_list[] = {
 	},
 };
 
-/**
+/*
  * Supported standards -
  *
  * Currently supports two standards only, need to add support for rest of the
@@ -931,7 +935,7 @@ static int tvp514x_get_pad_format(struct v4l2_subdev *sd,
  * tvp514x_set_pad_format() - V4L2 decoder interface handler for set pad format
  * @sd: pointer to standard V4L2 sub-device structure
  * @cfg: pad configuration
- * @format: pointer to v4l2_subdev_format structure
+ * @fmt: pointer to v4l2_subdev_format structure
  *
  * Set pad format for the output pad
  */
@@ -1199,7 +1203,7 @@ static const struct tvp514x_reg tvp514xm_init_reg_seq[] = {
 	{TOK_TERM, 0, 0},
 };
 
-/**
+/*
  * I2C Device Table -
  *
  * name - Name of the actual device/chip.
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
index 11829c0fa138050df4a80be72da20d422e6f9c54..509d69e6ca4adc27e7d26d05367f9d75d96e656d 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
@@ -82,11 +82,11 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
  * @start_addr_lo:	DMA ring buffer start address, lower part
  * @start_addr_hi:	DMA ring buffer start address, higher part
  * @size:		DMA ring buffer size register
-			Bits [0-7]:	DMA packet size, 188 bytes
-			Bits [16-23]:	packets count in block, 128 packets
-			Bits [24-31]:	blocks count, 8 blocks
+ *			* Bits [0-7]:	DMA packet size, 188 bytes
+ *			* Bits [16-23]:	packets count in block, 128 packets
+ *			* Bits [24-31]:	blocks count, 8 blocks
  * @timeout:		DMA timeout in units of 8ns
-			For example, value of 375000000 equals to 3 sec
+ *			For example, value of 375000000 equals to 3 sec
  * @curr_addr_lo:	Current ring buffer head address, lower part
  * @curr_addr_hi:	Current ring buffer head address, higher part
  * @stat_pkt_received:	Statistic register, not tested
diff --git a/drivers/media/pci/solo6x10/solo6x10-enc.c b/drivers/media/pci/solo6x10/solo6x10-enc.c
index d28211bb967420c4470ad45896b55cbb9ea1a516..58d6b5131dd045a78c9a4d960d1df6423c7c291c 100644
--- a/drivers/media/pci/solo6x10/solo6x10-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-enc.c
@@ -175,7 +175,7 @@ int solo_osd_print(struct solo_enc_dev *solo_enc)
 	return 0;
 }
 
-/**
+/*
  * Set channel Quality Profile (0-3).
  */
 void solo_s_jpeg_qp(struct solo_dev *solo_dev, unsigned int ch,
diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c
index eb5a9eae7c8ef7046fc08333bff244621959d732..dd199bfc1d457ad36dda7bccbf59ce66862f82e4 100644
--- a/drivers/media/pci/sta2x11/sta2x11_vip.c
+++ b/drivers/media/pci/sta2x11/sta2x11_vip.c
@@ -404,6 +404,7 @@ static const struct v4l2_file_operations vip_fops = {
  * vidioc_querycap - return capabilities of device
  * @file: descriptor of device
  * @cap: contains return values
+ * @priv: unused
  *
  * the capabilities of the device are returned
  *
@@ -429,6 +430,7 @@ static int vidioc_querycap(struct file *file, void *priv,
  * vidioc_s_std - set video standard
  * @file: descriptor of device
  * @std: contains standard to be set
+ * @priv: unused
  *
  * the video standard is set
  *
@@ -466,6 +468,7 @@ static int vidioc_s_std(struct file *file, void *priv, v4l2_std_id std)
 /**
  * vidioc_g_std - get video standard
  * @file: descriptor of device
+ * @priv: unused
  * @std: contains return values
  *
  * the current video standard is returned
@@ -483,6 +486,7 @@ static int vidioc_g_std(struct file *file, void *priv, v4l2_std_id *std)
 /**
  * vidioc_querystd - get possible video standards
  * @file: descriptor of device
+ * @priv: unused
  * @std: contains return values
  *
  * all possible video standards are returned
@@ -512,6 +516,7 @@ static int vidioc_enum_input(struct file *file, void *priv,
 /**
  * vidioc_s_input - set input line
  * @file: descriptor of device
+ * @priv: unused
  * @i: new input line number
  *
  * the current active input line is set
@@ -538,6 +543,7 @@ static int vidioc_s_input(struct file *file, void *priv, unsigned int i)
 /**
  * vidioc_g_input - return input line
  * @file: descriptor of device
+ * @priv: unused
  * @i: returned input line number
  *
  * the current active input line is returned
@@ -554,6 +560,8 @@ static int vidioc_g_input(struct file *file, void *priv, unsigned int *i)
 
 /**
  * vidioc_enum_fmt_vid_cap - return video capture format
+ * @file: descriptor of device
+ * @priv: unused
  * @f: returned format information
  *
  * returns name and format of video capture
@@ -577,6 +585,7 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 /**
  * vidioc_try_fmt_vid_cap - set video capture format
  * @file: descriptor of device
+ * @priv: unused
  * @f: new format
  *
  * new video format is set which includes width and
@@ -639,6 +648,7 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 /**
  * vidioc_s_fmt_vid_cap - set current video format parameters
  * @file: descriptor of device
+ * @priv: unused
  * @f: returned format information
  *
  * set new capture format
@@ -706,6 +716,7 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *priv,
 /**
  * vidioc_g_fmt_vid_cap - get current video format parameters
  * @file: descriptor of device
+ * @priv: unused
  * @f: contains format information
  *
  * returns current video format parameters
diff --git a/drivers/media/pci/tw68/tw68-risc.c b/drivers/media/pci/tw68/tw68-risc.c
index 7439db212a69d2f401eacd5586505f59b85657b2..82ff9c9494f3d99a371495a60010ed17874679ed 100644
--- a/drivers/media/pci/tw68/tw68-risc.c
+++ b/drivers/media/pci/tw68/tw68-risc.c
@@ -29,14 +29,15 @@
 #include "tw68.h"
 
 /**
- *  @rp		pointer to current risc program position
- *  @sglist	pointer to "scatter-gather list" of buffer pointers
- *  @offset	offset to target memory buffer
- *  @sync_line	0 -> no sync, 1 -> odd sync, 2 -> even sync
- *  @bpl	number of bytes per scan line
- *  @padding	number of bytes of padding to add
- *  @lines	number of lines in field
- *  @jump	insert a jump at the start
+ * tw68_risc_field
+ *  @rp:	pointer to current risc program position
+ *  @sglist:	pointer to "scatter-gather list" of buffer pointers
+ *  @offset:	offset to target memory buffer
+ *  @sync_line:	0 -> no sync, 1 -> odd sync, 2 -> even sync
+ *  @bpl:	number of bytes per scan line
+ *  @padding:	number of bytes of padding to add
+ *  @lines:	number of lines in field
+ *  @jump:	insert a jump at the start
  */
 static __le32 *tw68_risc_field(__le32 *rp, struct scatterlist *sglist,
 			    unsigned int offset, u32 sync_line,
@@ -120,18 +121,18 @@ static __le32 *tw68_risc_field(__le32 *rp, struct scatterlist *sglist,
  *	memory for the dma controller "program" and then fills in that
  *	memory with the appropriate "instructions".
  *
- *	@pci_dev	structure with info about the pci
+ *	@pci:		structure with info about the pci
  *			slot which our device is in.
- *	@risc		structure with info about the memory
+ *	@buf:		structure with info about the memory
  *			used for our controller program.
- *	@sglist		scatter-gather list entry
- *	@top_offset	offset within the risc program area for the
+ *	@sglist:	scatter-gather list entry
+ *	@top_offset:	offset within the risc program area for the
  *			first odd frame line
- *	@bottom_offset	offset within the risc program area for the
+ *	@bottom_offset:	offset within the risc program area for the
  *			first even frame line
- *	@bpl		number of data bytes per scan line
- *	@padding	number of extra bytes to add at end of line
- *	@lines		number of scan lines
+ *	@bpl:		number of data bytes per scan line
+ *	@padding:	number of extra bytes to add at end of line
+ *	@lines:		number of scan lines
  */
 int tw68_risc_buffer(struct pci_dev *pci,
 			struct tw68_buf *buf,
diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c
index 07e89a4985a6064a982b7d39cb6667613c05984a..16352e2263d2d2efcf165aabded6d10e3f216e02 100644
--- a/drivers/media/platform/davinci/vpif.c
+++ b/drivers/media/platform/davinci/vpif.c
@@ -47,8 +47,9 @@ EXPORT_SYMBOL_GPL(vpif_lock);
 void __iomem *vpif_base;
 EXPORT_SYMBOL_GPL(vpif_base);
 
-/**
+/*
  * vpif_ch_params: video standard configuration parameters for vpif
+ *
  * The table must include all presets from supported subdevices.
  */
 const struct vpif_channel_config_params vpif_ch_params[] = {
diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c
index a89367ab1e068a593396c808c892ca0c8bf5d70d..fca4dc829f73eeb0eb9a1c191fc6daae940587a9 100644
--- a/drivers/media/platform/davinci/vpif_capture.c
+++ b/drivers/media/platform/davinci/vpif_capture.c
@@ -109,7 +109,7 @@ static int vpif_buffer_prepare(struct vb2_buffer *vb)
  * @vq: vb2_queue ptr
  * @nbuffers: ptr to number of buffers requested by application
  * @nplanes:: contains number of distinct video planes needed to hold a frame
- * @sizes[]: contains the size (in bytes) of each plane.
+ * @sizes: contains the size (in bytes) of each plane.
  * @alloc_devs: ptr to allocation context
  *
  * This callback function is called when reqbuf() is called to adjust
@@ -167,7 +167,7 @@ static void vpif_buffer_queue(struct vb2_buffer *vb)
 
 /**
  * vpif_start_streaming : Starts the DMA engine for streaming
- * @vb: ptr to vb2_buffer
+ * @vq: ptr to vb2_buffer
  * @count: number of buffers
  */
 static int vpif_start_streaming(struct vb2_queue *vq, unsigned int count)
@@ -629,7 +629,7 @@ static void vpif_calculate_offsets(struct channel_obj *ch)
 
 /**
  * vpif_get_default_field() - Get default field type based on interface
- * @vpif_params - ptr to vpif params
+ * @iface: ptr to vpif interface
  */
 static inline enum v4l2_field vpif_get_default_field(
 				struct vpif_interface *iface)
@@ -640,8 +640,8 @@ static inline enum v4l2_field vpif_get_default_field(
 
 /**
  * vpif_config_addr() - function to configure buffer address in vpif
- * @ch - channel ptr
- * @muxmode - channel mux mode
+ * @ch: channel ptr
+ * @muxmode: channel mux mode
  */
 static void vpif_config_addr(struct channel_obj *ch, int muxmode)
 {
@@ -661,9 +661,9 @@ static void vpif_config_addr(struct channel_obj *ch, int muxmode)
 
 /**
  * vpif_input_to_subdev() - Maps input to sub device
- * @vpif_cfg - global config ptr
- * @chan_cfg - channel config ptr
- * @input_index - Given input index from application
+ * @vpif_cfg: global config ptr
+ * @chan_cfg: channel config ptr
+ * @input_index: Given input index from application
  *
  * lookup the sub device information for a given input index.
  * we report all the inputs to application. inputs table also
@@ -699,9 +699,9 @@ static int vpif_input_to_subdev(
 
 /**
  * vpif_set_input() - Select an input
- * @vpif_cfg - global config ptr
- * @ch - channel
- * @_index - Given input index from application
+ * @vpif_cfg: global config ptr
+ * @ch: channel
+ * @index: Given input index from application
  *
  * Select the given input.
  */
@@ -792,7 +792,7 @@ static int vpif_querystd(struct file *file, void *priv, v4l2_std_id *std_id)
  * vpif_g_std() - get STD handler
  * @file: file ptr
  * @priv: file handle
- * @std_id: ptr to std id
+ * @std: ptr to std id
  */
 static int vpif_g_std(struct file *file, void *priv, v4l2_std_id *std)
 {
@@ -933,7 +933,7 @@ static int vpif_s_input(struct file *file, void *priv, unsigned int index)
  * vpif_enum_fmt_vid_cap() - ENUM_FMT handler
  * @file: file ptr
  * @priv: file handle
- * @index: input index
+ * @fmt: ptr to V4L2 format descriptor
  */
 static int vpif_enum_fmt_vid_cap(struct file *file, void  *priv,
 					struct v4l2_fmtdesc *fmt)
@@ -1745,6 +1745,7 @@ static int vpif_remove(struct platform_device *device)
 #ifdef CONFIG_PM_SLEEP
 /**
  * vpif_suspend: vpif device suspend
+ * @dev: pointer to &struct device
  */
 static int vpif_suspend(struct device *dev)
 {
diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c
index ff2f75a328c98451e8c238bba2ae228319b962ce..7be636237acf8ec3fbb116f260ced51dbdd586ac 100644
--- a/drivers/media/platform/davinci/vpif_display.c
+++ b/drivers/media/platform/davinci/vpif_display.c
@@ -102,7 +102,7 @@ static int vpif_buffer_prepare(struct vb2_buffer *vb)
  * @vq: vb2_queue ptr
  * @nbuffers: ptr to number of buffers requested by application
  * @nplanes:: contains number of distinct video planes needed to hold a frame
- * @sizes[]: contains the size (in bytes) of each plane.
+ * @sizes: contains the size (in bytes) of each plane.
  * @alloc_devs: ptr to allocation context
  *
  * This callback function is called when reqbuf() is called to adjust
@@ -158,7 +158,7 @@ static void vpif_buffer_queue(struct vb2_buffer *vb)
 
 /**
  * vpif_start_streaming : Starts the DMA engine for streaming
- * @vb: ptr to vb2_buffer
+ * @vq: ptr to vb2_buffer
  * @count: number of buffers
  */
 static int vpif_start_streaming(struct vb2_queue *vq, unsigned int count)
@@ -766,9 +766,9 @@ static int vpif_enum_output(struct file *file, void *fh,
 
 /**
  * vpif_output_to_subdev() - Maps output to sub device
- * @vpif_cfg - global config ptr
- * @chan_cfg - channel config ptr
- * @index - Given output index from application
+ * @vpif_cfg: global config ptr
+ * @chan_cfg: channel config ptr
+ * @index: Given output index from application
  *
  * lookup the sub device information for a given output index.
  * we report all the output to application. output table also
@@ -802,9 +802,9 @@ vpif_output_to_subdev(struct vpif_display_config *vpif_cfg,
 
 /**
  * vpif_set_output() - Select an output
- * @vpif_cfg - global config ptr
- * @ch - channel
- * @index - Given output index from application
+ * @vpif_cfg: global config ptr
+ * @ch: channel
+ * @index: Given output index from application
  *
  * Select the given output.
  */
diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c
index 948fe01f6c96cc1745e96bccee0bde7283fc0abc..ed9302caa00484c55e4e75474acd204fa77df404 100644
--- a/drivers/media/platform/exynos4-is/fimc-capture.c
+++ b/drivers/media/platform/exynos4-is/fimc-capture.c
@@ -146,6 +146,7 @@ static int fimc_stop_capture(struct fimc_dev *fimc, bool suspend)
 
 /**
  * fimc_capture_config_update - apply the camera interface configuration
+ * @ctx: FIMC capture context
  *
  * To be called from within the interrupt handler with fimc.slock
  * spinlock held. It updates the camera pixel crop, rotation and
@@ -858,6 +859,7 @@ static int fimc_pipeline_try_format(struct fimc_ctx *ctx,
  * fimc_get_sensor_frame_desc - query the sensor for media bus frame parameters
  * @sensor: pointer to the sensor subdev
  * @plane_fmt: provides plane sizes corresponding to the frame layout entries
+ * @num_planes: number of planes
  * @try: true to set the frame parameters, false to query only
  *
  * This function is used by this driver only for compressed/blob data formats.
@@ -1101,6 +1103,7 @@ static int fimc_cap_g_input(struct file *file, void *priv, unsigned int *i)
 /**
  * fimc_pipeline_validate - check for formats inconsistencies
  *                          between source and sink pad of each link
+ * @fimc:	the FIMC device this context applies to
  *
  * Return 0 if all formats match or -EPIPE otherwise.
  */
diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c
index c15596b56dc97af7bfb4ef5079c5161a2434b69f..0ef583cfc424f61671d0374909aacdaaf4c67fe0 100644
--- a/drivers/media/platform/exynos4-is/media-dev.c
+++ b/drivers/media/platform/exynos4-is/media-dev.c
@@ -60,6 +60,7 @@ static void __setup_sensor_notification(struct fimc_md *fmd,
 
 /**
  * fimc_pipeline_prepare - update pipeline information with subdevice pointers
+ * @p: fimc pipeline
  * @me: media entity terminating the pipeline
  *
  * Caller holds the graph mutex.
@@ -151,8 +152,8 @@ static int __subdev_set_power(struct v4l2_subdev *sd, int on)
 
 /**
  * fimc_pipeline_s_power - change power state of all pipeline subdevs
- * @fimc: fimc device terminating the pipeline
- * @state: true to power on, false to power off
+ * @p: fimc device terminating the pipeline
+ * @on: true to power on, false to power off
  *
  * Needs to be called with the graph mutex held.
  */
@@ -219,6 +220,7 @@ static int __fimc_pipeline_enable(struct exynos_media_pipeline *ep,
 /**
  * __fimc_pipeline_open - update the pipeline information, enable power
  *                        of all pipeline subdevs and the sensor clock
+ * @ep: fimc device terminating the pipeline
  * @me: media entity to start graph walk with
  * @prepare: true to walk the current pipeline and acquire all subdevs
  *
@@ -252,7 +254,7 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
 
 /**
  * __fimc_pipeline_close - disable the sensor clock and pipeline power
- * @fimc: fimc device terminating the pipeline
+ * @ep: fimc device terminating the pipeline
  *
  * Disable power of all subdevs and turn the external sensor clock off.
  */
@@ -281,7 +283,7 @@ static int __fimc_pipeline_close(struct exynos_media_pipeline *ep)
 
 /**
  * __fimc_pipeline_s_stream - call s_stream() on pipeline subdevs
- * @pipeline: video pipeline structure
+ * @ep: video pipeline structure
  * @on: passed as the s_stream() callback argument
  */
 static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
@@ -902,6 +904,7 @@ static int __fimc_md_create_fimc_is_links(struct fimc_md *fmd)
 
 /**
  * fimc_md_create_links - create default links between registered entities
+ * @fmd: fimc media device
  *
  * Parallel interface sensor entities are connected directly to FIMC capture
  * entities. The sensors using MIPI CSIS bus are connected through immutable
diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c
index 560aadabcb111c3de8aac551c1e4809f741de245..cba46a65633836d5632c61107e7e8dbecdf1b39b 100644
--- a/drivers/media/platform/exynos4-is/mipi-csis.c
+++ b/drivers/media/platform/exynos4-is/mipi-csis.c
@@ -189,7 +189,7 @@ struct csis_drvdata {
  * @irq: requested s5p-mipi-csis irq number
  * @interrupt_mask: interrupt mask of the all used interrupts
  * @flags: the state variable for power and streaming control
- * @clock_frequency: device bus clock frequency
+ * @clk_frequency: device bus clock frequency
  * @hs_settle: HS-RX settle time
  * @num_lanes: number of MIPI-CSI data lanes used
  * @max_num_lanes: maximum number of MIPI-CSI data lanes supported
diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
index b7731b18ecae1741ebee3be1bf9188659bfeb398..aa3ce41898bc82c194c32e4055d5facdfa789043 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
@@ -59,6 +59,7 @@ struct h264_fb {
  * @read_idx  : read index
  * @write_idx : write index
  * @count     : buffer count in list
+ * @reserved  : for 8 bytes alignment
  */
 struct h264_ring_fb_list {
 	struct h264_fb fb_list[H264_MAX_FB_NUM];
diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
index b9fad6a488799ebc7fad8b12b6990b9c33d7c60b..3e84a761db3a9188500610a2a83a7c80d61580da 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
@@ -155,7 +155,6 @@ struct vdec_vp8_vpu_inst {
  * @reg_base		   : HW register base address
  * @frm_cnt		   : decode frame count
  * @ctx			   : V4L2 context
- * @dev			   : platform device
  * @vpu			   : VPU instance for decoder
  * @vsi			   : VPU share information
  */
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
index 4eb3be37ba143980291f33e0de40835a352f42ad..6cf31b366aad2d94df0c80bd0b0b300ba854fa48 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
@@ -34,7 +34,7 @@ static const char h264_filler_marker[] = {0x0, 0x0, 0x0, 0x1, 0xc};
 #define H264_FILLER_MARKER_SIZE ARRAY_SIZE(h264_filler_marker)
 #define VENC_PIC_BITSTREAM_BYTE_CNT 0x0098
 
-/**
+/*
  * enum venc_h264_vpu_work_buf - h264 encoder buffer index
  */
 enum venc_h264_vpu_work_buf {
@@ -50,7 +50,7 @@ enum venc_h264_vpu_work_buf {
 	VENC_H264_VPU_WORK_BUF_MAX,
 };
 
-/**
+/*
  * enum venc_h264_bs_mode - for bs_mode argument in h264_enc_vpu_encode
  */
 enum venc_h264_bs_mode {
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
index acb639c4abd2b65c3b59e244c671f6981b9ef790..957420dd60de245260e39d5aef04a8efa45c882c 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
@@ -34,7 +34,7 @@
 /* This ac_tag is vp8 frame tag. */
 #define MAX_AC_TAG_SIZE 10
 
-/**
+/*
  * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index
  */
 enum venc_vp8_vpu_work_buf {
diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
index 853d598937f69d4667ce5a28d078e53365c2b134..1ff6a93262b7a674b718a44a17e1d686c21314bb 100644
--- a/drivers/media/platform/mtk-vpu/mtk_vpu.c
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
@@ -181,6 +181,7 @@ struct share_obj {
  * @extmem:		VPU extended memory information
  * @reg:		VPU TCM and configuration registers
  * @run:		VPU initialization status
+ * @wdt:		VPU watchdog workqueue
  * @ipi_desc:		VPU IPI descriptor
  * @recv_buf:		VPU DTCM share buffer for receiving. The
  *			receive buffer is only accessed in interrupt context.
@@ -194,7 +195,7 @@ struct share_obj {
  *			suppose a client is using VPU to decode VP8.
  *			If the other client wants to encode VP8,
  *			it has to wait until VP8 decode completes.
- * @wdt_refcnt		WDT reference count to make sure the watchdog can be
+ * @wdt_refcnt:		WDT reference count to make sure the watchdog can be
  *			disabled if no other client is using VPU service
  * @ack_wq:		The wait queue for each codec and mdp. When sleeping
  *			processes wake up, they will check the condition
diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c
index 9d3f0cb1d95ae25519045fb501dc955b67123d65..295f34ad10800b1019a172e90dc805589138f234 100644
--- a/drivers/media/platform/pxa_camera.c
+++ b/drivers/media/platform/pxa_camera.c
@@ -235,6 +235,7 @@ enum pxa_mbus_layout {
  *			stored in memory in the following way:
  * @packing:		Type of sample-packing, that has to be used
  * @order:		Sample order when storing in memory
+ * @layout:		Planes layout in memory
  * @bits_per_sample:	How many bits the bridge has to sample
  */
 struct pxa_mbus_pixelfmt {
@@ -852,10 +853,10 @@ static void pxa_camera_dma_irq_v(void *data)
 /**
  * pxa_init_dma_channel - init dma descriptors
  * @pcdev: pxa camera device
- * @vb: videobuffer2 buffer
- * @dma: dma video buffer
+ * @buf: pxa camera buffer
  * @channel: dma channel (0 => 'Y', 1 => 'U', 2 => 'V')
- * @cibr: camera Receive Buffer Register
+ * @sg: dma scatter list
+ * @sglen: dma scatter list length
  *
  * Prepares the pxa dma descriptors to transfer one camera channel.
  *
@@ -1010,6 +1011,8 @@ static void pxa_camera_wakeup(struct pxa_camera_dev *pcdev,
 /**
  * pxa_camera_check_link_miss - check missed DMA linking
  * @pcdev: camera device
+ * @last_submitted: an opaque DMA cookie for last submitted
+ * @last_issued: an opaque DMA cookie for last issued
  *
  * The DMA chaining is done with DMA running. This means a tiny temporal window
  * remains, where a buffer is queued on the chain, while the chain is already
diff --git a/drivers/media/platform/rcar_fdp1.c b/drivers/media/platform/rcar_fdp1.c
index 3245bc45f4a0636581599b01529ec34fae445c44..b13dec3081e55266486156bd8d2d8d398955b8d0 100644
--- a/drivers/media/platform/rcar_fdp1.c
+++ b/drivers/media/platform/rcar_fdp1.c
@@ -1132,7 +1132,7 @@ static int fdp1_device_process(struct fdp1_ctx *ctx)
  * mem2mem callbacks
  */
 
-/**
+/*
  * job_ready() - check whether an instance is ready to be scheduled to run
  */
 static int fdp1_m2m_job_ready(void *priv)
diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c
index 070bac36d766891dabeb01e1f1bf064eb989de58..f6092ae459129478aebb628f39685d9d35669532 100644
--- a/drivers/media/platform/rcar_jpu.c
+++ b/drivers/media/platform/rcar_jpu.c
@@ -257,7 +257,7 @@ struct jpu_fmt {
 };
 
 /**
- * jpu_q_data - parameters of one queue
+ * struct jpu_q_data - parameters of one queue
  * @fmtinfo: driver-specific format of this queue
  * @format: multiplanar format of this queue
  * @sequence: sequence number
@@ -269,7 +269,7 @@ struct jpu_q_data {
 };
 
 /**
- * jpu_ctx - the device context data
+ * struct jpu_ctx - the device context data
  * @jpu: JPEG IP device for this context
  * @encoder: compression (encode) operation or decompression (decode)
  * @compr_quality: destination image quality in compression (encode) mode
diff --git a/drivers/media/platform/s3c-camif/camif-core.c b/drivers/media/platform/s3c-camif/camif-core.c
index c4ab63986c8f08df7d33e15fdf2765a7e7d8d099..79bc0ef6bb413d8cd15aaaea3662444d2b047635 100644
--- a/drivers/media/platform/s3c-camif/camif-core.c
+++ b/drivers/media/platform/s3c-camif/camif-core.c
@@ -103,6 +103,7 @@ static const struct camif_fmt camif_formats[] = {
 
 /**
  * s3c_camif_find_format() - lookup camif color format by fourcc or an index
+ * @vp: video path (DMA) description (codec/preview)
  * @pixelformat: fourcc to match, ignored if null
  * @index: index to the camif_formats array, ignored if negative
  */
diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c
index 15a562af13c774711c8478cf682491e916b8784b..dedc1b024f6f1ef692f6c41f0aec892521b51035 100644
--- a/drivers/media/platform/sh_veu.c
+++ b/drivers/media/platform/sh_veu.c
@@ -267,7 +267,7 @@ static void sh_veu_process(struct sh_veu_dev *veu,
 	sh_veu_reg_write(veu, VEU_EIER, 1); /* enable interrupt in VEU */
 }
 
-/**
+/*
  * sh_veu_device_run() - prepares and starts the device
  *
  * This will be called by the framework when it decides to schedule a particular
diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c
index 0116097c0c0faf8ce7db5a42032213a92ba615a2..270ec613c27ca04ea68041ab13009f6b3c4b58ac 100644
--- a/drivers/media/platform/soc_camera/soc_scale_crop.c
+++ b/drivers/media/platform/soc_camera/soc_scale_crop.c
@@ -306,16 +306,17 @@ static int client_set_fmt(struct soc_camera_device *icd,
 }
 
 /**
- * @icd		- soc-camera device
- * @rect	- camera cropping window
- * @subrect	- part of rect, sent to the user
- * @mf		- in- / output camera output window
- * @width	- on input: max host input width
- *		  on output: user width, mapped back to input
- * @height	- on input: max host input height
- *		  on output: user height, mapped back to input
- * @host_can_scale - host can scale this pixel format
- * @shift	- shift, used for scaling
+ * soc_camera_client_scale
+ * @icd:		soc-camera device
+ * @rect:		camera cropping window
+ * @subrect:		part of rect, sent to the user
+ * @mf:			in- / output camera output window
+ * @width:		on input: max host input width;
+ *			on output: user width, mapped back to input
+ * @height:		on input: max host input height;
+ *			on output: user height, mapped back to input
+ * @host_can_scale:	host can scale this pixel format
+ * @shift:		shift, used for scaling
  */
 int soc_camera_client_scale(struct soc_camera_device *icd,
 			struct v4l2_rect *rect, struct v4l2_rect *subrect,
diff --git a/drivers/media/platform/sti/hva/hva-h264.c b/drivers/media/platform/sti/hva/hva-h264.c
index a7e5eed17adae6487307b1670f79be64535c61cd..17f1eb0ba957faea55d4df7909cb0b1a84384a29 100644
--- a/drivers/media/platform/sti/hva/hva-h264.c
+++ b/drivers/media/platform/sti/hva/hva-h264.c
@@ -134,7 +134,7 @@ enum hva_h264_sei_payload_type {
 	SEI_FRAME_PACKING_ARRANGEMENT = 45
 };
 
-/**
+/*
  * stereo Video Info struct
  */
 struct hva_h264_stereo_video_sei {
@@ -146,7 +146,9 @@ struct hva_h264_stereo_video_sei {
 	u8 right_view_self_contained_flag;
 };
 
-/**
+/*
+ * struct hva_h264_td
+ *
  * @frame_width: width in pixels of the buffer containing the input frame
  * @frame_height: height in pixels of the buffer containing the input frame
  * @frame_num: the parameter to be written in the slice header
@@ -352,7 +354,9 @@ struct hva_h264_td {
 	u32 addr_brc_in_out_parameter;
 };
 
-/**
+/*
+ * struct hva_h264_slice_po
+ *
  * @ slice_size: slice size
  * @ slice_start_time: start time
  * @ slice_stop_time: stop time
@@ -365,7 +369,9 @@ struct hva_h264_slice_po {
 	u32 slice_num;
 };
 
-/**
+/*
+ * struct hva_h264_po
+ *
  * @ bitstream_size: bitstream size
  * @ dct_bitstream_size: dtc bitstream size
  * @ stuffing_bits: number of stuffing bits inserted by the encoder
@@ -391,7 +397,9 @@ struct hva_h264_task {
 	struct hva_h264_po po;
 };
 
-/**
+/*
+ * struct hva_h264_ctx
+ *
  * @seq_info:  sequence information buffer
  * @ref_frame: reference frame buffer
  * @rec_frame: reconstructed frame buffer
diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c
index 45bd10544189e9a3c2144c11b1887e6dd091a2a8..e395aa85c8ad66341f9e17b9d488120cfe43feed 100644
--- a/drivers/media/platform/ti-vpe/vpe.c
+++ b/drivers/media/platform/ti-vpe/vpe.c
@@ -926,7 +926,7 @@ static struct vpe_ctx *file2ctx(struct file *file)
  * mem2mem callbacks
  */
 
-/**
+/*
  * job_ready() - check whether an instance is ready to be scheduled to run
  */
 static int job_ready(void *priv)
diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
index 7bf9fa2f8534bf456e0f1992f014247cc119fbc1..065483e62db4d7ede0893c61a2fc3d099e9750ad 100644
--- a/drivers/media/platform/vim2m.c
+++ b/drivers/media/platform/vim2m.c
@@ -343,7 +343,7 @@ static void schedule_irq(struct vim2m_dev *dev, int msec_timeout)
  * mem2mem callbacks
  */
 
-/**
+/*
  * job_ready() - check whether an instance is ready to be scheduled to run
  */
 static int job_ready(void *priv)
diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c
index 8b5cbb6b7a704651117960d2ff7bd95a24ca4e7d..4257451f1bd8c107d89778fd68d8b435ccb28fb6 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -70,6 +70,7 @@ struct vsp1_dl_body {
  * @dma: DMA address for the header
  * @body0: first display list body
  * @fragments: list of extra display list bodies
+ * @has_chain: if true, indicates that there's a partition chain
  * @chain: entry in the display list partition chain
  */
 struct vsp1_dl_list {
diff --git a/drivers/media/radio/radio-si476x.c b/drivers/media/radio/radio-si476x.c
index 271f725b17e85c77017f08ac9181510369f16872..540ac887a63cdd562213bf377fbf05574fb27143 100644
--- a/drivers/media/radio/radio-si476x.c
+++ b/drivers/media/radio/radio-si476x.c
@@ -158,7 +158,7 @@ enum si476x_ctrl_idx {
 };
 static struct v4l2_ctrl_config si476x_ctrls[] = {
 
-	/**
+	/*
 	 * SI476X during its station seeking(or tuning) process uses several
 	 * parameters to detrmine if "the station" is valid:
 	 *
@@ -197,7 +197,7 @@ static struct v4l2_ctrl_config si476x_ctrls[] = {
 		.step	= 2,
 	},
 
-	/**
+	/*
 	 * #V4L2_CID_SI476X_HARMONICS_COUNT -- number of harmonics
 	 * built-in power-line noise supression filter is to reject
 	 * during AM-mode operation.
@@ -213,7 +213,7 @@ static struct v4l2_ctrl_config si476x_ctrls[] = {
 		.step	= 1,
 	},
 
-	/**
+	/*
 	 * #V4L2_CID_SI476X_DIVERSITY_MODE -- configuration which
 	 * two tuners working in diversity mode are to work in.
 	 *
@@ -237,7 +237,7 @@ static struct v4l2_ctrl_config si476x_ctrls[] = {
 		.max	= ARRAY_SIZE(phase_diversity_modes) - 1,
 	},
 
-	/**
+	/*
 	 * #V4L2_CID_SI476X_INTERCHIP_LINK -- inter-chip link in
 	 * diversity mode indicator. Allows user to determine if two
 	 * chips working in diversity mode have established a link
@@ -296,11 +296,15 @@ struct si476x_radio_ops {
 /**
  * struct si476x_radio - radio device
  *
- * @core: Pointer to underlying core device
+ * @v4l2dev: Pointer to V4L2 device created by V4L2 subsystem
  * @videodev: Pointer to video device created by V4L2 subsystem
+ * @ctrl_handler: V4L2 controls handler
+ * @core: Pointer to underlying core device
  * @ops: Vtable of functions. See struct si476x_radio_ops for details
- * @kref: Reference counter
- * @core_lock: An r/w semaphore to brebvent the deletion of underlying
+ * @debugfs: pointer to &strucd dentry for debugfs
+ * @audmode: audio mode, as defined for the rxsubchans field
+ *	     at videodev2.h
+ *
  * core structure is the radio device is being used
  */
 struct si476x_radio {
diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c
index 903fcd5e99c07e3928c0ddf658cf146cc0a568e6..3cbdc085c65df309dc5e43ec97e95806af21aa99 100644
--- a/drivers/media/radio/radio-wl1273.c
+++ b/drivers/media/radio/radio-wl1273.c
@@ -1330,7 +1330,7 @@ static int wl1273_fm_vidioc_s_input(struct file *file, void *priv,
 
 /**
  * wl1273_fm_set_tx_power() -	Set the transmission power value.
- * @core:			A pointer to the device struct.
+ * @radio:			A pointer to the device struct.
  * @power:			The new power value.
  */
 static int wl1273_fm_set_tx_power(struct wl1273_device *radio, u16 power)
diff --git a/drivers/media/rc/img-ir/img-ir-hw.c b/drivers/media/rc/img-ir/img-ir-hw.c
index f54bc5d23893da0b5ce0e1e66592f1d0ab30746b..ec4ded84cd170b93d231eeff298c6a9d0a5b56c9 100644
--- a/drivers/media/rc/img-ir/img-ir-hw.c
+++ b/drivers/media/rc/img-ir/img-ir-hw.c
@@ -339,7 +339,7 @@ static void img_ir_decoder_preprocess(struct img_ir_decoder *decoder)
 /**
  * img_ir_decoder_convert() - Generate internal timings in decoder.
  * @decoder:	Decoder to be converted to internal timings.
- * @timings:	Timing register values.
+ * @reg_timings: Timing register values.
  * @clock_hz:	IR clock rate in Hz.
  *
  * Fills out the repeat timings and timing register values for a specific clock
diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
index b25b35b3f6da530b1869ae4844a0e689f22ed186..eb943e862515dc55800ef080ebf75860737e7033 100644
--- a/drivers/media/rc/imon.c
+++ b/drivers/media/rc/imon.c
@@ -492,7 +492,7 @@ static void free_imon_context(struct imon_context *ictx)
 	dev_dbg(dev, "%s: iMON context freed\n", __func__);
 }
 
-/**
+/*
  * Called when the Display device (e.g. /dev/lcd0)
  * is opened by the application.
  */
@@ -542,7 +542,7 @@ static int display_open(struct inode *inode, struct file *file)
 	return retval;
 }
 
-/**
+/*
  * Called when the display device (e.g. /dev/lcd0)
  * is closed by the application.
  */
@@ -575,7 +575,7 @@ static int display_close(struct inode *inode, struct file *file)
 	return retval;
 }
 
-/**
+/*
  * Sends a packet to the device -- this function must be called with
  * ictx->lock held, or its unlock/lock sequence while waiting for tx
  * to complete can/will lead to a deadlock.
@@ -664,7 +664,7 @@ static int send_packet(struct imon_context *ictx)
 	return retval;
 }
 
-/**
+/*
  * Sends an associate packet to the iMON 2.4G.
  *
  * This might not be such a good idea, since it has an id collision with
@@ -694,7 +694,7 @@ static int send_associate_24g(struct imon_context *ictx)
 	return retval;
 }
 
-/**
+/*
  * Sends packets to setup and show clock on iMON display
  *
  * Arguments: year - last 2 digits of year, month - 1..12,
@@ -781,7 +781,7 @@ static int send_set_imon_clock(struct imon_context *ictx,
 	return retval;
 }
 
-/**
+/*
  * These are the sysfs functions to handle the association on the iMON 2.4G LT.
  */
 static ssize_t show_associate_remote(struct device *d,
@@ -823,7 +823,7 @@ static ssize_t store_associate_remote(struct device *d,
 	return count;
 }
 
-/**
+/*
  * sysfs functions to control internal imon clock
  */
 static ssize_t show_imon_clock(struct device *d,
@@ -923,7 +923,7 @@ static const struct attribute_group imon_rf_attr_group = {
 	.attrs = imon_rf_sysfs_entries
 };
 
-/**
+/*
  * Writes data to the VFD.  The iMON VFD is 2x16 characters
  * and requires data in 5 consecutive USB interrupt packets,
  * each packet but the last carrying 7 bytes.
@@ -1008,7 +1008,7 @@ static ssize_t vfd_write(struct file *file, const char __user *buf,
 	return (!retval) ? n_bytes : retval;
 }
 
-/**
+/*
  * Writes data to the LCD.  The iMON OEM LCD screen expects 8-byte
  * packets. We accept data as 16 hexadecimal digits, followed by a
  * newline (to make it easy to drive the device from a command-line
@@ -1066,7 +1066,7 @@ static ssize_t lcd_write(struct file *file, const char __user *buf,
 	return (!retval) ? n_bytes : retval;
 }
 
-/**
+/*
  * Callback function for USB core API: transmit data
  */
 static void usb_tx_callback(struct urb *urb)
@@ -1087,7 +1087,7 @@ static void usb_tx_callback(struct urb *urb)
 	complete(&ictx->tx.finished);
 }
 
-/**
+/*
  * report touchscreen input
  */
 static void imon_touch_display_timeout(struct timer_list *t)
@@ -1103,7 +1103,7 @@ static void imon_touch_display_timeout(struct timer_list *t)
 	input_sync(ictx->touch);
 }
 
-/**
+/*
  * iMON IR receivers support two different signal sets -- those used by
  * the iMON remotes, and those used by the Windows MCE remotes (which is
  * really just RC-6), but only one or the other at a time, as the signals
@@ -1191,7 +1191,7 @@ static inline int tv2int(const struct timeval *a, const struct timeval *b)
 	return sec;
 }
 
-/**
+/*
  * The directional pad behaves a bit differently, depending on whether this is
  * one of the older ffdc devices or a newer device. Newer devices appear to
  * have a higher resolution matrix for more precise mouse movement, but it
@@ -1543,7 +1543,7 @@ static void imon_pad_to_keys(struct imon_context *ictx, unsigned char *buf)
 	}
 }
 
-/**
+/*
  * figure out if these is a press or a release. We don't actually
  * care about repeats, as those will be auto-generated within the IR
  * subsystem for repeating scancodes.
@@ -1592,10 +1592,10 @@ static int imon_parse_press_type(struct imon_context *ictx,
 	return press_type;
 }
 
-/**
+/*
  * Process the incoming packet
  */
-/**
+/*
  * Convert bit count to time duration (in us) and submit
  * the value to lirc_dev.
  */
@@ -1608,7 +1608,7 @@ static void submit_data(struct imon_context *context)
 	ir_raw_event_store_with_filter(context->rdev, &ev);
 }
 
-/**
+/*
  * Process the incoming packet
  */
 static void imon_incoming_ir_raw(struct imon_context *context,
@@ -1831,7 +1831,7 @@ static void imon_incoming_scancode(struct imon_context *ictx,
 	}
 }
 
-/**
+/*
  * Callback function for USB core API: receive data
  */
 static void usb_rx_callback_intf0(struct urb *urb)
@@ -2485,7 +2485,7 @@ static void imon_init_display(struct imon_context *ictx,
 
 }
 
-/**
+/*
  * Callback function for USB core API: Probe
  */
 static int imon_probe(struct usb_interface *interface,
@@ -2583,7 +2583,7 @@ static int imon_probe(struct usb_interface *interface,
 	return ret;
 }
 
-/**
+/*
  * Callback function for USB core API: disconnect
  */
 static void imon_disconnect(struct usb_interface *interface)
diff --git a/drivers/media/rc/ir-jvc-decoder.c b/drivers/media/rc/ir-jvc-decoder.c
index e2bd68c42edfa8ab870ebaf294ac06eef8a4e1a0..22c8aee3df4fee44ec294da322e0afc66c57651c 100644
--- a/drivers/media/rc/ir-jvc-decoder.c
+++ b/drivers/media/rc/ir-jvc-decoder.c
@@ -39,7 +39,7 @@ enum jvc_state {
 /**
  * ir_jvc_decode() - Decode one JVC pulse or space
  * @dev:	the struct rc_dev descriptor of the device
- * @duration:   the struct ir_raw_event descriptor of the pulse/space
+ * @ev:   the struct ir_raw_event descriptor of the pulse/space
  *
  * This function returns -EINVAL if the pulse violates the state machine
  */
diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c
index 8f2f37412fc5e24e90ebe763e16628ca8f7f6483..4fd4521693d95b417643c4efa5af3a82e86c4428 100644
--- a/drivers/media/rc/ir-lirc-codec.c
+++ b/drivers/media/rc/ir-lirc-codec.c
@@ -25,8 +25,8 @@
 /**
  * ir_lirc_decode() - Send raw IR data to lirc_dev to be relayed to the
  *		      lircd userspace daemon for decoding.
- * @input_dev:	the struct rc_dev descriptor of the device
- * @duration:	the struct ir_raw_event descriptor of the pulse/space
+ * @dev:	the struct rc_dev descriptor of the device
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
  *
  * This function returns -EINVAL if the lirc interfaces aren't wired up.
  */
diff --git a/drivers/media/rc/ir-nec-decoder.c b/drivers/media/rc/ir-nec-decoder.c
index a95d09acc22a56a81f63260a8fecb7ecb216fe6c..6880c190dcd258041e506cd3275282930d22d287 100644
--- a/drivers/media/rc/ir-nec-decoder.c
+++ b/drivers/media/rc/ir-nec-decoder.c
@@ -41,7 +41,7 @@ enum nec_state {
 /**
  * ir_nec_decode() - Decode one NEC pulse or space
  * @dev:	the struct rc_dev descriptor of the device
- * @duration:	the struct ir_raw_event descriptor of the pulse/space
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
  *
  * This function returns -EINVAL if the pulse violates the state machine
  */
@@ -183,7 +183,6 @@ static int ir_nec_decode(struct rc_dev *dev, struct ir_raw_event ev)
  * ir_nec_scancode_to_raw() - encode an NEC scancode ready for modulation.
  * @protocol:	specific protocol to use
  * @scancode:	a single NEC scancode.
- * @raw:	raw data to be modulated.
  */
 static u32 ir_nec_scancode_to_raw(enum rc_proto protocol, u32 scancode)
 {
diff --git a/drivers/media/rc/ir-sanyo-decoder.c b/drivers/media/rc/ir-sanyo-decoder.c
index 758c60956850fc3ac1c0f25aea4645d9518a21fc..d94e07b02f3b0b475f1c2910be408cdd3ea14e68 100644
--- a/drivers/media/rc/ir-sanyo-decoder.c
+++ b/drivers/media/rc/ir-sanyo-decoder.c
@@ -48,7 +48,7 @@ enum sanyo_state {
 /**
  * ir_sanyo_decode() - Decode one SANYO pulse or space
  * @dev:	the struct rc_dev descriptor of the device
- * @duration:	the struct ir_raw_event descriptor of the pulse/space
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
  *
  * This function returns -EINVAL if the pulse violates the state machine
  */
diff --git a/drivers/media/rc/ir-sharp-decoder.c b/drivers/media/rc/ir-sharp-decoder.c
index 129b558acc9214abda6ce249eb3d8e2095591196..7140dd6160eee2c4de9e7b4448c961117f9e9a0d 100644
--- a/drivers/media/rc/ir-sharp-decoder.c
+++ b/drivers/media/rc/ir-sharp-decoder.c
@@ -39,7 +39,7 @@ enum sharp_state {
 /**
  * ir_sharp_decode() - Decode one Sharp pulse or space
  * @dev:	the struct rc_dev descriptor of the device
- * @duration:	the struct ir_raw_event descriptor of the pulse/space
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
  *
  * This function returns -EINVAL if the pulse violates the state machine
  */
diff --git a/drivers/media/rc/ir-xmp-decoder.c b/drivers/media/rc/ir-xmp-decoder.c
index 6f464be1c8d7af2db47a4684dda6bc10a7ed56f7..712bc6d76e9283944adbabe06f54f9ea536948ed 100644
--- a/drivers/media/rc/ir-xmp-decoder.c
+++ b/drivers/media/rc/ir-xmp-decoder.c
@@ -35,7 +35,7 @@ enum xmp_state {
 /**
  * ir_xmp_decode() - Decode one XMP pulse or space
  * @dev:	the struct rc_dev descriptor of the device
- * @duration:	the struct ir_raw_event descriptor of the pulse/space
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
  *
  * This function returns -EINVAL if the pulse violates the state machine
  */
diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c
index f6e5ba4fbb49f5b46b0c06cc9da09cb16265eaca..d78483a504c9f4921d70765a6ccf000d9eefc979 100644
--- a/drivers/media/rc/rc-ir-raw.c
+++ b/drivers/media/rc/rc-ir-raw.c
@@ -128,7 +128,7 @@ EXPORT_SYMBOL_GPL(ir_raw_event_store_edge);
 /**
  * ir_raw_event_store_with_filter() - pass next pulse/space to decoders with some processing
  * @dev:	the struct rc_dev device descriptor
- * @type:	the type of the event that has occurred
+ * @ev:		the event that has occurred
  *
  * This routine (which may be called from an interrupt context) works
  * in similar manner to ir_raw_event_store_edge.
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 17950e29d4e3ec72a18e4ad2bee2286c99ab99e3..c144b77eac987b5e5431b7e49cb27941002d6b04 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -39,41 +39,41 @@ static const struct {
 	[RC_PROTO_UNKNOWN] = { .name = "unknown", .repeat_period = 250 },
 	[RC_PROTO_OTHER] = { .name = "other", .repeat_period = 250 },
 	[RC_PROTO_RC5] = { .name = "rc-5",
-		.scancode_bits = 0x1f7f, .repeat_period = 164 },
+		.scancode_bits = 0x1f7f, .repeat_period = 250 },
 	[RC_PROTO_RC5X_20] = { .name = "rc-5x-20",
-		.scancode_bits = 0x1f7f3f, .repeat_period = 164 },
+		.scancode_bits = 0x1f7f3f, .repeat_period = 250 },
 	[RC_PROTO_RC5_SZ] = { .name = "rc-5-sz",
-		.scancode_bits = 0x2fff, .repeat_period = 164 },
+		.scancode_bits = 0x2fff, .repeat_period = 250 },
 	[RC_PROTO_JVC] = { .name = "jvc",
 		.scancode_bits = 0xffff, .repeat_period = 250 },
 	[RC_PROTO_SONY12] = { .name = "sony-12",
-		.scancode_bits = 0x1f007f, .repeat_period = 100 },
+		.scancode_bits = 0x1f007f, .repeat_period = 250 },
 	[RC_PROTO_SONY15] = { .name = "sony-15",
-		.scancode_bits = 0xff007f, .repeat_period = 100 },
+		.scancode_bits = 0xff007f, .repeat_period = 250 },
 	[RC_PROTO_SONY20] = { .name = "sony-20",
-		.scancode_bits = 0x1fff7f, .repeat_period = 100 },
+		.scancode_bits = 0x1fff7f, .repeat_period = 250 },
 	[RC_PROTO_NEC] = { .name = "nec",
-		.scancode_bits = 0xffff, .repeat_period = 160 },
+		.scancode_bits = 0xffff, .repeat_period = 250 },
 	[RC_PROTO_NECX] = { .name = "nec-x",
-		.scancode_bits = 0xffffff, .repeat_period = 160 },
+		.scancode_bits = 0xffffff, .repeat_period = 250 },
 	[RC_PROTO_NEC32] = { .name = "nec-32",
-		.scancode_bits = 0xffffffff, .repeat_period = 160 },
+		.scancode_bits = 0xffffffff, .repeat_period = 250 },
 	[RC_PROTO_SANYO] = { .name = "sanyo",
 		.scancode_bits = 0x1fffff, .repeat_period = 250 },
 	[RC_PROTO_MCIR2_KBD] = { .name = "mcir2-kbd",
-		.scancode_bits = 0xffff, .repeat_period = 150 },
+		.scancode_bits = 0xffff, .repeat_period = 250 },
 	[RC_PROTO_MCIR2_MSE] = { .name = "mcir2-mse",
-		.scancode_bits = 0x1fffff, .repeat_period = 150 },
+		.scancode_bits = 0x1fffff, .repeat_period = 250 },
 	[RC_PROTO_RC6_0] = { .name = "rc-6-0",
-		.scancode_bits = 0xffff, .repeat_period = 164 },
+		.scancode_bits = 0xffff, .repeat_period = 250 },
 	[RC_PROTO_RC6_6A_20] = { .name = "rc-6-6a-20",
-		.scancode_bits = 0xfffff, .repeat_period = 164 },
+		.scancode_bits = 0xfffff, .repeat_period = 250 },
 	[RC_PROTO_RC6_6A_24] = { .name = "rc-6-6a-24",
-		.scancode_bits = 0xffffff, .repeat_period = 164 },
+		.scancode_bits = 0xffffff, .repeat_period = 250 },
 	[RC_PROTO_RC6_6A_32] = { .name = "rc-6-6a-32",
-		.scancode_bits = 0xffffffff, .repeat_period = 164 },
+		.scancode_bits = 0xffffffff, .repeat_period = 250 },
 	[RC_PROTO_RC6_MCE] = { .name = "rc-6-mce",
-		.scancode_bits = 0xffff7fff, .repeat_period = 164 },
+		.scancode_bits = 0xffff7fff, .repeat_period = 250 },
 	[RC_PROTO_SHARP] = { .name = "sharp",
 		.scancode_bits = 0x1fff, .repeat_period = 250 },
 	[RC_PROTO_XMP] = { .name = "xmp", .repeat_period = 250 },
@@ -170,10 +170,11 @@ static struct rc_map_list empty_map = {
  * @name:	name to assign to the table
  * @rc_proto:	ir type to assign to the new table
  * @size:	initial size of the table
- * @return:	zero on success or a negative error code
  *
  * This routine will initialize the rc_map and will allocate
  * memory to hold at least the specified number of elements.
+ *
+ * return:	zero on success or a negative error code
  */
 static int ir_create_table(struct rc_map *rc_map,
 			   const char *name, u64 rc_proto, size_t size)
@@ -216,10 +217,11 @@ static void ir_free_table(struct rc_map *rc_map)
  * ir_resize_table() - resizes a scancode table if necessary
  * @rc_map:	the rc_map to resize
  * @gfp_flags:	gfp flags to use when allocating memory
- * @return:	zero on success or a negative error code
  *
  * This routine will shrink the rc_map if it has lots of
  * unused entries and grow it if it is full.
+ *
+ * return:	zero on success or a negative error code
  */
 static int ir_resize_table(struct rc_map *rc_map, gfp_t gfp_flags)
 {
@@ -265,11 +267,13 @@ static int ir_resize_table(struct rc_map *rc_map, gfp_t gfp_flags)
  * @dev:	the struct rc_dev device descriptor
  * @rc_map:	scancode table to be adjusted
  * @index:	index of the mapping that needs to be updated
- * @keycode:	the desired keycode
- * @return:	previous keycode assigned to the mapping
+ * @new_keycode: the desired keycode
  *
  * This routine is used to update scancode->keycode mapping at given
  * position.
+ *
+ * return:	previous keycode assigned to the mapping
+ *
  */
 static unsigned int ir_update_mapping(struct rc_dev *dev,
 				      struct rc_map *rc_map,
@@ -320,12 +324,13 @@ static unsigned int ir_update_mapping(struct rc_dev *dev,
  * @scancode:	the desired scancode
  * @resize:	controls whether we allowed to resize the table to
  *		accommodate not yet present scancodes
- * @return:	index of the mapping containing scancode in question
- *		or -1U in case of failure.
  *
  * This routine is used to locate given scancode in rc_map.
  * If scancode is not yet present the routine will allocate a new slot
  * for it.
+ *
+ * return:	index of the mapping containing scancode in question
+ *		or -1U in case of failure.
  */
 static unsigned int ir_establish_scancode(struct rc_dev *dev,
 					  struct rc_map *rc_map,
@@ -375,11 +380,12 @@ static unsigned int ir_establish_scancode(struct rc_dev *dev,
 /**
  * ir_setkeycode() - set a keycode in the scancode->keycode table
  * @idev:	the struct input_dev device descriptor
- * @scancode:	the desired scancode
- * @keycode:	result
- * @return:	-EINVAL if the keycode could not be inserted, otherwise zero.
+ * @ke:		Input keymap entry
+ * @old_keycode: result
  *
  * This routine is used to handle evdev EVIOCSKEY ioctl.
+ *
+ * return:	-EINVAL if the keycode could not be inserted, otherwise zero.
  */
 static int ir_setkeycode(struct input_dev *idev,
 			 const struct input_keymap_entry *ke,
@@ -422,11 +428,11 @@ static int ir_setkeycode(struct input_dev *idev,
 /**
  * ir_setkeytable() - sets several entries in the scancode->keycode table
  * @dev:	the struct rc_dev device descriptor
- * @to:		the struct rc_map to copy entries to
  * @from:	the struct rc_map to copy entries from
- * @return:	-ENOMEM if all keycodes could not be inserted, otherwise zero.
  *
  * This routine is used to handle table initialization.
+ *
+ * return:	-ENOMEM if all keycodes could not be inserted, otherwise zero.
  */
 static int ir_setkeytable(struct rc_dev *dev,
 			  const struct rc_map *from)
@@ -474,10 +480,11 @@ static int rc_map_cmp(const void *key, const void *elt)
  * ir_lookup_by_scancode() - locate mapping by scancode
  * @rc_map:	the struct rc_map to search
  * @scancode:	scancode to look for in the table
- * @return:	index in the table, -1U if not found
  *
  * This routine performs binary search in RC keykeymap table for
  * given scancode.
+ *
+ * return:	index in the table, -1U if not found
  */
 static unsigned int ir_lookup_by_scancode(const struct rc_map *rc_map,
 					  unsigned int scancode)
@@ -495,11 +502,11 @@ static unsigned int ir_lookup_by_scancode(const struct rc_map *rc_map,
 /**
  * ir_getkeycode() - get a keycode from the scancode->keycode table
  * @idev:	the struct input_dev device descriptor
- * @scancode:	the desired scancode
- * @keycode:	used to return the keycode, if found, or KEY_RESERVED
- * @return:	always returns zero.
+ * @ke:		Input keymap entry
  *
  * This routine is used to handle evdev EVIOCGKEY ioctl.
+ *
+ * return:	always returns zero.
  */
 static int ir_getkeycode(struct input_dev *idev,
 			 struct input_keymap_entry *ke)
@@ -556,11 +563,12 @@ static int ir_getkeycode(struct input_dev *idev,
  * rc_g_keycode_from_table() - gets the keycode that corresponds to a scancode
  * @dev:	the struct rc_dev descriptor of the device
  * @scancode:	the scancode to look for
- * @return:	the corresponding keycode, or KEY_RESERVED
  *
  * This routine is used by drivers which need to convert a scancode to a
  * keycode. Normally it should not be used since drivers should have no
  * interest in keycodes.
+ *
+ * return:	the corresponding keycode, or KEY_RESERVED
  */
 u32 rc_g_keycode_from_table(struct rc_dev *dev, u32 scancode)
 {
@@ -625,7 +633,8 @@ EXPORT_SYMBOL_GPL(rc_keyup);
 
 /**
  * ir_timer_keyup() - generates a keyup event after a timeout
- * @cookie:	a pointer to the struct rc_dev for the device
+ *
+ * @t:		a pointer to the struct timer_list
  *
  * This routine will generate a keyup event some time after a keydown event
  * is generated when no further activity has been detected.
@@ -780,7 +789,8 @@ EXPORT_SYMBOL_GPL(rc_keydown_notimeout);
  *			  provides sensible defaults
  * @dev:	the struct rc_dev descriptor of the device
  * @filter:	the scancode and mask
- * @return:	0 or -EINVAL if the filter is not valid
+ *
+ * return:	0 or -EINVAL if the filter is not valid
  */
 static int rc_validate_filter(struct rc_dev *dev,
 			      struct rc_scancode_filter *filter)
diff --git a/drivers/media/rc/sir_ir.c b/drivers/media/rc/sir_ir.c
index 76120664b700ad6f8f5044e9f87cf1a92ad455ac..9ee2c9196b4d81fd64c90937eccaac7a69252383 100644
--- a/drivers/media/rc/sir_ir.c
+++ b/drivers/media/rc/sir_ir.c
@@ -57,7 +57,7 @@ static void add_read_queue(int flag, unsigned long val);
 static irqreturn_t sir_interrupt(int irq, void *dev_id);
 static void send_space(unsigned long len);
 static void send_pulse(unsigned long len);
-static void init_hardware(void);
+static int init_hardware(void);
 static void drop_hardware(void);
 /* Initialisation */
 
@@ -263,11 +263,36 @@ static void send_pulse(unsigned long len)
 	}
 }
 
-static void init_hardware(void)
+static int init_hardware(void)
 {
+	u8 scratch, scratch2, scratch3;
 	unsigned long flags;
 
 	spin_lock_irqsave(&hardware_lock, flags);
+
+	/*
+	 * This is a simple port existence test, borrowed from the autoconfig
+	 * function in drivers/tty/serial/8250/8250_port.c
+	 */
+	scratch = sinp(UART_IER);
+	soutp(UART_IER, 0);
+#ifdef __i386__
+	outb(0xff, 0x080);
+#endif
+	scratch2 = sinp(UART_IER) & 0x0f;
+	soutp(UART_IER, 0x0f);
+#ifdef __i386__
+	outb(0x00, 0x080);
+#endif
+	scratch3 = sinp(UART_IER) & 0x0f;
+	soutp(UART_IER, scratch);
+	if (scratch2 != 0 || scratch3 != 0x0f) {
+		/* we fail, there's nothing here */
+		spin_unlock_irqrestore(&hardware_lock, flags);
+		pr_err("port existence test failed, cannot continue\n");
+		return -ENODEV;
+	}
+
 	/* reset UART */
 	outb(0, io + UART_MCR);
 	outb(0, io + UART_IER);
@@ -285,6 +310,8 @@ static void init_hardware(void)
 	/* turn on UART */
 	outb(UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2, io + UART_MCR);
 	spin_unlock_irqrestore(&hardware_lock, flags);
+
+	return 0;
 }
 
 static void drop_hardware(void)
@@ -334,14 +361,19 @@ static int sir_ir_probe(struct platform_device *dev)
 		pr_err("IRQ %d already in use.\n", irq);
 		return retval;
 	}
+
+	retval = init_hardware();
+	if (retval) {
+		del_timer_sync(&timerlist);
+		return retval;
+	}
+
 	pr_info("I/O port 0x%.4x, IRQ %d.\n", io, irq);
 
 	retval = devm_rc_register_device(&sir_ir_dev->dev, rcdev);
 	if (retval < 0)
 		return retval;
 
-	init_hardware();
-
 	return 0;
 }
 
diff --git a/drivers/media/rc/st_rc.c b/drivers/media/rc/st_rc.c
index a8e39c635f3400025e6ac69ff2358206b511482a..d2efd7b2c3bcc71ecac638168903ec697868a6d3 100644
--- a/drivers/media/rc/st_rc.c
+++ b/drivers/media/rc/st_rc.c
@@ -49,7 +49,7 @@ struct st_rc_device {
 #define IRB_RX_NOISE_SUPPR      0x5c	/* noise suppression  */
 #define IRB_RX_POLARITY_INV     0x68	/* polarity inverter  */
 
-/**
+/*
  * IRQ set: Enable full FIFO                 1  -> bit  3;
  *          Enable overrun IRQ               1  -> bit  2;
  *          Enable last symbol IRQ           1  -> bit  1:
@@ -72,7 +72,7 @@ static void st_rc_send_lirc_timeout(struct rc_dev *rdev)
 	ir_raw_event_store(rdev, &ev);
 }
 
-/**
+/*
  * RX graphical example to better understand the difference between ST IR block
  * output and standard definition used by LIRC (and most of the world!)
  *
@@ -317,7 +317,7 @@ static int st_rc_probe(struct platform_device *pdev)
 	device_init_wakeup(dev, true);
 	dev_pm_set_wake_irq(dev, rc_dev->irq);
 
-	/**
+	/*
 	 * for LIRC_MODE_MODE2 or LIRC_MODE_PULSE or LIRC_MODE_RAW
 	 * lircd expects a long space first before a signal train to sync.
 	 */
diff --git a/drivers/media/rc/streamzap.c b/drivers/media/rc/streamzap.c
index 4eebfcfc10f34bc7f49869a6bfee265b5842b15a..c9a70fda88a88bcd02b45137581896857c612bb9 100644
--- a/drivers/media/rc/streamzap.c
+++ b/drivers/media/rc/streamzap.c
@@ -191,7 +191,7 @@ static void sz_push_half_space(struct streamzap_ir *sz,
 	sz_push_full_space(sz, value & SZ_SPACE_MASK);
 }
 
-/**
+/*
  * streamzap_callback - usb IRQ handler callback
  *
  * This procedure is invoked on reception of data from
@@ -321,7 +321,7 @@ static struct rc_dev *streamzap_init_rc_dev(struct streamzap_ir *sz)
 	return NULL;
 }
 
-/**
+/*
  *	streamzap_probe
  *
  *	Called by usb-core to associated with a candidate device
@@ -450,7 +450,7 @@ static int streamzap_probe(struct usb_interface *intf,
 	return retval;
 }
 
-/**
+/*
  * streamzap_disconnect
  *
  * Called by the usb core when the device is removed from the system.
diff --git a/drivers/media/tuners/mt2063.c b/drivers/media/tuners/mt2063.c
index 8b39d8dc97a075972c5f1824e2a286a6c675f4fd..5c87c5c6a455691a1366b623ceedd21b9e6b5120 100644
--- a/drivers/media/tuners/mt2063.c
+++ b/drivers/media/tuners/mt2063.c
@@ -1397,9 +1397,9 @@ static u32 MT2063_Round_fLO(u32 f_LO, u32 f_LO_Step, u32 f_ref)
  *                        risk of overflow.  It accurately calculates
  *                        f_ref * num / denom to within 1 HZ with fixed math.
  *
- * @num :	Fractional portion of the multiplier
+ * @f_ref:	SRO frequency.
+ * @num:	Fractional portion of the multiplier
  * @denom:	denominator portion of the ratio
- * @f_Ref:	SRO frequency.
  *
  * This calculation handles f_ref as two separate 14-bit fields.
  * Therefore, a maximum value of 2^28-1 may safely be used for f_ref.
@@ -1464,8 +1464,6 @@ static u32 MT2063_CalcLO1Mult(u32 *Div,
  * @f_LO:	desired LO frequency.
  * @f_LO_Step:	Minimum step size for the LO (in Hz).
  * @f_Ref:	SRO frequency.
- * @f_Avoid:	Range of PLL frequencies to avoid near
- *		integer multiples of f_Ref (in Hz).
  *
  * Returns: Recalculated LO frequency.
  */
diff --git a/drivers/media/usb/dvb-usb/cinergyT2-fe.c b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
index f9772ad0a2a5c114ebbae1c70f381b84f6525ea5..5a2f81311fb7767926f160b8ed3d167455ae52f6 100644
--- a/drivers/media/usb/dvb-usb/cinergyT2-fe.c
+++ b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
@@ -26,7 +26,7 @@
 #include "cinergyT2.h"
 
 
-/**
+/*
  *  convert linux-dvb frontend parameter set into TPS.
  *  See ETSI ETS-300744, section 4.6.2, table 9 for details.
  *
diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c
index 92098c1b78e51c69b4cba8179d56f24f2a8410fe..366b055299157e617020ee00d9c356cf0758732f 100644
--- a/drivers/media/usb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/usb/dvb-usb/dib0700_devices.c
@@ -1677,10 +1677,10 @@ static int dib8096_set_param_override(struct dvb_frontend *fe)
 		return -EINVAL;
 	}
 
-	/** Update PLL if needed ratio **/
+	/* Update PLL if needed ratio */
 	state->dib8000_ops.update_pll(fe, &dib8090_pll_config_12mhz, fe->dtv_property_cache.bandwidth_hz / 1000, 0);
 
-	/** Get optimize PLL ratio to remove spurious **/
+	/* Get optimize PLL ratio to remove spurious */
 	pll_ratio = dib8090_compute_pll_parameters(fe);
 	if (pll_ratio == 17)
 		timf = 21387946;
@@ -1691,7 +1691,7 @@ static int dib8096_set_param_override(struct dvb_frontend *fe)
 	else
 		timf = 18179756;
 
-	/** Update ratio **/
+	/* Update ratio */
 	state->dib8000_ops.update_pll(fe, &dib8090_pll_config_12mhz, fe->dtv_property_cache.bandwidth_hz / 1000, pll_ratio);
 
 	state->dib8000_ops.ctrl_timf(fe, DEMOD_TIMF_SET, timf);
@@ -3357,7 +3357,7 @@ static int novatd_sleep_override(struct dvb_frontend* fe)
 	return state->sleep(fe);
 }
 
-/**
+/*
  * novatd_frontend_attach - Nova-TD specific attach
  *
  * Nova-TD has GPIO0, 1 and 2 for LEDs. So do not fiddle with them except for
diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c
index 8207e6900656bef61de4f0df10b37b5cc6110f98..bcacb0f220282d9c3537e8847b178d7f0ec71727 100644
--- a/drivers/media/usb/dvb-usb/dibusb-common.c
+++ b/drivers/media/usb/dvb-usb/dibusb-common.c
@@ -223,8 +223,20 @@ EXPORT_SYMBOL(dibusb_i2c_algo);
 
 int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val)
 {
-	u8 wbuf[1] = { offs };
-	return dibusb_i2c_msg(d, 0x50, wbuf, 1, val, 1);
+	u8 *buf;
+	int rc;
+
+	buf = kmalloc(2, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf[0] = offs;
+
+	rc = dibusb_i2c_msg(d, 0x50, &buf[0], 1, &buf[1], 1);
+	*val = buf[1];
+	kfree(buf);
+
+	return rc;
 }
 EXPORT_SYMBOL(dibusb_read_eeprom_byte);
 
diff --git a/drivers/media/usb/dvb-usb/friio-fe.c b/drivers/media/usb/dvb-usb/friio-fe.c
index 41261317bd5cc95b97a26896aa67c5669a7552f1..b6046e0e07f6845c150d1894d3ad1c34f04b6b40 100644
--- a/drivers/media/usb/dvb-usb/friio-fe.c
+++ b/drivers/media/usb/dvb-usb/friio-fe.c
@@ -297,7 +297,7 @@ static int jdvbt90502_set_frontend(struct dvb_frontend *fe)
 }
 
 
-/**
+/*
  * (reg, val) commad list to initialize this module.
  *  captured on a Windows box.
  */
diff --git a/drivers/media/usb/dvb-usb/friio.c b/drivers/media/usb/dvb-usb/friio.c
index 62abe6c43a324588af7f269d36a55e601ae6c962..16875945e662d0f303930e9993706564bb75485a 100644
--- a/drivers/media/usb/dvb-usb/friio.c
+++ b/drivers/media/usb/dvb-usb/friio.c
@@ -21,7 +21,7 @@ MODULE_PARM_DESC(debug,
 
 DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
-/**
+/*
  * Indirect I2C access to the PLL via FE.
  * whole I2C protocol data to the PLL is sent via the FE's I2C register.
  * This is done by a control msg to the FE with the I2C data accompanied, and
diff --git a/drivers/media/usb/gspca/ov519.c b/drivers/media/usb/gspca/ov519.c
index f1537daf4e2ea4f3c6cb32c4cf675621f37a4d9c..1b30434b72ef2932f4cd1c57c527ade6b06ebdac 100644
--- a/drivers/media/usb/gspca/ov519.c
+++ b/drivers/media/usb/gspca/ov519.c
@@ -1,4 +1,4 @@
-/**
+/*
  * OV519 driver
  *
  * Copyright (C) 2008-2011 Jean-François Moine <moinejf@free.fr>
diff --git a/drivers/media/usb/pwc/pwc-dec23.c b/drivers/media/usb/pwc/pwc-dec23.c
index 3792fedff9515e85734200d8ffb22aa1ed15bb93..1283b3bd9800cd63bb33ce2f460d11c616a0fd33 100644
--- a/drivers/media/usb/pwc/pwc-dec23.c
+++ b/drivers/media/usb/pwc/pwc-dec23.c
@@ -649,11 +649,10 @@ static void DecompressBand23(struct pwc_dec23_private *pdec,
 }
 
 /**
- *
  * Uncompress a pwc23 buffer.
- *
- * src: raw data
- * dst: image output
+ * @pdev: pointer to pwc device's internal struct
+ * @src: raw data
+ * @dst: image output
  */
 void pwc_dec23_decompress(struct pwc_device *pdev,
 			  const void *src,
diff --git a/drivers/media/usb/siano/smsusb.c b/drivers/media/usb/siano/smsusb.c
index 8c1f926567ec04425f36fd9fd68d80cf37df98a4..d07349cf94898550972e069cf30d143a261aaa97 100644
--- a/drivers/media/usb/siano/smsusb.c
+++ b/drivers/media/usb/siano/smsusb.c
@@ -74,7 +74,7 @@ struct smsusb_device_t {
 static int smsusb_submit_urb(struct smsusb_device_t *dev,
 			     struct smsusb_urb_t *surb);
 
-/**
+/*
  * Completing URB's callback handler - bottom half (proccess context)
  * submits the URB prepared on smsusb_onresponse()
  */
@@ -86,7 +86,7 @@ static void do_submit_urb(struct work_struct *work)
 	smsusb_submit_urb(dev, surb);
 }
 
-/**
+/*
  * Completing URB's callback handler - top half (interrupt context)
  * adds completing sms urb to the global surbs list and activtes the worker
  * thread the surb
diff --git a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
index b842f367249f3361bd46ae73dd1aacfc7b500100..a142b9dc0feb2bb99be8cc728dc369710a5825a4 100644
--- a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
+++ b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
@@ -76,7 +76,7 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 #define TTUSB_REV_2_2	0x22
 #define TTUSB_BUDGET_NAME "ttusb_stc_fw"
 
-/**
+/*
  *  since we're casting (struct ttusb*) <-> (struct dvb_demux*) around
  *  the dvb_demux field must be the first in struct!!
  */
@@ -713,7 +713,7 @@ static void ttusb_process_frame(struct ttusb *ttusb, u8 * data, int len)
 					}
 				}
 
-			/**
+			/*
 			 * if length is valid and we reached the end:
 			 * goto next muxpack
 			 */
@@ -729,7 +729,7 @@ static void ttusb_process_frame(struct ttusb *ttusb, u8 * data, int len)
 					/* maximum bytes, until we know the length */
 					ttusb->muxpack_len = 2;
 
-				/**
+				/*
 				 * no muxpacks left?
 				 * return to search-sync state
 				 */
diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c
index b55b79b8e921c9cd9717ccbad251f9b8837f6765..127f8a0c098bd417801587cecc4ea208ecd5725c 100644
--- a/drivers/media/usb/usbtv/usbtv-core.c
+++ b/drivers/media/usb/usbtv/usbtv-core.c
@@ -144,6 +144,7 @@ static void usbtv_disconnect(struct usb_interface *intf)
 
 static const struct usb_device_id usbtv_id_table[] = {
 	{ USB_DEVICE(0x1b71, 0x3002) },
+	{ USB_DEVICE(0x1f71, 0x3301) },
 	{}
 };
 MODULE_DEVICE_TABLE(usb, usbtv_id_table);
diff --git a/drivers/media/v4l2-core/tuner-core.c b/drivers/media/v4l2-core/tuner-core.c
index 8db45dfc271b2823d97da42ecbf44c02d6322152..82852f23a3b6af07085754960cd7d059eda2f5f9 100644
--- a/drivers/media/v4l2-core/tuner-core.c
+++ b/drivers/media/v4l2-core/tuner-core.c
@@ -239,7 +239,7 @@ static const struct analog_demod_ops tuner_analog_ops = {
  * @type:		type of the tuner (e. g. tuner number)
  * @new_mode_mask:	Indicates if tuner supports TV and/or Radio
  * @new_config:		an optional parameter used by a few tuners to adjust
-			internal parameters, like LNA mode
+ *			internal parameters, like LNA mode
  * @tuner_callback:	an optional function to be called when switching
  *			to analog mode
  *
@@ -750,6 +750,7 @@ static int tuner_remove(struct i2c_client *client)
 /**
  * check_mode - Verify if tuner supports the requested mode
  * @t: a pointer to the module's internal struct_tuner
+ * @mode: mode of the tuner, as defined by &enum v4l2_tuner_type.
  *
  * This function checks if the tuner is capable of tuning analog TV,
  * digital TV or radio, depending on what the caller wants. If the
@@ -757,6 +758,7 @@ static int tuner_remove(struct i2c_client *client)
  * returns 0.
  * This function is needed for boards that have a separate tuner for
  * radio (like devices with tea5767).
+ *
  * NOTE: mt20xx uses V4L2_TUNER_DIGITAL_TV and calls set_tv_freq to
  *       select a TV frequency. So, t_mode = T_ANALOG_TV could actually
  *	 be used to represent a Digital TV too.
diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c
index a7c3464976f2436133cbe729a2a1ef507c72c926..e5acfab470a5ee6bd3dc9bea4c333abe44c7a6f5 100644
--- a/drivers/media/v4l2-core/v4l2-async.c
+++ b/drivers/media/v4l2-core/v4l2-async.c
@@ -558,8 +558,7 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd)
 		if (!asd)
 			continue;
 
-		ret = v4l2_async_match_notify(notifier, notifier->v4l2_dev, sd,
-					      asd);
+		ret = v4l2_async_match_notify(notifier, v4l2_dev, sd, asd);
 		if (ret)
 			goto err_unbind;
 
diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index 5c8c49d240d14cf959a83de8d9f365873c907e95..930f9c53a64e4247b74459f1e1a465b2dee13db8 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -245,11 +245,11 @@ EXPORT_SYMBOL_GPL(v4l2_find_dv_timings_cea861_vic);
 
 /**
  * v4l2_match_dv_timings - check if two timings match
- * @t1 - compare this v4l2_dv_timings struct...
- * @t2 - with this struct.
- * @pclock_delta - the allowed pixelclock deviation.
- * @match_reduced_fps - if true, then fail if V4L2_DV_FL_REDUCED_FPS does not
- * match.
+ * @t1: compare this v4l2_dv_timings struct...
+ * @t2: with this struct.
+ * @pclock_delta: the allowed pixelclock deviation.
+ * @match_reduced_fps: if true, then fail if V4L2_DV_FL_REDUCED_FPS does not
+ * 	match.
  *
  * Compare t1 with t2 with a given margin of error for the pixelclock.
  */
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 681b192420d922f7677d32802672c9af2c810e86..fb72c7ac04d48298148a08e4d7c23c35c9451478 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -458,11 +458,6 @@ static int __v4l2_async_notifier_parse_fwnode_endpoints(
 		if (!is_available)
 			continue;
 
-		if (WARN_ON(notifier->num_subdevs >= notifier->max_subdevs)) {
-			ret = -EINVAL;
-			break;
-		}
-
 		if (has_port) {
 			struct fwnode_endpoint ep;
 
@@ -474,6 +469,11 @@ static int __v4l2_async_notifier_parse_fwnode_endpoints(
 				continue;
 		}
 
+		if (WARN_ON(notifier->num_subdevs >= notifier->max_subdevs)) {
+			ret = -EINVAL;
+			break;
+		}
+
 		ret = v4l2_async_notifier_fwnode_parse_endpoint(
 			dev, notifier, fwnode, asd_struct_size, parse_endpoint);
 		if (ret < 0)
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index f62e68aa04c42712060a9898a55ae51511c2cd55..bc580fbe18fac9d9567f491f963b6ca4d9725d36 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -183,6 +183,7 @@ EXPORT_SYMBOL(v4l2_m2m_get_curr_priv);
 
 /**
  * v4l2_m2m_try_run() - select next job to perform and run it if possible
+ * @m2m_dev: per-device context
  *
  * Get next transaction (if present) from the waiting jobs list and run it.
  */
@@ -281,6 +282,7 @@ EXPORT_SYMBOL_GPL(v4l2_m2m_try_schedule);
 
 /**
  * v4l2_m2m_cancel_job() - cancel pending jobs for the context
+ * @m2m_ctx: m2m context with jobs to be canceled
  *
  * In case of streamoff or release called on any context,
  * 1] If the context is currently running, then abort job will be called
diff --git a/drivers/media/v4l2-core/videobuf-core.c b/drivers/media/v4l2-core/videobuf-core.c
index 1dbf6f7785bba98c881be37bf355dbc43a71dd05..e87fb13b22dc7f4b7bd986596bdfc3f08091b14c 100644
--- a/drivers/media/v4l2-core/videobuf-core.c
+++ b/drivers/media/v4l2-core/videobuf-core.c
@@ -222,7 +222,7 @@ int videobuf_queue_is_busy(struct videobuf_queue *q)
 }
 EXPORT_SYMBOL_GPL(videobuf_queue_is_busy);
 
-/**
+/*
  * __videobuf_free() - free all the buffers and their control structures
  *
  * This function can only be called if streaming/reading is off, i.e. no buffers
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index cb115ba6a1d280e339247f14759cfe0c0a6fab6a..a8589d96ef723f04fc6a6a6fc63d5b794aa0cd70 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -188,7 +188,7 @@ module_param(debug, int, 0644);
 static void __vb2_queue_cancel(struct vb2_queue *q);
 static void __enqueue_in_driver(struct vb2_buffer *vb);
 
-/**
+/*
  * __vb2_buf_mem_alloc() - allocate video memory for the given buffer
  */
 static int __vb2_buf_mem_alloc(struct vb2_buffer *vb)
@@ -229,7 +229,7 @@ static int __vb2_buf_mem_alloc(struct vb2_buffer *vb)
 	return ret;
 }
 
-/**
+/*
  * __vb2_buf_mem_free() - free memory of the given buffer
  */
 static void __vb2_buf_mem_free(struct vb2_buffer *vb)
@@ -243,7 +243,7 @@ static void __vb2_buf_mem_free(struct vb2_buffer *vb)
 	}
 }
 
-/**
+/*
  * __vb2_buf_userptr_put() - release userspace memory associated with
  * a USERPTR buffer
  */
@@ -258,7 +258,7 @@ static void __vb2_buf_userptr_put(struct vb2_buffer *vb)
 	}
 }
 
-/**
+/*
  * __vb2_plane_dmabuf_put() - release memory associated with
  * a DMABUF shared plane
  */
@@ -277,7 +277,7 @@ static void __vb2_plane_dmabuf_put(struct vb2_buffer *vb, struct vb2_plane *p)
 	p->dbuf_mapped = 0;
 }
 
-/**
+/*
  * __vb2_buf_dmabuf_put() - release memory associated with
  * a DMABUF shared buffer
  */
@@ -289,7 +289,7 @@ static void __vb2_buf_dmabuf_put(struct vb2_buffer *vb)
 		__vb2_plane_dmabuf_put(vb, &vb->planes[plane]);
 }
 
-/**
+/*
  * __setup_offsets() - setup unique offsets ("cookies") for every plane in
  * the buffer.
  */
@@ -317,7 +317,7 @@ static void __setup_offsets(struct vb2_buffer *vb)
 	}
 }
 
-/**
+/*
  * __vb2_queue_alloc() - allocate videobuf buffer structures and (for MMAP type)
  * video buffer memory for all buffers/planes on the queue and initializes the
  * queue
@@ -386,7 +386,7 @@ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory,
 	return buffer;
 }
 
-/**
+/*
  * __vb2_free_mem() - release all video buffer memory for a given queue
  */
 static void __vb2_free_mem(struct vb2_queue *q, unsigned int buffers)
@@ -410,7 +410,7 @@ static void __vb2_free_mem(struct vb2_queue *q, unsigned int buffers)
 	}
 }
 
-/**
+/*
  * __vb2_queue_free() - free buffers at the end of the queue - video memory and
  * related information, if no buffers are left return the queue to an
  * uninitialized state. Might be called even if the queue has already been freed.
@@ -544,7 +544,7 @@ bool vb2_buffer_in_use(struct vb2_queue *q, struct vb2_buffer *vb)
 }
 EXPORT_SYMBOL(vb2_buffer_in_use);
 
-/**
+/*
  * __buffers_in_use() - return true if any buffers on the queue are in use and
  * the queue cannot be freed (by the means of REQBUFS(0)) call
  */
@@ -564,7 +564,7 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb)
 }
 EXPORT_SYMBOL_GPL(vb2_core_querybuf);
 
-/**
+/*
  * __verify_userptr_ops() - verify that all memory operations required for
  * USERPTR queue type have been provided
  */
@@ -577,7 +577,7 @@ static int __verify_userptr_ops(struct vb2_queue *q)
 	return 0;
 }
 
-/**
+/*
  * __verify_mmap_ops() - verify that all memory operations required for
  * MMAP queue type have been provided
  */
@@ -590,7 +590,7 @@ static int __verify_mmap_ops(struct vb2_queue *q)
 	return 0;
 }
 
-/**
+/*
  * __verify_dmabuf_ops() - verify that all memory operations required for
  * DMABUF queue type have been provided
  */
@@ -953,7 +953,7 @@ void vb2_discard_done(struct vb2_queue *q)
 }
 EXPORT_SYMBOL_GPL(vb2_discard_done);
 
-/**
+/*
  * __prepare_mmap() - prepare an MMAP buffer
  */
 static int __prepare_mmap(struct vb2_buffer *vb, const void *pb)
@@ -966,7 +966,7 @@ static int __prepare_mmap(struct vb2_buffer *vb, const void *pb)
 	return ret ? ret : call_vb_qop(vb, buf_prepare, vb);
 }
 
-/**
+/*
  * __prepare_userptr() - prepare a USERPTR buffer
  */
 static int __prepare_userptr(struct vb2_buffer *vb, const void *pb)
@@ -1082,7 +1082,7 @@ static int __prepare_userptr(struct vb2_buffer *vb, const void *pb)
 	return ret;
 }
 
-/**
+/*
  * __prepare_dmabuf() - prepare a DMABUF buffer
  */
 static int __prepare_dmabuf(struct vb2_buffer *vb, const void *pb)
@@ -1215,7 +1215,7 @@ static int __prepare_dmabuf(struct vb2_buffer *vb, const void *pb)
 	return ret;
 }
 
-/**
+/*
  * __enqueue_in_driver() - enqueue a vb2_buffer in driver for processing
  */
 static void __enqueue_in_driver(struct vb2_buffer *vb)
@@ -1298,7 +1298,7 @@ int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb)
 }
 EXPORT_SYMBOL_GPL(vb2_core_prepare_buf);
 
-/**
+/*
  * vb2_start_streaming() - Attempt to start streaming.
  * @q:		videobuf2 queue
  *
@@ -1427,7 +1427,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
 }
 EXPORT_SYMBOL_GPL(vb2_core_qbuf);
 
-/**
+/*
  * __vb2_wait_for_done_vb() - wait for a buffer to become available
  * for dequeuing
  *
@@ -1502,7 +1502,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking)
 	return 0;
 }
 
-/**
+/*
  * __vb2_get_done_vb() - get a buffer ready for dequeuing
  *
  * Will sleep if required for nonblocking == false.
@@ -1553,7 +1553,7 @@ int vb2_wait_for_all_buffers(struct vb2_queue *q)
 }
 EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers);
 
-/**
+/*
  * __vb2_dqbuf() - bring back the buffer to the DEQUEUED state
  */
 static void __vb2_dqbuf(struct vb2_buffer *vb)
@@ -1625,7 +1625,7 @@ int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
 }
 EXPORT_SYMBOL_GPL(vb2_core_dqbuf);
 
-/**
+/*
  * __vb2_queue_cancel() - cancel and stop (pause) streaming
  *
  * Removes all queued buffers from driver's queue and all buffers queued by
@@ -1773,7 +1773,7 @@ int vb2_core_streamoff(struct vb2_queue *q, unsigned int type)
 }
 EXPORT_SYMBOL_GPL(vb2_core_streamoff);
 
-/**
+/*
  * __find_plane_by_offset() - find plane associated with the given offset off
  */
 static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off,
@@ -2104,7 +2104,7 @@ unsigned int vb2_core_poll(struct vb2_queue *q, struct file *file,
 }
 EXPORT_SYMBOL_GPL(vb2_core_poll);
 
-/**
+/*
  * struct vb2_fileio_buf - buffer context used by file io emulator
  *
  * vb2 provides a compatibility layer and emulator of file io (read and
@@ -2118,7 +2118,7 @@ struct vb2_fileio_buf {
 	unsigned int queued:1;
 };
 
-/**
+/*
  * struct vb2_fileio_data - queue context used by file io emulator
  *
  * @cur_index:	the index of the buffer currently being read from or
@@ -2155,7 +2155,7 @@ struct vb2_fileio_data {
 	unsigned write_immediately:1;
 };
 
-/**
+/*
  * __vb2_init_fileio() - initialize file io emulator
  * @q:		videobuf2 queue
  * @read:	mode selector (1 means read, 0 means write)
@@ -2274,7 +2274,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
 	return ret;
 }
 
-/**
+/*
  * __vb2_cleanup_fileio() - free resourced used by file io emulator
  * @q:		videobuf2 queue
  */
@@ -2293,7 +2293,7 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q)
 	return 0;
 }
 
-/**
+/*
  * __vb2_perform_fileio() - perform a single file io (read or write) operation
  * @q:		videobuf2 queue
  * @data:	pointed to target userspace buffer
diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c
index 4bb8424114ce6d12e34d28b1620a1fafeeafa5ca..89e51989332bb0916e98e355767ce5518324bd7a 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -120,7 +120,7 @@ static void vb2_common_vm_close(struct vm_area_struct *vma)
 	h->put(h->arg);
 }
 
-/**
+/*
  * vb2_common_vm_ops - common vm_ops used for tracking refcount of mmaped
  * video buffers
  */
diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index 0c0669976bdc1c1424340edbc7c5d68c3c2ec8b6..4075314a698933ab9f0f25c60064a7c2d2cea98a 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -49,7 +49,7 @@ module_param(debug, int, 0644);
 #define V4L2_BUFFER_OUT_FLAGS	(V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_BFRAME | \
 				 V4L2_BUF_FLAG_KEYFRAME | V4L2_BUF_FLAG_TIMECODE)
 
-/**
+/*
  * __verify_planes_array() - verify that the planes array passed in struct
  * v4l2_buffer from userspace can be safely used
  */
@@ -78,7 +78,7 @@ static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
 	return __verify_planes_array(vb, pb);
 }
 
-/**
+/*
  * __verify_length() - Verify that the bytesused value for each plane fits in
  * the plane length and that the data offset doesn't exceed the bytesused value.
  */
@@ -181,7 +181,7 @@ static int vb2_queue_or_prepare_buf(struct vb2_queue *q, struct v4l2_buffer *b,
 	return __verify_planes_array(q->bufs[b->index], b);
 }
 
-/**
+/*
  * __fill_v4l2_buffer() - fill in a struct v4l2_buffer with information to be
  * returned to userspace
  */
@@ -286,7 +286,7 @@ static void __fill_v4l2_buffer(struct vb2_buffer *vb, void *pb)
 		q->last_buffer_dequeued = true;
 }
 
-/**
+/*
  * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a
  * v4l2_buffer by the userspace. It also verifies that struct
  * v4l2_buffer has a valid number of planes.
@@ -446,7 +446,7 @@ static const struct vb2_buf_ops v4l2_buf_ops = {
 	.copy_timestamp		= __copy_timestamp,
 };
 
-/**
+/*
  * vb2_querybuf() - query video buffer information
  * @q:		videobuf queue
  * @b:		buffer struct passed from userspace to vidioc_querybuf handler
diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 09cf3699e354415046af18c21eae3d98a445a53e..a307832d7e45fd90e046c7118d7fe505036d3cf6 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = {
 };
 
 static struct lock_class_key arizona_irq_lock_class;
+static struct lock_class_key arizona_irq_request_class;
 
 static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
 			      irq_hw_number_t hw)
@@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
 	struct arizona *data = h->host_data;
 
 	irq_set_chip_data(virq, data);
-	irq_set_lockdep_class(virq, &arizona_irq_lock_class);
+	irq_set_lockdep_class(virq, &arizona_irq_lock_class,
+		&arizona_irq_request_class);
 	irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq);
 	irq_set_nested_thread(virq, 1);
 	irq_set_noprobe(virq);
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index c9714072e22465d4b23d8101038f782b084b2dca..59c82cdcf48d8a508613dbc7b1c98654285de28f 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
 	u8 *ptr;
 	u8 *rx_buf;
 	u8 sum;
+	u8 rx_byte;
 	int ret = 0, final_ret;
 
 	len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
 	if (!ret) {
 		/* Verify that EC can process command */
 		for (i = 0; i < len; i++) {
-			switch (rx_buf[i]) {
-			case EC_SPI_PAST_END:
-			case EC_SPI_RX_BAD_DATA:
-			case EC_SPI_NOT_READY:
-				ret = -EAGAIN;
-				ec_msg->result = EC_RES_IN_PROGRESS;
-			default:
+			rx_byte = rx_buf[i];
+			if (rx_byte == EC_SPI_PAST_END  ||
+			    rx_byte == EC_SPI_RX_BAD_DATA ||
+			    rx_byte == EC_SPI_NOT_READY) {
+				ret = -EREMOTEIO;
 				break;
 			}
-			if (ret)
-				break;
 		}
-		if (!ret)
-			ret = cros_ec_spi_receive_packet(ec_dev,
-					ec_msg->insize + sizeof(*response));
-	} else {
-		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
 	}
 
+	if (!ret)
+		ret = cros_ec_spi_receive_packet(ec_dev,
+				ec_msg->insize + sizeof(*response));
+	else
+		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
 	final_ret = terminate_request(ec_dev);
 
 	spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	int i, len;
 	u8 *ptr;
 	u8 *rx_buf;
+	u8 rx_byte;
 	int sum;
 	int ret = 0, final_ret;
 
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	if (!ret) {
 		/* Verify that EC can process command */
 		for (i = 0; i < len; i++) {
-			switch (rx_buf[i]) {
-			case EC_SPI_PAST_END:
-			case EC_SPI_RX_BAD_DATA:
-			case EC_SPI_NOT_READY:
-				ret = -EAGAIN;
-				ec_msg->result = EC_RES_IN_PROGRESS;
-			default:
+			rx_byte = rx_buf[i];
+			if (rx_byte == EC_SPI_PAST_END  ||
+			    rx_byte == EC_SPI_RX_BAD_DATA ||
+			    rx_byte == EC_SPI_NOT_READY) {
+				ret = -EREMOTEIO;
 				break;
 			}
-			if (ret)
-				break;
 		}
-		if (!ret)
-			ret = cros_ec_spi_receive_response(ec_dev,
-					ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
-	} else {
-		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
 	}
 
+	if (!ret)
+		ret = cros_ec_spi_receive_response(ec_dev,
+				ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
+	else
+		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
 	final_ret = terminate_request(ec_dev);
 
 	spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 			   sizeof(struct ec_response_get_protocol_info);
 	ec_dev->dout_size = sizeof(struct ec_host_request);
 
+	ec_spi->last_transfer_ns = ktime_get_ns();
 
 	err = cros_ec_register(ec_dev);
 	if (err) {
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 590fb9aad77d9a690d6948d61715f04af0a43cd9..c3ed885c155cf3c85676133644a11666dfa3db01 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -1543,6 +1543,9 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev)
 	rtsx_pci_power_off(pcr, HOST_ENTER_S1);
 
 	pci_disable_device(pcidev);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
 }
 
 #else /* CONFIG_PM */
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index da16bf45fab43ee9a946beef340f4cd2a224156e..dc94ffc6321a84dd25ce08d0f1a9374d40d4cead 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
 EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
 
 static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
-			      struct device_node *node)
+			      struct device_node *parent)
 {
+	struct device_node *node;
+
 	if (pdata && pdata->codec)
 		return true;
 
-	if (of_find_node_by_name(node, "codec"))
+	node = of_get_child_by_name(parent, "codec");
+	if (node) {
+		of_node_put(node);
 		return true;
+	}
 
 	return false;
 }
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index d66502d36ba0b3202d1c15c08540fa8aade42a32..dd19f17a1b637543965dd94e64d0d44b9178f64c 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
 };
 
 
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
 {
-#ifdef CONFIG_OF
-	if (of_find_node_by_name(node, "vibra"))
+	struct device_node *node;
+
+	node = of_get_child_by_name(parent, "vibra");
+	if (node) {
+		of_node_put(node);
 		return true;
-#endif
+	}
+
 	return false;
 }
 
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 305a7a464d091614978e37b26a0573f66a27e8b8..4d63ac8a82e0022f10f424b4bab4dc85820d0981 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -562,7 +562,7 @@ static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf,
 static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 {
 	struct at24_data *at24 = priv;
-	struct i2c_client *client;
+	struct device *dev = &at24->client[0]->dev;
 	char *buf = val;
 	int ret;
 
@@ -572,11 +572,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 	if (off + count > at24->chip.byte_len)
 		return -EINVAL;
 
-	client = at24_translate_offset(at24, &off);
-
-	ret = pm_runtime_get_sync(&client->dev);
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&client->dev);
+		pm_runtime_put_noidle(dev);
 		return ret;
 	}
 
@@ -592,7 +590,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 		status = at24->read_func(at24, buf, off, count);
 		if (status < 0) {
 			mutex_unlock(&at24->lock);
-			pm_runtime_put(&client->dev);
+			pm_runtime_put(dev);
 			return status;
 		}
 		buf += status;
@@ -602,7 +600,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 
 	mutex_unlock(&at24->lock);
 
-	pm_runtime_put(&client->dev);
+	pm_runtime_put(dev);
 
 	return 0;
 }
@@ -610,7 +608,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 {
 	struct at24_data *at24 = priv;
-	struct i2c_client *client;
+	struct device *dev = &at24->client[0]->dev;
 	char *buf = val;
 	int ret;
 
@@ -620,11 +618,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 	if (off + count > at24->chip.byte_len)
 		return -EINVAL;
 
-	client = at24_translate_offset(at24, &off);
-
-	ret = pm_runtime_get_sync(&client->dev);
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&client->dev);
+		pm_runtime_put_noidle(dev);
 		return ret;
 	}
 
@@ -640,7 +636,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 		status = at24->write_func(at24, buf, off, count);
 		if (status < 0) {
 			mutex_unlock(&at24->lock);
-			pm_runtime_put(&client->dev);
+			pm_runtime_put(dev);
 			return status;
 		}
 		buf += status;
@@ -650,7 +646,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 
 	mutex_unlock(&at24->lock);
 
-	pm_runtime_put(&client->dev);
+	pm_runtime_put(dev);
 
 	return 0;
 }
@@ -880,7 +876,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	at24->nvmem_config.reg_read = at24_read;
 	at24->nvmem_config.reg_write = at24_write;
 	at24->nvmem_config.priv = at24;
-	at24->nvmem_config.stride = 4;
+	at24->nvmem_config.stride = 1;
 	at24->nvmem_config.word_size = 1;
 	at24->nvmem_config.size = chip.byte_len;
 
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index eda38cbe85307edd67863122e24451d269d66866..41f2a9f6851d9e74a58fd06030cb2f00517ea8d0 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
 #include <linux/pci.h>
 #include <linux/mutex.h>
 #include <linux/miscdevice.h>
-#include <linux/pti.h>
+#include <linux/intel-pti.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index f06cd91964ce970b0c623dd662c335eb0fc4dadb..79a5b985ccf5ee8fe5ba06b5aec717f36799794e 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -75,9 +75,11 @@ struct mmc_fixup {
 #define EXT_CSD_REV_ANY (-1u)
 
 #define CID_MANFID_SANDISK      0x2
+#define CID_MANFID_ATP          0x9
 #define CID_MANFID_TOSHIBA      0x11
 #define CID_MANFID_MICRON       0x13
 #define CID_MANFID_SAMSUNG      0x15
+#define CID_MANFID_APACER       0x27
 #define CID_MANFID_KINGSTON     0x70
 #define CID_MANFID_HYNIX	0x90
 
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index d209fb466979015d3d668beab293e60b9657c0fc..208a762b87ef2876914d641b38fc61858429a22e 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1290,7 +1290,7 @@ int mmc_hs400_to_hs200(struct mmc_card *card)
 
 static void mmc_select_driver_type(struct mmc_card *card)
 {
-	int card_drv_type, drive_strength, drv_type;
+	int card_drv_type, drive_strength, drv_type = 0;
 	int fixed_drv_type = card->host->fixed_drv_type;
 
 	card_drv_type = card->ext_csd.raw_driver_strength |
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index f664e9cbc9f8b66aa9f4f3c8fdd55882d99c8c79..75d317623852dc9f55586e41a176311a48144e1d 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -52,6 +52,14 @@ static const struct mmc_fixup mmc_blk_fixups[] = {
 	MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
 
+	/*
+	 * Some SD cards lockup while using CMD23 multiblock transfers.
+	 */
+	MMC_FIXUP("AF SD", CID_MANFID_ATP, CID_OEMID_ANY, add_quirk_sd,
+		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP("APUSD", CID_MANFID_APACER, 0x5048, add_quirk_sd,
+		  MMC_QUIRK_BLK_NO_CMD23),
+
 	/*
 	 * Some MMC cards need longer data read timeout than indicated in CSD.
 	 */
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index fcf7235d5742ae443bdc629d293983db5b60312d..157e1d9e7725a050f64c32ebed135b46ae0ee58d 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
@@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f7f157a62a4a7dfa7155b5fe9933ead9f870ad1b..555c7f133eb8a5246ff3534d1899927463e99dc2 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1424,7 +1424,9 @@ static const struct file_operations s3cmci_fops_state = {
 struct s3cmci_reg {
 	unsigned short	addr;
 	unsigned char	*name;
-} debug_regs[] = {
+};
+
+static const struct s3cmci_reg debug_regs[] = {
 	DBG_REG(CON),
 	DBG_REG(PRE),
 	DBG_REG(CMDARG),
@@ -1446,7 +1448,7 @@ struct s3cmci_reg {
 static int s3cmci_regs_show(struct seq_file *seq, void *v)
 {
 	struct s3cmci_host *host = seq->private;
-	struct s3cmci_reg *rptr = debug_regs;
+	const struct s3cmci_reg *rptr = debug_regs;
 
 	for (; rptr->name; rptr++)
 		seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index f80e911b8843819db8dcd1956c76ce2bf60b5ab8..73b6055774474e322b07cda4144c48b5b235a55c 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
 	if (!ops->oobbuf)
 		ops->ooblen = 0;
 
-	if (offs < 0 || offs + ops->len >= mtd->size)
+	if (offs < 0 || offs + ops->len > mtd->size)
 		return -EINVAL;
 
 	if (ops->ooblen) {
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index e0eb51d8c0129937b35157ccdc107e5ef54c038a..dd56a671ea4285af0f5079bc652ecf4a32410272 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
 			err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
 							      addr);
 			/* erased page bitflips corrected */
-			if (err > 0)
+			if (err >= 0)
 				return err;
 		}
 
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 484f7fbc3f7d2d11cd66fc3416e64ab38d47f852..a8bde6665c24f7e20e6103959ceee16c5d3ec5c8 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
 		goto out_ce;
 	}
 
-	gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
-	if (IS_ERR(gpiomtd->nwp)) {
-		ret = PTR_ERR(gpiomtd->nwp);
+	gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
+	if (IS_ERR(gpiomtd->ale)) {
+		ret = PTR_ERR(gpiomtd->ale);
 		goto out_ce;
 	}
 
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 50f8d4a1b9832326070045d0c294d22393001fbd..d4d824ef64e9fb395af3bc549daae72b96731e16 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 		return ret;
 	}
 
-	/* handle the block mark swapping */
-	block_mark_swapping(this, payload_virt, auxiliary_virt);
-
 	/* Loop over status bytes, accumulating ECC status. */
 	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 		max_bitflips = max_t(unsigned int, max_bitflips, *status);
 	}
 
+	/* handle the block mark swapping */
+	block_mark_swapping(this, buf, auxiliary_virt);
+
 	if (oob_required) {
 		/*
 		 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 90b9a9ccbe60e3fad44855705bf2dc4623cd5347..9285f60e57836ae056349751b75b9437596bd131 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -963,6 +963,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command)
 
 	switch (command) {
 	case NAND_CMD_READ0:
+	case NAND_CMD_READOOB:
 	case NAND_CMD_PAGEPROG:
 		info->use_ecc = 1;
 		break;
diff --git a/drivers/mux/core.c b/drivers/mux/core.c
index 2260063b0ea83be7f24e5d780af8bd808c23b427..6e5cf9d9cd9927f4264e3a01394e2b84d80d7d3b 100644
--- a/drivers/mux/core.c
+++ b/drivers/mux/core.c
@@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
 	return dev->of_node == data;
 }
 
+/* Note this function returns a reference to the mux_chip dev. */
 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 {
 	struct device *dev;
@@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
 	    (!args.args_count && (mux_chip->controllers > 1))) {
 		dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
 			np, args.np);
+		put_device(&mux_chip->dev);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
 	if (controller >= mux_chip->controllers) {
 		dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
 			np, controller, args.np);
+		put_device(&mux_chip->dev);
 		return ERR_PTR(-EINVAL);
 	}
 
-	get_device(&mux_chip->dev);
 	return &mux_chip->mux[controller];
 }
 EXPORT_SYMBOL_GPL(mux_control_get);
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index a13a4896a8bddad19ae48f8c58bbaf2f3c8dce84..760d2c07e3a2c1edd15a2f70d20d1272adb3fd8a 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -184,12 +184,12 @@
  * Below is some version info we got:
  *    SOC   Version   IP-Version  Glitch- [TR]WRN_INT IRQ Err Memory err RTR re-
  *                                Filter? connected?  Passive detection  ception in MB
- *   MX25  FlexCAN2  03.00.00.00     no        no         ?       no        no
+ *   MX25  FlexCAN2  03.00.00.00     no        no        no       no        no
  *   MX28  FlexCAN2  03.00.04.00    yes       yes        no       no        no
- *   MX35  FlexCAN2  03.00.00.00     no        no         ?       no        no
+ *   MX35  FlexCAN2  03.00.00.00     no        no        no       no        no
  *   MX53  FlexCAN2  03.00.00.00    yes        no        no       no        no
  *   MX6s  FlexCAN3  10.00.12.00    yes       yes        no       no       yes
- *   VF610 FlexCAN3  ?               no       yes         ?      yes       yes?
+ *   VF610 FlexCAN3  ?               no       yes        no      yes       yes?
  *
  * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
  */
@@ -297,7 +297,8 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
 
 static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
 	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
-		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
+		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
+		FLEXCAN_QUIRK_BROKEN_PERR_STATE,
 };
 
 static const struct can_bittiming_const flexcan_bittiming_const = {
@@ -525,7 +526,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		data = be32_to_cpup((__be32 *)&cf->data[0]);
 		flexcan_write(data, &priv->tx_mb->data[0]);
 	}
-	if (cf->can_dlc > 3) {
+	if (cf->can_dlc > 4) {
 		data = be32_to_cpup((__be32 *)&cf->data[4]);
 		flexcan_write(data, &priv->tx_mb->data[1]);
 	}
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index 85268be0c913df5f5dc595405ade48efbd8add0a..55513411a82e68e11d6b1ca30e90ea4337a0f2ee 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -258,21 +258,18 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
 	/* if this frame is an echo, */
 	if ((rx_msg_flags & PUCAN_MSG_LOOPED_BACK) &&
 	    !(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) {
-		int n;
 		unsigned long flags;
 
 		spin_lock_irqsave(&priv->echo_lock, flags);
-		n = can_get_echo_skb(priv->ndev, msg->client);
+		can_get_echo_skb(priv->ndev, msg->client);
 		spin_unlock_irqrestore(&priv->echo_lock, flags);
 
 		/* count bytes of the echo instead of skb */
 		stats->tx_bytes += cf_len;
 		stats->tx_packets++;
 
-		if (n) {
-			/* restart tx queue only if a slot is free */
-			netif_wake_queue(priv->ndev);
-		}
+		/* restart tx queue (a slot is free) */
+		netif_wake_queue(priv->ndev);
 
 		return 0;
 	}
diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index b4efd711f824ccd1c832af8817e09bf2e00b2b5c..788c3464a3b0e95aaa101591750b9de493a34a18 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -825,7 +825,10 @@ static int peak_pciefd_probe(struct pci_dev *pdev,
 err_disable_pci:
 	pci_disable_device(pdev);
 
-	return err;
+	/* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while
+	 * the probe() function must return a negative errno in case of failure
+	 * (err is unchanged if negative) */
+	return pcibios_err_to_errno(err);
 }
 
 /* free the board structure object, as well as its resources: */
diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c
index 131026fbc2d77cbc3ccb5903daa10f8920f8ae17..5adc95c922eef2d9f968a2dea3bac7c2dd3bfda2 100644
--- a/drivers/net/can/sja1000/peak_pci.c
+++ b/drivers/net/can/sja1000/peak_pci.c
@@ -717,7 +717,10 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 failure_disable_pci:
 	pci_disable_device(pdev);
 
-	return err;
+	/* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while
+	 * the probe() function must return a negative errno in case of failure
+	 * (err is unchanged if negative) */
+	return pcibios_err_to_errno(err);
 }
 
 static void peak_pci_remove(struct pci_dev *pdev)
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 4d4941469cfc06bfff3aeafaa0c3562b63702730..db6ea936dc3fc3fca00c939b2db6a938cf5011dc 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -637,6 +637,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota)
 		mbx_mask = hecc_read(priv, HECC_CANMIM);
 		mbx_mask |= HECC_TX_MBOX_MASK;
 		hecc_write(priv, HECC_CANMIM, mbx_mask);
+	} else {
+		/* repoll is done only if whole budget is used */
+		num_pkts = quota;
 	}
 
 	return num_pkts;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index b3d02759c226bbde64ea3bdcdd3314ca646b743b..12ff0020ecd60904b65b89a633cd2a60bf880ed2 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -288,6 +288,8 @@ static void ems_usb_read_interrupt_callback(struct urb *urb)
 
 	case -ECONNRESET: /* unlink */
 	case -ENOENT:
+	case -EPIPE:
+	case -EPROTO:
 	case -ESHUTDOWN:
 		return;
 
@@ -393,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
 
 		if (dev->can.state == CAN_STATE_ERROR_WARNING ||
 		    dev->can.state == CAN_STATE_ERROR_PASSIVE) {
+			cf->can_id |= CAN_ERR_CRTL;
 			cf->data[1] = (txerr > rxerr) ?
 			    CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
 		}
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 9fdb0f0bfa06a00a2ade5e74daef15798ab7d4a1..c6dcf93675c00585bd960f579988ea1c2d348d50 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -393,6 +393,8 @@ static void esd_usb2_read_bulk_callback(struct urb *urb)
 		break;
 
 	case -ENOENT:
+	case -EPIPE:
+	case -EPROTO:
 	case -ESHUTDOWN:
 		return;
 
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 68ac3e88a8cecbe5b4a58da8491756ad5c26039a..8bf80ad9dc44cf4622811bc1fab31404c50cfb95 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
 		dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
 			rc);
 
-	return rc;
+	return (rc > 0) ? 0 : rc;
 }
 
 static void gs_usb_xmit_callback(struct urb *urb)
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 9b18d96ef52633ab34bb5ff39f4f643023dc308a..63587b8e6825add0dadc75b6e446981935d18adb 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -609,8 +609,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id,
 			}
 
 			if (pos + tmp->len > actual_len) {
-				dev_err(dev->udev->dev.parent,
-					"Format error\n");
+				dev_err_ratelimited(dev->udev->dev.parent,
+						    "Format error\n");
 				break;
 			}
 
@@ -813,6 +813,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
 	if (err) {
 		netdev_err(netdev, "Error transmitting URB\n");
 		usb_unanchor_urb(urb);
+		kfree(buf);
 		usb_free_urb(urb);
 		return err;
 	}
@@ -1325,6 +1326,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
 	case 0:
 		break;
 	case -ENOENT:
+	case -EPIPE:
+	case -EPROTO:
 	case -ESHUTDOWN:
 		return;
 	default:
@@ -1333,7 +1336,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
 		goto resubmit_urb;
 	}
 
-	while (pos <= urb->actual_length - MSG_HEADER_LEN) {
+	while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) {
 		msg = urb->transfer_buffer + pos;
 
 		/* The Kvaser firmware can only read and write messages that
@@ -1352,7 +1355,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
 		}
 
 		if (pos + msg->len > urb->actual_length) {
-			dev_err(dev->udev->dev.parent, "Format error\n");
+			dev_err_ratelimited(dev->udev->dev.parent,
+					    "Format error\n");
 			break;
 		}
 
@@ -1768,6 +1772,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 		spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
 
 		usb_unanchor_urb(urb);
+		kfree(buf);
 
 		stats->tx_dropped++;
 
diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 7f0272558befe9ecdeaf110d2b95f8754cea8939..8d8c2086424d09b2c93a377be756dead26dc7979 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -592,6 +592,8 @@ static void mcba_usb_read_bulk_callback(struct urb *urb)
 		break;
 
 	case -ENOENT:
+	case -EPIPE:
+	case -EPROTO:
 	case -ESHUTDOWN:
 		return;
 
@@ -862,7 +864,7 @@ static int mcba_usb_probe(struct usb_interface *intf,
 		goto cleanup_unregister_candev;
 	}
 
-	dev_info(&intf->dev, "Microchip CAN BUS analizer connected\n");
+	dev_info(&intf->dev, "Microchip CAN BUS Analyzer connected\n");
 
 	return 0;
 
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index d000cb62d6ae8c68233e123bc63686d3ab71dd67..27861c417c9404c9c3df841daac95015978f3a69 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -524,6 +524,8 @@ static void usb_8dev_read_bulk_callback(struct urb *urb)
 		break;
 
 	case -ENOENT:
+	case -EPIPE:
+	case -EPROTO:
 	case -ESHUTDOWN:
 		return;
 
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 8404e8852a0f96df80d7cb3d1059e64890405949..b4c4a2c764378e17131234f6b9481d76be2db786 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
 		tbp = peer_tb;
 	}
 
-	if (tbp[IFLA_IFNAME]) {
+	if (ifmp && tbp[IFLA_IFNAME]) {
 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
 		name_assign_type = NET_NAME_USER;
 	} else {
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index f5a8dd96fd75f273f6a31647087bd0c5e0b4fd47..4498ab897d94b9ea86f5fb83fd5eda98bd88e263 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1500,10 +1500,13 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds,
 {
 	struct b53_device *dev = ds->priv;
 
-	/* Older models support a different tag format that we do not
-	 * support in net/dsa/tag_brcm.c yet.
+	/* Older models (5325, 5365) support a different tag format that we do
+	 * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed
+	 * mode to be turned on which means we need to specifically manage ARL
+	 * misses on multicast addresses (TBD).
 	 */
-	if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port))
+	if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) ||
+	    !b53_can_enable_brcm_tags(ds, port))
 		return DSA_TAG_PROTO_NONE;
 
 	/* Broadcom BCM58xx chips have a flow accelerator on Port 8
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index ea01f24f15e77f4b9765d3bed76ce71527e39dad..b62d47210db8d1e1af522b4ddf1ed073e0977991 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -14,7 +14,6 @@
 #include <linux/netdevice.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 #include <linux/mii.h>
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index b721a2009b5030f440bed9eab8ed4f7003ae25a4..23b45da784cb601a7abf84b212717aee7dc64403 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -625,7 +625,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
 	bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc,
 				slice_num, false);
 	bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6src, v6_m_spec->psrc,
-				slice_num, true);
+				SLICE_NUM_MASK, true);
 
 	/* Insert into TCAM now because we need to insert a second rule */
 	bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]);
@@ -699,7 +699,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
 	/* Insert into Action and policer RAMs now, set chain ID to
 	 * the one we are chained to
 	 */
-	ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num,
+	ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num,
 				      queue_num, true);
 	if (ret)
 		goto out_err;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 8171055fde7a0238fb2fbc691a482c211d4d8d5b..66d33e97cbc5426b71395f878bc4e648dc3f5182 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -339,7 +339,7 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	u16 mask;
 
 	mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &mask);
-	mask |= GENMASK(chip->g1_irq.nirqs, 0);
+	mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
 	mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
 	free_irq(chip->irq, chip);
@@ -395,7 +395,7 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 	return 0;
 
 out_disable:
-	mask |= GENMASK(chip->g1_irq.nirqs, 0);
+	mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
 	mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
 out_mapping:
@@ -2177,6 +2177,19 @@ static const struct of_device_id mv88e6xxx_mdio_external_match[] = {
 	{ },
 };
 
+static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
+
+{
+	struct mv88e6xxx_mdio_bus *mdio_bus;
+	struct mii_bus *bus;
+
+	list_for_each_entry(mdio_bus, &chip->mdios, list) {
+		bus = mdio_bus->bus;
+
+		mdiobus_unregister(bus);
+	}
+}
+
 static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip,
 				    struct device_node *np)
 {
@@ -2201,27 +2214,16 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip,
 		match = of_match_node(mv88e6xxx_mdio_external_match, child);
 		if (match) {
 			err = mv88e6xxx_mdio_register(chip, child, true);
-			if (err)
+			if (err) {
+				mv88e6xxx_mdios_unregister(chip);
 				return err;
+			}
 		}
 	}
 
 	return 0;
 }
 
-static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
-
-{
-	struct mv88e6xxx_mdio_bus *mdio_bus;
-	struct mii_bus *bus;
-
-	list_for_each_entry(mdio_bus, &chip->mdios, list) {
-		bus = mdio_bus->bus;
-
-		mdiobus_unregister(bus);
-	}
-}
-
 static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index a7801f6668a5d0e394fd01078bb10c50998cdf56..6315774d72b3304d5ccd1729cb150c183bbac19e 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -338,6 +338,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX;
 		break;
 	case PHY_INTERFACE_MODE_XGMII:
+	case PHY_INTERFACE_MODE_XAUI:
 		cmode = MV88E6XXX_PORT_STS_CMODE_XAUI;
 		break;
 	case PHY_INTERFACE_MODE_RXAUI:
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index f4e13a7014bde0a1cd4f60b4b60a5a1b0459d09e..36c8950dbd2d80699f396f217f0f438479f68355 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -602,7 +602,7 @@ struct vortex_private {
 	struct sk_buff* rx_skbuff[RX_RING_SIZE];
 	struct sk_buff* tx_skbuff[TX_RING_SIZE];
 	unsigned int cur_rx, cur_tx;		/* The next free ring entry */
-	unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
+	unsigned int dirty_tx;	/* The ring entries to be free()ed. */
 	struct vortex_extra_stats xstats;	/* NIC-specific extra stats */
 	struct sk_buff *tx_skb;				/* Packet being eaten by bus master ctrl.  */
 	dma_addr_t tx_skb_dma;				/* Allocated DMA address for bus master ctrl DMA.   */
@@ -618,7 +618,6 @@ struct vortex_private {
 
 	/* The remainder are related to chip state, mostly media selection. */
 	struct timer_list timer;			/* Media selection timer. */
-	struct timer_list rx_oom_timer;		/* Rx skb allocation retry timer */
 	int options;						/* User-settable misc. driver options. */
 	unsigned int media_override:4, 		/* Passed-in media type. */
 		default_media:4,				/* Read from the EEPROM/Wn3_Config. */
@@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits);
 static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
 static void vortex_timer(struct timer_list *t);
-static void rx_oom_timer(struct timer_list *t);
 static netdev_tx_t vortex_start_xmit(struct sk_buff *skb,
 				     struct net_device *dev);
 static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
@@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev)
 
 	timer_setup(&vp->timer, vortex_timer, 0);
 	mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait));
-	timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0);
 
 	if (vortex_debug > 1)
 		pr_debug("%s: Initial media type %s.\n",
@@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev)
 	window_write16(vp, 0x0040, 4, Wn4_NetDiag);
 
 	if (vp->full_bus_master_rx) { /* Boomerang bus master. */
-		vp->cur_rx = vp->dirty_rx = 0;
+		vp->cur_rx = 0;
 		/* Initialize the RxEarly register as recommended. */
 		iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
 		iowrite32(0x0020, ioaddr + PktStatus);
@@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev)
 	struct vortex_private *vp = netdev_priv(dev);
 	int i;
 	int retval;
+	dma_addr_t dma;
 
 	/* Use the now-standard shared IRQ implementation. */
 	if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
@@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev)
 				break;			/* Bad news!  */
 
 			skb_reserve(skb, NET_IP_ALIGN);	/* Align IP on 16 byte boundaries */
-			vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+			dma = pci_map_single(VORTEX_PCI(vp), skb->data,
+					     PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+			if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
+				break;
+			vp->rx_ring[i].addr = cpu_to_le32(dma);
 		}
 		if (i != RX_RING_SIZE) {
 			pr_emerg("%s: no memory for rx ring\n", dev->name);
@@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		int len = (skb->len + 3) & ~3;
 		vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
 						PCI_DMA_TODEVICE);
+		if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
+			dev_kfree_skb_any(skb);
+			dev->stats.tx_dropped++;
+			return NETDEV_TX_OK;
+		}
+
 		spin_lock_irq(&vp->window_lock);
 		window_set(vp, 7);
 		iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr);
@@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev)
 	int entry = vp->cur_rx % RX_RING_SIZE;
 	void __iomem *ioaddr = vp->ioaddr;
 	int rx_status;
-	int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
+	int rx_work_limit = RX_RING_SIZE;
 
 	if (vortex_debug > 5)
 		pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS));
@@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev)
 		} else {
 			/* The packet length: up to 4.5K!. */
 			int pkt_len = rx_status & 0x1fff;
-			struct sk_buff *skb;
+			struct sk_buff *skb, *newskb;
+			dma_addr_t newdma;
 			dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
 
 			if (vortex_debug > 4)
@@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev)
 				pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				vp->rx_copy++;
 			} else {
+				/* Pre-allocate the replacement skb.  If it or its
+				 * mapping fails then recycle the buffer thats already
+				 * in place
+				 */
+				newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
+				if (!newskb) {
+					dev->stats.rx_dropped++;
+					goto clear_complete;
+				}
+				newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
+							PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
+					dev->stats.rx_dropped++;
+					consume_skb(newskb);
+					goto clear_complete;
+				}
+
 				/* Pass up the skbuff already on the Rx ring. */
 				skb = vp->rx_skbuff[entry];
-				vp->rx_skbuff[entry] = NULL;
+				vp->rx_skbuff[entry] = newskb;
+				vp->rx_ring[entry].addr = cpu_to_le32(newdma);
 				skb_put(skb, pkt_len);
 				pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				vp->rx_nocopy++;
@@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev)
 			netif_rx(skb);
 			dev->stats.rx_packets++;
 		}
-		entry = (++vp->cur_rx) % RX_RING_SIZE;
-	}
-	/* Refill the Rx ring buffers. */
-	for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
-		struct sk_buff *skb;
-		entry = vp->dirty_rx % RX_RING_SIZE;
-		if (vp->rx_skbuff[entry] == NULL) {
-			skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
-			if (skb == NULL) {
-				static unsigned long last_jif;
-				if (time_after(jiffies, last_jif + 10 * HZ)) {
-					pr_warn("%s: memory shortage\n",
-						dev->name);
-					last_jif = jiffies;
-				}
-				if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
-					mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
-				break;			/* Bad news!  */
-			}
 
-			vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
-			vp->rx_skbuff[entry] = skb;
-		}
+clear_complete:
 		vp->rx_ring[entry].status = 0;	/* Clear complete bit. */
 		iowrite16(UpUnstall, ioaddr + EL3_CMD);
+		entry = (++vp->cur_rx) % RX_RING_SIZE;
 	}
 	return 0;
 }
 
-/*
- * If we've hit a total OOM refilling the Rx ring we poll once a second
- * for some memory.  Otherwise there is no way to restart the rx process.
- */
-static void
-rx_oom_timer(struct timer_list *t)
-{
-	struct vortex_private *vp = from_timer(vp, t, rx_oom_timer);
-	struct net_device *dev = vp->mii.dev;
-
-	spin_lock_irq(&vp->lock);
-	if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)	/* This test is redundant, but makes me feel good */
-		boomerang_rx(dev);
-	if (vortex_debug > 1) {
-		pr_debug("%s: rx_oom_timer %s\n", dev->name,
-			((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
-	}
-	spin_unlock_irq(&vp->lock);
-}
-
 static void
 vortex_down(struct net_device *dev, int final_down)
 {
@@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down)
 	netdev_reset_queue(dev);
 	netif_stop_queue(dev);
 
-	del_timer_sync(&vp->rx_oom_timer);
 	del_timer_sync(&vp->timer);
 
 	/* Turn off statistics ASAP.  We update dev->stats below. */
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 97c5a89a9cf7a4f1a65dbef70d889efd7d661c20..fbe21a817bd8a2c9ca01a2be6e8c6948b704ef87 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq;
 MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
 
 static int ena_rss_init_default(struct ena_adapter *adapter);
+static void check_for_admin_com_state(struct ena_adapter *adapter);
+static void ena_destroy_device(struct ena_adapter *adapter);
+static int ena_restore_device(struct ena_adapter *adapter);
 
 static void ena_tx_timeout(struct net_device *dev)
 {
@@ -1565,7 +1568,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
 
 static int ena_up_complete(struct ena_adapter *adapter)
 {
-	int rc, i;
+	int rc;
 
 	rc = ena_rss_configure(adapter);
 	if (rc)
@@ -1584,17 +1587,6 @@ static int ena_up_complete(struct ena_adapter *adapter)
 
 	ena_napi_enable_all(adapter);
 
-	/* Enable completion queues interrupt */
-	for (i = 0; i < adapter->num_queues; i++)
-		ena_unmask_interrupt(&adapter->tx_ring[i],
-				     &adapter->rx_ring[i]);
-
-	/* schedule napi in case we had pending packets
-	 * from the last time we disable napi
-	 */
-	for (i = 0; i < adapter->num_queues; i++)
-		napi_schedule(&adapter->ena_napi[i].napi);
-
 	return 0;
 }
 
@@ -1731,7 +1723,7 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
 
 static int ena_up(struct ena_adapter *adapter)
 {
-	int rc;
+	int rc, i;
 
 	netdev_dbg(adapter->netdev, "%s\n", __func__);
 
@@ -1774,6 +1766,17 @@ static int ena_up(struct ena_adapter *adapter)
 
 	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 
+	/* Enable completion queues interrupt */
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_unmask_interrupt(&adapter->tx_ring[i],
+				     &adapter->rx_ring[i]);
+
+	/* schedule napi in case we had pending packets
+	 * from the last time we disable napi
+	 */
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_schedule(&adapter->ena_napi[i].napi);
+
 	return rc;
 
 err_up:
@@ -1884,6 +1887,17 @@ static int ena_close(struct net_device *netdev)
 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		ena_down(adapter);
 
+	/* Check for device status and issue reset if needed*/
+	check_for_admin_com_state(adapter);
+	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		netif_err(adapter, ifdown, adapter->netdev,
+			  "Destroy failure, restarting device\n");
+		ena_dump_stats_to_dmesg(adapter);
+		/* rtnl lock already obtained in dev_ioctl() layer */
+		ena_destroy_device(adapter);
+		ena_restore_device(adapter);
+	}
+
 	return 0;
 }
 
@@ -2544,11 +2558,12 @@ static void ena_destroy_device(struct ena_adapter *adapter)
 
 	ena_com_set_admin_running_state(ena_dev, false);
 
-	ena_close(netdev);
+	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		ena_down(adapter);
 
 	/* Before releasing the ENA resources, a device reset is required.
 	 * (to prevent the device from accessing them).
-	 * In case the reset flag is set and the device is up, ena_close
+	 * In case the reset flag is set and the device is up, ena_down()
 	 * already perform the reset, so it can be skipped.
 	 */
 	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 57e796870595bb9a305a7579154b0dbd1cbeec60..105fdb958cefb1d28e2f57a46da522e32536c3c7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -50,7 +50,7 @@
 #define AQ_CFG_PCI_FUNC_MSIX_IRQS   9U
 #define AQ_CFG_PCI_FUNC_PORTS       2U
 
-#define AQ_CFG_SERVICE_TIMER_INTERVAL    (2 * HZ)
+#define AQ_CFG_SERVICE_TIMER_INTERVAL    (1 * HZ)
 #define AQ_CFG_POLLING_TIMER_INTERVAL   ((unsigned int)(2 * HZ))
 
 #define AQ_CFG_SKB_FRAGS_MAX   32U
@@ -80,6 +80,7 @@
 #define AQ_CFG_DRV_VERSION	__stringify(NIC_MAJOR_DRIVER_VERSION)"."\
 				__stringify(NIC_MINOR_DRIVER_VERSION)"."\
 				__stringify(NIC_BUILD_DRIVER_VERSION)"."\
-				__stringify(NIC_REVISION_DRIVER_VERSION)
+				__stringify(NIC_REVISION_DRIVER_VERSION) \
+				AQ_CFG_DRV_VERSION_SUFFIX
 
 #endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 70efb7467bf3a1c6f298e9d5697b38307e91dab7..f2d8063a2cefd8f7581f0e2182b81b1ce773a92a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -66,14 +66,14 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = {
 	"OutUCast",
 	"OutMCast",
 	"OutBCast",
-	"InUCastOctects",
-	"OutUCastOctects",
-	"InMCastOctects",
-	"OutMCastOctects",
-	"InBCastOctects",
-	"OutBCastOctects",
-	"InOctects",
-	"OutOctects",
+	"InUCastOctets",
+	"OutUCastOctets",
+	"InMCastOctets",
+	"OutMCastOctets",
+	"InBCastOctets",
+	"OutBCastOctets",
+	"InOctets",
+	"OutOctets",
 	"InPacketsDma",
 	"OutPacketsDma",
 	"InOctetsDma",
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 0207927dc8a6ab4ac76c46fb17669b7e50e7ae1e..b3825de6cdfb03b7f176e4b5a4cee00a20306982 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -46,6 +46,28 @@ struct aq_hw_link_status_s {
 	unsigned int mbps;
 };
 
+struct aq_stats_s {
+	u64 uprc;
+	u64 mprc;
+	u64 bprc;
+	u64 erpt;
+	u64 uptc;
+	u64 mptc;
+	u64 bptc;
+	u64 erpr;
+	u64 mbtc;
+	u64 bbtc;
+	u64 mbrc;
+	u64 bbrc;
+	u64 ubrc;
+	u64 ubtc;
+	u64 dpc;
+	u64 dma_pkt_rc;
+	u64 dma_pkt_tc;
+	u64 dma_oct_rc;
+	u64 dma_oct_tc;
+};
+
 #define AQ_HW_IRQ_INVALID 0U
 #define AQ_HW_IRQ_LEGACY  1U
 #define AQ_HW_IRQ_MSI     2U
@@ -85,7 +107,9 @@ struct aq_hw_ops {
 	void (*destroy)(struct aq_hw_s *self);
 
 	int (*get_hw_caps)(struct aq_hw_s *self,
-			   struct aq_hw_caps_s *aq_hw_caps);
+			   struct aq_hw_caps_s *aq_hw_caps,
+			   unsigned short device,
+			   unsigned short subsystem_device);
 
 	int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
 			       unsigned int frags);
@@ -164,8 +188,7 @@ struct aq_hw_ops {
 
 	int (*hw_update_stats)(struct aq_hw_s *self);
 
-	int (*hw_get_hw_stats)(struct aq_hw_s *self, u64 *data,
-			       unsigned int *p_count);
+	struct aq_stats_s *(*hw_get_hw_stats)(struct aq_hw_s *self);
 
 	int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 78dfb2ab78cefa0586168a0d043348c3488f0f20..75a894a9251c2114e7d30d40ab795f89ded413fe 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -37,6 +37,8 @@ static unsigned int aq_itr_rx;
 module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644);
 MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate");
 
+static void aq_nic_update_ndev_stats(struct aq_nic_s *self);
+
 static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
 {
 	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
@@ -166,11 +168,8 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
 static void aq_nic_service_timer_cb(struct timer_list *t)
 {
 	struct aq_nic_s *self = from_timer(self, t, service_timer);
-	struct net_device *ndev = aq_nic_get_ndev(self);
+	int ctimer = AQ_CFG_SERVICE_TIMER_INTERVAL;
 	int err = 0;
-	unsigned int i = 0U;
-	struct aq_ring_stats_rx_s stats_rx;
-	struct aq_ring_stats_tx_s stats_tx;
 
 	if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY))
 		goto err_exit;
@@ -182,23 +181,14 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
 	if (self->aq_hw_ops.hw_update_stats)
 		self->aq_hw_ops.hw_update_stats(self->aq_hw);
 
-	memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
-	memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
-	for (i = AQ_DIMOF(self->aq_vec); i--;) {
-		if (self->aq_vec[i])
-			aq_vec_add_stats(self->aq_vec[i], &stats_rx, &stats_tx);
-	}
+	aq_nic_update_ndev_stats(self);
 
-	ndev->stats.rx_packets = stats_rx.packets;
-	ndev->stats.rx_bytes = stats_rx.bytes;
-	ndev->stats.rx_errors = stats_rx.errors;
-	ndev->stats.tx_packets = stats_tx.packets;
-	ndev->stats.tx_bytes = stats_tx.bytes;
-	ndev->stats.tx_errors = stats_tx.errors;
+	/* If no link - use faster timer rate to detect link up asap */
+	if (!netif_carrier_ok(self->ndev))
+		ctimer = max(ctimer / 2, 1);
 
 err_exit:
-	mod_timer(&self->service_timer,
-		  jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL);
+	mod_timer(&self->service_timer, jiffies + ctimer);
 }
 
 static void aq_nic_polling_timer_cb(struct timer_list *t)
@@ -222,7 +212,7 @@ static struct net_device *aq_nic_ndev_alloc(void)
 
 struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
 				   const struct ethtool_ops *et_ops,
-				   struct device *dev,
+				   struct pci_dev *pdev,
 				   struct aq_pci_func_s *aq_pci_func,
 				   unsigned int port,
 				   const struct aq_hw_ops *aq_hw_ops)
@@ -242,7 +232,7 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
 	ndev->netdev_ops = ndev_ops;
 	ndev->ethtool_ops = et_ops;
 
-	SET_NETDEV_DEV(ndev, dev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	ndev->if_port = port;
 	self->ndev = ndev;
@@ -254,7 +244,8 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
 
 	self->aq_hw = self->aq_hw_ops.create(aq_pci_func, self->port,
 						&self->aq_hw_ops);
-	err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps);
+	err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps,
+					  pdev->device, pdev->subsystem_device);
 	if (err < 0)
 		goto err_exit;
 
@@ -749,16 +740,40 @@ int aq_nic_get_regs_count(struct aq_nic_s *self)
 
 void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
 {
-	struct aq_vec_s *aq_vec = NULL;
 	unsigned int i = 0U;
 	unsigned int count = 0U;
-	int err = 0;
+	struct aq_vec_s *aq_vec = NULL;
+	struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw);
 
-	err = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw, data, &count);
-	if (err < 0)
+	if (!stats)
 		goto err_exit;
 
-	data += count;
+	data[i] = stats->uprc + stats->mprc + stats->bprc;
+	data[++i] = stats->uprc;
+	data[++i] = stats->mprc;
+	data[++i] = stats->bprc;
+	data[++i] = stats->erpt;
+	data[++i] = stats->uptc + stats->mptc + stats->bptc;
+	data[++i] = stats->uptc;
+	data[++i] = stats->mptc;
+	data[++i] = stats->bptc;
+	data[++i] = stats->ubrc;
+	data[++i] = stats->ubtc;
+	data[++i] = stats->mbrc;
+	data[++i] = stats->mbtc;
+	data[++i] = stats->bbrc;
+	data[++i] = stats->bbtc;
+	data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
+	data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
+	data[++i] = stats->dma_pkt_rc;
+	data[++i] = stats->dma_pkt_tc;
+	data[++i] = stats->dma_oct_rc;
+	data[++i] = stats->dma_oct_tc;
+	data[++i] = stats->dpc;
+
+	i++;
+
+	data += i;
 	count = 0U;
 
 	for (i = 0U, aq_vec = self->aq_vec[0];
@@ -768,7 +783,20 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
 	}
 
 err_exit:;
-	(void)err;
+}
+
+static void aq_nic_update_ndev_stats(struct aq_nic_s *self)
+{
+	struct net_device *ndev = self->ndev;
+	struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw);
+
+	ndev->stats.rx_packets = stats->uprc + stats->mprc + stats->bprc;
+	ndev->stats.rx_bytes = stats->ubrc + stats->mbrc + stats->bbrc;
+	ndev->stats.rx_errors = stats->erpr;
+	ndev->stats.tx_packets = stats->uptc + stats->mptc + stats->bptc;
+	ndev->stats.tx_bytes = stats->ubtc + stats->mbtc + stats->bbtc;
+	ndev->stats.tx_errors = stats->erpt;
+	ndev->stats.multicast = stats->mprc;
 }
 
 void aq_nic_get_link_ksettings(struct aq_nic_s *self,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 4309983acdd6f7502fa05869f79336fd459dc2fe..3c9f8db03d5f2a576c83064b18e930d5af6b7e1c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -71,7 +71,7 @@ struct aq_nic_cfg_s {
 
 struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
 				   const struct ethtool_ops *et_ops,
-				   struct device *dev,
+				   struct pci_dev *pdev,
 				   struct aq_pci_func_s *aq_pci_func,
 				   unsigned int port,
 				   const struct aq_hw_ops *aq_hw_ops);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index cadaa646c89f4b741382b4beee72c6ec3e3bfc18..58c29d04b186e634686ca667bc5afe9bd86e63a3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -51,7 +51,8 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
 	pci_set_drvdata(pdev, self);
 	self->pdev = pdev;
 
-	err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps);
+	err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps, pdev->device,
+				     pdev->subsystem_device);
 	if (err < 0)
 		goto err_exit;
 
@@ -59,7 +60,7 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
 
 	for (port = 0; port < self->ports; ++port) {
 		struct aq_nic_s *aq_nic = aq_nic_alloc_cold(ndev_ops, eth_ops,
-							    &pdev->dev, self,
+							    pdev, self,
 							    port, aq_hw_ops);
 
 		if (!aq_nic) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 07b3c49a16a4266b4fb312bb79198f9ba0c60f04..f18dce14c93cfa89f5c091db5f5b06c6e882aa68 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -18,9 +18,20 @@
 #include "hw_atl_a0_internal.h"
 
 static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self,
-				 struct aq_hw_caps_s *aq_hw_caps)
+				 struct aq_hw_caps_s *aq_hw_caps,
+				 unsigned short device,
+				 unsigned short subsystem_device)
 {
 	memcpy(aq_hw_caps, &hw_atl_a0_hw_caps_, sizeof(*aq_hw_caps));
+
+	if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001)
+		aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G;
+
+	if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) {
+		aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G;
+		aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_5G;
+	}
+
 	return 0;
 }
 
@@ -333,6 +344,10 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self,
 	hw_atl_a0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
 	hw_atl_a0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
 
+	/* Reset link status and read out initial hardware counters */
+	self->aq_link_status.mbps = 0;
+	hw_atl_utils_update_stats(self);
+
 	err = aq_hw_err_from_flags(self);
 	if (err < 0)
 		goto err_exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index ec68c20efcbdb6079b9dba4b8200ad8f1f450233..e4a22ce7bf09d50f21ffed6425783d7be7c88da5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -16,11 +16,23 @@
 #include "hw_atl_utils.h"
 #include "hw_atl_llh.h"
 #include "hw_atl_b0_internal.h"
+#include "hw_atl_llh_internal.h"
 
 static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self,
-				 struct aq_hw_caps_s *aq_hw_caps)
+				 struct aq_hw_caps_s *aq_hw_caps,
+				 unsigned short device,
+				 unsigned short subsystem_device)
 {
 	memcpy(aq_hw_caps, &hw_atl_b0_hw_caps_, sizeof(*aq_hw_caps));
+
+	if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001)
+		aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G;
+
+	if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) {
+		aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G;
+		aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_5G;
+	}
+
 	return 0;
 }
 
@@ -357,6 +369,7 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self,
 	};
 
 	int err = 0;
+	u32 val;
 
 	self->aq_nic_cfg = aq_nic_cfg;
 
@@ -374,6 +387,20 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self,
 	hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
 	hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
 
+	/* Force limit MRRS on RDM/TDM to 2K */
+	val = aq_hw_read_reg(self, pci_reg_control6_adr);
+	aq_hw_write_reg(self, pci_reg_control6_adr, (val & ~0x707) | 0x404);
+
+	/* TX DMA total request limit. B0 hardware is not capable to
+	 * handle more than (8K-MRRS) incoming DMA data.
+	 * Value 24 in 256byte units
+	 */
+	aq_hw_write_reg(self, tx_dma_total_req_limit_adr, 24);
+
+	/* Reset link status and read out initial hardware counters */
+	self->aq_link_status.mbps = 0;
+	hw_atl_utils_update_stats(self);
+
 	err = aq_hw_err_from_flags(self);
 	if (err < 0)
 		goto err_exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 5527fc0e5942d6a8f4e14071a9e40a12e02ecf27..93450ec930e89f71c83253ac094e9928206df75f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -2343,6 +2343,9 @@
 #define tx_dma_desc_base_addrmsw_adr(descriptor) \
 			(0x00007c04u + (descriptor) * 0x40)
 
+/* tx dma total request limit */
+#define tx_dma_total_req_limit_adr 0x00007b20u
+
 /* tx interrupt moderation control register definitions
  * Preprocessor definitions for TX Interrupt Moderation Control Register
  * Base Address: 0x00008980
@@ -2369,6 +2372,9 @@
 /* default value of bitfield reg_res_dsbl */
 #define pci_reg_res_dsbl_default 0x1
 
+/* PCI core control register */
+#define pci_reg_control6_adr 0x1014u
+
 /* global microprocessor scratch pad definitions */
 #define glb_cpu_scratch_scp_adr(scratch_scp) (0x00000300u + (scratch_scp) * 0x4)
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 1fe016fc4bc704361ca68ee39f3e443715505e8c..f2ce12ed4218ee8d39a462b6ffc1fada96358506 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -503,73 +503,43 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self)
 	struct hw_atl_s *hw_self = PHAL_ATLANTIC;
 	struct hw_aq_atl_utils_mbox mbox;
 
-	if (!self->aq_link_status.mbps)
-		return 0;
-
 	hw_atl_utils_mpi_read_stats(self, &mbox);
 
 #define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \
 			mbox.stats._N_ - hw_self->last_stats._N_)
-
-	AQ_SDELTA(uprc);
-	AQ_SDELTA(mprc);
-	AQ_SDELTA(bprc);
-	AQ_SDELTA(erpt);
-
-	AQ_SDELTA(uptc);
-	AQ_SDELTA(mptc);
-	AQ_SDELTA(bptc);
-	AQ_SDELTA(erpr);
-
-	AQ_SDELTA(ubrc);
-	AQ_SDELTA(ubtc);
-	AQ_SDELTA(mbrc);
-	AQ_SDELTA(mbtc);
-	AQ_SDELTA(bbrc);
-	AQ_SDELTA(bbtc);
-	AQ_SDELTA(dpc);
-
+	if (self->aq_link_status.mbps) {
+		AQ_SDELTA(uprc);
+		AQ_SDELTA(mprc);
+		AQ_SDELTA(bprc);
+		AQ_SDELTA(erpt);
+
+		AQ_SDELTA(uptc);
+		AQ_SDELTA(mptc);
+		AQ_SDELTA(bptc);
+		AQ_SDELTA(erpr);
+
+		AQ_SDELTA(ubrc);
+		AQ_SDELTA(ubtc);
+		AQ_SDELTA(mbrc);
+		AQ_SDELTA(mbtc);
+		AQ_SDELTA(bbrc);
+		AQ_SDELTA(bbtc);
+		AQ_SDELTA(dpc);
+	}
 #undef AQ_SDELTA
+	hw_self->curr_stats.dma_pkt_rc = stats_rx_dma_good_pkt_counterlsw_get(self);
+	hw_self->curr_stats.dma_pkt_tc = stats_tx_dma_good_pkt_counterlsw_get(self);
+	hw_self->curr_stats.dma_oct_rc = stats_rx_dma_good_octet_counterlsw_get(self);
+	hw_self->curr_stats.dma_oct_tc = stats_tx_dma_good_octet_counterlsw_get(self);
 
 	memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats));
 
 	return 0;
 }
 
-int hw_atl_utils_get_hw_stats(struct aq_hw_s *self,
-			      u64 *data, unsigned int *p_count)
+struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self)
 {
-	struct hw_atl_s *hw_self = PHAL_ATLANTIC;
-	struct hw_atl_stats_s *stats = &hw_self->curr_stats;
-	int i = 0;
-
-	data[i] = stats->uprc + stats->mprc + stats->bprc;
-	data[++i] = stats->uprc;
-	data[++i] = stats->mprc;
-	data[++i] = stats->bprc;
-	data[++i] = stats->erpt;
-	data[++i] = stats->uptc + stats->mptc + stats->bptc;
-	data[++i] = stats->uptc;
-	data[++i] = stats->mptc;
-	data[++i] = stats->bptc;
-	data[++i] = stats->ubrc;
-	data[++i] = stats->ubtc;
-	data[++i] = stats->mbrc;
-	data[++i] = stats->mbtc;
-	data[++i] = stats->bbrc;
-	data[++i] = stats->bbtc;
-	data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
-	data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
-	data[++i] = stats_rx_dma_good_pkt_counterlsw_get(self);
-	data[++i] = stats_tx_dma_good_pkt_counterlsw_get(self);
-	data[++i] = stats_rx_dma_good_octet_counterlsw_get(self);
-	data[++i] = stats_tx_dma_good_octet_counterlsw_get(self);
-	data[++i] = stats->dpc;
-
-	if (p_count)
-		*p_count = ++i;
-
-	return 0;
+	return &PHAL_ATLANTIC->curr_stats;
 }
 
 static const u32 hw_atl_utils_hw_mac_regs[] = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index c99cc690e425bb72907df675e04a196819cfec02..21aeca6908d3b6dac5ec5ced1285c89cdba5acf2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -129,7 +129,7 @@ struct __packed hw_aq_atl_utils_mbox {
 struct __packed hw_atl_s {
 	struct aq_hw_s base;
 	struct hw_atl_stats_s last_stats;
-	struct hw_atl_stats_s curr_stats;
+	struct aq_stats_s curr_stats;
 	u64 speed;
 	unsigned int chip_features;
 	u32 fw_ver_actual;
@@ -207,8 +207,6 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version);
 
 int hw_atl_utils_update_stats(struct aq_hw_s *self);
 
-int hw_atl_utils_get_hw_stats(struct aq_hw_s *self,
-			      u64 *data,
-			      unsigned int *p_count);
+struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
 
 #endif /* HW_ATL_UTILS_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index 0de858d215c22d036d6c120bafe996e12ada1287..9009f2651e706b66e50b998b95a12283cb3b0e9b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -11,8 +11,10 @@
 #define VER_H
 
 #define NIC_MAJOR_DRIVER_VERSION           1
-#define NIC_MINOR_DRIVER_VERSION           5
-#define NIC_BUILD_DRIVER_VERSION           345
+#define NIC_MINOR_DRIVER_VERSION           6
+#define NIC_BUILD_DRIVER_VERSION           13
 #define NIC_REVISION_DRIVER_VERSION        0
 
+#define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
+
 #endif /* VER_H */
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index 3c63b16d485f4bb3a7587e9e6d36dd0e121668d2..d9efbc8d783b84b128379e0ce58a43b005a8ab58 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
 	unsigned int link;
 	unsigned int duplex;
 	unsigned int speed;
+
+	unsigned int rx_missed_errors;
 };
 
 /**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 3241af1ce7182824c09ee3ad774f122565f6c940..bd277b0dc615118a58b81dfba5b040e26fa667ba 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
 
 #include "emac.h"
 
+static void arc_emac_restart(struct net_device *ndev);
+
 /**
  * arc_emac_tx_avail - Return the number of available slots in the tx ring.
  * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
 			continue;
 		}
 
-		pktlen = info & LEN_MASK;
-		stats->rx_packets++;
-		stats->rx_bytes += pktlen;
-		skb = rx_buff->skb;
-		skb_put(skb, pktlen);
-		skb->dev = ndev;
-		skb->protocol = eth_type_trans(skb, ndev);
-
-		dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
-				 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
-
-		/* Prepare the BD for next cycle */
-		rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
-							 EMAC_BUFFER_SIZE);
-		if (unlikely(!rx_buff->skb)) {
+		/* Prepare the BD for next cycle. netif_receive_skb()
+		 * only if new skb was allocated and mapped to avoid holes
+		 * in the RX fifo.
+		 */
+		skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
+		if (unlikely(!skb)) {
+			if (net_ratelimit())
+				netdev_err(ndev, "cannot allocate skb\n");
+			/* Return ownership to EMAC */
+			rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
 			stats->rx_errors++;
-			/* Because receive_skb is below, increment rx_dropped */
 			stats->rx_dropped++;
 			continue;
 		}
 
-		/* receive_skb only if new skb was allocated to avoid holes */
-		netif_receive_skb(skb);
-
-		addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
+		addr = dma_map_single(&ndev->dev, (void *)skb->data,
 				      EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
 		if (dma_mapping_error(&ndev->dev, addr)) {
 			if (net_ratelimit())
-				netdev_err(ndev, "cannot dma map\n");
-			dev_kfree_skb(rx_buff->skb);
+				netdev_err(ndev, "cannot map dma buffer\n");
+			dev_kfree_skb(skb);
+			/* Return ownership to EMAC */
+			rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
 			stats->rx_errors++;
+			stats->rx_dropped++;
 			continue;
 		}
+
+		/* unmap previosly mapped skb */
+		dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+				 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+		pktlen = info & LEN_MASK;
+		stats->rx_packets++;
+		stats->rx_bytes += pktlen;
+		skb_put(rx_buff->skb, pktlen);
+		rx_buff->skb->dev = ndev;
+		rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
+
+		netif_receive_skb(rx_buff->skb);
+
+		rx_buff->skb = skb;
 		dma_unmap_addr_set(rx_buff, addr, addr);
 		dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
 
@@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
 	return work_done;
 }
 
+/**
+ * arc_emac_rx_miss_handle - handle R_MISS register
+ * @ndev:	Pointer to the net_device structure.
+ */
+static void arc_emac_rx_miss_handle(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int miss;
+
+	miss = arc_reg_get(priv, R_MISS);
+	if (miss) {
+		stats->rx_errors += miss;
+		stats->rx_missed_errors += miss;
+		priv->rx_missed_errors += miss;
+	}
+}
+
+/**
+ * arc_emac_rx_stall_check - check RX stall
+ * @ndev:	Pointer to the net_device structure.
+ * @budget:	How many BDs requested to process on 1 call.
+ * @work_done:	How many BDs processed
+ *
+ * Under certain conditions EMAC stop reception of incoming packets and
+ * continuously increment R_MISS register instead of saving data into
+ * provided buffer. This function detect that condition and restart
+ * EMAC.
+ */
+static void arc_emac_rx_stall_check(struct net_device *ndev,
+				    int budget, unsigned int work_done)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	struct arc_emac_bd *rxbd;
+
+	if (work_done)
+		priv->rx_missed_errors = 0;
+
+	if (priv->rx_missed_errors && budget) {
+		rxbd = &priv->rxbd[priv->last_rx_bd];
+		if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
+			arc_emac_restart(ndev);
+			priv->rx_missed_errors = 0;
+		}
+	}
+}
+
 /**
  * arc_emac_poll - NAPI poll handler.
  * @napi:	Pointer to napi_struct structure.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
 	unsigned int work_done;
 
 	arc_emac_tx_clean(ndev);
+	arc_emac_rx_miss_handle(ndev);
 
 	work_done = arc_emac_rx(ndev, budget);
 	if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
 		arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
 	}
 
+	arc_emac_rx_stall_check(ndev, budget, work_done);
+
 	return work_done;
 }
 
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
 		if (status & MSER_MASK) {
 			stats->rx_missed_errors += 0x100;
 			stats->rx_errors += 0x100;
+			priv->rx_missed_errors += 0x100;
+			napi_schedule(&priv->napi);
 		}
 
 		if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 
+/**
+ * arc_emac_restart - Restart EMAC
+ * @ndev:	Pointer to net_device structure.
+ *
+ * This function do hardware reset of EMAC in order to restore
+ * network packets reception.
+ */
+static void arc_emac_restart(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	int i;
+
+	if (net_ratelimit())
+		netdev_warn(ndev, "restarting stalled EMAC\n");
+
+	netif_stop_queue(ndev);
+
+	/* Disable interrupts */
+	arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+	/* Disable EMAC */
+	arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+	/* Return the sk_buff to system */
+	arc_free_tx_queue(ndev);
+
+	/* Clean Tx BD's */
+	priv->txbd_curr = 0;
+	priv->txbd_dirty = 0;
+	memset(priv->txbd, 0, TX_RING_SZ);
+
+	for (i = 0; i < RX_BD_NUM; i++) {
+		struct arc_emac_bd *rxbd = &priv->rxbd[i];
+		unsigned int info = le32_to_cpu(rxbd->info);
+
+		if (!(info & FOR_EMAC)) {
+			stats->rx_errors++;
+			stats->rx_dropped++;
+		}
+		/* Return ownership to EMAC */
+		rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+	}
+	priv->last_rx_bd = 0;
+
+	/* Make sure info is visible to EMAC before enable */
+	wmb();
+
+	/* Enable interrupts */
+	arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+	/* Enable EMAC */
+	arc_reg_or(priv, R_CTRL, EN_MASK);
+
+	netif_start_queue(ndev);
+}
+
 static const struct net_device_ops arc_emac_netdev_ops = {
 	.ndo_open		= arc_emac_open,
 	.ndo_stop		= arc_emac_stop,
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index e278e3d96ee010235ec4d21696a996b68cb3ef18..16f9bee992fedfab2069a2324c38fd4a5f142c93 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -199,9 +199,11 @@ static int emac_rockchip_probe(struct platform_device *pdev)
 
 	/* RMII interface needs always a rate of 50MHz */
 	err = clk_set_rate(priv->refclk, 50000000);
-	if (err)
+	if (err) {
 		dev_err(dev,
 			"failed to change reference clock rate (%d)\n", err);
+		goto out_regulator_disable;
+	}
 
 	if (priv->soc_data->need_div_macclk) {
 		priv->macclk = devm_clk_get(dev, "macclk");
@@ -220,19 +222,24 @@ static int emac_rockchip_probe(struct platform_device *pdev)
 
 		/* RMII TX/RX needs always a rate of 25MHz */
 		err = clk_set_rate(priv->macclk, 25000000);
-		if (err)
+		if (err) {
 			dev_err(dev,
 				"failed to change mac clock rate (%d)\n", err);
+			goto out_clk_disable_macclk;
+		}
 	}
 
 	err = arc_emac_probe(ndev, interface);
 	if (err) {
 		dev_err(dev, "failed to probe arc emac (%d)\n", err);
-		goto out_regulator_disable;
+		goto out_clk_disable_macclk;
 	}
 
 	return 0;
 
+out_clk_disable_macclk:
+	if (priv->soc_data->need_div_macclk)
+		clk_disable_unprepare(priv->macclk);
 out_regulator_disable:
 	if (priv->regulator)
 		regulator_disable(priv->regulator);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 4c739d5355d2279a5f1c4cd23e8ccf3288788e8d..8ae269ec17a119b419afc92eeb66dde76d43d473 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 
 	del_timer_sync(&bp->timer);
 
-	if (IS_PF(bp)) {
+	if (IS_PF(bp) && !BP_NOMCP(bp)) {
 		/* Set ALWAYS_ALIVE bit in shmem */
 		bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
 		bnx2x_drv_pulse(bp);
@@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 	bp->cnic_loaded = false;
 
 	/* Clear driver version indication in shmem */
-	if (IS_PF(bp))
+	if (IS_PF(bp) && !BP_NOMCP(bp))
 		bnx2x_update_mng_version(bp);
 
 	/* Check if there are pending parity attentions. If there are - set
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 91e2a7560b48d572d26e8566c9a3b0667083d45d..ddd5d3ebd20111f667536aac61141b10e5bb2e7c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp)
 
 	do {
 		bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR);
+
+		/* If we read all 0xFFs, means we are in PCI error state and
+		 * should bail out to avoid crashes on adapter's FW reads.
+		 */
+		if (bp->common.shmem_base == 0xFFFFFFFF) {
+			bp->flags |= NO_MCP_FLAG;
+			return -ENODEV;
+		}
+
 		if (bp->common.shmem_base) {
 			val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]);
 			if (val & SHR_MEM_VALIDITY_MB)
@@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
 		BNX2X_ERR("IO slot reset --> driver unload\n");
 
 		/* MCP should have been reset; Need to wait for validity */
-		bnx2x_init_shmem(bp);
+		if (bnx2x_init_shmem(bp)) {
+			rtnl_unlock();
+			return PCI_ERS_RESULT_DISCONNECT;
+		}
 
 		if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
 			u32 v;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c5c38d4b7d1ccd04044f972777c5d3844e755a02..61ca4eb7c6fa983165fc0308b22f2e29fbf30ef6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1883,7 +1883,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			 * here forever if we consistently cannot allocate
 			 * buffers.
 			 */
-			else if (rc == -ENOMEM)
+			else if (rc == -ENOMEM && budget)
 				rx_pkts++;
 			else if (rc == -EBUSY)	/* partial completion */
 				break;
@@ -1969,7 +1969,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 				cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
 
 			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
-			if (likely(rc == -EIO))
+			if (likely(rc == -EIO) && budget)
 				rx_pkts++;
 			else if (rc == -EBUSY)	/* partial completion */
 				break;
@@ -3368,6 +3368,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	u16 cp_ring_id, len = 0;
 	struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
+	struct hwrm_short_input short_input = {0};
 
 	req->seq_id = cpu_to_le16(bp->hwrm_cmd_seq++);
 	memset(resp, 0, PAGE_SIZE);
@@ -3376,7 +3377,6 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
 	if (bp->flags & BNXT_FLAG_SHORT_CMD) {
 		void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
-		struct hwrm_short_input short_input = {0};
 
 		memcpy(short_cmd_req, req, msg_len);
 		memset(short_cmd_req + msg_len, 0, BNXT_HWRM_MAX_REQ_LEN -
@@ -8263,8 +8263,9 @@ static void bnxt_shutdown(struct pci_dev *pdev)
 	if (netif_running(dev))
 		dev_close(dev);
 
+	bnxt_ulp_shutdown(bp);
+
 	if (system_state == SYSTEM_POWER_OFF) {
-		bnxt_ulp_shutdown(bp);
 		bnxt_clear_int_mode(bp);
 		pci_wake_from_d3(pdev, bp->wol);
 		pci_set_power_state(pdev, PCI_D3hot);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 5ee18660bc33a2572320ac3210029e883841b044..c9617675f934b615b05f13e4e74c8d9f7aeeaabb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
 		netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
 		return -EINVAL;
 	}
-	if (vf_id >= bp->pf.max_vfs) {
+	if (vf_id >= bp->pf.active_vfs) {
 		netdev_err(bp->dev, "Invalid VF id %d\n", vf_id);
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index d5031f436f8341ac98d7b1074f22bec9e107245d..d8fee26cd45eaf9691a323c5f5d6bfd8379eb3b8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -56,7 +56,6 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
 {
 	int ifindex = tcf_mirred_ifindex(tc_act);
 	struct net_device *dev;
-	u16 dst_fid;
 
 	dev = __dev_get_by_index(dev_net(bp->dev), ifindex);
 	if (!dev) {
@@ -64,15 +63,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
 		return -EINVAL;
 	}
 
-	/* find the FID from dev */
-	dst_fid = bnxt_flow_get_dst_fid(bp, dev);
-	if (dst_fid == BNXT_FID_INVALID) {
-		netdev_info(bp->dev, "can't get fid for ifindex=%d", ifindex);
-		return -EINVAL;
-	}
-
 	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
-	actions->dst_fid = dst_fid;
 	actions->dst_dev = dev;
 	return 0;
 }
@@ -160,13 +151,17 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 	if (rc)
 		return rc;
 
-	/* Tunnel encap/decap action must be accompanied by a redirect action */
-	if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP ||
-	     actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) &&
-	    !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) {
-		netdev_info(bp->dev,
-			    "error: no redir action along with encap/decap");
-		return -EINVAL;
+	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
+		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
+			/* dst_fid is PF's fid */
+			actions->dst_fid = bp->pf.fw_fid;
+		} else {
+			/* find the FID from dst_dev */
+			actions->dst_fid =
+				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
+			if (actions->dst_fid == BNXT_FID_INVALID)
+				return -EINVAL;
+		}
 	}
 
 	return rc;
@@ -426,7 +421,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	}
 
 	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
-	if (is_wildcard(&l3_mask, sizeof(l3_mask)) &&
+	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
 	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
 	} else {
@@ -532,10 +527,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
 	}
 
 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
-		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR |
-			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR;
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
 		ether_addr_copy(req.dst_macaddr, l2_info->dmac);
-		ether_addr_copy(req.src_macaddr, l2_info->smac);
 	}
 	if (l2_info->num_vlans) {
 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
@@ -901,10 +894,10 @@ static void bnxt_tc_put_decap_handle(struct bnxt *bp,
 
 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
 				       struct ip_tunnel_key *tun_key,
-				       struct bnxt_tc_l2_key *l2_info,
-				       struct net_device *real_dst_dev)
+				       struct bnxt_tc_l2_key *l2_info)
 {
 #ifdef CONFIG_INET
+	struct net_device *real_dst_dev = bp->dev;
 	struct flowi4 flow = { {0} };
 	struct net_device *dst_dev;
 	struct neighbour *nbr;
@@ -1008,14 +1001,13 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	 */
 	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
 	tun_key.tp_dst = flow->tun_key.tp_dst;
-	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev);
+	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
 	if (rc)
 		goto put_decap;
 
-	decap_key->ttl = tun_key.ttl;
 	decap_l2_info = &decap_node->l2_info;
+	/* decap smac is wildcarded */
 	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
-	ether_addr_copy(decap_l2_info->smac, l2_info.dmac);
 	if (l2_info.num_vlans) {
 		decap_l2_info->num_vlans = l2_info.num_vlans;
 		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
@@ -1095,8 +1087,7 @@ static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
 		goto done;
 
-	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info,
-					 flow->actions.dst_dev);
+	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
 	if (rc)
 		goto put_encap;
 
@@ -1169,6 +1160,15 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
 	return 0;
 }
 
+static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				u16 src_fid)
+{
+	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
+		flow->src_fid = bp->pf.fw_fid;
+	else
+		flow->src_fid = src_fid;
+}
+
 /* Add a new flow or replace an existing flow.
  * Notes on locking:
  * There are essentially two critical sections here.
@@ -1204,7 +1204,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
 	if (rc)
 		goto free_node;
-	flow->src_fid = src_fid;
+
+	bnxt_tc_set_src_fid(bp, flow, src_fid);
 
 	if (!bnxt_tc_can_offload(bp, flow)) {
 		rc = -ENOSPC;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index de51c2177d03b3cf9e4653226bd8901b1d29834e..8995cfefbfcf1aa1a46f1cbd2f1bdb6722dca7bb 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4,11 +4,13 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2005-2014 Broadcom Corporation.
+ * Copyright (C) 2005-2016 Broadcom Corporation.
+ * Copyright (C) 2016-2017 Broadcom Limited.
  *
  * Firmware is:
  *	Derived from proprietary unpublished source code,
- *	Copyright (C) 2000-2003 Broadcom Corporation.
+ *	Copyright (C) 2000-2016 Broadcom Corporation.
+ *	Copyright (C) 2016-2017 Broadcom Ltd.
  *
  *	Permission is hereby granted for the distribution of this firmware
  *	data in hexadecimal or equivalent format, provided this copyright
@@ -10052,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
 
 	tw32(GRC_MODE, tp->grc_mode | val);
 
+	/* On one of the AMD platform, MRRS is restricted to 4000 because of
+	 * south bridge limitation. As a workaround, Driver is setting MRRS
+	 * to 2048 instead of default 4096.
+	 */
+	if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
+	    tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) {
+		val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK;
+		tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048);
+	}
+
 	/* Setup the timer prescalar register.  Clock is always 66Mhz. */
 	val = tr32(GRC_MISC_CFG);
 	val &= ~0xff;
@@ -14225,7 +14237,10 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
 	/* Reset PHY, otherwise the read DMA engine will be in a mode that
 	 * breaks all requests to 256 bytes.
 	 */
-	if (tg3_asic_rev(tp) == ASIC_REV_57766)
+	if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+	    tg3_asic_rev(tp) == ASIC_REV_5717 ||
+	    tg3_asic_rev(tp) == ASIC_REV_5719 ||
+	    tg3_asic_rev(tp) == ASIC_REV_5720)
 		reset_phy = true;
 
 	err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index c2d02d02d1e6f4558365fe53f152e13e045e0277..1f0271fa7c74026edbc68c7880a44287eded7066 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -5,7 +5,8 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2007-2014 Broadcom Corporation.
+ * Copyright (C) 2007-2016 Broadcom Corporation.
+ * Copyright (C) 2016-2017 Broadcom Limited.
  */
 
 #ifndef _T3_H
@@ -96,6 +97,7 @@
 #define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR		0x0106
 #define TG3PCI_SUBDEVICE_ID_DELL_MERLOT		0x0109
 #define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT	0x010a
+#define TG3PCI_SUBDEVICE_ID_DELL_5762		0x07f0
 #define TG3PCI_SUBVENDOR_ID_COMPAQ		PCI_VENDOR_ID_COMPAQ
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE	0x007c
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2	0x009a
@@ -281,6 +283,9 @@
 #define TG3PCI_STD_RING_PROD_IDX	0x00000098 /* 64-bit */
 #define TG3PCI_RCV_RET_RING_CON_IDX	0x000000a0 /* 64-bit */
 /* 0xa8 --> 0xb8 unused */
+#define TG3PCI_DEV_STATUS_CTRL		0x000000b4
+#define  MAX_READ_REQ_SIZE_2048		 0x00004000
+#define  MAX_READ_REQ_MASK		 0x00007000
 #define TG3PCI_DUAL_MAC_CTRL		0x000000b8
 #define  DUAL_MAC_CTRL_CH_MASK		 0x00000003
 #define  DUAL_MAC_CTRL_ID		 0x00000004
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 6aa0eee88ea529963850828fc1ab46eb36d75095..a5eecd895a8253d753bea0fb273da0bf49005d13 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1113,7 +1113,7 @@ static int liquidio_watchdog(void *param)
 				dev_err(&oct->pci_dev->dev,
 					"ERROR: Octeon core %d crashed or got stuck!  See oct-fwdump for details.\n",
 					core);
-					err_msg_was_printed[core] = true;
+				err_msg_was_printed[core] = true;
 			}
 		}
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 8b2c31e2a2b0281d6ca8c70bbf3a520bdd15eb31..a3d12dbde95b6d71634c8502c6eb3509be057049 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1355,6 +1355,8 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
 
 	/* Offload checksum calculation to HW */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (ip.v4->version == 4)
+			hdr->csum_l3 = 1; /* Enable IP csum calculation */
 		hdr->l3_offset = skb_network_offset(skb);
 		hdr->l4_offset = skb_transport_offset(skb);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 6f9fa6e3c42a0cc1988e82ae976cce17d45ca0e5..d8424ed16c33777e81c78bab929bda4b5c5aeb29 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -344,7 +344,6 @@ struct adapter_params {
 
 	unsigned int sf_size;             /* serial flash size in bytes */
 	unsigned int sf_nsec;             /* # of flash sectors */
-	unsigned int sf_fw_start;         /* start of FW image in flash */
 
 	unsigned int fw_vers;		  /* firmware version */
 	unsigned int bs_vers;		  /* bootstrap version */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index f63210f155796c9809415d7da54f59086ebdbf7c..375ef86a84da1b08c0a3ff1dd2a6a5380600acb4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2844,8 +2844,6 @@ enum {
 	SF_RD_DATA_FAST = 0xb,        /* read flash */
 	SF_RD_ID        = 0x9f,       /* read ID */
 	SF_ERASE_SECTOR = 0xd8,       /* erase sector */
-
-	FW_MAX_SIZE = 16 * SF_SEC_SIZE,
 };
 
 /**
@@ -3558,8 +3556,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	const __be32 *p = (const __be32 *)fw_data;
 	const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data;
 	unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
-	unsigned int fw_img_start = adap->params.sf_fw_start;
-	unsigned int fw_start_sec = fw_img_start / sf_sec_size;
+	unsigned int fw_start_sec = FLASH_FW_START_SEC;
+	unsigned int fw_size = FLASH_FW_MAX_SIZE;
+	unsigned int fw_start = FLASH_FW_START;
 
 	if (!size) {
 		dev_err(adap->pdev_dev, "FW image has no data\n");
@@ -3575,9 +3574,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 			"FW image size differs from size in FW header\n");
 		return -EINVAL;
 	}
-	if (size > FW_MAX_SIZE) {
+	if (size > fw_size) {
 		dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n",
-			FW_MAX_SIZE);
+			fw_size);
 		return -EFBIG;
 	}
 	if (!t4_fw_matches_chip(adap, hdr))
@@ -3604,11 +3603,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	 */
 	memcpy(first_page, fw_data, SF_PAGE_SIZE);
 	((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
-	ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page);
+	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
 	if (ret)
 		goto out;
 
-	addr = fw_img_start;
+	addr = fw_start;
 	for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
 		addr += SF_PAGE_SIZE;
 		fw_data += SF_PAGE_SIZE;
@@ -3618,7 +3617,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	}
 
 	ret = t4_write_flash(adap,
-			     fw_img_start + offsetof(struct fw_hdr, fw_ver),
+			     fw_start + offsetof(struct fw_hdr, fw_ver),
 			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
 out:
 	if (ret)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6105738552134809fc0d0abf8caadfa76f5e1f8a..a74300a4459c78d7797cff90962a1a7a394dccb5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
 		for (i = 0; i < txq->bd.ring_size; i++) {
 			/* Initialize the BD for every fragment in the page. */
 			bdp->cbd_sc = cpu_to_fec16(0);
+			if (bdp->cbd_bufaddr &&
+			    !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+				dma_unmap_single(&fep->pdev->dev,
+						 fec32_to_cpu(bdp->cbd_bufaddr),
+						 fec16_to_cpu(bdp->cbd_datlen),
+						 DMA_TO_DEVICE);
 			if (txq->tx_skbuff[i]) {
 				dev_kfree_skb_any(txq->tx_skbuff[i]);
 				txq->tx_skbuff[i] = NULL;
@@ -3463,6 +3469,10 @@ fec_probe(struct platform_device *pdev)
 			goto failed_regulator;
 		}
 	} else {
+		if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto failed_regulator;
+		}
 		fep->reg_phy = NULL;
 	}
 
@@ -3546,8 +3556,9 @@ fec_probe(struct platform_device *pdev)
 failed_clk:
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
-failed_phy:
 	of_node_put(phy_node);
+failed_phy:
+	dev_id--;
 failed_ioremap:
 	free_netdev(ndev);
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 5be52d89b182ec4d1c46cc692a0e446d6925c618..7f837006bb6adf04a844d08a2a88d0957f704443 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1378,9 +1378,11 @@ static int gfar_probe(struct platform_device *ofdev)
 
 	gfar_init_addr_hash_table(priv);
 
-	/* Insert receive time stamps into padding alignment bytes */
+	/* Insert receive time stamps into padding alignment bytes, and
+	 * plus 2 bytes padding to ensure the cpu alignment.
+	 */
 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
-		priv->padding = 8;
+		priv->padding = 8 + DEFAULT_PADDING;
 
 	if (dev->features & NETIF_F_IP_CSUM ||
 	    priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
@@ -1790,6 +1792,7 @@ static int init_phy(struct net_device *dev)
 		GFAR_SUPPORTED_GBIT : 0;
 	phy_interface_t interface;
 	struct phy_device *phydev;
+	struct ethtool_eee edata;
 
 	priv->oldlink = 0;
 	priv->oldspeed = 0;
@@ -1814,6 +1817,10 @@ static int init_phy(struct net_device *dev)
 	/* Add support for flow control, but don't advertise it by default */
 	phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
 
+	/* disable EEE autoneg, EEE not supported by eTSEC */
+	memset(&edata, 0, sizeof(struct ethtool_eee));
+	phy_ethtool_set_eee(phydev, &edata);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 544114281ea754fc9c2221b8aa2f2b8d3ef842f6..9f8d4f8e57e30fa09ca1e8af3aa0d6a017633c1d 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -319,11 +319,10 @@ static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	now = tmr_cnt_read(etsects);
 	now += delta;
 	tmr_cnt_write(etsects, now);
+	set_fipers(etsects);
 
 	spin_unlock_irqrestore(&etsects->lock, flags);
 
-	set_fipers(etsects);
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
index d7bdea79e9fa755f8a9c6ad63b0ed1eb1e3c63f5..8fd2458060a088d6475622e4ae68cd40df23c116 100644
--- a/drivers/net/ethernet/intel/e1000/e1000.h
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -331,7 +331,8 @@ struct e1000_adapter {
 enum e1000_state_t {
 	__E1000_TESTING,
 	__E1000_RESETTING,
-	__E1000_DOWN
+	__E1000_DOWN,
+	__E1000_DISABLED
 };
 
 #undef pr_fmt
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 1982f7917a8d5d68776b2e052ac1df809f11c065..3dd4aeb2706d393cd8fbf4998a7582d33b9bafcd 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter,
 static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *netdev;
-	struct e1000_adapter *adapter;
+	struct e1000_adapter *adapter = NULL;
 	struct e1000_hw *hw;
 
 	static int cards_found;
@@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	u16 tmp = 0;
 	u16 eeprom_apme_mask = E1000_EEPROM_APME;
 	int bars, need_ioport;
+	bool disable_dev = false;
 
 	/* do not allocate ioport bars when not needed */
 	need_ioport = e1000_is_need_ioport(pdev);
@@ -1259,11 +1260,13 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	iounmap(hw->ce4100_gbe_mdio_base_virt);
 	iounmap(hw->hw_addr);
 err_ioremap:
+	disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
 	free_netdev(netdev);
 err_alloc_etherdev:
 	pci_release_selected_regions(pdev, bars);
 err_pci_reg:
-	pci_disable_device(pdev);
+	if (!adapter || disable_dev)
+		pci_disable_device(pdev);
 	return err;
 }
 
@@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev)
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
+	bool disable_dev;
 
 	e1000_down_and_stop(adapter);
 	e1000_release_manageability(adapter);
@@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev)
 		iounmap(hw->flash_address);
 	pci_release_selected_regions(pdev, adapter->bars);
 
+	disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
 	free_netdev(netdev);
 
-	pci_disable_device(pdev);
+	if (disable_dev)
+		pci_disable_device(pdev);
 }
 
 /**
@@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
 	if (netif_running(netdev))
 		e1000_free_irq(adapter);
 
-	pci_disable_device(pdev);
+	if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+		pci_disable_device(pdev);
 
 	return 0;
 }
@@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev)
 		pr_err("Cannot enable PCI device from suspend\n");
 		return err;
 	}
+
+	/* flush memory to make sure state is correct */
+	smp_mb__before_atomic();
+	clear_bit(__E1000_DISABLED, &adapter->flags);
 	pci_set_master(pdev);
 
 	pci_enable_wake(pdev, PCI_D3hot, 0);
@@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 
 	if (netif_running(netdev))
 		e1000_down(adapter);
-	pci_disable_device(pdev);
+
+	if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+		pci_disable_device(pdev);
 
 	/* Request a slot slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
@@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
 		pr_err("Cannot re-enable PCI device after reset.\n");
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
+
+	/* flush memory to make sure state is correct */
+	smp_mb__before_atomic();
+	clear_bit(__E1000_DISABLED, &adapter->flags);
 	pci_set_master(pdev);
 
 	pci_enable_wake(pdev, PCI_D3hot, 0);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index d6d4ed7acf03117232778a82a757ee44d65f765a..31277d3bb7dc1241032695d2d9424779654f4f5f 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1367,6 +1367,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
+ *
+ *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ *  up).
  **/
 static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 {
@@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 * Change or Rx Sequence Error interrupt.
 	 */
 	if (!mac->get_link_status)
-		return 0;
+		return 1;
 
 	/* First we want to see if the MII Status Register reports
 	 * link.  If so, then we want to get the current speed/duplex
@@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 * different link partner.
 	 */
 	ret_val = e1000e_config_fc_after_link_up(hw);
-	if (ret_val)
+	if (ret_val) {
 		e_dbg("Error configuring flow control\n");
+		return ret_val;
+	}
 
-	return ret_val;
+	return 1;
 }
 
 static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 321d8be80871ce2fc4a5ab39c3189dad2370c988..42dcaefc4c1942777edf6044953ccd1f798cac2a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 	else
 		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
+	/* Copy the address first, so that we avoid a possible race with
+	 * .set_rx_mode(). If we copy after changing the address in the filter
+	 * list, we might open ourselves to a narrow race window where
+	 * .set_rx_mode could delete our dev_addr filter and prevent traffic
+	 * from passing.
+	 */
+	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 	i40e_del_mac_filter(vsi, netdev->dev_addr);
 	i40e_add_mac_filter(vsi, addr->sa_data);
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 	if (vsi->type == I40E_VSI_MAIN) {
 		i40e_status ret;
 
@@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
 	i40e_del_mac_filter(vsi, addr);
 
 	return 0;
@@ -6038,8 +6053,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
 	/* Set Bit 7 to be valid */
 	mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
 
-	/* Set L4type to both TCP and UDP support */
-	mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+	/* Set L4type for TCP support */
+	mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
 
 	/* Set cloud filter mode */
 	mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
@@ -6969,18 +6984,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
 	     is_valid_ether_addr(filter->src_mac)) ||
 	    (is_multicast_ether_addr(filter->dst_mac) &&
 	     is_multicast_ether_addr(filter->src_mac)))
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
-	/* Make sure port is specified, otherwise bail out, for channel
-	 * specific cloud filter needs 'L4 port' to be non-zero
+	/* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
+	 * ports are not supported via big buffer now.
 	 */
-	if (!filter->dst_port)
-		return -EINVAL;
+	if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
+		return -EOPNOTSUPP;
 
 	/* adding filter using src_port/src_ip is not supported at this stage */
 	if (filter->src_port || filter->src_ipv4 ||
 	    !ipv6_addr_any(&filter->ip.v6.src_ip6))
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	/* copy element needed to add cloud filter from filter */
 	i40e_set_cld_element(filter, &cld_filter.element);
@@ -6991,7 +7006,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
 	    is_multicast_ether_addr(filter->src_mac)) {
 		/* MAC + IP : unsupported mode */
 		if (filter->dst_ipv4)
-			return -EINVAL;
+			return -EOPNOTSUPP;
 
 		/* since we validated that L4 port must be valid before
 		 * we get here, start with respective "flags" value
@@ -7356,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
 
 	if (tc < 0) {
 		dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4566d66ffc7c95a72c4135075a19d5ac8bcd2689..5bc2748ac46860df3a14553d870197d0d67ddda4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3047,10 +3047,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
 	/* Walk through fragments adding latest fragment, testing it, and
 	 * then removing stale fragments from the sum.
 	 */
-	stale = &skb_shinfo(skb)->frags[0];
-	for (;;) {
+	for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
 		sum += skb_frag_size(frag++);
 
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > I40E_MAX_DATA_PER_TXD) {
+			int align_pad = -(stale->page_offset) &
+					(I40E_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > I40E_MAX_DATA_PER_TXD);
+		}
+
 		/* if sum is negative we failed to make sufficient progress */
 		if (sum < 0)
 			return true;
@@ -3058,7 +3078,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
 		if (!nr_frags--)
 			break;
 
-		sum -= skb_frag_size(stale++);
+		sum -= stale_size;
 	}
 
 	return false;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 50864f99446d3a9f22c83c984eaec4f25caa6015..1ba29bb85b6706c15b5d80d760a7128afd31e0fe 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
 	/* Walk through fragments adding latest fragment, testing it, and
 	 * then removing stale fragments from the sum.
 	 */
-	stale = &skb_shinfo(skb)->frags[0];
-	for (;;) {
+	for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
 		sum += skb_frag_size(frag++);
 
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > I40E_MAX_DATA_PER_TXD) {
+			int align_pad = -(stale->page_offset) &
+					(I40E_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > I40E_MAX_DATA_PER_TXD);
+		}
+
 		/* if sum is negative we failed to make sufficient progress */
 		if (sum < 0)
 			return true;
@@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
 		if (!nr_frags--)
 			break;
 
-		sum -= skb_frag_size(stale++);
+		sum -= stale_size;
 	}
 
 	return false;
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index c9798210fa0f6cef39aec36ed5c64fe6e805d7dc..0495487f7b42e7e80d416a2212fad2d8ca786f71 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -344,7 +344,8 @@ static int orion_mdio_probe(struct platform_device *pdev)
 			dev->regs + MVMDIO_ERR_INT_MASK);
 
 	} else if (dev->err_interrupt == -EPROBE_DEFER) {
-		return -EPROBE_DEFER;
+		ret = -EPROBE_DEFER;
+		goto out_mdio;
 	}
 
 	if (pdev->dev.of_node)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index bc93b69cfd1edcf62d11cd24d41a9ca74b8f0dcc..a539263cd79ce4be8fcc0cbfe6bfdd196336cd38 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
 	val &= ~MVNETA_GMAC0_PORT_ENABLE;
 	mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
 
+	pp->link = 0;
+	pp->duplex = -1;
+	pp->speed = 0;
+
 	udelay(200);
 }
 
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 
 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+			mvneta_rx_error(pp, rx_desc);
 err_drop_frame:
 			dev->stats.rx_errors++;
-			mvneta_rx_error(pp, rx_desc);
 			/* leave the descriptor untouched */
 			continue;
 		}
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
 	int queue;
 
-	for (queue = 0; queue < txq_number; queue++)
+	for (queue = 0; queue < rxq_number; queue++)
 		mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
 }
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index d83a78be98a2cb90f5cea6b07eb257e11ad9ebed..634b2f41cc9e43ef66d2cf2c33e393221b92af23 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -85,7 +85,7 @@
 
 /* RSS Registers */
 #define MVPP22_RSS_INDEX			0x1500
-#define     MVPP22_RSS_INDEX_TABLE_ENTRY(idx)	((idx) << 8)
+#define     MVPP22_RSS_INDEX_TABLE_ENTRY(idx)	(idx)
 #define     MVPP22_RSS_INDEX_TABLE(idx)		((idx) << 8)
 #define     MVPP22_RSS_INDEX_QUEUE(idx)		((idx) << 16)
 #define MVPP22_RSS_TABLE_ENTRY			0x1508
@@ -5598,7 +5598,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
 	u32 txq_dma;
 
 	/* Allocate memory for TX descriptors */
-	aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
+	aggr_txq->descs = dma_zalloc_coherent(&pdev->dev,
 				MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
 				&aggr_txq->descs_dma, GFP_KERNEL);
 	if (!aggr_txq->descs)
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 6e423f098a60d33f96678ef5c08b1b52cbe3287f..31efc47c847eaf555d3c54b7a754a83c5e4f8d9b 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -4081,7 +4081,6 @@ static void skge_remove(struct pci_dev *pdev)
 	if (hw->ports > 1) {
 		skge_write32(hw, B0_IMSK, 0);
 		skge_read32(hw, B0_IMSK);
-		free_irq(pdev->irq, hw);
 	}
 	spin_unlock_irq(&hw->hw_lock);
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 54adfd96785846f9e60a2ded11ab96bc0c196c7e..fc67e35b253e4e59c12227c3e24da9c0f5bae311 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	/* set GE2 TUNE */
 	regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
 
-	/* GE1, Force 1000M/FD, FC ON */
-	mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
-
-	/* GE2, Force 1000M/FD, FC ON */
-	mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
+	/* Set linkdown as the default for each GMAC. Its own MCR would be set
+	 * up with the more appropriate value when mtk_phy_link_adjust call is
+	 * being invoked.
+	 */
+	for (i = 0; i < MTK_MAC_COUNT; i++)
+		mtk_w32(eth, 0, MTK_MAC_MCR(i));
 
 	/* Indicates CDM to parse the MTK special tag from CPU
 	 * which also is working out for untag packets.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index e0eb695318e64ebcaf58d6edb5f9a57be6f9ddf6..1fa4849a6f560f2c3e15dddc13c03bb59031a5b7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -188,7 +188,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	struct net_device *dev = mdev->pndev[port];
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
-	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_cmd_mailbox *mailbox, *mailbox_priority;
 	u64 in_mod = reset << 8 | port;
 	int err;
 	int i, counter_index;
@@ -198,6 +198,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
+
+	mailbox_priority = mlx4_alloc_cmd_mailbox(mdev->dev);
+	if (IS_ERR(mailbox_priority)) {
+		mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+		return PTR_ERR(mailbox_priority);
+	}
+
 	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
 			   MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
 			   MLX4_CMD_NATIVE);
@@ -206,6 +213,28 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 
 	mlx4_en_stats = mailbox->buf;
 
+	memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
+	counter_index = mlx4_get_default_counter_index(mdev->dev, port);
+	err = mlx4_get_counter_stats(mdev->dev, counter_index,
+				     &tmp_counter_stats, reset);
+
+	/* 0xffs indicates invalid value */
+	memset(mailbox_priority->buf, 0xff,
+	       sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+
+	if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
+		memset(mailbox_priority->buf, 0,
+		       sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+		err = mlx4_cmd_box(mdev->dev, 0, mailbox_priority->dma,
+				   in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
+				   0, MLX4_CMD_DUMP_ETH_STATS,
+				   MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+		if (err)
+			goto out;
+	}
+
+	flowstats = mailbox_priority->buf;
+
 	spin_lock_bh(&priv->stats_lock);
 
 	mlx4_en_fold_software_stats(dev);
@@ -345,31 +374,6 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan);
 	priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan);
 
-	spin_unlock_bh(&priv->stats_lock);
-
-	memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
-	counter_index = mlx4_get_default_counter_index(mdev->dev, port);
-	err = mlx4_get_counter_stats(mdev->dev, counter_index,
-				     &tmp_counter_stats, reset);
-
-	/* 0xffs indicates invalid value */
-	memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
-
-	if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
-		memset(mailbox->buf, 0,
-		       sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
-		err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma,
-				   in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
-				   0, MLX4_CMD_DUMP_ETH_STATS,
-				   MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
-		if (err)
-			goto out;
-	}
-
-	flowstats = mailbox->buf;
-
-	spin_lock_bh(&priv->stats_lock);
-
 	if (tmp_counter_stats.counter_mode == 0) {
 		priv->pf_stats.rx_bytes   = be64_to_cpu(tmp_counter_stats.rx_bytes);
 		priv->pf_stats.tx_bytes   = be64_to_cpu(tmp_counter_stats.tx_bytes);
@@ -410,6 +414,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 
 out:
 	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+	mlx4_free_cmd_mailbox(mdev->dev, mailbox_priority);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
index 88699b18194618b0f6fd71af120bf2848560787a..946d9db7c8c2028c61b03b7f3d2f5d7243280ab5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -185,7 +185,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
 		if (priv->mdev->dev->caps.flags &
 					MLX4_DEV_CAP_FLAG_UC_LOOPBACK) {
 			buf[3] = mlx4_en_test_registers(priv);
-			if (priv->port_up)
+			if (priv->port_up && dev->mtu >= MLX4_SELFTEST_LB_MIN_MTU)
 				buf[4] = mlx4_en_test_loopback(priv);
 		}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 1856e279a7e0a40b9365da2016a1b52e9156479b..2b72677eccd48f5a45aa6f0e44cc45258cf24762 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -153,6 +153,9 @@
 #define SMALL_PACKET_SIZE      (256 - NET_IP_ALIGN)
 #define HEADER_COPY_SIZE       (128 - NET_IP_ALIGN)
 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
+#define PREAMBLE_LEN           8
+#define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \
+				  ETH_HLEN + PREAMBLE_LEN)
 
 #define MLX4_EN_MIN_MTU		46
 /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 04304dd894c6c3119eb24302fe599766dadd3708..606a0e0beeae6961ae4e8c7a357d737834be614e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -611,7 +611,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 						MLX4_MAX_PORTS;
 				else
 					res_alloc->guaranteed[t] = 0;
-				res_alloc->res_free -= res_alloc->guaranteed[t];
 				break;
 			default:
 				break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1fffdebbc9e8994c70a19f4982f26d1de98be5f2..e9a1fbcc4adfa6e692902b551d0c535bfe019a9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
-	case MLX5_CMD_OP_SET_RATE_LIMIT:
+	case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
 	case MLX5_CMD_OP_QUERY_RATE_LIMIT:
 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
-	MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+	MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
 	MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
 	MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
 	MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c0872b3284cb405583642d71a0e2e540d4804b6f..543060c305a073c0457cc31ae7318f425a0e7c49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
 	max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
 #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
 #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+	(cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+	MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
 
 #define MLX5_MPWRQ_LOG_WQE_SZ			18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
 	struct mlx5_core_dev      *mdev;
 	struct hwtstamp_config    *tstamp;
 	int                        ix;
+	int                        cpu;
 };
 
 struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-			      struct mlx5e_params *params, u8 rq_type);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+			       struct mlx5e_params *params,
+			       u8 rq_type);
 
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c6d90b6dd80efa9a1ee82958adf5ef4dd3b4522d..9bcf38f4123b504637c080413078c23304d9e49e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
 				    struct ieee_ets *ets)
 {
+	bool have_ets_tc = false;
 	int bw_sum = 0;
 	int i;
 
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
 	}
 
 	/* Validate Bandwidth Sum */
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+			have_ets_tc = true;
 			bw_sum += ets->tc_tx_bw[i];
+		}
+	}
 
-	if (bw_sum != 0 && bw_sum != 100) {
+	if (have_ets_tc && bw_sum != 100) {
 		netdev_err(netdev,
 			   "Failed to validate ETS: BW sum is illegal\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 23425f02840581f6be591bc48cf8cccc8cc26443..8f05efa5c829bccb67ddd8b24dc2997adfe4a6c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 	new_channels.params = priv->channels.params;
 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 
-	mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
-				 new_channels.params.rq_wq_type);
+	new_channels.params.mpwqe_log_stride_sz =
+		MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
+	new_channels.params.mpwqe_log_num_strides =
+		MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 		return err;
 
 	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+		  MLX5E_GET_PFLAG(&priv->channels.params,
+				  MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a3e512c1144d741ccffd5bf47b5f138a01..d9d8227f195f0e151ba948e0622ea90a411817c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
 	struct mlx5e_cq_param      icosq_cq;
 };
 
-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
-{
-	return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
-}
-
 static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
 	return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 		MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-			      struct mlx5e_params *params, u8 rq_type)
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+			       struct mlx5e_params *params, u8 rq_type)
 {
 	params->rq_wq_type = rq_type;
 	params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 		params->log_rq_size = is_kdump_kernel() ?
 			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
 			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-		params->mpwqe_log_stride_sz =
-			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
-			MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
-			MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+		params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
+			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 		params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
 			params->mpwqe_log_stride_sz;
 		break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
+				struct mlx5e_params *params)
 {
 	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
 		    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
 		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
 		    MLX5_WQ_TYPE_LINKED_LIST;
-	mlx5e_set_rq_type_params(mdev, params, rq_type);
+	mlx5e_init_rq_type_params(mdev, params, rq_type);
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 	int mtt_sz = mlx5e_get_wqe_mtt_sz();
 	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
-	int node = mlx5e_get_node(c->priv, c->ix);
 	int i;
 
 	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
-					GFP_KERNEL, node);
+				      GFP_KERNEL, cpu_to_node(c->cpu));
 	if (!rq->mpwqe.info)
 		goto err_out;
 
 	/* We allocate more than mtt_sz as we will align the pointer */
-	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
-					GFP_KERNEL, node);
+	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+					cpu_to_node(c->cpu));
 	if (unlikely(!rq->mpwqe.mtt_no_align))
 		goto err_free_wqe_info;
 
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	int err;
 	int i;
 
-	rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+	rqp->wq.db_numa_node = cpu_to_node(c->cpu);
 
 	err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
 				&rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		rq->wqe.frag_info =
 			kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
-				     GFP_KERNEL,
-				     mlx5e_get_node(c->priv, c->ix));
+				     GFP_KERNEL, cpu_to_node(c->cpu));
 		if (!rq->wqe.frag_info) {
 			err = -ENOMEM;
 			goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 
-	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
 	if (err)
 		return err;
 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-	err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+	err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
 	if (err)
 		goto err_sq_wq_destroy;
 
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 	sq->channel   = c;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
-	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
 	if (err)
 		return err;
 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-	err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
+	err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
 	if (err)
 		goto err_sq_wq_destroy;
 
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
-	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
 	if (err)
 		return err;
 	sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
 
-	err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+	err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
 	if (err)
 		goto err_sq_wq_destroy;
 
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
 	struct mlx5_core_dev *mdev = c->priv->mdev;
 	int err;
 
-	param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
-	param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
+	param->wq.buf_numa_node = cpu_to_node(c->cpu);
+	param->wq.db_numa_node  = cpu_to_node(c->cpu);
 	param->eq_ix   = c->ix;
 
 	err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
 	mlx5e_free_cq(cq);
 }
 
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+	return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
+}
+
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
 			     struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 {
 	struct mlx5e_cq_moder icocq_moder = {0, 0};
 	struct net_device *netdev = priv->netdev;
+	int cpu = mlx5e_get_cpu(priv, ix);
 	struct mlx5e_channel *c;
 	unsigned int irq;
 	int err;
 	int eqn;
 
-	c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
+	c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
 	if (!c)
 		return -ENOMEM;
 
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->mdev     = priv->mdev;
 	c->tstamp   = &priv->tstamp;
 	c->ix       = ix;
+	c->cpu      = cpu;
 	c->pdev     = &priv->mdev->pdev->dev;
 	c->netdev   = priv->netdev;
 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_activate_txqsq(&c->sq[tc]);
 	mlx5e_activate_rq(&c->rq);
-	netif_set_xps_queue(c->netdev,
-		mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
+	netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
 }
 
 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 						     struct sk_buff *skb,
 						     netdev_features_t features)
 {
+	unsigned int offset = 0;
 	struct udphdr *udph;
 	u8 proto;
 	u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 		proto = ip_hdr(skb)->protocol;
 		break;
 	case htons(ETH_P_IPV6):
-		proto = ipv6_hdr(skb)->nexthdr;
+		proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
 		break;
 	default:
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 60771865c99c9bf4402d042a760887c4497e0036..e7e7cef2bde402be23b191873a5790ed23fd7843 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 		case MLX5_EVENT_TYPE_CQ_ERROR:
 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
 				       cqn, eqe->data.cq_err.syndrome);
 			mlx5_cq_event(dev, cqn, eqe->type);
 			break;
@@ -775,7 +775,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	return err;
 }
 
-int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
 	int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, pg)) {
 		err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
 		if (err)
-			return err;
+			mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+				      err);
 	}
 #endif
 
 	err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
 	if (err)
-		return err;
+		mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+			      err);
 
-	mlx5_destroy_unmap_eq(dev, &table->async_eq);
+	err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+	if (err)
+		mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+			      err);
 	mlx5_cmd_use_polling(dev);
 
 	err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
 	if (err)
-		mlx5_cmd_use_events(dev);
-
-	return err;
+		mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+			      err);
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
index 3c11d6e2160abeef5a893b7b81274a7ce315368c..14962969c5ba8c4462662eeb30ef10cbe1c27fa6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
 	u8 actual_size;
 	int err;
 
+	if (!size)
+		return -EINVAL;
+
 	if (!fdev->mdev)
 		return -ENOTCONN;
 
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
 	u8 actual_size;
 	int err;
 
+	if (!size)
+		return -EINVAL;
+
 	if (!fdev->mdev)
 		return -ENOTCONN;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c70fd663a63301e7e89ef9ee00d37c7075fe1a0b..dfaad9ecb2b8f155c5cdf30451c572b2d10f1d37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
 static void del_sw_flow_table(struct fs_node *node);
 static void del_sw_flow_group(struct fs_node *node);
 static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
 /* Delete rule (destination) is special case that 
  * requires to lock the FTE for all the deletion process.
  */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
 	return NULL;
 }
 
+static void del_sw_ns(struct fs_node *node)
+{
+	kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+	kfree(node);
+}
+
 static void del_hw_flow_table(struct fs_node *node)
 {
 	struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 		return ERR_PTR(-ENOMEM);
 
 	fs_prio->node.type = FS_TYPE_PRIO;
-	tree_init_node(&fs_prio->node, NULL, NULL);
+	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
 	tree_add_node(&fs_prio->node, &ns->node);
 	fs_prio->num_levels = num_levels;
 	fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
 		return ERR_PTR(-ENOMEM);
 
 	fs_init_namespace(ns);
-	tree_init_node(&ns->node, NULL, NULL);
+	tree_init_node(&ns->node, NULL, del_sw_ns);
 	tree_add_node(&ns->node, &prio->node);
 	list_add_tail(&ns->node.list, &prio->node.children);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 1a0e797ad001ad672c954c228350ff9bcdea125b..21d29f7936f6c5d1e26c6e0d3f10644fd0f096c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
 	u32 fw;
 	int i;
 
-	/* If the syndrom is 0, the device is OK and no need to print buffer */
+	/* If the syndrome is 0, the device is OK and no need to print buffer */
 	if (!ioread8(&h->synd))
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index d2a66dc4adc6d2933cfbc60c28cd49c67716a010..8812d7208e8f3522500b3f3e971b4a7341b22c8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 				   struct mlx5e_params *params)
 {
 	/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-	mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
+	mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
 
 	/* RQ size in ipoib by default is 512 */
 	params->log_rq_size = is_kdump_kernel() ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index f26f97fe46666ff7a3f2ce6c496cb8936ee5cbb3..582b2f18010a317600f0238163557077d4c48a39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
 
+static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+				       bool reset, void *out, int out_size)
+{
+	u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+	MLX5_SET(query_cong_statistics_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+	MLX5_SET(query_cong_statistics_in, in, clear, reset);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
 static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
 {
 	return dev->priv.lag;
@@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
 	/* If bonded, we do not add an IB device for PF1. */
 	return false;
 }
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+				 u64 *values,
+				 int num_counters,
+				 size_t *offsets)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+	struct mlx5_lag *ldev;
+	int num_ports;
+	int ret, i, j;
+	void *out;
+
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	memset(values, 0, sizeof(*values) * num_counters);
+
+	mutex_lock(&lag_mutex);
+	ldev = mlx5_lag_dev_get(dev);
+	if (ldev && mlx5_lag_is_bonded(ldev)) {
+		num_ports = MLX5_MAX_PORTS;
+		mdev[0] = ldev->pf[0].dev;
+		mdev[1] = ldev->pf[1].dev;
+	} else {
+		num_ports = 1;
+		mdev[0] = dev;
+	}
+
+	for (i = 0; i < num_ports; ++i) {
+		ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+		if (ret)
+			goto unlock;
+
+		for (j = 0; j < num_counters; ++j)
+			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+	}
+
+unlock:
+	mutex_unlock(&lag_mutex);
+	kvfree(out);
+	return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f323442cc5ac009d5006438d93183e96b85d0d9..8a89c7e8cd631f2e14cb7cbac99a8983964b7eda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	struct mlx5_eq_table *table = &priv->eq_table;
-	struct irq_affinity irqdesc = {
-		.pre_vectors = MLX5_EQ_VEC_COMP_BASE,
-	};
 	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
 	int nvec;
 
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 	if (!priv->irq_info)
 		goto err_free_msix;
 
-	nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
+	nvec = pci_alloc_irq_vectors(dev->pdev,
 			MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-			PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
-			&irqdesc);
+			PCI_IRQ_MSIX);
 	if (nvec < 0)
 		return nvec;
 
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
 	return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	struct mlx5_priv *priv  = &mdev->priv;
+	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+		return -ENOMEM;
+	}
+
+	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+			priv->irq_info[i].mask);
+
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+	return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	struct mlx5_priv *priv  = &mdev->priv;
+	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+	irq_set_affinity_hint(irq, NULL);
+	free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+		err = mlx5_irq_set_affinity_hint(mdev, i);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	for (i--; i >= 0; i--)
+		mlx5_irq_clear_affinity_hint(mdev, i);
+
+	return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int i;
+
+	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+		mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 		    unsigned int *irqn)
 {
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_stop_eqs;
 	}
 
+	err = mlx5_irq_set_affinity_hints(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+		goto err_affinity_hints;
+	}
+
 	err = mlx5_init_fs(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_cleanup_fs(dev);
 
 err_fs:
+	mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
 	free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 
 	mlx5_sriov_detach(dev);
 	mlx5_cleanup_fs(dev);
+	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
 	mlx5_put_uars_page(dev, priv->uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index db9e665ab10474f934131b5c2a8fa2173fa5feb6..889130edb71525ecd1f46e88a11b2d3fa0ef843f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 err_cmd:
 	memset(din, 0, sizeof(din));
 	memset(dout, 0, sizeof(dout));
-	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index e651e4c02867740d35c07bfcf485860f26ad6409..d3c33e9eea7292412974802c4c38ded8898ed55c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
 	return ret_entry;
 }
 
-static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
 				   u32 rate, u16 index)
 {
-	u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
 
-	MLX5_SET(set_rate_limit_in, in, opcode,
-		 MLX5_CMD_OP_SET_RATE_LIMIT);
-	MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
-	MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+	MLX5_SET(set_pp_rate_limit_in, in, opcode,
+		 MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+	MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+	MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
 		entry->refcount++;
 	} else {
 		/* new rate limit */
-		err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+		err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
 		if (err) {
 			mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
 				      rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
 	entry->refcount--;
 	if (!entry->refcount) {
 		/* need to remove rate */
-		mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+		mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
 		entry->rate = 0;
 	}
 
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
 	/* Clear all configured rates */
 	for (i = 0; i < table->max_size; i++)
 		if (table->rl_entry[i].rate)
-			mlx5_set_rate_limit_cmd(dev, 0,
-						table->rl_entry[i].index);
+			mlx5_set_pp_rate_limit_cmd(dev, 0,
+						   table->rl_entry[i].index);
 
 	kfree(dev->priv.rl_table.rl_entry);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 07a9ba6cfc70a11f7b4c05c73c1b32a704b7e6ba..2f74953e4561511e23d8fe3219db89104e3dd9e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
 	struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
 	struct mlx5e_vxlan *vxlan;
 
-	spin_lock(&vxlan_db->lock);
+	spin_lock_bh(&vxlan_db->lock);
 	vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-	spin_unlock(&vxlan_db->lock);
+	spin_unlock_bh(&vxlan_db->lock);
 
 	return vxlan;
 }
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 	struct mlx5e_vxlan *vxlan;
 	int err;
 
-	if (mlx5e_vxlan_lookup_port(priv, port))
+	mutex_lock(&priv->state_lock);
+	vxlan = mlx5e_vxlan_lookup_port(priv, port);
+	if (vxlan) {
+		atomic_inc(&vxlan->refcount);
 		goto free_work;
+	}
 
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 		goto err_delete_port;
 
 	vxlan->udp_port = port;
+	atomic_set(&vxlan->refcount, 1);
 
-	spin_lock_irq(&vxlan_db->lock);
+	spin_lock_bh(&vxlan_db->lock);
 	err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
-	spin_unlock_irq(&vxlan_db->lock);
+	spin_unlock_bh(&vxlan_db->lock);
 	if (err)
 		goto err_free;
 
@@ -113,35 +118,39 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 err_delete_port:
 	mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
 free_work:
+	mutex_unlock(&priv->state_lock);
 	kfree(vxlan_work);
 }
 
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
+static void mlx5e_vxlan_del_port(struct work_struct *work)
 {
+	struct mlx5e_vxlan_work *vxlan_work =
+		container_of(work, struct mlx5e_vxlan_work, work);
+	struct mlx5e_priv *priv         = vxlan_work->priv;
 	struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
+	u16 port = vxlan_work->port;
 	struct mlx5e_vxlan *vxlan;
+	bool remove = false;
 
-	spin_lock_irq(&vxlan_db->lock);
-	vxlan = radix_tree_delete(&vxlan_db->tree, port);
-	spin_unlock_irq(&vxlan_db->lock);
-
+	mutex_lock(&priv->state_lock);
+	spin_lock_bh(&vxlan_db->lock);
+	vxlan = radix_tree_lookup(&vxlan_db->tree, port);
 	if (!vxlan)
-		return;
-
-	mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
-	kfree(vxlan);
-}
+		goto out_unlock;
 
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
-	struct mlx5e_vxlan_work *vxlan_work =
-		container_of(work, struct mlx5e_vxlan_work, work);
-	struct mlx5e_priv *priv = vxlan_work->priv;
-	u16 port = vxlan_work->port;
+	if (atomic_dec_and_test(&vxlan->refcount)) {
+		radix_tree_delete(&vxlan_db->tree, port);
+		remove = true;
+	}
 
-	__mlx5e_vxlan_core_del_port(priv, port);
+out_unlock:
+	spin_unlock_bh(&vxlan_db->lock);
 
+	if (remove) {
+		mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+		kfree(vxlan);
+	}
+	mutex_unlock(&priv->state_lock);
 	kfree(vxlan_work);
 }
 
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
 	struct mlx5e_vxlan *vxlan;
 	unsigned int port = 0;
 
-	spin_lock_irq(&vxlan_db->lock);
+	/* Lockless since we are the only radix-tree consumers, wq is disabled */
 	while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
 		port = vxlan->udp_port;
-		spin_unlock_irq(&vxlan_db->lock);
-		__mlx5e_vxlan_core_del_port(priv, (u16)port);
-		spin_lock_irq(&vxlan_db->lock);
+		radix_tree_delete(&vxlan_db->tree, port);
+		mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+		kfree(vxlan);
 	}
-	spin_unlock_irq(&vxlan_db->lock);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
index 5def12c048e38992e7edd9f870233e369ef4580e..5ef6ae7d568abcd1410bc403b526628a634799cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
 #include "en.h"
 
 struct mlx5e_vxlan {
+	atomic_t refcount;
 	u16 udp_port;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 23f7d828cf676244766e582674a2946ea467bb6b..6ef20e5cc77dd314a60c9b1330fb421f81ceda7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1643,7 +1643,12 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
 		return 0;
 	}
 
-	wmb(); /* reset needs to be written before we read control register */
+	/* Reset needs to be written before we read control register, and
+	 * we must wait for the HW to become responsive once again
+	 */
+	wmb();
+	msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
+
 	end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
 	do {
 		u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index a6441208e9d96e0b96de0cde155f50fa437f56ae..fb082ad21b00e43435003e186eae92ff3d197a66 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -59,6 +59,7 @@
 #define MLXSW_PCI_SW_RESET			0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT		BIT(0)
 #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	5000
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS		100
 #define MLXSW_PCI_FW_READY			0xA1844
 #define MLXSW_PCI_FW_READY_MASK			0xFFFF
 #define MLXSW_PCI_FW_READY_MAGIC		0x5E
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 2d0897b7d86035286666e38ad4e41ab63fb4746b..c3837ca7a705bd4d6ed48c1877d0f0f0de984c85 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4300,6 +4300,7 @@ static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	u16 vid = 1;
 	int err;
 
 	err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true);
@@ -4312,8 +4313,19 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
 				     true, false);
 	if (err)
 		goto err_port_vlan_set;
+
+	for (; vid <= VLAN_N_VID - 1; vid++) {
+		err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
+						     vid, false);
+		if (err)
+			goto err_vid_learning_set;
+	}
+
 	return 0;
 
+err_vid_learning_set:
+	for (vid--; vid >= 1; vid--)
+		mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
 err_port_vlan_set:
 	mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
 err_port_stp_set:
@@ -4323,6 +4335,12 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
 
 static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	u16 vid;
+
+	for (vid = VLAN_N_VID - 1; vid >= 1; vid--)
+		mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
+					       vid, true);
+
 	mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
 			       false, false);
 	mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
@@ -4358,7 +4376,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		}
 		if (!info->linking)
 			break;
-		if (netdev_has_any_upper_dev(upper_dev)) {
+		if (netdev_has_any_upper_dev(upper_dev) &&
+		    (!netif_is_bridge_master(upper_dev) ||
+		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+							  upper_dev))) {
 			NL_SET_ERR_MSG(extack,
 				       "spectrum: Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
@@ -4486,6 +4507,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 					      u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct netdev_notifier_changeupper_info *info = ptr;
 	struct netlink_ext_ack *extack;
 	struct net_device *upper_dev;
@@ -4502,7 +4524,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 		}
 		if (!info->linking)
 			break;
-		if (netdev_has_any_upper_dev(upper_dev)) {
+		if (netdev_has_any_upper_dev(upper_dev) &&
+		    (!netif_is_bridge_master(upper_dev) ||
+		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+							  upper_dev))) {
 			NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 432ab9b12b7f59ded586a663f0a686741740463d..05ce1befd9b378b41584c149abd6a0d53c4be246 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -365,6 +365,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 				struct net_device *brport_dev,
 				struct net_device *br_dev);
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+					 const struct net_device *br_dev);
 
 /* spectrum.c */
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index c33beac5def0615ca24a7ade5f8fdd6b2651647c..b5397da94d7f5b178da560145ee8def18825df6e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -46,7 +46,8 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
 				  int tclass_num, u32 min, u32 max,
 				  u32 probability, bool is_ecn)
 {
-	char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)];
+	char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
+	char cwtp_cmd[MLXSW_REG_CWTP_LEN];
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	int err;
 
@@ -60,10 +61,10 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (err)
 		return err;
 
-	mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num,
+	mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
 			     MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
 
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
 }
 
 static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72ef4f8025f00ff8810c2955b25b7f3baec49be1..434b3922b34f06689e3e85c0dc70810902c51275 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-				    const struct mlxsw_sp_rif *rif)
-{
-	char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-			     rif->rif_index, rif->addr);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-				 rif_list_node)
+				 rif_list_node) {
+		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+	}
 }
 
 enum mlxsw_sp_nexthop_type {
@@ -3237,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 {
 	if (!removing)
 		nh->should_offload = 1;
-	else if (nh->offloaded)
+	else
 		nh->should_offload = 0;
 	nh->update = 1;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7b8548e25ae73da7e835c7a8da1eaaece8cbbcfb..593ad31be7490d6af8abf3f5c3929de925c0ca77 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
 	return NULL;
 }
 
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+					 const struct net_device *br_dev)
+{
+	return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+}
+
 static struct mlxsw_sp_bridge_device *
 mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
 			      struct net_device *br_dev)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e379b78e86efa7c02dca2bc95afc1f79afc7800a..13190aa09faf748c16e1f00f7aee9097442aef85 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 	return nfp_net_ebpf_capable(nn) ? "BPF" : "";
 }
 
+static int
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+	int err;
+
+	nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
+	if (!nn->app_priv)
+		return -ENOMEM;
+
+	err = nfp_app_nic_vnic_alloc(app, nn, id);
+	if (err)
+		goto err_free_priv;
+
+	return 0;
+err_free_priv:
+	kfree(nn->app_priv);
+	return err;
+}
+
 static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
+	struct nfp_bpf_vnic *bv = nn->app_priv;
+
 	if (nn->dp.bpf_offload_xdp)
 		nfp_bpf_xdp_offload(app, nn, NULL);
+	WARN_ON(bv->tc_prog);
+	kfree(bv);
 }
 
 static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 {
 	struct tc_cls_bpf_offload *cls_bpf = type_data;
 	struct nfp_net *nn = cb_priv;
+	struct bpf_prog *oldprog;
+	struct nfp_bpf_vnic *bv;
+	int err;
 
 	if (type != TC_SETUP_CLSBPF ||
 	    !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
 	    cls_bpf->common.chain_index)
 		return -EOPNOTSUPP;
-	if (nn->dp.bpf_offload_xdp)
-		return -EBUSY;
 
 	/* Only support TC direct action */
 	if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 		return -EOPNOTSUPP;
 	}
 
-	switch (cls_bpf->command) {
-	case TC_CLSBPF_REPLACE:
-		return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
-	case TC_CLSBPF_ADD:
-		return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
-	case TC_CLSBPF_DESTROY:
-		return nfp_net_bpf_offload(nn, NULL, true);
-	default:
+	if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
 		return -EOPNOTSUPP;
+
+	bv = nn->app_priv;
+	oldprog = cls_bpf->oldprog;
+
+	/* Don't remove if oldprog doesn't match driver's state */
+	if (bv->tc_prog != oldprog) {
+		oldprog = NULL;
+		if (!cls_bpf->prog)
+			return 0;
 	}
+
+	err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
+	if (err)
+		return err;
+
+	bv->tc_prog = cls_bpf->prog;
+	return 0;
 }
 
 static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
-	.vnic_alloc	= nfp_app_nic_vnic_alloc,
+	.vnic_alloc	= nfp_bpf_vnic_alloc,
 	.vnic_free	= nfp_bpf_vnic_free,
 
 	.setup_tc	= nfp_bpf_setup_tc,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 082a15f6dfb5b9ba806316bd4f272006c93749fb..57b6043177a3891c49096ab85906ee539b3d5ecb 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -172,6 +172,14 @@ struct nfp_prog {
 	struct list_head insns;
 };
 
+/**
+ * struct nfp_bpf_vnic - per-vNIC BPF priv structure
+ * @tc_prog:	currently loaded cls_bpf program
+ */
+struct nfp_bpf_vnic {
+	struct bpf_prog *tc_prog;
+};
+
 int nfp_bpf_jit(struct nfp_prog *prog);
 
 extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1a603fdd9e802d1344e37bcfa719404b69312e43..99b0487b6d820b53befa8a5904a17d5a9176081a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -568,6 +568,7 @@ nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
 		return err;
 	}
 	nn_writeb(nn, ctrl_offset, entry->entry);
+	nfp_net_irq_unmask(nn, entry->entry);
 
 	return 0;
 }
@@ -582,6 +583,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
 				 unsigned int vector_idx)
 {
 	nn_writeb(nn, ctrl_offset, 0xff);
+	nn_pci_flush(nn);
 	free_irq(nn->irq_entries[vector_idx].vector, nn);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 924a05e05da027523e7845728c6ddc4d41accc0b..78b36c67c232f661d5c2633034c5ada015bf1aeb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -84,16 +84,13 @@ nfp_repr_phy_port_get_stats64(struct nfp_port *port,
 {
 	u8 __iomem *mem = port->eth_stats;
 
-	/* TX and RX stats are flipped as we are returning the stats as seen
-	 * at the switch port corresponding to the phys port.
-	 */
-	stats->tx_packets = readq(mem + NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK);
-	stats->tx_bytes = readq(mem + NFP_MAC_STATS_RX_IN_OCTETS);
-	stats->tx_dropped = readq(mem + NFP_MAC_STATS_RX_IN_ERRORS);
+	stats->tx_packets = readq(mem + NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK);
+	stats->tx_bytes = readq(mem + NFP_MAC_STATS_TX_OUT_OCTETS);
+	stats->tx_dropped = readq(mem + NFP_MAC_STATS_TX_OUT_ERRORS);
 
-	stats->rx_packets = readq(mem + NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK);
-	stats->rx_bytes = readq(mem + NFP_MAC_STATS_TX_OUT_OCTETS);
-	stats->rx_dropped = readq(mem + NFP_MAC_STATS_TX_OUT_ERRORS);
+	stats->rx_packets = readq(mem + NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK);
+	stats->rx_bytes = readq(mem + NFP_MAC_STATS_RX_IN_OCTETS);
+	stats->rx_dropped = readq(mem + NFP_MAC_STATS_RX_IN_ERRORS);
 }
 
 static void
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index 18461fcb981501efd7015634999cb787041c01a7..53dbf1e163a85ea5bdfc571788c18ce0d6d0f7b3 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -47,6 +47,7 @@
 #define MDIO_CLK_25_28                                               7
 
 #define MDIO_WAIT_TIMES                                           1000
+#define MDIO_STATUS_DELAY_TIME                                       1
 
 static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
 {
@@ -65,7 +66,7 @@ static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
 
 	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
 			       !(reg & (MDIO_START | MDIO_BUSY)),
-			       100, MDIO_WAIT_TIMES * 100))
+			       MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100))
 		return -EIO;
 
 	return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
@@ -88,8 +89,8 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
 	writel(reg, adpt->base + EMAC_MDIO_CTRL);
 
 	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
-			       !(reg & (MDIO_START | MDIO_BUSY)), 100,
-			       MDIO_WAIT_TIMES * 100))
+			       !(reg & (MDIO_START | MDIO_BUSY)),
+			       MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100))
 		return -EIO;
 
 	return 0;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 70c92b649b299a1a16195e144e3da77cff25855c..38c924bdd32e46f3586eac77d5a63c361993fd09 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
 		return ret;
 	}
 
-	ret = emac_mac_up(adpt);
+	ret = adpt->phy.open(adpt);
 	if (ret) {
 		emac_mac_rx_tx_rings_free_all(adpt);
 		free_irq(irq->irq, irq);
 		return ret;
 	}
 
-	ret = adpt->phy.open(adpt);
+	ret = emac_mac_up(adpt);
 	if (ret) {
-		emac_mac_down(adpt);
 		emac_mac_rx_tx_rings_free_all(adpt);
 		free_irq(irq->irq, irq);
+		adpt->phy.close(adpt);
 		return ret;
 	}
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 71bee1af71effaea4359f765df82c4654ca96567..df21e900f874036bfaf349fa334acf6c339acf3c 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -195,6 +195,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 err1:
 	rmnet_unregister_real_device(real_dev, port);
 err0:
+	kfree(ep);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 29842ccc91a9d35ff49a0e5ac1cb61ec61fb3b49..08e4afc0ab39b42ff8585a466daf64d9f25fb703 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -126,12 +126,12 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 
 	if (skb_headroom(skb) < required_headroom) {
 		if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
-			return RMNET_MAP_CONSUMED;
+			goto fail;
 	}
 
 	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
 	if (!map_header)
-		return RMNET_MAP_CONSUMED;
+		goto fail;
 
 	if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
 		if (mux_id == 0xff)
@@ -143,6 +143,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	skb->protocol = htons(ETH_P_MAP);
 
 	return RMNET_MAP_SUCCESS;
+
+fail:
+	kfree_skb(skb);
+	return RMNET_MAP_CONSUMED;
 }
 
 static void
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 2b962d349f5f415b0b38148a5d575a5720850fbe..009780df664b4a09ea3e88855b0e707ff0b24b10 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2308,32 +2308,9 @@ static int __maybe_unused ravb_resume(struct device *dev)
 	struct ravb_private *priv = netdev_priv(ndev);
 	int ret = 0;
 
-	if (priv->wol_enabled) {
-		/* Reduce the usecount of the clock to zero and then
-		 * restore it to its original value. This is done to force
-		 * the clock to be re-enabled which is a workaround
-		 * for renesas-cpg-mssr driver which do not enable clocks
-		 * when resuming from PSCI suspend/resume.
-		 *
-		 * Without this workaround the driver fails to communicate
-		 * with the hardware if WoL was enabled when the system
-		 * entered PSCI suspend. This is due to that if WoL is enabled
-		 * we explicitly keep the clock from being turned off when
-		 * suspending, but in PSCI sleep power is cut so the clock
-		 * is disabled anyhow, the clock driver is not aware of this
-		 * so the clock is not turned back on when resuming.
-		 *
-		 * TODO: once the renesas-cpg-mssr suspend/resume is working
-		 *       this clock dance should be removed.
-		 */
-		clk_disable(priv->clk);
-		clk_disable(priv->clk);
-		clk_enable(priv->clk);
-		clk_enable(priv->clk);
-
-		/* Set reset mode to rearm the WoL logic */
+	/* If WoL is enabled set reset mode to rearm the WoL logic */
+	if (priv->wol_enabled)
 		ravb_write(ndev, CCC_OPC_RESET, CCC);
-	}
 
 	/* All register have been reset to default values.
 	 * Restore all registers which where setup at probe time and
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 7e060aa9fbed4057c2a00f389f4e411923acb6f0..b9e2846589f8678e72683c230d601d7f517de5da 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
 	[FWNLCR0]	= 0x0090,
 	[FWALCR0]	= 0x0094,
 	[TXNLCR1]	= 0x00a0,
-	[TXALCR1]	= 0x00a0,
+	[TXALCR1]	= 0x00a4,
 	[RXNLCR1]	= 0x00a8,
 	[RXALCR1]	= 0x00ac,
 	[FWNLCR1]	= 0x00b0,
@@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
 	[FWNLCR0]	= 0x0090,
 	[FWALCR0]	= 0x0094,
 	[TXNLCR1]	= 0x00a0,
-	[TXALCR1]	= 0x00a0,
+	[TXALCR1]	= 0x00a4,
 	[RXNLCR1]	= 0x00a8,
 	[RXALCR1]	= 0x00ac,
 	[FWNLCR1]	= 0x00b0,
@@ -1149,7 +1149,8 @@ static int sh_eth_tx_free(struct net_device *ndev, bool sent_only)
 			   entry, le32_to_cpu(txdesc->status));
 		/* Free the original skb. */
 		if (mdp->tx_skbuff[entry]) {
-			dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
+			dma_unmap_single(&mdp->pdev->dev,
+					 le32_to_cpu(txdesc->addr),
 					 le32_to_cpu(txdesc->len) >> 16,
 					 DMA_TO_DEVICE);
 			dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
@@ -1179,14 +1180,14 @@ static void sh_eth_ring_free(struct net_device *ndev)
 			if (mdp->rx_skbuff[i]) {
 				struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i];
 
-				dma_unmap_single(&ndev->dev,
+				dma_unmap_single(&mdp->pdev->dev,
 						 le32_to_cpu(rxdesc->addr),
 						 ALIGN(mdp->rx_buf_sz, 32),
 						 DMA_FROM_DEVICE);
 			}
 		}
 		ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
-		dma_free_coherent(NULL, ringsize, mdp->rx_ring,
+		dma_free_coherent(&mdp->pdev->dev, ringsize, mdp->rx_ring,
 				  mdp->rx_desc_dma);
 		mdp->rx_ring = NULL;
 	}
@@ -1203,7 +1204,7 @@ static void sh_eth_ring_free(struct net_device *ndev)
 		sh_eth_tx_free(ndev, false);
 
 		ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring;
-		dma_free_coherent(NULL, ringsize, mdp->tx_ring,
+		dma_free_coherent(&mdp->pdev->dev, ringsize, mdp->tx_ring,
 				  mdp->tx_desc_dma);
 		mdp->tx_ring = NULL;
 	}
@@ -1245,9 +1246,9 @@ static void sh_eth_ring_format(struct net_device *ndev)
 
 		/* The size of the buffer is a multiple of 32 bytes. */
 		buf_len = ALIGN(mdp->rx_buf_sz, 32);
-		dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len,
+		dma_addr = dma_map_single(&mdp->pdev->dev, skb->data, buf_len,
 					  DMA_FROM_DEVICE);
-		if (dma_mapping_error(&ndev->dev, dma_addr)) {
+		if (dma_mapping_error(&mdp->pdev->dev, dma_addr)) {
 			kfree_skb(skb);
 			break;
 		}
@@ -1323,8 +1324,8 @@ static int sh_eth_ring_init(struct net_device *ndev)
 
 	/* Allocate all Rx descriptors. */
 	rx_ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
-	mdp->rx_ring = dma_alloc_coherent(NULL, rx_ringsize, &mdp->rx_desc_dma,
-					  GFP_KERNEL);
+	mdp->rx_ring = dma_alloc_coherent(&mdp->pdev->dev, rx_ringsize,
+					  &mdp->rx_desc_dma, GFP_KERNEL);
 	if (!mdp->rx_ring)
 		goto ring_free;
 
@@ -1332,8 +1333,8 @@ static int sh_eth_ring_init(struct net_device *ndev)
 
 	/* Allocate all Tx descriptors. */
 	tx_ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring;
-	mdp->tx_ring = dma_alloc_coherent(NULL, tx_ringsize, &mdp->tx_desc_dma,
-					  GFP_KERNEL);
+	mdp->tx_ring = dma_alloc_coherent(&mdp->pdev->dev, tx_ringsize,
+					  &mdp->tx_desc_dma, GFP_KERNEL);
 	if (!mdp->tx_ring)
 		goto ring_free;
 	return 0;
@@ -1527,7 +1528,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 			mdp->rx_skbuff[entry] = NULL;
 			if (mdp->cd->rpadir)
 				skb_reserve(skb, NET_IP_ALIGN);
-			dma_unmap_single(&ndev->dev, dma_addr,
+			dma_unmap_single(&mdp->pdev->dev, dma_addr,
 					 ALIGN(mdp->rx_buf_sz, 32),
 					 DMA_FROM_DEVICE);
 			skb_put(skb, pkt_len);
@@ -1555,9 +1556,9 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 			if (skb == NULL)
 				break;	/* Better luck next round. */
 			sh_eth_set_receive_align(skb);
-			dma_addr = dma_map_single(&ndev->dev, skb->data,
+			dma_addr = dma_map_single(&mdp->pdev->dev, skb->data,
 						  buf_len, DMA_FROM_DEVICE);
-			if (dma_mapping_error(&ndev->dev, dma_addr)) {
+			if (dma_mapping_error(&mdp->pdev->dev, dma_addr)) {
 				kfree_skb(skb);
 				break;
 			}
@@ -1891,6 +1892,16 @@ static int sh_eth_phy_init(struct net_device *ndev)
 		return PTR_ERR(phydev);
 	}
 
+	/* mask with MAC supported features */
+	if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) {
+		int err = phy_set_max_speed(phydev, SPEED_100);
+		if (err) {
+			netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n");
+			phy_disconnect(phydev);
+			return err;
+		}
+	}
+
 	phy_attached_info(phydev);
 
 	return 0;
@@ -2441,9 +2452,9 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	/* soft swap. */
 	if (!mdp->cd->hw_swap)
 		sh_eth_soft_swap(PTR_ALIGN(skb->data, 4), skb->len + 2);
-	dma_addr = dma_map_single(&ndev->dev, skb->data, skb->len,
+	dma_addr = dma_map_single(&mdp->pdev->dev, skb->data, skb->len,
 				  DMA_TO_DEVICE);
-	if (dma_mapping_error(&ndev->dev, dma_addr)) {
+	if (dma_mapping_error(&mdp->pdev->dev, dma_addr)) {
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
@@ -3214,18 +3225,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	/* ioremap the TSU registers */
 	if (mdp->cd->tsu) {
 		struct resource *rtsu;
+
 		rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
-		if (IS_ERR(mdp->tsu_addr)) {
-			ret = PTR_ERR(mdp->tsu_addr);
+		if (!rtsu) {
+			dev_err(&pdev->dev, "no TSU resource\n");
+			ret = -ENODEV;
+			goto out_release;
+		}
+		/* We can only request the  TSU region  for the first port
+		 * of the two  sharing this TSU for the probe to succeed...
+		 */
+		if (devno % 2 == 0 &&
+		    !devm_request_mem_region(&pdev->dev, rtsu->start,
+					     resource_size(rtsu),
+					     dev_name(&pdev->dev))) {
+			dev_err(&pdev->dev, "can't request TSU resource.\n");
+			ret = -EBUSY;
+			goto out_release;
+		}
+		mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
+					     resource_size(rtsu));
+		if (!mdp->tsu_addr) {
+			dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
+			ret = -ENOMEM;
 			goto out_release;
 		}
 		mdp->port = devno % 2;
 		ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
 	}
 
-	/* initialize first or needed device */
-	if (!devno || pd->needs_init) {
+	/* Need to init only the first port of the two sharing a TSU */
+	if (devno % 2 == 0) {
 		if (mdp->cd->chip_reset)
 			mdp->cd->chip_reset(ndev);
 
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 0ea7e16f2e6e2c6d8106308e73327390e62074ce..9937a2450e573f8028046233cf82de2f51f16704 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -77,6 +77,7 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 	}
 
 	if (buffer->flags & EFX_TX_BUF_SKB) {
+		EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
 		(*pkts_compl)++;
 		(*bytes_compl) += buffer->skb->len;
 		dev_consume_skb_any((struct sk_buff *)buffer->skb);
@@ -426,12 +427,14 @@ static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
 static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
 {
 	struct efx_tx_buffer *buffer;
+	unsigned int bytes_compl = 0;
+	unsigned int pkts_compl = 0;
 
 	/* Work backwards until we hit the original insert pointer value */
 	while (tx_queue->insert_count != tx_queue->write_count) {
 		--tx_queue->insert_count;
 		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-		efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
+		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
 	}
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e5ac0537606f2192d553c85795428b18fd615d..ce2ea2d491acac195eefe3f01f8ec9df08d3e77c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
 	/* get timestamp value */
 	 u64(*get_timestamp) (void *desc, u32 ats);
 	/* get rx timestamp status */
-	int (*get_rx_timestamp_status) (void *desc, u32 ats);
+	int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
 	/* Display ring */
 	void (*display_ring)(void *head, unsigned int size, bool rx);
 	/* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index e5ff734d4f9b2ff9b56799cc803fdafe4f80ea1c..9eb7f65d8000d28190da780587dba562aed72152 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -808,8 +808,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 			 val, reg);
 
 	if (gmac->variant->soc_has_internal_phy) {
-		if (of_property_read_bool(priv->plat->phy_node,
-					  "allwinner,leds-active-low"))
+		if (of_property_read_bool(node, "allwinner,leds-active-low"))
 			reg |= H3_EPHY_LED_POL;
 		else
 			reg &= ~H3_EPHY_LED_POL;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 4b286e27c4ca5cdbbb7c457e31bef1b2e9e7bd94..7e089bf906b4f316034403f9a44fbfd191ee09eb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
 	return ret;
 }
 
-static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
+						 u32 ats)
 {
 	struct dma_desc *p = (struct dma_desc *)desc;
 	int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 
 			/* Check if timestamp is OK from context descriptor */
 			do {
-				ret = dwmac4_rx_check_timestamp(desc);
+				ret = dwmac4_rx_check_timestamp(next_desc);
 				if (ret < 0)
 					goto exit;
 				i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7546b3664113a3d776fe19094df71b2adfb99e98..2a828a31281423082995bc332ec51a3f20989804 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
 	return ns;
 }
 
-static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
+static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
+					    u32 ats)
 {
 	if (ats) {
 		struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f817f8f365696d3388e73f85710d30dde43a7d41..db4cee57bb2465eb98fe38cb947624e779da4673 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
 	return ns;
 }
 
-static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
+static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
 {
 	struct dma_desc *p = (struct dma_desc *)desc;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 721b616552611aa74ea077e744ec9a0c4836a48f..08c19ebd530674972ceb9ebcb41cd7af4b3fb58d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 {
 	u32 value = readl(ioaddr + PTP_TCR);
 	unsigned long data;
+	u32 reg_value;
 
 	/* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
 	 *	formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 
 	data &= PTP_SSIR_SSINC_MASK;
 
+	reg_value = data;
 	if (gmac4)
-		data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+		reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
 
-	writel(data, ioaddr + PTP_SSIR);
+	writel(reg_value, ioaddr + PTP_SSIR);
 
 	return data;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f63c2ddced3c9a1e90f4425b28be95d202379a71..c0af0bc4e714891080e966c7d6ddd5e47e8ab5a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
 	struct net_device *ndev = priv->dev;
+	int interface = priv->plat->interface;
 	unsigned long flags;
 	bool ret = false;
 
+	if ((interface != PHY_INTERFACE_MODE_MII) &&
+	    (interface != PHY_INTERFACE_MODE_GMII) &&
+	    !phy_interface_mode_is_rgmii(interface))
+		goto out;
+
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
 	 */
@@ -482,7 +488,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 		desc = np;
 
 	/* Check if timestamp is available */
-	if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
+	if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
 		ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
 		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
@@ -2588,6 +2594,7 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
+	priv->mss = 0;
 
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b718a02a6bb6055d7841619b42cb400d1eea0984..0a48b3073d3d3614483e9d8ae64f073a5b3d2ead 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
+	if (skb_dst(skb)) {
+		int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
+			  GENEVE_BASE_HLEN - info->options_len - 14;
+
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+	}
+
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
 		tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
@@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
+	if (skb_dst(skb)) {
+		int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
+			  GENEVE_BASE_HLEN - info->options_len - 14;
+
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+	}
+
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
 		prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 8483f03d5a4103d6c4da7d5d586a99e013f06107..1ab97d99b9bae9f9dde6227dacff606256e66c72 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1379,8 +1379,8 @@ static int rr_close(struct net_device *dev)
 			    rrpriv->info_dma);
 	rrpriv->info = NULL;
 
-	free_irq(pdev->irq, dev);
 	spin_unlock_irqrestore(&rrpriv->lock, flags);
+	free_irq(pdev->irq, dev);
 
 	return 0;
 }
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 11c1e7950fe58002b1b2b52e6af395dbfc7b6863..77cc4fbaeace4836419b2232913f8d78351e0148 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -393,6 +393,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 		.flowi4_oif = dev->ifindex,
 		.flowi4_tos = RT_TOS(ip4h->tos),
 		.flowi4_flags = FLOWI_FLAG_ANYSRC,
+		.flowi4_mark = skb->mark,
 		.daddr = ip4h->daddr,
 		.saddr = ip4h->saddr,
 	};
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a178c5efd33e5486d47608dd7e4d7218b8eda4a1..a0f2be81d52e4a4fd24585d8957b181a3cf9a103 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	return 0;
 
 unregister_netdev:
+	/* macvlan_uninit would free the macvlan port */
 	unregister_netdevice(dev);
+	return err;
 destroy_macvlan_port:
-	if (create)
+	/* the macvlan port may be freed by macvlan_uninit when fail to register.
+	 * so we destroy the macvlan port only when it's valid.
+	 */
+	if (create && macvlan_port_get_rtnl(dev))
 		macvlan_port_destroy(port->dev);
 	return err;
 }
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 5f93e6add56394f28f68da72ddb8fc724db14200..e911e4990b20e181aaf3711d1069f086c2a7a4d3 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -239,14 +239,10 @@ static int at803x_resume(struct phy_device *phydev)
 {
 	int value;
 
-	mutex_lock(&phydev->lock);
-
 	value = phy_read(phydev, MII_BMCR);
 	value &= ~(BMCR_PDOWN | BMCR_ISOLATE);
 	phy_write(phydev, MII_BMCR, value);
 
-	mutex_unlock(&phydev->lock);
-
 	return 0;
 }
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 4d02b27df0445e7c1ad6f0774e44a5b7013bb05e..82104edca393b9b6662a18ef8ea0bdd8d3bb057d 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -637,6 +637,10 @@ static int m88e1510_config_aneg(struct phy_device *phydev)
 	if (err < 0)
 		goto error;
 
+	/* Do not touch the fiber page if we're in copper->sgmii mode */
+	if (phydev->interface == PHY_INTERFACE_MODE_SGMII)
+		return 0;
+
 	/* Then the fiber link */
 	err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
 	if (err < 0)
@@ -875,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
 
 	/* SGMII-to-Copper mode initialization */
 	if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+		u32 pause;
+
 		/* Select page 18 */
 		err = marvell_set_page(phydev, 18);
 		if (err < 0)
@@ -898,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
 		err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
 		if (err < 0)
 			return err;
+
+		/* There appears to be a bug in the 88e1512 when used in
+		 * SGMII to copper mode, where the AN advertisment register
+		 * clears the pause bits each time a negotiation occurs.
+		 * This means we can never be truely sure what was advertised,
+		 * so disable Pause support.
+		 */
+		pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		phydev->supported &= ~pause;
+		phydev->advertising &= ~pause;
 	}
 
 	return m88e1121_config_init(phydev);
@@ -2069,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &m88e1145_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1101_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index 135296508a7ed70661f7e8f9aa9c0eb7fadf3133..6425ce04d3f95d6ca7a7b2ec7a22b7f860358a1e 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c
@@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev)
 
 	data->regulator = devm_regulator_get(&pdev->dev, "phy");
 	if (IS_ERR(data->regulator)) {
-		if (PTR_ERR(data->regulator) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
+		if (PTR_ERR(data->regulator) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto err_out_free_mdiobus;
+		}
 
 		dev_info(&pdev->dev, "no regulator found\n");
 		data->regulator = NULL;
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index bfd3090fb055bac4c40924205036119da5b6ce61..07c6048200c6164ac77a649468063e2fedd404c6 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
 	}
 
 	ret = xgene_enet_ecc_init(pdata);
-	if (ret)
+	if (ret) {
+		if (pdata->dev->of_node)
+			clk_disable_unprepare(pdata->clk);
 		return ret;
+	}
 	xgene_gmac_reset(pdata);
 
 	return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
 		return ret;
 
 	mdio_bus = mdiobus_alloc();
-	if (!mdio_bus)
-		return -ENOMEM;
+	if (!mdio_bus) {
+		ret = -ENOMEM;
+		goto out_clk;
+	}
 
 	mdio_bus->name = "APM X-Gene MDIO bus";
 
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
 		mdio_bus->phy_mask = ~0;
 		ret = mdiobus_register(mdio_bus);
 		if (ret)
-			goto out;
+			goto out_mdiobus;
 
 		acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
 				    acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
 	}
 
 	if (ret)
-		goto out;
+		goto out_mdiobus;
 
 	pdata->mdio_bus = mdio_bus;
 	xgene_mdio_status = true;
 
 	return 0;
 
-out:
+out_mdiobus:
 	mdiobus_free(mdio_bus);
 
+out_clk:
+	if (dev->of_node)
+		clk_disable_unprepare(pdata->clk);
+
 	return ret;
 }
 
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 2df7b62c1a36811e97087ae641a89d06641cef4e..54d00a1d2bef094877c80bccad220de7f4d97eba 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -270,6 +270,7 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
 
 		if (addr == mdiodev->addr) {
 			dev->of_node = child;
+			dev->fwnode = of_fwnode_handle(child);
 			return;
 		}
 	}
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index 1ea69b7585d9bcb8098ecddfc33d0a3a46704843..842eb871a6e38df0c22cc43b7ee02f0f137f3a05 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -22,6 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/netdevice.h>
+#include <linux/bitfield.h>
 
 static int meson_gxl_config_init(struct phy_device *phydev)
 {
@@ -50,6 +51,77 @@ static int meson_gxl_config_init(struct phy_device *phydev)
 	return 0;
 }
 
+/* This function is provided to cope with the possible failures of this phy
+ * during aneg process. When aneg fails, the PHY reports that aneg is done
+ * but the value found in MII_LPA is wrong:
+ *  - Early failures: MII_LPA is just 0x0001. if MII_EXPANSION reports that
+ *    the link partner (LP) supports aneg but the LP never acked our base
+ *    code word, it is likely that we never sent it to begin with.
+ *  - Late failures: MII_LPA is filled with a value which seems to make sense
+ *    but it actually is not what the LP is advertising. It seems that we
+ *    can detect this using a magic bit in the WOL bank (reg 12 - bit 12).
+ *    If this particular bit is not set when aneg is reported being done,
+ *    it means MII_LPA is likely to be wrong.
+ *
+ * In both case, forcing a restart of the aneg process solve the problem.
+ * When this failure happens, the first retry is usually successful but,
+ * in some cases, it may take up to 6 retries to get a decent result
+ */
+static int meson_gxl_read_status(struct phy_device *phydev)
+{
+	int ret, wol, lpa, exp;
+
+	if (phydev->autoneg == AUTONEG_ENABLE) {
+		ret = genphy_aneg_done(phydev);
+		if (ret < 0)
+			return ret;
+		else if (!ret)
+			goto read_status_continue;
+
+		/* Need to access WOL bank, make sure the access is open */
+		ret = phy_write(phydev, 0x14, 0x0000);
+		if (ret)
+			return ret;
+		ret = phy_write(phydev, 0x14, 0x0400);
+		if (ret)
+			return ret;
+		ret = phy_write(phydev, 0x14, 0x0000);
+		if (ret)
+			return ret;
+		ret = phy_write(phydev, 0x14, 0x0400);
+		if (ret)
+			return ret;
+
+		/* Request LPI_STATUS WOL register */
+		ret = phy_write(phydev, 0x14, 0x8D80);
+		if (ret)
+			return ret;
+
+		/* Read LPI_STATUS value */
+		wol = phy_read(phydev, 0x15);
+		if (wol < 0)
+			return wol;
+
+		lpa = phy_read(phydev, MII_LPA);
+		if (lpa < 0)
+			return lpa;
+
+		exp = phy_read(phydev, MII_EXPANSION);
+		if (exp < 0)
+			return exp;
+
+		if (!(wol & BIT(12)) ||
+		    ((exp & EXPANSION_NWAY) && !(lpa & LPA_LPACK))) {
+			/* Looks like aneg failed after all */
+			phydev_dbg(phydev, "LPA corruption - aneg restart\n");
+			return genphy_restart_aneg(phydev);
+		}
+	}
+
+read_status_continue:
+	return genphy_read_status(phydev);
+}
+
 static struct phy_driver meson_gxl_phy[] = {
 	{
 		.phy_id		= 0x01814400,
@@ -60,7 +132,7 @@ static struct phy_driver meson_gxl_phy[] = {
 		.config_init	= meson_gxl_config_init,
 		.config_aneg	= genphy_config_aneg,
 		.aneg_done      = genphy_aneg_done,
-		.read_status	= genphy_read_status,
+		.read_status	= meson_gxl_read_status,
 		.suspend        = genphy_suspend,
 		.resume         = genphy_resume,
 	},
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index fdb43dd9b5cd424f4dde02f1257070ffe4b50fb1..422ff6333c52da8c4a212123bdaf443945613e02 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -496,16 +496,18 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev,
 	return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval);
 }
 
+/* Center KSZ9031RNX FLP timing at 16ms. */
 static int ksz9031_center_flp_timing(struct phy_device *phydev)
 {
 	int result;
 
-	/* Center KSZ9031RNX FLP timing at 16ms. */
 	result = ksz9031_extended_write(phydev, OP_DATA, 0,
 					MII_KSZ9031RN_FLP_BURST_TX_HI, 0x0006);
+	if (result)
+		return result;
+
 	result = ksz9031_extended_write(phydev, OP_DATA, 0,
 					MII_KSZ9031RN_FLP_BURST_TX_LO, 0x1A80);
-
 	if (result)
 		return result;
 
@@ -622,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
 		phydev->link = 0;
 		if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
 			phydev->drv->config_intr(phydev);
+		return genphy_config_aneg(phydev);
 	}
 
 	return 0;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 2b1e67bc1e736ceb33f7afa8462f5a4858b522df..ed10d1fc8f59188b95e090f085ced18421ad23c2 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -828,7 +828,6 @@ EXPORT_SYMBOL(phy_stop);
  */
 void phy_start(struct phy_device *phydev)
 {
-	bool do_resume = false;
 	int err = 0;
 
 	mutex_lock(&phydev->lock);
@@ -841,6 +840,9 @@ void phy_start(struct phy_device *phydev)
 		phydev->state = PHY_UP;
 		break;
 	case PHY_HALTED:
+		/* if phy was suspended, bring the physical link up again */
+		phy_resume(phydev);
+
 		/* make sure interrupts are re-enabled for the PHY */
 		if (phydev->irq != PHY_POLL) {
 			err = phy_enable_interrupts(phydev);
@@ -849,17 +851,12 @@ void phy_start(struct phy_device *phydev)
 		}
 
 		phydev->state = PHY_RESUMING;
-		do_resume = true;
 		break;
 	default:
 		break;
 	}
 	mutex_unlock(&phydev->lock);
 
-	/* if phy was suspended, bring the physical link up again */
-	if (do_resume)
-		phy_resume(phydev);
-
 	phy_trigger_machine(phydev, true);
 }
 EXPORT_SYMBOL(phy_start);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 67f25ac29025c53903cc724fac62efdd94828510..b15b31ca26182719cca6f764e19335483e11930e 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -135,7 +135,9 @@ static int mdio_bus_phy_resume(struct device *dev)
 	if (!mdio_bus_phy_may_suspend(phydev))
 		goto no_resume;
 
+	mutex_lock(&phydev->lock);
 	ret = phy_resume(phydev);
+	mutex_unlock(&phydev->lock);
 	if (ret < 0)
 		return ret;
 
@@ -1026,7 +1028,9 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	if (err)
 		goto error;
 
+	mutex_lock(&phydev->lock);
 	phy_resume(phydev);
+	mutex_unlock(&phydev->lock);
 	phy_led_triggers_register(phydev);
 
 	return err;
@@ -1157,6 +1161,8 @@ int phy_resume(struct phy_device *phydev)
 	struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
 	int ret = 0;
 
+	WARN_ON(!mutex_is_locked(&phydev->lock));
+
 	if (phydev->drv && phydrv->resume)
 		ret = phydrv->resume(phydev);
 
@@ -1639,13 +1645,9 @@ int genphy_resume(struct phy_device *phydev)
 {
 	int value;
 
-	mutex_lock(&phydev->lock);
-
 	value = phy_read(phydev, MII_BMCR);
 	phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
 
-	mutex_unlock(&phydev->lock);
-
 	return 0;
 }
 EXPORT_SYMBOL(genphy_resume);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index e3bbc70372d3ba73517514c1eda6a861d35d9524..249ce5cbea2201902563ceb6b9339809e8ccb99e 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
 	pl->link_config.pause = MLO_PAUSE_AN;
 	pl->link_config.speed = SPEED_UNKNOWN;
 	pl->link_config.duplex = DUPLEX_UNKNOWN;
+	pl->link_config.an_enabled = true;
 	pl->ops = ops;
 	__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
 
@@ -773,6 +774,7 @@ void phylink_stop(struct phylink *pl)
 		sfp_upstream_stop(pl->sfp_bus);
 
 	set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+	queue_work(system_power_efficient_wq, &pl->resolve);
 	flush_work(&pl->resolve);
 }
 EXPORT_SYMBOL_GPL(phylink_stop);
@@ -950,6 +952,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 	mutex_lock(&pl->state_mutex);
 	/* Configure the MAC to match the new settings */
 	linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
+	pl->link_config.interface = config.interface;
 	pl->link_config.speed = our_kset.base.speed;
 	pl->link_config.duplex = our_kset.base.duplex;
 	pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
@@ -1293,6 +1296,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
 		switch (cmd) {
 		case SIOCGMIIPHY:
 			mii->phy_id = pl->phydev->mdio.addr;
+			/* fall through */
 
 		case SIOCGMIIREG:
 			ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num);
@@ -1315,6 +1319,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
 		switch (cmd) {
 		case SIOCGMIIPHY:
 			mii->phy_id = 0;
+			/* fall through */
 
 		case SIOCGMIIREG:
 			ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num);
@@ -1426,9 +1431,8 @@ static void phylink_sfp_link_down(void *upstream)
 	WARN_ON(!lockdep_rtnl_is_held());
 
 	set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+	queue_work(system_power_efficient_wq, &pl->resolve);
 	flush_work(&pl->resolve);
-
-	netif_carrier_off(pl->netdev);
 }
 
 static void phylink_sfp_link_up(void *upstream)
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8a1b1f4c1b7c675278902ec78c31a632df44351f..ab64a142b832c20328f73c691fb566b78c2c8399 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -356,7 +356,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream);
 void sfp_unregister_upstream(struct sfp_bus *bus)
 {
 	rtnl_lock();
-	sfp_unregister_bus(bus);
+	if (bus->sfp)
+		sfp_unregister_bus(bus);
 	bus->upstream = NULL;
 	bus->netdev = NULL;
 	rtnl_unlock();
@@ -459,7 +460,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket);
 void sfp_unregister_socket(struct sfp_bus *bus)
 {
 	rtnl_lock();
-	sfp_unregister_bus(bus);
+	if (bus->netdev)
+		sfp_unregister_bus(bus);
 	bus->sfp_dev = NULL;
 	bus->sfp = NULL;
 	bus->socket_ops = NULL;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index e381811e5f1143f35432e6624e80c00b13f0b56e..9dfc1c4c954f3230c7f6419ac2c59ad85b26c1c4 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -351,12 +351,13 @@ static void sfp_sm_link_check_los(struct sfp *sfp)
 {
 	unsigned int los = sfp->state & SFP_F_LOS;
 
-	/* FIXME: what if neither SFP_OPTIONS_LOS_INVERTED nor
-	 * SFP_OPTIONS_LOS_NORMAL are set?  For now, we assume
-	 * the same as SFP_OPTIONS_LOS_NORMAL set.
+	/* If neither SFP_OPTIONS_LOS_INVERTED nor SFP_OPTIONS_LOS_NORMAL
+	 * are set, we assume that no LOS signal is available.
 	 */
-	if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED)
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED))
 		los ^= SFP_F_LOS;
+	else if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL)))
+		los = 0;
 
 	if (los)
 		sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
@@ -364,6 +365,22 @@ static void sfp_sm_link_check_los(struct sfp *sfp)
 		sfp_sm_link_up(sfp);
 }
 
+static bool sfp_los_event_active(struct sfp *sfp, unsigned int event)
+{
+	return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) &&
+		event == SFP_E_LOS_LOW) ||
+	       (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) &&
+		event == SFP_E_LOS_HIGH);
+}
+
+static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event)
+{
+	return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) &&
+		event == SFP_E_LOS_HIGH) ||
+	       (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) &&
+		event == SFP_E_LOS_LOW);
+}
+
 static void sfp_sm_fault(struct sfp *sfp, bool warn)
 {
 	if (sfp->sm_retries && !--sfp->sm_retries) {
@@ -470,6 +487,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		return -EINVAL;
 	}
 
+	/* If the module requires address swap mode, warn about it */
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
+		dev_warn(sfp->dev,
+			 "module address swap to access page 0xA2 is not supported.\n");
+
 	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
 }
 
@@ -581,9 +603,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 	case SFP_S_WAIT_LOS:
 		if (event == SFP_E_TX_FAULT)
 			sfp_sm_fault(sfp, true);
-		else if (event ==
-			 (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
-			  SFP_E_LOS_HIGH : SFP_E_LOS_LOW))
+		else if (sfp_los_event_inactive(sfp, event))
 			sfp_sm_link_up(sfp);
 		break;
 
@@ -591,9 +611,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 		if (event == SFP_E_TX_FAULT) {
 			sfp_sm_link_down(sfp);
 			sfp_sm_fault(sfp, true);
-		} else if (event ==
-			   (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
-			    SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) {
+		} else if (sfp_los_event_active(sfp, event)) {
 			sfp_sm_link_down(sfp);
 			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
 		}
@@ -639,7 +657,8 @@ static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
 {
 	/* locking... and check module is present */
 
-	if (sfp->id.ext.sff8472_compliance) {
+	if (sfp->id.ext.sff8472_compliance &&
+	    !(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)) {
 		modinfo->type = ETH_MODULE_SFF_8472;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
 	} else {
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index e9489b88407ce1677385fe480592958b57d02c8d..0a886fda01291efb5a6beb0a2b5eb2123c1f05ab 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q,
 	DEFINE_WAIT(wait);
 	ssize_t ret = 0;
 
-	if (!iov_iter_count(to))
+	if (!iov_iter_count(to)) {
+		if (skb)
+			kfree_skb(skb);
 		return 0;
+	}
 
 	if (skb)
 		goto put;
@@ -1154,11 +1157,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
 		       size_t total_len, int flags)
 {
 	struct tap_queue *q = container_of(sock, struct tap_queue, sock);
+	struct sk_buff *skb = m->msg_control;
 	int ret;
-	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
+		if (skb)
+			kfree_skb(skb);
 		return -EINVAL;
-	ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
-			  m->msg_control);
+	}
+	ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 95749006d687b971a49894c903fcc611bc25c375..4f4a842a1c9cb8ac3397b329854a0fc7bd2f6aa3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1952,8 +1952,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 
 	tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
-	if (!iov_iter_count(to))
+	if (!iov_iter_count(to)) {
+		if (skb)
+			kfree_skb(skb);
 		return 0;
+	}
 
 	if (!skb) {
 		/* Read frames from ring */
@@ -2069,22 +2072,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = tun_get(tfile);
+	struct sk_buff *skb = m->msg_control;
 	int ret;
 
-	if (!tun)
-		return -EBADFD;
+	if (!tun) {
+		ret = -EBADFD;
+		goto out_free_skb;
+	}
 
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
 		ret = -EINVAL;
-		goto out;
+		goto out_put_tun;
 	}
 	if (flags & MSG_ERRQUEUE) {
 		ret = sock_recv_errqueue(sock->sk, m, total_len,
 					 SOL_PACKET, TUN_TX_TIMESTAMP);
 		goto out;
 	}
-	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
-			  m->msg_control);
+	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
 	if (ret > (ssize_t)total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
@@ -2092,6 +2097,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 out:
 	tun_put(tun);
 	return ret;
+
+out_put_tun:
+	tun_put(tun);
+out_free_skb:
+	if (skb)
+		kfree_skb(skb);
+	return ret;
 }
 
 static int tun_peek_len(struct socket *sock)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index c750cf7c042b004ecfbbce64aefb3d0f1d512c82..728819feab44db75f6b4c369767c3ed6c1966cbf 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -261,9 +261,11 @@ static void qmi_wwan_netdev_setup(struct net_device *net)
 		net->hard_header_len = 0;
 		net->addr_len        = 0;
 		net->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+		set_bit(EVENT_NO_IP_ALIGN, &dev->flags);
 		netdev_dbg(net, "mode: raw IP\n");
 	} else if (!net->header_ops) { /* don't bother if already set */
 		ether_setup(net);
+		clear_bit(EVENT_NO_IP_ALIGN, &dev->flags);
 		netdev_dbg(net, "mode: Ethernet\n");
 	}
 
@@ -1098,6 +1100,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+	{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)},	/* YUGA CLM920-NC5 */
 	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
@@ -1202,12 +1205,14 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1199, 0x907b, 8)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1199, 0x907b, 10)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9091, 8)},	/* Sierra Wireless EM7565 */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */
 	{QMI_FIXED_INTF(0x2357, 0x9000, 4)},	/* TP-LINK MA260 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)},	/* Telit LE922A */
 	{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},	/* Telit ME910 */
+	{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)},	/* Telit ME910 dual modem */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)},	/* Telit LE920, LE920A4 */
 	{QMI_FIXED_INTF(0x1c9e, 0x9801, 3)},	/* Telewell TW-3G HSPA+ */
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 80348b6a864668d0b7535906084bf64f967a1448..d56fe32bf48dea8c617c011d5bd6ddc8d9d5270f 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -484,7 +484,10 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 		return -ENOLINK;
 	}
 
-	skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
+	if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags))
+		skb = __netdev_alloc_skb(dev->net, size, flags);
+	else
+		skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
 	if (!skb) {
 		netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
 		usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 19a985ef9104ba129086d53f7661ba880d2cb5d3..559b215c016967f2b6525e7f3bc4fca2a2ee9e90 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -756,7 +756,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		int num_skb_frags;
 
 		buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
-		if (unlikely(!ctx)) {
+		if (unlikely(!buf)) {
 			pr_debug("%s: rx error: %d buffers out of %d missing\n",
 				 dev->name, num_buf,
 				 virtio16_to_cpu(vi->vdev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19b9cc51079e75346af766c91786d66eaa92c3f2..31f4b7911ef84c85789011332e37c5314099d82c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
+		if (skb_dst(skb)) {
+			int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
+
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+						       skb, mtu);
+		}
+
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 		err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
+		if (skb_dst(skb)) {
+			int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
+
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+						       skb, mtu);
+		}
+
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
 		skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
 
 		max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
 					   VXLAN_HEADROOM);
+		if (max_mtu < ETH_MIN_MTU)
+			max_mtu = ETH_MIN_MTU;
+
+		if (!changelink && !conf->mtu)
+			dev->mtu = max_mtu;
 	}
 
 	if (dev->mtu > max_mtu)
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index f7d228b5ba933f744474be119802f41e461c5715..987f1252a3cf888d76432ee4f55b1bd215358209 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -384,6 +384,18 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
 		}
 	}
 
+	if (changed & IEEE80211_CONF_CHANGE_PS) {
+		list_for_each_entry(tmp, &wcn->vif_list, list) {
+			vif = wcn36xx_priv_to_vif(tmp);
+			if (hw->conf.flags & IEEE80211_CONF_PS) {
+				if (vif->bss_conf.ps) /* ps allowed ? */
+					wcn36xx_pmc_enter_bmps_state(wcn, vif);
+			} else {
+				wcn36xx_pmc_exit_bmps_state(wcn, vif);
+			}
+		}
+	}
+
 	mutex_unlock(&wcn->conf_mutex);
 
 	return 0;
@@ -747,17 +759,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
 		vif_priv->dtim_period = bss_conf->dtim_period;
 	}
 
-	if (changed & BSS_CHANGED_PS) {
-		wcn36xx_dbg(WCN36XX_DBG_MAC,
-			    "mac bss PS set %d\n",
-			    bss_conf->ps);
-		if (bss_conf->ps) {
-			wcn36xx_pmc_enter_bmps_state(wcn, vif);
-		} else {
-			wcn36xx_pmc_exit_bmps_state(wcn, vif);
-		}
-	}
-
 	if (changed & BSS_CHANGED_BSSID) {
 		wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n",
 			    bss_conf->bssid);
diff --git a/drivers/net/wireless/ath/wcn36xx/pmc.c b/drivers/net/wireless/ath/wcn36xx/pmc.c
index 589fe5f7097160a4bcee5d1b1be979ce389e297c..1976b80c235fe5e4c5c591a761c1dc8d1d7b941b 100644
--- a/drivers/net/wireless/ath/wcn36xx/pmc.c
+++ b/drivers/net/wireless/ath/wcn36xx/pmc.c
@@ -45,8 +45,10 @@ int wcn36xx_pmc_exit_bmps_state(struct wcn36xx *wcn,
 	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
 
 	if (WCN36XX_BMPS != vif_priv->pw_state) {
-		wcn36xx_err("Not in BMPS mode, no need to exit from BMPS mode!\n");
-		return -EINVAL;
+		/* Unbalanced call or last BMPS enter failed */
+		wcn36xx_dbg(WCN36XX_DBG_PMC,
+			    "Not in BMPS mode, no need to exit\n");
+		return -EALREADY;
 	}
 	wcn36xx_smd_exit_bmps(wcn, vif);
 	vif_priv->pw_state = WCN36XX_FULL_POWER;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 310c4e2746aab1da08d62e58812219b5271507e9..cdf9e41615925c6978dc4620e032d65b055ee28d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -2070,7 +2070,7 @@ static int brcmf_sdio_txpkt_hdalign(struct brcmf_sdio *bus, struct sk_buff *pkt)
 	return head_pad;
 }
 
-/**
+/*
  * struct brcmf_skbuff_cb reserves first two bytes in sk_buff::cb for
  * bus layer usage.
  */
@@ -4121,8 +4121,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	sdio_release_host(sdiodev->func[1]);
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
-	device_release_driver(dev);
 	device_release_driver(&sdiodev->func[2]->dev);
+	device_release_driver(dev);
 }
 
 struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
index 87b4434224a1f0a121455ef0a2d0877c4e14e42b..dfa111bb411e5b1421ae2060f37d600432d70bd9 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
@@ -68,6 +68,9 @@
  * @IWL_MVM_DQA_CMD_QUEUE: a queue reserved for sending HCMDs to the FW
  * @IWL_MVM_DQA_AUX_QUEUE: a queue reserved for aux frames
  * @IWL_MVM_DQA_P2P_DEVICE_QUEUE: a queue reserved for P2P device frames
+ * @IWL_MVM_DQA_INJECT_MONITOR_QUEUE: a queue reserved for injection using
+ *	monitor mode. Note this queue is the same as the queue for P2P device
+ *	but we can't have active monitor mode along with P2P device anyway.
  * @IWL_MVM_DQA_GCAST_QUEUE: a queue reserved for P2P GO/SoftAP GCAST frames
  * @IWL_MVM_DQA_BSS_CLIENT_QUEUE: a queue reserved for BSS activity, to ensure
  *	that we are never left without the possibility to connect to an AP.
@@ -87,6 +90,7 @@ enum iwl_mvm_dqa_txq {
 	IWL_MVM_DQA_CMD_QUEUE = 0,
 	IWL_MVM_DQA_AUX_QUEUE = 1,
 	IWL_MVM_DQA_P2P_DEVICE_QUEUE = 2,
+	IWL_MVM_DQA_INJECT_MONITOR_QUEUE = 2,
 	IWL_MVM_DQA_GCAST_QUEUE = 3,
 	IWL_MVM_DQA_BSS_CLIENT_QUEUE = 4,
 	IWL_MVM_DQA_MIN_MGMT_QUEUE = 5,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 9c889a32fe2424941d9bceb89b8cd1f593e4f3ab..223fb77a3aa9d64456244dd4c5156b8885cec6fd 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -209,8 +209,6 @@ static inline void iwl_fw_dbg_stop_recording(struct iwl_fw_runtime *fwrt)
 
 static inline void iwl_fw_dump_conf_clear(struct iwl_fw_runtime *fwrt)
 {
-	iwl_fw_dbg_stop_recording(fwrt);
-
 	fwrt->dump.conf = FW_DBG_INVALID;
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index ca0b5536a8a68e1a4dcc8b2d2de61e5dfb7af255..921cab9e2d737bf7fac47028339852dd60c201c6 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -117,6 +117,7 @@
 #define FH_RSCSR_FRAME_INVALID		0x55550000
 #define FH_RSCSR_FRAME_ALIGN		0x40
 #define FH_RSCSR_RPA_EN			BIT(25)
+#define FH_RSCSR_RADA_EN		BIT(26)
 #define FH_RSCSR_RXQ_POS		16
 #define FH_RSCSR_RXQ_MASK		0x3F0000
 
@@ -128,7 +129,8 @@ struct iwl_rx_packet {
 	 * 31:    flag flush RB request
 	 * 30:    flag ignore TC (terminal counter) request
 	 * 29:    flag fast IRQ request
-	 * 28-26: Reserved
+	 * 28-27: Reserved
+	 * 26:    RADA enabled
 	 * 25:    Offload enabled
 	 * 24:    RPF enabled
 	 * 23:    RSS enabled
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index a2bf530eeae49e38430d9f05ab33d865ed9fd560..2f22e14e00fe881bc9868a22c25ba41286a9ea51 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -787,7 +787,7 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm,
 					 u32 action)
 {
 	struct iwl_mac_ctx_cmd cmd = {};
-	u32 tfd_queue_msk = 0;
+	u32 tfd_queue_msk = BIT(mvm->snif_queue);
 	int ret;
 
 	WARN_ON(vif->type != NL80211_IFTYPE_MONITOR);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 4575595ab022600ff7d33da2d789c54f84a0e951..55ab5349dd40d8b886373df6cb0715e977650309 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -972,6 +972,7 @@ struct iwl_mvm {
 
 	/* Tx queues */
 	u16 aux_queue;
+	u16 snif_queue;
 	u16 probe_queue;
 	u16 p2p_dev_queue;
 
@@ -1060,6 +1061,7 @@ struct iwl_mvm {
  * @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running
  * @IWL_MVM_STATUS_D3_RECONFIG: D3 reconfiguration is being done
  * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running
+ * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA
  */
 enum iwl_mvm_status {
 	IWL_MVM_STATUS_HW_RFKILL,
@@ -1071,6 +1073,7 @@ enum iwl_mvm_status {
 	IWL_MVM_STATUS_ROC_AUX_RUNNING,
 	IWL_MVM_STATUS_D3_RECONFIG,
 	IWL_MVM_STATUS_FIRMWARE_RUNNING,
+	IWL_MVM_STATUS_NEED_FLUSH_P2P,
 };
 
 /* Keep track of completed init configuration */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 7078b7e458be84d59e691e88bf791bce9ca9154d..45470b6b351a9a31734b3a77f04fcbd99a8526eb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -624,6 +624,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	mvm->fw_restart = iwlwifi_mod_params.fw_restart ? -1 : 0;
 
 	mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
+	mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE;
 	mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
 	mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 76dc58381e1c9e443847d2f890b19eceb6b0140e..3b8d44361380de8d2a0a9635a915fe8151388ecf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -213,6 +213,7 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm,
 					struct ieee80211_rx_status *rx_status)
 {
 	int energy_a, energy_b, max_energy;
+	u32 rate_flags = le32_to_cpu(desc->rate_n_flags);
 
 	energy_a = desc->energy_a;
 	energy_a = energy_a ? -energy_a : S8_MIN;
@@ -224,7 +225,8 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm,
 			energy_a, energy_b, max_energy);
 
 	rx_status->signal = max_energy;
-	rx_status->chains = 0; /* TODO: phy info */
+	rx_status->chains =
+		(rate_flags & RATE_MCS_ANT_AB_MSK) >> RATE_MCS_ANT_POS;
 	rx_status->chain_signal[0] = energy_a;
 	rx_status->chain_signal[1] = energy_b;
 	rx_status->chain_signal[2] = S8_MIN;
@@ -232,8 +234,8 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm,
 
 static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr,
 			     struct ieee80211_rx_status *stats,
-			     struct iwl_rx_mpdu_desc *desc, int queue,
-			     u8 *crypt_len)
+			     struct iwl_rx_mpdu_desc *desc, u32 pkt_flags,
+			     int queue, u8 *crypt_len)
 {
 	u16 status = le16_to_cpu(desc->status);
 
@@ -253,6 +255,8 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr,
 			return -1;
 
 		stats->flag |= RX_FLAG_DECRYPTED;
+		if (pkt_flags & FH_RSCSR_RADA_EN)
+			stats->flag |= RX_FLAG_MIC_STRIPPED;
 		*crypt_len = IEEE80211_CCMP_HDR_LEN;
 		return 0;
 	case IWL_RX_MPDU_STATUS_SEC_TKIP:
@@ -270,6 +274,10 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr,
 		if ((status & IWL_RX_MPDU_STATUS_SEC_MASK) ==
 				IWL_RX_MPDU_STATUS_SEC_WEP)
 			*crypt_len = IEEE80211_WEP_IV_LEN;
+
+		if (pkt_flags & FH_RSCSR_RADA_EN)
+			stats->flag |= RX_FLAG_ICV_STRIPPED;
+
 		return 0;
 	case IWL_RX_MPDU_STATUS_SEC_EXT_ENC:
 		if (!(status & IWL_RX_MPDU_STATUS_MIC_OK))
@@ -848,7 +856,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 
 	rx_status = IEEE80211_SKB_RXCB(skb);
 
-	if (iwl_mvm_rx_crypto(mvm, hdr, rx_status, desc, queue, &crypt_len)) {
+	if (iwl_mvm_rx_crypto(mvm, hdr, rx_status, desc,
+			      le32_to_cpu(pkt->len_n_flags), queue,
+			      &crypt_len)) {
 		kfree_skb(skb);
 		return;
 	}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index c19f98489d4e9565e0ee33c0dfd22a757a435db4..1add5615fc3ad9d0a9801920448e79f71e64eda5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1709,29 +1709,29 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta)
 	sta->sta_id = IWL_MVM_INVALID_STA;
 }
 
-static void iwl_mvm_enable_aux_queue(struct iwl_mvm *mvm)
+static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue,
+					  u8 sta_id, u8 fifo)
 {
 	unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
 					mvm->cfg->base_params->wd_timeout :
 					IWL_WATCHDOG_DISABLED;
 
 	if (iwl_mvm_has_new_tx_api(mvm)) {
-		int queue = iwl_mvm_tvqm_enable_txq(mvm, mvm->aux_queue,
-						    mvm->aux_sta.sta_id,
-						    IWL_MAX_TID_COUNT,
-						    wdg_timeout);
-		mvm->aux_queue = queue;
+		int tvqm_queue =
+			iwl_mvm_tvqm_enable_txq(mvm, *queue, sta_id,
+						IWL_MAX_TID_COUNT,
+						wdg_timeout);
+		*queue = tvqm_queue;
 	} else {
 		struct iwl_trans_txq_scd_cfg cfg = {
-			.fifo = IWL_MVM_TX_FIFO_MCAST,
-			.sta_id = mvm->aux_sta.sta_id,
+			.fifo = fifo,
+			.sta_id = sta_id,
 			.tid = IWL_MAX_TID_COUNT,
 			.aggregate = false,
 			.frame_limit = IWL_FRAME_LIMIT,
 		};
 
-		iwl_mvm_enable_txq(mvm, mvm->aux_queue, mvm->aux_queue, 0, &cfg,
-				   wdg_timeout);
+		iwl_mvm_enable_txq(mvm, *queue, *queue, 0, &cfg, wdg_timeout);
 	}
 }
 
@@ -1750,7 +1750,9 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
 
 	/* Map Aux queue to fifo - needs to happen before adding Aux station */
 	if (!iwl_mvm_has_new_tx_api(mvm))
-		iwl_mvm_enable_aux_queue(mvm);
+		iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue,
+					      mvm->aux_sta.sta_id,
+					      IWL_MVM_TX_FIFO_MCAST);
 
 	ret = iwl_mvm_add_int_sta_common(mvm, &mvm->aux_sta, NULL,
 					 MAC_INDEX_AUX, 0);
@@ -1764,7 +1766,9 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
 	 * to firmware so enable queue here - after the station was added
 	 */
 	if (iwl_mvm_has_new_tx_api(mvm))
-		iwl_mvm_enable_aux_queue(mvm);
+		iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue,
+					      mvm->aux_sta.sta_id,
+					      IWL_MVM_TX_FIFO_MCAST);
 
 	return 0;
 }
@@ -1772,10 +1776,31 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
 int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	int ret;
 
 	lockdep_assert_held(&mvm->mutex);
-	return iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr,
+
+	/* Map snif queue to fifo - must happen before adding snif station */
+	if (!iwl_mvm_has_new_tx_api(mvm))
+		iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue,
+					      mvm->snif_sta.sta_id,
+					      IWL_MVM_TX_FIFO_BE);
+
+	ret = iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr,
 					 mvmvif->id, 0);
+	if (ret)
+		return ret;
+
+	/*
+	 * For 22000 firmware and on we cannot add queue to a station unknown
+	 * to firmware so enable queue here - after the station was added
+	 */
+	if (iwl_mvm_has_new_tx_api(mvm))
+		iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue,
+					      mvm->snif_sta.sta_id,
+					      IWL_MVM_TX_FIFO_BE);
+
+	return 0;
 }
 
 int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
@@ -1784,6 +1809,8 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 	lockdep_assert_held(&mvm->mutex);
 
+	iwl_mvm_disable_txq(mvm, mvm->snif_queue, mvm->snif_queue,
+			    IWL_MAX_TID_COUNT, 0);
 	ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id);
 	if (ret)
 		IWL_WARN(mvm, "Failed sending remove station\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 4d0314912e94794b7728af17707618cd72ad2908..e25cda9fbf6c34d951b7441b40574bfb9c0a67b7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -132,6 +132,24 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk)
 	 * executed, and a new time event means a new command.
 	 */
 	iwl_mvm_flush_sta(mvm, &mvm->aux_sta, true, CMD_ASYNC);
+
+	/* Do the same for the P2P device queue (STA) */
+	if (test_and_clear_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status)) {
+		struct iwl_mvm_vif *mvmvif;
+
+		/*
+		 * NB: access to this pointer would be racy, but the flush bit
+		 * can only be set when we had a P2P-Device VIF, and we have a
+		 * flush of this work in iwl_mvm_prepare_mac_removal() so it's
+		 * not really racy.
+		 */
+
+		if (!WARN_ON(!mvm->p2p_device_vif)) {
+			mvmvif = iwl_mvm_vif_from_mac80211(mvm->p2p_device_vif);
+			iwl_mvm_flush_sta(mvm, &mvmvif->bcast_sta, true,
+					  CMD_ASYNC);
+		}
+	}
 }
 
 static void iwl_mvm_roc_finished(struct iwl_mvm *mvm)
@@ -855,10 +873,12 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm)
 
 	mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
 
-	if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE)
+	if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
 		iwl_mvm_remove_time_event(mvm, mvmvif, te_data);
-	else
+		set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+	} else {
 		iwl_mvm_remove_aux_roc_te(mvm, mvmvif, te_data);
+	}
 
 	iwl_mvm_roc_finished(mvm);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 593b7f97b29c103f8faf28dd905dd36aed34e763..333bcb75b8afcd17eecd76c1a581c683edef4457 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -657,7 +657,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 			if (ap_sta_id != IWL_MVM_INVALID_STA)
 				sta_id = ap_sta_id;
 		} else if (info.control.vif->type == NL80211_IFTYPE_MONITOR) {
-			queue = mvm->aux_queue;
+			queue = mvm->snif_queue;
+			sta_id = mvm->snif_sta.sta_id;
 		}
 	}
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index d46115e2d69e1d6de1fcc3126e3f1af8a59da99c..03ffd84786ca4f1b832fe78177ac0ef090d4dddd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1134,9 +1134,18 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm,
 	unsigned int default_timeout =
 		cmd_q ? IWL_DEF_WD_TIMEOUT : mvm->cfg->base_params->wd_timeout;
 
-	if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS))
+	if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS)) {
+		/*
+		 * We can't know when the station is asleep or awake, so we
+		 * must disable the queue hang detection.
+		 */
+		if (fw_has_capa(&mvm->fw->ucode_capa,
+				IWL_UCODE_TLV_CAPA_STA_PM_NOTIF) &&
+		    vif && vif->type == NL80211_IFTYPE_AP)
+			return IWL_WATCHDOG_DISABLED;
 		return iwlmvm_mod_params.tfd_q_hang_detect ?
 			default_timeout : IWL_WATCHDOG_DISABLED;
+	}
 
 	trigger = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS);
 	txq_timer = (void *)trigger->data;
@@ -1163,6 +1172,8 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm,
 		return le32_to_cpu(txq_timer->p2p_go);
 	case NL80211_IFTYPE_P2P_DEVICE:
 		return le32_to_cpu(txq_timer->p2p_device);
+	case NL80211_IFTYPE_MONITOR:
+		return default_timeout;
 	default:
 		WARN_ON(1);
 		return mvm->cfg->base_params->wd_timeout;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index f21fe59faccff835efe0db590c437f419a89ccb4..ccd7c33c4c2823e3374934a7e25da8cac82996a1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -553,6 +553,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_cfg)},
@@ -664,6 +665,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2720, 0x0310, iwla000_2ac_cfg_hr_cdb)},
 	{IWL_PCI_DEVICE(0x40C0, 0x0000, iwla000_2ax_cfg_hr)},
 	{IWL_PCI_DEVICE(0x40C0, 0x0A10, iwla000_2ax_cfg_hr)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0000, iwla000_2ax_cfg_hr)},
 
 #endif /* CONFIG_IWLMVM */
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index d749abeca3ae99866c33ffce095d09cfce8c8ff0..403e65c309d0ac8e22fa84f163c636b18bc5872c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -670,11 +670,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
 	return index & (q->n_window - 1);
 }
 
-static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
+static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
 				     struct iwl_txq *txq, int idx)
 {
-	return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
-									 idx);
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	if (trans->cfg->use_tfh)
+		idx = iwl_pcie_get_cmd_index(txq, idx);
+
+	return txq->tfds + trans_pcie->tfd_size * idx;
 }
 
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index c59f4581e97271cc35d07bed98876d300384fa9c..ac05fd1e74c4c80308caf5b630ffd29a6968f888 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -49,6 +49,7 @@
  *
  *****************************************************************************/
 #include "iwl-trans.h"
+#include "iwl-prph.h"
 #include "iwl-context-info.h"
 #include "internal.h"
 
@@ -156,6 +157,11 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans, bool low_power)
 
 	trans_pcie->is_down = true;
 
+	/* Stop dbgc before stopping device */
+	iwl_write_prph(trans, DBGC_IN_SAMPLE, 0);
+	udelay(100);
+	iwl_write_prph(trans, DBGC_OUT_CTRL, 0);
+
 	/* tell the device to stop sending interrupts */
 	iwl_disable_interrupts(trans);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index b7a51603465b20752616639cd3f663a6844dcdd8..4541c86881d604e16093eef51cb7ba48afb3685b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -166,6 +166,7 @@ static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans)
 		print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32,
 			       4, buf, i, 0);
 	}
+	goto out;
 
 err_read:
 	print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
@@ -1226,6 +1227,15 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 
 	trans_pcie->is_down = true;
 
+	/* Stop dbgc before stopping device */
+	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
+		iwl_set_bits_prph(trans, MON_BUFF_SAMPLE_CTL, 0x100);
+	} else {
+		iwl_write_prph(trans, DBGC_IN_SAMPLE, 0);
+		udelay(100);
+		iwl_write_prph(trans, DBGC_OUT_CTRL, 0);
+	}
+
 	/* tell the device to stop sending interrupts */
 	iwl_disable_interrupts(trans);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 16b345f54ff0003db66f70f547338558bc7c4703..6d0a907d5ba58f8666d06e65744d1c179edf3013 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
 
 static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
 	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
 	 * idx is bounded by n_window
 	 */
@@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 	lockdep_assert_held(&txq->lock);
 
 	iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
-				iwl_pcie_get_tfd(trans_pcie, txq, idx));
+				iwl_pcie_get_tfd(trans, txq, idx));
 
 	/* free SKB */
 	if (txq->entries) {
@@ -364,11 +362,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
 					    struct sk_buff *skb,
 					    struct iwl_cmd_meta *out_meta)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
-	struct iwl_tfh_tfd *tfd =
-		iwl_pcie_get_tfd(trans_pcie, txq, idx);
+	struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
 	dma_addr_t tb_phys;
 	bool amsdu;
 	int i, len, tb1_len, tb2_len, hdr_len;
@@ -565,8 +561,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 	u8 group_id = iwl_cmd_groupid(cmd->id);
 	const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
 	u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
-	struct iwl_tfh_tfd *tfd =
-		iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+	struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
 
 	memset(tfd, 0, sizeof(*tfd));
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index fed6d842a5e1dc444fe58ad203cb8fb5e3ab1c6f..3f85713c41dcc9291130f25873ef97bd702335f1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int i, num_tbs;
-	void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
+	void *tfd = iwl_pcie_get_tfd(trans, txq, index);
 
 	/* Sanity check on number of chunks */
 	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
@@ -2018,7 +2018,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
 	}
 
 	trace_iwlwifi_dev_tx(trans->dev, skb,
-			     iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+			     iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
 			     trans_pcie->tfd_size,
 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     hdr_len);
@@ -2092,7 +2092,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 		IEEE80211_CCMP_HDR_LEN : 0;
 
 	trace_iwlwifi_dev_tx(trans->dev, skb,
-			     iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+			     iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
 			     trans_pcie->tfd_size,
 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
 
@@ -2425,7 +2425,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
 	       IWL_FIRST_TB_SIZE);
 
-	tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+	tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
 	/* Set up entry for this TFD in Tx byte-count array */
 	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
 					 iwl_pcie_tfd_get_num_tbs(trans, tfd));
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 10b075a46b266218c53d1e5674c1789e1e0f3d80..e8189c07b41f6b450f135ef703ec4e01568e311d 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
 	hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
 	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
 					 IEEE80211_STYPE_NULLFUNC |
+					 IEEE80211_FCTL_TODS |
 					 (ps ? IEEE80211_FCTL_PM : 0));
 	hdr->duration_id = cpu_to_le16(0);
 	memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
 			continue;
 
-		skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 		if (!skb) {
 			res = -ENOMEM;
 			goto out_err;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index d6dff347f8962e04208306c0ffea90956470cddd..78ebe494fef02b8d31505262f8c551e27aee7dc5 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -186,7 +186,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Obtain the queue to be used to transmit this packet */
 	index = skb_get_queue_mapping(skb);
 	if (index >= num_queues) {
-		pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
+		pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n",
 				    index, vif->dev->name);
 		index %= num_queues;
 	}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c5a34671abdaf78a1257b869af817ecd148a6e0c..9bd7ddeeb6a5c7b29569e51ad2f469d9fe493a76 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 
 	netif_carrier_off(netdev);
 
+	xenbus_switch_state(dev, XenbusStateInitialising);
 	return netdev;
 
  exit:
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index e949e3302af4743472ac26c68e9f4d54a27bb11a..c586bcdb5190b1c9f6447cb9a380568a03840988 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
 	return ret;
 }
 
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
-			struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+			struct log_group *log)
 {
 	return arena_read_bytes(arena,
-			arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-			2 * LOG_ENT_SIZE, 0);
+			arena->logoff + (lane * LOG_GRP_SIZE), log,
+			LOG_GRP_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
 	debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
 	debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
 	debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+	debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+	debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
 }
 
 static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
 	}
 }
 
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+	return le32_to_cpu(log->ent[log_idx].seq);
+}
+
 /*
  * This function accepts two log entries, and uses the
  * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
  *
  * TODO The logic feels a bit kludge-y. make it better..
  */
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
 {
+	int idx0 = a->log_index[0];
+	int idx1 = a->log_index[1];
 	int old;
 
 	/*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
 	 * the next time, the following logic works out to put this
 	 * (next) entry into [1]
 	 */
-	if (ent[0].seq == 0) {
-		ent[0].seq = cpu_to_le32(1);
+	if (log_seq(log, idx0) == 0) {
+		log->ent[idx0].seq = cpu_to_le32(1);
 		return 0;
 	}
 
-	if (ent[0].seq == ent[1].seq)
+	if (log_seq(log, idx0) == log_seq(log, idx1))
 		return -EINVAL;
-	if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+	if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
 		return -EINVAL;
 
-	if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
-		if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+	if (log_seq(log, idx0) < log_seq(log, idx1)) {
+		if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
 			old = 0;
 		else
 			old = 1;
 	} else {
-		if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+		if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
 			old = 1;
 		else
 			old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
 {
 	int ret;
 	int old_ent, ret_ent;
-	struct log_entry log[2];
+	struct log_group log;
 
-	ret = btt_log_read_pair(arena, lane, log);
+	ret = btt_log_group_read(arena, lane, &log);
 	if (ret)
 		return -EIO;
 
-	old_ent = btt_log_get_old(log);
+	old_ent = btt_log_get_old(arena, &log);
 	if (old_ent < 0 || old_ent > 1) {
 		dev_err(to_dev(arena),
 				"log corruption (%d): lane %d seq [%d, %d]\n",
-			old_ent, lane, log[0].seq, log[1].seq);
+				old_ent, lane, log.ent[arena->log_index[0]].seq,
+				log.ent[arena->log_index[1]].seq);
 		/* TODO set error state? */
 		return -EIO;
 	}
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
 	ret_ent = (old_flag ? old_ent : (1 - old_ent));
 
 	if (ent != NULL)
-		memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+		memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
 
 	return ret_ent;
 }
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
 			u32 sub, struct log_entry *ent, unsigned long flags)
 {
 	int ret;
-	/*
-	 * Ignore the padding in log_entry for calculating log_half.
-	 * The entry is 'committed' when we write the sequence number,
-	 * and we want to ensure that that is the last thing written.
-	 * We don't bother writing the padding as that would be extra
-	 * media wear and write amplification
-	 */
-	unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
-	u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+	u32 group_slot = arena->log_index[sub];
+	unsigned int log_half = LOG_ENT_SIZE / 2;
 	void *src = ent;
+	u64 ns_off;
 
+	ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+		(group_slot * LOG_ENT_SIZE);
 	/* split the 16B write into atomic, durable halves */
 	ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
 	if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
 {
 	size_t logsize = arena->info2off - arena->logoff;
 	size_t chunk_size = SZ_4K, offset = 0;
-	struct log_entry log;
+	struct log_entry ent;
 	void *zerobuf;
 	int ret;
 	u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
 	}
 
 	for (i = 0; i < arena->nfree; i++) {
-		log.lba = cpu_to_le32(i);
-		log.old_map = cpu_to_le32(arena->external_nlba + i);
-		log.new_map = cpu_to_le32(arena->external_nlba + i);
-		log.seq = cpu_to_le32(LOG_SEQ_INIT);
-		ret = __btt_log_write(arena, i, 0, &log, 0);
+		ent.lba = cpu_to_le32(i);
+		ent.old_map = cpu_to_le32(arena->external_nlba + i);
+		ent.new_map = cpu_to_le32(arena->external_nlba + i);
+		ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+		ret = __btt_log_write(arena, i, 0, &ent, 0);
 		if (ret)
 			goto free;
 	}
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
 	return 0;
 }
 
+static bool ent_is_padding(struct log_entry *ent)
+{
+	return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+		&& (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+	bool idx_set = false, initial_state = true;
+	int ret, log_index[2] = {-1, -1};
+	u32 i, j, next_idx = 0;
+	struct log_group log;
+	u32 pad_count = 0;
+
+	for (i = 0; i < arena->nfree; i++) {
+		ret = btt_log_group_read(arena, i, &log);
+		if (ret < 0)
+			return ret;
+
+		for (j = 0; j < 4; j++) {
+			if (!idx_set) {
+				if (ent_is_padding(&log.ent[j])) {
+					pad_count++;
+					continue;
+				} else {
+					/* Skip if index has been recorded */
+					if ((next_idx == 1) &&
+						(j == log_index[0]))
+						continue;
+					/* valid entry, record index */
+					log_index[next_idx] = j;
+					next_idx++;
+				}
+				if (next_idx == 2) {
+					/* two valid entries found */
+					idx_set = true;
+				} else if (next_idx > 2) {
+					/* too many valid indices */
+					return -ENXIO;
+				}
+			} else {
+				/*
+				 * once the indices have been set, just verify
+				 * that all subsequent log groups are either in
+				 * their initial state or follow the same
+				 * indices.
+				 */
+				if (j == log_index[0]) {
+					/* entry must be 'valid' */
+					if (ent_is_padding(&log.ent[j]))
+						return -ENXIO;
+				} else if (j == log_index[1]) {
+					;
+					/*
+					 * log_index[1] can be padding if the
+					 * lane never got used and it is still
+					 * in the initial state (three 'padding'
+					 * entries)
+					 */
+				} else {
+					/* entry must be invalid (padding) */
+					if (!ent_is_padding(&log.ent[j]))
+						return -ENXIO;
+				}
+			}
+		}
+		/*
+		 * If any of the log_groups have more than one valid,
+		 * non-padding entry, then the we are no longer in the
+		 * initial_state
+		 */
+		if (pad_count < 3)
+			initial_state = false;
+		pad_count = 0;
+	}
+
+	if (!initial_state && !idx_set)
+		return -ENXIO;
+
+	/*
+	 * If all the entries in the log were in the initial state,
+	 * assume new padding scheme
+	 */
+	if (initial_state)
+		log_index[1] = 1;
+
+	/*
+	 * Only allow the known permutations of log/padding indices,
+	 * i.e. (0, 1), and (0, 2)
+	 */
+	if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+		; /* known index possibilities */
+	else {
+		dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+		return -ENXIO;
+	}
+
+	arena->log_index[0] = log_index[0];
+	arena->log_index[1] = log_index[1];
+	dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+	dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+	return 0;
+}
+
 static int btt_rtt_init(struct arena_info *arena)
 {
 	arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
 	available -= 2 * BTT_PG_SIZE;
 
 	/* The log takes a fixed amount of space based on nfree */
-	logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
-				BTT_PG_SIZE);
+	logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
 	available -= logsize;
 
 	/* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
 	arena->mapoff = arena->dataoff + datasize;
 	arena->logoff = arena->mapoff + mapsize;
 	arena->info2off = arena->logoff + logsize;
+
+	/* Default log indices are (0,1) */
+	arena->log_index[0] = 0;
+	arena->log_index[1] = 1;
 	return arena;
 }
 
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
 		arena->external_lba_start = cur_nlba;
 		parse_arena_meta(arena, super, cur_off);
 
+		ret = log_set_indices(arena);
+		if (ret) {
+			dev_err(to_dev(arena),
+				"Unable to deduce log/padding indices\n");
+			goto out;
+		}
+
 		mutex_init(&arena->err_lock);
 		ret = btt_freelist_init(arena);
 		if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 578c2057524d396fbf7c2eee88804b58c1f17cfe..db3cb6d4d0d495df8978494ff6619e2923478d32 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
 #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
 #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
 #define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
 #define LOG_ENT_SIZE sizeof(struct log_entry)
 #define ARENA_MIN_SIZE (1UL << 24)	/* 16 MB */
 #define ARENA_MAX_SIZE (1ULL << 39)	/* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
 	INIT_READY
 };
 
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * |       pad       |       pad       |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
 struct log_entry {
 	__le32 lba;
 	__le32 old_map;
 	__le32 new_map;
 	__le32 seq;
-	__le64 padding[2];
+};
+
+struct log_group {
+	struct log_entry ent[4];
 };
 
 struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
  * @list:		List head for list of arenas
  * @debugfs_dir:	Debugfs dentry
  * @flags:		Arena flags - may signify error states.
+ * @err_lock:		Mutex for synchronizing error clearing.
+ * @log_index:		Indices of the valid log entries in a log_group
  *
  * arena_info is a per-arena handle. Once an arena is narrowed down for an
  * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
 	/* Arena flags */
 	u32 flags;
 	struct mutex err_lock;
+	int log_index[2];
 };
 
 /**
@@ -176,6 +220,7 @@ struct arena_info {
  * @init_lock:		Mutex used for the BTT initialization
  * @init_state:		Flag describing the initialization state for the BTT
  * @num_arenas:		Number of arenas in the BTT instance
+ * @phys_bb:		Pointer to the namespace's badblocks structure
  */
 struct btt {
 	struct gendisk *btt_disk;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 65cc171c721de8774baf05f9650f3bbacf511eec..2adada1a58551776186d6f6928a437d462734a48 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
-	unsigned long align;
 	enum nd_pfn_mode mode;
 	struct nd_namespace_io *nsio;
+	unsigned long align, start_pad;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
 	const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 
 	align = le32_to_cpu(pfn_sb->align);
 	offset = le64_to_cpu(pfn_sb->dataoff);
+	start_pad = le32_to_cpu(pfn_sb->start_pad);
 	if (align == 0)
 		align = 1UL << ilog2(offset);
 	mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EBUSY;
 	}
 
-	if ((align && !IS_ALIGNED(offset, align))
+	if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
 			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
 		dev_err(&nd_pfn->dev,
 				"bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	return altmap;
 }
 
+static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
+{
+	return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
+			ALIGN_DOWN(phys, nd_pfn->align));
+}
+
 static int nd_pfn_init(struct nd_pfn *nd_pfn)
 {
 	u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 	start = nsio->res.start;
 	size = PHYS_SECTION_ALIGN_UP(start + size) - start;
 	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-				IORES_DESC_NONE) == REGION_MIXED) {
+				IORES_DESC_NONE) == REGION_MIXED
+			|| !IS_ALIGNED(start + resource_size(&nsio->res),
+				nd_pfn->align)) {
 		size = resource_size(&nsio->res);
-		end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+		end_trunc = start + size - phys_pmem_align_down(nd_pfn,
+				start + size);
 	}
 
 	if (start_pad + end_trunc)
-		dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+		dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
 				dev_name(&ndns->dev), start_pad + end_trunc);
 
 	/*
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd499c8e33a1514f55344a1796005a1..839650e0926af1aaaf21bafbdc1baa79cf907d76 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	queue->limits.discard_alignment = size;
+	queue->limits.discard_alignment = 0;
 	queue->limits.discard_granularity = size;
 
 	blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1335,6 +1335,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
 		struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+	unsigned short bs = 1 << ns->lba_shift;
 	unsigned stream_alignment = 0;
 
 	if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
@@ -1343,7 +1344,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_mq_freeze_queue(disk->queue);
 	blk_integrity_unregister(disk);
 
-	blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift);
+	blk_queue_logical_block_size(disk->queue, bs);
+	blk_queue_physical_block_size(disk->queue, bs);
+	blk_queue_io_min(disk->queue, bs);
+
 	if (ns->ms && !ns->ext &&
 	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
 		nvme_init_integrity(disk, ns->ms, ns->pi_type);
@@ -1705,7 +1709,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
 		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
 	}
-	if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+	    is_power_of_2(ctrl->max_hw_sectors))
 		blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
 	blk_queue_virt_boundary(q, ctrl->page_size - 1);
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2874,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	nvme_set_queue_limits(ctrl, ns->queue);
-	nvme_setup_streams_ns(ctrl, ns);
 
 	id = nvme_identify_ns(ctrl, nsid);
 	if (!id)
@@ -2880,6 +2884,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	if (nvme_init_ns_head(ns, nsid, id, &new))
 		goto out_free_id;
+	nvme_setup_streams_ns(ctrl, ns);
 	
 #ifdef CONFIG_NVME_MULTIPATH
 	/*
@@ -2965,8 +2970,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		return;
 
 	if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-		if (blk_get_integrity(ns->disk))
-			blk_integrity_unregister(ns->disk);
 		nvme_mpath_remove_disk_links(ns);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_id_attr_group);
@@ -2974,6 +2977,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
 		blk_cleanup_queue(ns->queue);
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
 	}
 
 	mutex_lock(&ns->ctrl->subsys->lock);
@@ -2986,6 +2991,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	mutex_unlock(&ns->ctrl->namespaces_mutex);
 
 	synchronize_srcu(&ns->head->srcu);
+	nvme_mpath_check_last_path(ns);
 	nvme_put_ns(ns);
 }
 
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 76b4fe6816a03533dbb6bbfbf30016110a26c648..894c2ccb3891e0b83e1c839f0b08f4cba5d179ae 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -74,6 +74,7 @@ static struct nvmf_host *nvmf_host_default(void)
 		return NULL;
 
 	kref_init(&host->ref);
+	uuid_gen(&host->id);
 	snprintf(host->nqn, NVMF_NQN_SIZE,
 		"nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
 
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0a8af4daef8903f8ba983d345f1044498c57a975..794e66e4aa20115f4dc3a6b5fc12f706b2040bf4 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 		/* initiate nvme ctrl ref counting teardown */
 		nvme_uninit_ctrl(&ctrl->ctrl);
-		nvme_put_ctrl(&ctrl->ctrl);
 
 		/* Remove core ctrl ref. */
 		nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ea1aa5283e8ed9215537594a33a243d47b363e25..a00eabd0642738bbdbaf0b11d7f8e1747c996e3c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -417,6 +417,15 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 		rcu_assign_pointer(head->current_path, NULL);
 }
 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+	struct nvme_ns_head *head = ns->head;
+
+	if (head->disk && list_empty(&head->list))
+		kblockd_schedule_work(&head->requeue_work);
+}
+
 #else
 static inline void nvme_failover_req(struct request *req)
 {
@@ -448,6 +457,9 @@ static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 }
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 #ifdef CONFIG_NVM
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f5800c3c9082a6f038c129327bccd22f5eb861fe..d53550e612bc1350febf84c891e2a28c24a6ae34 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -448,12 +448,31 @@ static void **nvme_pci_iod_list(struct request *req)
 	return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
+static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	unsigned int avg_seg_size;
+
+	avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
+			blk_rq_nr_phys_segments(req));
+
+	if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
+		return false;
+	if (!iod->nvmeq->qid)
+		return false;
+	if (!sgl_threshold || avg_seg_size < sgl_threshold)
+		return false;
+	return true;
+}
+
 static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
 	unsigned int size = blk_rq_payload_bytes(rq);
 
+	iod->use_sgl = nvme_pci_use_sgls(dev, rq);
+
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
 				iod->use_sgl);
@@ -604,8 +623,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
 	dma_addr_t prp_dma;
 	int nprps, i;
 
-	iod->use_sgl = false;
-
 	length -= (page_size - offset);
 	if (length <= 0) {
 		iod->first_dma = 0;
@@ -715,8 +732,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
 	int entries = iod->nents, i = 0;
 	dma_addr_t sgl_dma;
 
-	iod->use_sgl = true;
-
 	/* setting the transfer type as SGL */
 	cmd->flags = NVME_CMD_SGL_METABUF;
 
@@ -770,23 +785,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
 	return BLK_STS_OK;
 }
 
-static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
-{
-	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	unsigned int avg_seg_size;
-
-	avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
-			blk_rq_nr_phys_segments(req));
-
-	if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
-		return false;
-	if (!iod->nvmeq->qid)
-		return false;
-	if (!sgl_threshold || avg_seg_size < sgl_threshold)
-		return false;
-	return true;
-}
-
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
@@ -806,7 +804,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 				DMA_ATTR_NO_WARN))
 		goto out;
 
-	if (nvme_pci_use_sgls(dev, req))
+	if (iod->use_sgl)
 		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
 	else
 		ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 37af56596be6ce8a0339ce2a3151f8dd98f8f854..2a0bba7f50cf43bb76e9d1f3073e24ba1edd9e1c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -974,12 +974,18 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_start_queues(&ctrl->ctrl);
 
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+		/* state change failure should never happen */
+		WARN_ON_ONCE(1);
+		return;
+	}
+
 	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
 {
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
 		return;
 
 	queue_work(nvme_wq, &ctrl->err_work);
@@ -1753,6 +1759,12 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl, false);
 
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+		/* state change failure should never happen */
+		WARN_ON_ONCE(1);
+		return;
+	}
+
 	ret = nvme_rdma_configure_admin_queue(ctrl, false);
 	if (ret)
 		goto out_fail;
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 7b75d9de55ab0d33939bf314a81ee01e26a6d1b1..6a018a0bd6ce851306dd82e5c21e680c626f99d5 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -1085,7 +1085,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t count)
 {
 	struct fcloop_nport *nport = NULL, *tmpport;
-	struct fcloop_tport *tport;
+	struct fcloop_tport *tport = NULL;
 	u64 nodename, portname;
 	unsigned long flags;
 	int ret;
diff --git a/drivers/nvmem/meson-mx-efuse.c b/drivers/nvmem/meson-mx-efuse.c
index a346b4923550829eca8c7a8176c4ad39dbd54f7c..41d3a3c1104ec7f810c7f9c33a2cab414c475299 100644
--- a/drivers/nvmem/meson-mx-efuse.c
+++ b/drivers/nvmem/meson-mx-efuse.c
@@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset,
 				 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE,
 				 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE);
 
-	for (i = offset; i < offset + bytes; i += efuse->config.word_size) {
-		addr = i / efuse->config.word_size;
+	for (i = 0; i < bytes; i += efuse->config.word_size) {
+		addr = (offset + i) / efuse->config.word_size;
 
 		err = meson_mx_efuse_read_addr(efuse, addr, &tmp);
 		if (err)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index c454941b34ec8aa7d09e83e2f72328110728349a..ab988d88704da0d2d583a9280dba31d671a5fa14 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -695,7 +695,7 @@ int __of_changeset_apply_entries(struct of_changeset *ocs, int *ret_revert)
 /*
  * Returns 0 on success, a negative error value in case of an error.
  *
- * If multiple changset entry notification errors occur then only the
+ * If multiple changeset entry notification errors occur then only the
  * final notification error is reported.
  */
 int __of_changeset_apply_notify(struct of_changeset *ocs)
@@ -795,7 +795,7 @@ int __of_changeset_revert_entries(struct of_changeset *ocs, int *ret_apply)
 }
 
 /*
- * If multiple changset entry notification errors occur then only the
+ * If multiple changeset entry notification errors occur then only the
  * final notification error is reported.
  */
 int __of_changeset_revert_notify(struct of_changeset *ocs)
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 98258583abb0b40529056767c91401296e0013d4..a327be1d264b8cea389feeb91423bb24a6a0d688 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -81,6 +81,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
 	 * can be looked up later */
 	of_node_get(child);
 	phy->mdio.dev.of_node = child;
+	phy->mdio.dev.fwnode = of_fwnode_handle(child);
 
 	/* All data is now stored in the phy struct;
 	 * register it */
@@ -111,6 +112,7 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
 	 */
 	of_node_get(child);
 	mdiodev->dev.of_node = child;
+	mdiodev->dev.fwnode = of_fwnode_handle(child);
 
 	/* All data is now stored in the mdiodev struct; register it. */
 	rc = mdio_device_register(mdiodev);
@@ -206,6 +208,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 	mdio->phy_mask = ~0;
 
 	mdio->dev.of_node = np;
+	mdio->dev.fwnode = of_fwnode_handle(np);
 
 	/* Get bus level PHY reset GPIO details */
 	mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY;
@@ -228,7 +231,12 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 			rc = of_mdiobus_register_phy(mdio, child, addr);
 		else
 			rc = of_mdiobus_register_device(mdio, child, addr);
-		if (rc)
+
+		if (rc == -ENODEV)
+			dev_err(&mdio->dev,
+				"MDIO device at address %d is missing.\n",
+				addr);
+		else if (rc)
 			goto unregister;
 	}
 
@@ -252,7 +260,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 
 			if (of_mdiobus_child_is_phy(child)) {
 				rc = of_mdiobus_register_phy(mdio, child, addr);
-				if (rc)
+				if (rc && rc != -ENODEV)
 					goto unregister;
 			}
 		}
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index c150abb9049d776d48c4ef5d38b821dcea09cef7..3981b7da4fa90e6d9d0b62c8d170ceb9589b7e3c 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -522,7 +522,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
 	struct device_node *node, *overlay_node;
 	struct fragment *fragment;
 	struct fragment *fragments;
-	int cnt, ret;
+	int cnt, id, ret;
 
 	/*
 	 * Warn for some issues.  Can not return -EINVAL for these until
@@ -543,9 +543,9 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
 
 	of_changeset_init(&ovcs->cset);
 
-	ovcs->id = idr_alloc(&ovcs_idr, ovcs, 1, 0, GFP_KERNEL);
-	if (ovcs->id <= 0)
-		return ovcs->id;
+	id = idr_alloc(&ovcs_idr, ovcs, 1, 0, GFP_KERNEL);
+	if (id <= 0)
+		return id;
 
 	cnt = 0;
 
@@ -572,18 +572,20 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
 
 	cnt = 0;
 	for_each_child_of_node(tree, node) {
+		overlay_node = of_get_child_by_name(node, "__overlay__");
+		if (!overlay_node)
+			continue;
+
 		fragment = &fragments[cnt];
-		fragment->overlay = of_get_child_by_name(node, "__overlay__");
-		if (fragment->overlay) {
-			fragment->target = find_target_node(node);
-			if (!fragment->target) {
-				of_node_put(fragment->overlay);
-				ret = -EINVAL;
-				goto err_free_fragments;
-			} else {
-				cnt++;
-			}
+		fragment->overlay = overlay_node;
+		fragment->target = find_target_node(node);
+		if (!fragment->target) {
+			of_node_put(fragment->overlay);
+			ret = -EINVAL;
+			goto err_free_fragments;
 		}
+
+		cnt++;
 	}
 
 	/*
@@ -611,6 +613,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
 		goto err_free_fragments;
 	}
 
+	ovcs->id = id;
 	ovcs->count = cnt;
 	ovcs->fragments = fragments;
 
@@ -619,7 +622,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
 err_free_fragments:
 	kfree(fragments);
 err_free_idr:
-	idr_remove(&ovcs_idr, ovcs->id);
+	idr_remove(&ovcs_idr, id);
 
 	pr_err("%s() failed, ret = %d\n", __func__, ret);
 
@@ -630,9 +633,8 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
 {
 	int i;
 
-	if (!ovcs->cset.entries.next)
-		return;
-	of_changeset_destroy(&ovcs->cset);
+	if (ovcs->cset.entries.next)
+		of_changeset_destroy(&ovcs->cset);
 
 	if (ovcs->id)
 		idr_remove(&ovcs_idr, ovcs->id);
@@ -660,14 +662,14 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
  * A non-zero return value will not have created the changeset if error is from:
  *   - parameter checks
  *   - building the changeset
- *   - overlay changset pre-apply notifier
+ *   - overlay changeset pre-apply notifier
  *
  * If an error is returned by an overlay changeset pre-apply notifier
  * then no further overlay changeset pre-apply notifier will be called.
  *
  * A non-zero return value will have created the changeset if error is from:
  *   - overlay changeset entry notifier
- *   - overlay changset post-apply notifier
+ *   - overlay changeset post-apply notifier
  *
  * If an error is returned by an overlay changeset post-apply notifier
  * then no further overlay changeset post-apply notifier will be called.
@@ -706,12 +708,11 @@ int of_overlay_apply(struct device_node *tree, int *ovcs_id)
 	}
 
 	of_overlay_mutex_lock();
+	mutex_lock(&of_mutex);
 
 	ret = of_resolve_phandles(tree);
 	if (ret)
-		goto err_overlay_unlock;
-
-	mutex_lock(&of_mutex);
+		goto err_free_overlay_changeset;
 
 	ret = init_overlay_changeset(ovcs, tree);
 	if (ret)
@@ -736,14 +737,13 @@ int of_overlay_apply(struct device_node *tree, int *ovcs_id)
 			devicetree_state_flags |= DTSF_APPLY_FAIL;
 		}
 		goto err_free_overlay_changeset;
-	} else {
-		ret = __of_changeset_apply_notify(&ovcs->cset);
-		if (ret)
-			pr_err("overlay changeset entry notify error %d\n",
-			       ret);
-		/* fall through */
 	}
 
+	ret = __of_changeset_apply_notify(&ovcs->cset);
+	if (ret)
+		pr_err("overlay changeset entry notify error %d\n", ret);
+	/* notify failure is not fatal, continue */
+
 	list_add_tail(&ovcs->ovcs_list, &ovcs_list);
 	*ovcs_id = ovcs->id;
 
@@ -755,18 +755,14 @@ int of_overlay_apply(struct device_node *tree, int *ovcs_id)
 			ret = ret_tmp;
 	}
 
-	mutex_unlock(&of_mutex);
-	of_overlay_mutex_unlock();
-
-	goto out;
-
-err_overlay_unlock:
-	of_overlay_mutex_unlock();
+	goto out_unlock;
 
 err_free_overlay_changeset:
 	free_overlay_changeset(ovcs);
 
+out_unlock:
 	mutex_unlock(&of_mutex);
+	of_overlay_mutex_unlock();
 
 out:
 	pr_debug("%s() err=%d\n", __func__, ret);
@@ -871,7 +867,7 @@ static int overlay_removal_is_ok(struct overlay_changeset *remove_ovcs)
  *
  * A non-zero return value will not revert the changeset if error is from:
  *   - parameter checks
- *   - overlay changset pre-remove notifier
+ *   - overlay changeset pre-remove notifier
  *   - overlay changeset entry revert
  *
  * If an error is returned by an overlay changeset pre-remove notifier
@@ -882,7 +878,7 @@ static int overlay_removal_is_ok(struct overlay_changeset *remove_ovcs)
  *
  * A non-zero return value will revert the changeset if error is from:
  *   - overlay changeset entry notifier
- *   - overlay changset post-remove notifier
+ *   - overlay changeset post-remove notifier
  *
  * If an error is returned by an overlay changeset post-remove notifier
  * then no further overlay changeset post-remove notifier will be called.
@@ -931,15 +927,13 @@ int of_overlay_remove(int *ovcs_id)
 		if (ret_apply)
 			devicetree_state_flags |= DTSF_REVERT_FAIL;
 		goto out_unlock;
-	} else {
-		ret = __of_changeset_revert_notify(&ovcs->cset);
-		if (ret) {
-			pr_err("overlay changeset entry notify error %d\n",
-			       ret);
-			/* fall through - changeset was reverted */
-		}
 	}
 
+	ret = __of_changeset_revert_notify(&ovcs->cset);
+	if (ret)
+		pr_err("overlay changeset entry notify error %d\n", ret);
+	/* notify failure is not fatal, continue */
+
 	*ovcs_id = 0;
 
 	ret_tmp = overlay_notify(ovcs, OF_OVERLAY_POST_REMOVE);
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index e568b1e82501b832f4f92cb30810749d914855da..0f8052f1355c0c09ac99a43335ab67927cdac52b 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2165,7 +2165,6 @@ static int __init overlay_data_add(int onum)
 	ret = of_overlay_apply(info->np_overlay, &info->overlay_id);
 	if (ret < 0) {
 		pr_err("of_overlay_apply() (ret=%d), %d\n", ret, onum);
-		of_overlay_mutex_unlock();
 		goto out_free_np_overlay;
 	}
 
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index 0b3fb99d9b8992531412fae57d809984cdf853ab..7390fb8ca9d156c485f85aa1c0cc475988765b23 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -303,7 +303,7 @@ static void dino_mask_irq(struct irq_data *d)
 	struct dino_device *dino_dev = irq_data_get_irq_chip_data(d);
 	int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 
-	DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq);
+	DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
 
 	/* Clear the matching bit in the IMR register */
 	dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq));
@@ -316,7 +316,7 @@ static void dino_unmask_irq(struct irq_data *d)
 	int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 	u32 tmp;
 
-	DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq);
+	DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
 
 	/*
 	** clear pending IRQ bits
@@ -396,7 +396,7 @@ static irqreturn_t dino_isr(int irq, void *intr_dev)
 	if (mask) {
 		if (--ilr_loop > 0)
 			goto ilr_again;
-		printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n", 
+		printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n",
 		       dino_dev->hba.base_addr, mask);
 		return IRQ_NONE;
 	}
@@ -553,7 +553,7 @@ dino_fixup_bus(struct pci_bus *bus)
         struct pci_dev *dev;
         struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge));
 
-	DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n",
+	DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n",
 	    __func__, bus, bus->busn_res.start,
 	    bus->bridge->platform_data);
 
@@ -854,7 +854,7 @@ static int __init dino_common_init(struct parisc_device *dev,
 	res->flags = IORESOURCE_IO; /* do not mark it busy ! */
 	if (request_resource(&ioport_resource, res) < 0) {
 		printk(KERN_ERR "%s: request I/O Port region failed "
-		       "0x%lx/%lx (hpa 0x%p)\n",
+		       "0x%lx/%lx (hpa 0x%px)\n",
 		       name, (unsigned long)res->start, (unsigned long)res->end,
 		       dino_dev->hba.base_addr);
 		return 1;
diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c
index 4dd9b1308128a4eb34aecd2824ce245f6a069fd1..99a80da6fd2e2c15f246076114106295c79a4d74 100644
--- a/drivers/parisc/eisa_eeprom.c
+++ b/drivers/parisc/eisa_eeprom.c
@@ -106,7 +106,7 @@ static int __init eisa_eeprom_init(void)
 		return retval;
 	}
 
-	printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr);
+	printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr);
 	return 0;
 }
 
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index a25fed52f7e94de4bd3dd5cb8b0922e1df8e81bf..41b740aed3a346e4bbc610959281649447f83bd4 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
 	iounmap(base_addr);
 }
 
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+	if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+	    dev->subsystem_device != 0x1292)
+		return;
+
+	dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+	dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+	quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+	if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+	    dev->subsystem_device != 0x1291)
+		return;
+
+	dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+	dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+	quirk_diva_aux_disable);
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 04dac6a42c9f287519379757a0cefd3c08e14e11..6b8d060d07de7d8ba2fc66dd3a0b0087470f4b82 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1(
 	int_pkt->wslot.slot = slot;
 	int_pkt->int_desc.vector = vector;
 	int_pkt->int_desc.vector_count = 1;
-	int_pkt->int_desc.delivery_mode =
-		(apic->irq_delivery_mode == dest_LowestPrio) ?
-			dest_LowestPrio : dest_Fixed;
+	int_pkt->int_desc.delivery_mode = dest_Fixed;
 
 	/*
 	 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
@@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2(
 	int_pkt->wslot.slot = slot;
 	int_pkt->int_desc.vector = vector;
 	int_pkt->int_desc.vector_count = 1;
-	int_pkt->int_desc.delivery_mode =
-		(apic->irq_delivery_mode == dest_LowestPrio) ?
-			dest_LowestPrio : dest_Fixed;
+	int_pkt->int_desc.delivery_mode = dest_Fixed;
 
 	/*
 	 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 12796eccb2befd9170b1ba8119dc600ef356c7e8..52ab3cb0a0bfe065d8209201cbf03a7ef15cdfa4 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -1128,12 +1128,12 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 	err = rcar_pcie_get_resources(pcie);
 	if (err < 0) {
 		dev_err(dev, "failed to request resources: %d\n", err);
-		goto err_free_bridge;
+		goto err_free_resource_list;
 	}
 
 	err = rcar_pcie_parse_map_dma_ranges(pcie, dev->of_node);
 	if (err)
-		goto err_free_bridge;
+		goto err_free_resource_list;
 
 	pm_runtime_enable(dev);
 	err = pm_runtime_get_sync(dev);
@@ -1176,9 +1176,9 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 err_pm_disable:
 	pm_runtime_disable(dev);
 
-err_free_bridge:
-	pci_free_host_bridge(bridge);
+err_free_resource_list:
 	pci_free_resource_list(&pcie->resources);
+	pci_free_host_bridge(bridge);
 
 	return err;
 }
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7f47bb72bf301cd62cdcaa48e2e560e2df06e3a3..14fd865a512096393149fd63a3707305648f276f 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -999,7 +999,7 @@ static int pci_pm_thaw_noirq(struct device *dev)
 	 * the subsequent "thaw" callbacks for the device.
 	 */
 	if (dev_pm_smart_suspend_and_suspended(dev)) {
-		dev->power.direct_complete = true;
+		dev_pm_skip_next_resume_phases(dev);
 		return 0;
 	}
 
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_resume_early(dev);
 
-	pci_update_current_state(pci_dev, PCI_D0);
+	/*
+	 * pci_restore_state() requires the device to be in D0 (because of MSI
+	 * restoration among other things), so force it into D0 in case the
+	 * driver's "freeze" callbacks put it into a low-power state directly.
+	 */
+	pci_set_power_state(pci_dev, PCI_D0);
 	pci_restore_state(pci_dev);
 
 	if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c
index accaaaccb662fd24b1b3cf5f8e22154f5f46f9eb..6601ad0dfb3ad23c77865138b3db7bd0c4f55ae8 100644
--- a/drivers/phy/motorola/phy-cpcap-usb.c
+++ b/drivers/phy/motorola/phy-cpcap-usb.c
@@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev,
 	int irq, error;
 
 	irq = platform_get_irq_byname(pdev, name);
-	if (!irq)
+	if (irq < 0)
 		return -ENODEV;
 
 	error = devm_request_threaded_irq(ddata->dev, irq, NULL,
diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig
index cb09245e9b4c730528a7019b44f89c5abd20c7d9..c845facacb063e9a0bd2b244869d3c22c3409fbf 100644
--- a/drivers/phy/renesas/Kconfig
+++ b/drivers/phy/renesas/Kconfig
@@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2
 	tristate "Renesas R-Car generation 3 USB 2.0 PHY driver"
 	depends on ARCH_RENESAS
 	depends on EXTCON
+	depends on USB_SUPPORT
 	select GENERIC_PHY
+	select USB_COMMON
 	help
 	  Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs.
 
diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c
index ee85fa0ca4b05bac340121cd798291708091e59e..7492c8978217f45e04f591898bfb0ddbaac2a217 100644
--- a/drivers/phy/rockchip/phy-rockchip-typec.c
+++ b/drivers/phy/rockchip/phy-rockchip-typec.c
@@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
 		if (IS_ERR(phy)) {
 			dev_err(dev, "failed to create phy: %s\n",
 				child_np->name);
+			pm_runtime_disable(dev);
 			return PTR_ERR(phy);
 		}
 
@@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 	if (IS_ERR(phy_provider)) {
 		dev_err(dev, "Failed to register phy provider\n");
+		pm_runtime_disable(dev);
 		return PTR_ERR(phy_provider);
 	}
 
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 4307bf0013e186cd859b779aaaf4c61ddd662a6c..63e916d4d0696156b244fb2c7e0e5f6de606c8e3 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match);
 static struct device_node *
 tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
 {
-	/*
-	 * of_find_node_by_name() drops a reference, so make sure to grab one.
-	 */
-	struct device_node *np = of_node_get(padctl->dev->of_node);
+	struct device_node *pads, *np;
+
+	pads = of_get_child_by_name(padctl->dev->of_node, "pads");
+	if (!pads)
+		return NULL;
 
-	np = of_find_node_by_name(np, "pads");
-	if (np)
-		np = of_find_node_by_name(np, name);
+	np = of_get_child_by_name(pads, name);
+	of_node_put(pads);
 
 	return np;
 }
@@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
 static struct device_node *
 tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index)
 {
-	/*
-	 * of_find_node_by_name() drops a reference, so make sure to grab one.
-	 */
-	struct device_node *np = of_node_get(pad->dev.of_node);
+	struct device_node *np, *lanes;
 
-	np = of_find_node_by_name(np, "lanes");
-	if (!np)
+	lanes = of_get_child_by_name(pad->dev.of_node, "lanes");
+	if (!lanes)
 		return NULL;
 
-	return of_find_node_by_name(np, pad->soc->lanes[index].name);
+	np = of_get_child_by_name(lanes, pad->soc->lanes[index].name);
+	of_node_put(lanes);
+
+	return np;
 }
 
 static int
@@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad,
 	unsigned int i;
 	int err;
 
-	children = of_find_node_by_name(pad->dev.of_node, "lanes");
+	children = of_get_child_by_name(pad->dev.of_node, "lanes");
 	if (!children)
 		return -ENODEV;
 
@@ -444,21 +444,21 @@ static struct device_node *
 tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
 			  unsigned int index)
 {
-	/*
-	 * of_find_node_by_name() drops a reference, so make sure to grab one.
-	 */
-	struct device_node *np = of_node_get(padctl->dev->of_node);
+	struct device_node *ports, *np;
+	char *name;
 
-	np = of_find_node_by_name(np, "ports");
-	if (np) {
-		char *name;
+	ports = of_get_child_by_name(padctl->dev->of_node, "ports");
+	if (!ports)
+		return NULL;
 
-		name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
-		if (!name)
-			return ERR_PTR(-ENOMEM);
-		np = of_find_node_by_name(np, name);
-		kfree(name);
+	name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
+	if (!name) {
+		of_node_put(ports);
+		return ERR_PTR(-ENOMEM);
 	}
+	np = of_get_child_by_name(ports, name);
+	kfree(name);
+	of_node_put(ports);
 
 	return np;
 }
@@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
 
 static int tegra_xusb_padctl_probe(struct platform_device *pdev)
 {
-	struct device_node *np = of_node_get(pdev->dev.of_node);
+	struct device_node *np = pdev->dev.of_node;
 	const struct tegra_xusb_padctl_soc *soc;
 	struct tegra_xusb_padctl *padctl;
 	const struct of_device_id *match;
@@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
 	int err;
 
 	/* for backwards compatibility with old device trees */
-	np = of_find_node_by_name(np, "pads");
+	np = of_get_child_by_name(np, "pads");
 	if (!np) {
 		dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n");
 		return tegra_xusb_padctl_legacy_probe(pdev);
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index bdedb6325c72a5fa0966377d53678722d18f07eb..4471fd94e1fe1f48b953360ad76e638e88f1a7ff 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 			clear_bit(i, chip->irq.valid_mask);
 	}
 
+	/*
+	 * The same set of machines in chv_no_valid_mask[] have incorrectly
+	 * configured GPIOs that generate spurious interrupts so we use
+	 * this same list to apply another quirk for them.
+	 *
+	 * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
+	 */
+	if (!need_valid_mask) {
+		/*
+		 * Mask all interrupts the community is able to generate
+		 * but leave the ones that can only generate GPEs unmasked.
+		 */
+		chv_writel(GENMASK(31, pctrl->community->nirqs),
+			   pctrl->regs + CHV_INTMASK);
+	}
+
 	/* Clear all interrupts */
 	chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
 
diff --git a/drivers/pinctrl/intel/pinctrl-denverton.c b/drivers/pinctrl/intel/pinctrl-denverton.c
index 4500880240f25745f410538fb1a53d42a5210cce..6572550cfe784104179f1fb946ee41e2a312b2ca 100644
--- a/drivers/pinctrl/intel/pinctrl-denverton.c
+++ b/drivers/pinctrl/intel/pinctrl-denverton.c
@@ -207,7 +207,7 @@ static const unsigned int dnv_uart0_pins[] = { 60, 61, 64, 65 };
 static const unsigned int dnv_uart0_modes[] = { 2, 3, 1, 1 };
 static const unsigned int dnv_uart1_pins[] = { 94, 95, 96, 97 };
 static const unsigned int dnv_uart2_pins[] = { 60, 61, 62, 63 };
-static const unsigned int dnv_uart2_modes[] = { 1, 1, 2, 2 };
+static const unsigned int dnv_uart2_modes[] = { 1, 2, 2, 2 };
 static const unsigned int dnv_emmc_pins[] = {
 	142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
 };
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index d45af31b86b41ef6c85591a040fe2dd46c60de3e..bdb8d174efefb7ee897c60bab250a126a53af729 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -408,12 +408,21 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip,
 {
 	struct armada_37xx_pinctrl *info = gpiochip_get_data(chip);
 	unsigned int reg = OUTPUT_EN;
-	unsigned int mask;
+	unsigned int mask, val, ret;
 
 	armada_37xx_update_reg(&reg, offset);
 	mask = BIT(offset);
 
-	return regmap_update_bits(info->regmap, reg, mask, mask);
+	ret = regmap_update_bits(info->regmap, reg, mask, mask);
+
+	if (ret)
+		return ret;
+
+	reg = OUTPUT_VAL;
+	val = value ? mask : 0;
+	regmap_update_bits(info->regmap, reg, mask, val);
+
+	return 0;
 }
 
 static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset)
diff --git a/drivers/pinctrl/pinctrl-gemini.c b/drivers/pinctrl/pinctrl-gemini.c
index e9b83e291edf43e02e757625ffb4f8d251f6227e..c11b8f14d841e91268eb404b22bb017b83e05e8a 100644
--- a/drivers/pinctrl/pinctrl-gemini.c
+++ b/drivers/pinctrl/pinctrl-gemini.c
@@ -2322,7 +2322,7 @@ static const struct gemini_pin_conf *gemini_get_pin_conf(struct gemini_pmx *pmx,
 	int i;
 
 	for (i = 0; i < pmx->nconfs; i++) {
-		retconf = &gemini_confs_3516[i];
+		retconf = &pmx->confs[i];
 		if (retconf->pin == pin)
 			return retconf;
 	}
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index e6cd8de793e2ae66c791389ff686b42ad8084d84..3501491e5bfc8a5547c3ac820e01ad338dbb097b 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = {
  */
 static struct lock_class_key pcs_lock_class;
 
+/* Class for the IRQ request mutex */
+static struct lock_class_key pcs_request_class;
+
 /*
  * REVISIT: Reads and writes could eventually use regmap or something
  * generic. But at least on omaps, some mux registers are performance
@@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	irq_set_chip_data(irq, pcs_soc);
 	irq_set_chip_and_handler(irq, &pcs->chip,
 				 handle_level_irq);
-	irq_set_lockdep_class(irq, &pcs_lock_class);
+	irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class);
 	irq_set_noprobe(irq);
 
 	return 0;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index a276c61be217b41b1ce35d33875d8a532033eff0..e62ab087bfd8afd788236d11137405d4abfca3f5 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d,
 }
 
 static int stm32_gpio_domain_activate(struct irq_domain *d,
-				      struct irq_data *irq_data, bool early)
+				      struct irq_data *irq_data, bool reserve)
 {
 	struct stm32_gpio_bank *bank = d->host_data;
 	struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c
index 4f2a726bbaeb234c1caf7454db0058da903305b9..f5f77432ce6f830677d09887a16b34e0af01b2a4 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-a64.c
@@ -428,7 +428,7 @@ static const struct sunxi_desc_pin a64_pins[] = {
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
 		  SUNXI_FUNCTION(0x2, "mmc0"),		/* D3 */
-		  SUNXI_FUNCTION(0x4, "uart0")),	/* RX */
+		  SUNXI_FUNCTION(0x3, "uart0")),	/* RX */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 5),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c
index 97b48336f84a3fdd6a30e98a19e954d625d3cfee..a78d7b922ef47529ce0708201ccd7b60a876959f 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h5.c
@@ -535,14 +535,16 @@ static const struct sunxi_pinctrl_desc sun50i_h5_pinctrl_data_broken = {
 	.pins = sun50i_h5_pins,
 	.npins = ARRAY_SIZE(sun50i_h5_pins),
 	.irq_banks = 2,
-	.irq_read_needs_mux = true
+	.irq_read_needs_mux = true,
+	.disable_strict_mode = true,
 };
 
 static const struct sunxi_pinctrl_desc sun50i_h5_pinctrl_data = {
 	.pins = sun50i_h5_pins,
 	.npins = ARRAY_SIZE(sun50i_h5_pins),
 	.irq_banks = 3,
-	.irq_read_needs_mux = true
+	.irq_read_needs_mux = true,
+	.disable_strict_mode = true,
 };
 
 static int sun50i_h5_pinctrl_probe(struct platform_device *pdev)
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c b/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c
index 472ef0d91b9929c4628084a020374b68d3de3c86..5553c0eb0f41c420e422686bdacb6ea3c1c2c14a 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun9i-a80.c
@@ -145,19 +145,19 @@ static const struct sunxi_desc_pin sun9i_a80_pins[] = {
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
 		  SUNXI_FUNCTION(0x3, "mcsi"),		/* MCLK */
-		  SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)),	/* PB_EINT14 */
+		  SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 14)),	/* PB_EINT14 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 15),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
 		  SUNXI_FUNCTION(0x3, "mcsi"),		/* SCK */
 		  SUNXI_FUNCTION(0x4, "i2c4"),		/* SCK */
-		  SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 15)),	/* PB_EINT15 */
+		  SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 15)),	/* PB_EINT15 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 16),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
 		  SUNXI_FUNCTION(0x3, "mcsi"),		/* SDA */
 		  SUNXI_FUNCTION(0x4, "i2c4"),		/* SDA */
-		  SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 16)),	/* PB_EINT16 */
+		  SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 16)),	/* PB_EINT16 */
 
 	/* Hole */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 0),
diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
index f3796164329efb9ea00aed180a8c1116b813c979..d4aeac3477f55086b69aafa0098ad2f3d617d508 100644
--- a/drivers/platform/x86/asus-wireless.c
+++ b/drivers/platform/x86/asus-wireless.c
@@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
 		return;
 	}
 	input_report_key(data->idev, KEY_RFKILL, 1);
+	input_sync(data->idev);
 	input_report_key(data->idev, KEY_RFKILL, 0);
 	input_sync(data->idev);
 }
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index bf897b1832b188c24a92f33fdf74b33b25aeac43..cd4725e7e0b56d8981bc770ca80b562c874370f8 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -37,6 +37,7 @@
 
 struct quirk_entry {
 	u8 touchpad_led;
+	u8 kbd_led_levels_off_1;
 
 	int needs_kbd_timeouts;
 	/*
@@ -67,6 +68,10 @@ static struct quirk_entry quirk_dell_xps13_9333 = {
 	.kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
 };
 
+static struct quirk_entry quirk_dell_latitude_e6410 = {
+	.kbd_led_levels_off_1 = 1,
+};
+
 static struct platform_driver platform_driver = {
 	.driver = {
 		.name = "dell-laptop",
@@ -269,6 +274,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
 		},
 		.driver_data = &quirk_dell_xps13_9333,
 	},
+	{
+		.callback = dmi_matched,
+		.ident = "Dell Latitude E6410",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6410"),
+		},
+		.driver_data = &quirk_dell_latitude_e6410,
+	},
 	{ }
 };
 
@@ -1149,6 +1163,9 @@ static int kbd_get_info(struct kbd_info *info)
 	units = (buffer->output[2] >> 8) & 0xFF;
 	info->levels = (buffer->output[2] >> 16) & 0xFF;
 
+	if (quirks && quirks->kbd_led_levels_off_1 && info->levels)
+		info->levels--;
+
 	if (units & BIT(0))
 		info->seconds = (buffer->output[3] >> 0) & 0xFF;
 	if (units & BIT(1))
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 39d2f451848332d8346201b9cc1cd08bb5e10427..fb25b20df316f39140a3af0f974e9f1fa30aabff 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -639,6 +639,8 @@ static int dell_wmi_events_set_enabled(bool enable)
 	int ret;
 
 	buffer = kzalloc(sizeof(struct calling_interface_buffer), GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
 	buffer->cmd_class = CLASS_INFO;
 	buffer->cmd_select = SELECT_APP_REGISTRATION;
 	buffer->input[0] = 0x10000;
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 791449a2370f4f10af698dbaf27bb3749d6d263a..daa68acbc9003b34615fc43b810c5fb77dd87ecf 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -1458,5 +1458,5 @@ static void __exit acpi_wmi_exit(void)
 	class_unregister(&wmi_bus_class);
 }
 
-subsys_initcall(acpi_wmi_init);
+subsys_initcall_sync(acpi_wmi_init);
 module_exit(acpi_wmi_exit);
diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile
index e5225ad9c5b12fdd9d723704d66eb9ec2ff5b734..2fdab400c1fe0f918f2d8f6517f36d32b9ff90e0 100644
--- a/drivers/s390/Makefile
+++ b/drivers/s390/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the S/390 specific device drivers
 #
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 31f014b57bfc6600a86aefbdc3d512a6b3fa1b9b..bc27d716aa6b2d404863a22a4242c2ccbd9fc6e6 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 comment "S/390 block device drivers"
 	depends on S390 && BLOCK
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 66e008f7adb6c890a43199168dd118a814489797..d4e8dff673ccf321c6fefddcd9298eb51e189b3a 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -759,7 +759,7 @@ static void dasd_profile_end_add_data(struct dasd_profile_info *data,
 	/* in case of an overflow, reset the whole profile */
 	if (data->dasd_io_reqs == UINT_MAX) {
 			memset(data, 0, sizeof(*data));
-			getnstimeofday(&data->starttod);
+			ktime_get_real_ts64(&data->starttod);
 	}
 	data->dasd_io_reqs++;
 	data->dasd_io_sects += sectors;
@@ -894,7 +894,7 @@ void dasd_profile_reset(struct dasd_profile *profile)
 		return;
 	}
 	memset(data, 0, sizeof(*data));
-	getnstimeofday(&data->starttod);
+	ktime_get_real_ts64(&data->starttod);
 	spin_unlock_bh(&profile->lock);
 }
 
@@ -911,7 +911,7 @@ int dasd_profile_on(struct dasd_profile *profile)
 		kfree(data);
 		return 0;
 	}
-	getnstimeofday(&data->starttod);
+	ktime_get_real_ts64(&data->starttod);
 	profile->data = data;
 	spin_unlock_bh(&profile->lock);
 	return 0;
@@ -995,8 +995,8 @@ static void dasd_stats_array(struct seq_file *m, unsigned int *array)
 static void dasd_stats_seq_print(struct seq_file *m,
 				 struct dasd_profile_info *data)
 {
-	seq_printf(m, "start_time %ld.%09ld\n",
-		   data->starttod.tv_sec, data->starttod.tv_nsec);
+	seq_printf(m, "start_time %lld.%09ld\n",
+		   (s64)data->starttod.tv_sec, data->starttod.tv_nsec);
 	seq_printf(m, "total_requests %u\n", data->dasd_io_reqs);
 	seq_printf(m, "total_sectors %u\n", data->dasd_io_sects);
 	seq_printf(m, "total_pav %u\n", data->dasd_io_alias);
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index c94b606e0df8831e27a2c66a38ec19f43ad9a664..ee14d8e45c971863c6aef252ecbadba64d365826 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -2803,6 +2803,16 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
 		erp = dasd_3990_erp_handle_match_erp(cqr, erp);
 	}
 
+
+	/*
+	 * For path verification work we need to stick with the path that was
+	 * originally chosen so that the per path configuration data is
+	 * assigned correctly.
+	 */
+	if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) {
+		erp->lpm = cqr->lpm;
+	}
+
 	if (device->features & DASD_FEATURE_ERPLOG) {
 		/* print current erp_chain */
 		dev_err(&device->cdev->dev,
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 1a41ef49633875a08ba2021ffa729a39881ec102..a2edf2a7ace968f7e72beeed372a8a723e1c2d85 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -531,10 +531,12 @@ static int prefix_LRE(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata,
 	pfxdata->validity.define_extent = 1;
 
 	/* private uid is kept up to date, conf_data may be outdated */
-	if (startpriv->uid.type != UA_BASE_DEVICE) {
+	if (startpriv->uid.type == UA_BASE_PAV_ALIAS)
 		pfxdata->validity.verify_base = 1;
-		if (startpriv->uid.type == UA_HYPER_PAV_ALIAS)
-			pfxdata->validity.hyper_pav = 1;
+
+	if (startpriv->uid.type == UA_HYPER_PAV_ALIAS) {
+		pfxdata->validity.verify_base = 1;
+		pfxdata->validity.hyper_pav = 1;
 	}
 
 	rc = define_extent(NULL, dedata, trk, totrk, cmd, basedev, blksize);
@@ -3415,10 +3417,12 @@ static int prepare_itcw(struct itcw *itcw,
 	pfxdata.validity.define_extent = 1;
 
 	/* private uid is kept up to date, conf_data may be outdated */
-	if (startpriv->uid.type != UA_BASE_DEVICE) {
+	if (startpriv->uid.type == UA_BASE_PAV_ALIAS)
+		pfxdata.validity.verify_base = 1;
+
+	if (startpriv->uid.type == UA_HYPER_PAV_ALIAS) {
 		pfxdata.validity.verify_base = 1;
-		if (startpriv->uid.type == UA_HYPER_PAV_ALIAS)
-			pfxdata.validity.hyper_pav = 1;
+		pfxdata.validity.hyper_pav = 1;
 	}
 
 	switch (cmd) {
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index b095a23bcc0c133c44f51b366aca53f790891461..96709b1a7bf8d8af0f4e0db7748cd5ac8e5a8650 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -441,7 +441,7 @@ struct dasd_profile_info {
 	unsigned int dasd_io_nr_req[32]; /* hist. of # of requests in chanq */
 
 	/* new data */
-	struct timespec starttod;	   /* time of start or last reset */
+	struct timespec64 starttod;	   /* time of start or last reset */
 	unsigned int dasd_io_alias;	   /* requests using an alias */
 	unsigned int dasd_io_tpm;	   /* requests using transport mode */
 	unsigned int dasd_read_reqs;	   /* total number of read  requests */
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 97c4c9fdd53da316934f14bf261bc1885bd98bb6..ab0b243a947d6f1a8725721ee3f12ae0da538dfd 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 comment "S/390 character device drivers"
 	depends on S390
 
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 05ac6ba15a53285f41e01ab415b776366ddb5e54..614b44e70a2818cbec5bfc14b61c09f16d5fa145 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -17,6 +17,8 @@ CFLAGS_REMOVE_sclp_early_core.o	+= $(CC_FLAGS_MARCH)
 CFLAGS_sclp_early_core.o		+= -march=z900
 endif
 
+CFLAGS_sclp_early_core.o		+= -D__NO_FORTIFY
+
 obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
 	 sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \
 	 sclp_early.o sclp_early_core.o
diff --git a/drivers/s390/char/defkeymap.map b/drivers/s390/char/defkeymap.map
index 353b3f2688241d1077a29f2aef365113375cd810..f4c095612a025b016f86b44da52b66ba82907754 100644
--- a/drivers/s390/char/defkeymap.map
+++ b/drivers/s390/char/defkeymap.map
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 # Default keymap for 3270 (ebcdic codepage 037).
 keymaps 0-1,4-5
 
diff --git a/drivers/s390/cio/blacklist.h b/drivers/s390/cio/blacklist.h
index 95e25c1df9221a53bd2e77c594c968d63b1c295a..140e3e4ee2fd0ced47055eb0de2200dfd4e64069 100644
--- a/drivers/s390/cio/blacklist.h
+++ b/drivers/s390/cio/blacklist.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef S390_BLACKLIST_H
 #define S390_BLACKLIST_H
 
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 59b4a3370cd5d454cadc9e145bcb3a2dc9a321df..95b0efe28afb57650e8b15224f8997d04667d555 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -431,8 +431,8 @@ static void process_buffer_error(struct qdio_q *q, int count)
 	q->qdio_error = QDIO_ERROR_SLSB_STATE;
 
 	/* special handling for no target buffer empty */
-	if ((!q->is_input_q &&
-	    (q->sbal[q->first_to_check]->element[15].sflags) == 0x10)) {
+	if (queue_type(q) == QDIO_IQDIO_QFMT && !q->is_input_q &&
+	    q->sbal[q->first_to_check]->element[15].sflags == 0x10) {
 		qperf_inc(q, target_full);
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x",
 			      q->first_to_check);
@@ -536,7 +536,8 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	case SLSB_P_INPUT_ERROR:
 		process_buffer_error(q, count);
 		q->first_to_check = add_buf(q->first_to_check, count);
-		atomic_sub(count, &q->nr_buf_used);
+		if (atomic_sub_return(count, &q->nr_buf_used) == 0)
+			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals_error(q, count);
 		break;
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index b2837b1c70b750c229c9c42265dd232cedfa73e8..a782a207ad31c31f996833ed3d0ef3aa8ff04238 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 menu "S/390 network device drivers"
 	depends on NETDEVICES && S390
 
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 9cd569ef43ecfbaaf10680b3ac091267c1d1577f..badf42acbf95b8104d6167dfc811640b1ec311df 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -565,9 +565,9 @@ enum qeth_cq {
 };
 
 struct qeth_ipato {
-	int enabled;
-	int invert4;
-	int invert6;
+	bool enabled;
+	bool invert4;
+	bool invert6;
 	struct list_head entries;
 };
 
@@ -987,6 +987,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
 int qeth_set_features(struct net_device *, netdev_features_t);
 void qeth_recover_features(struct net_device *dev);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features);
 int qeth_vm_request_mac(struct qeth_card *card);
 int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
 
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 98a7f84540ab2c51e483b7a4d9368170640fef77..3614df68830f8f6a4abd756c52ca4e1e72e8e1d1 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -20,6 +20,11 @@
 #include <linux/mii.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include <linux/skbuff.h>
+
 #include <net/iucv/af_iucv.h>
 #include <net/dsfield.h>
 
@@ -1475,9 +1480,9 @@ static int qeth_setup_card(struct qeth_card *card)
 	qeth_set_intial_options(card);
 	/* IP address takeover */
 	INIT_LIST_HEAD(&card->ipato.entries);
-	card->ipato.enabled = 0;
-	card->ipato.invert4 = 0;
-	card->ipato.invert6 = 0;
+	card->ipato.enabled = false;
+	card->ipato.invert4 = false;
+	card->ipato.invert6 = false;
 	/* init QDIO stuff */
 	qeth_init_qdio_info(card);
 	INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
@@ -5381,6 +5386,13 @@ int qeth_poll(struct napi_struct *napi, int budget)
 }
 EXPORT_SYMBOL_GPL(qeth_poll);
 
+static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
+{
+	if (!cmd->hdr.return_code)
+		cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
+	return cmd->hdr.return_code;
+}
+
 int qeth_setassparms_cb(struct qeth_card *card,
 			struct qeth_reply *reply, unsigned long data)
 {
@@ -6237,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
 				(struct qeth_checksum_cmd *)reply->param;
 
 	QETH_CARD_TEXT(card, 4, "chkdoccb");
-	if (cmd->hdr.return_code)
+	if (qeth_setassparms_inspect_rc(cmd))
 		return 0;
 
 	memset(chksum_cb, 0, sizeof(*chksum_cb));
@@ -6439,6 +6451,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(qeth_fix_features);
 
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features)
+{
+	/* GSO segmentation builds skbs with
+	 *	a (small) linear part for the headers, and
+	 *	page frags for the data.
+	 * Compared to a linear skb, the header-only part consumes an
+	 * additional buffer element. This reduces buffer utilization, and
+	 * hurts throughput. So compress small segments into one element.
+	 */
+	if (netif_needs_gso(skb, features)) {
+		/* match skb_segment(): */
+		unsigned int doffset = skb->data - skb_mac_header(skb);
+		unsigned int hsize = skb_shinfo(skb)->gso_size;
+		unsigned int hroom = skb_headroom(skb);
+
+		/* linearize only if resulting skb allocations are order-0: */
+		if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
+			features &= ~NETIF_F_SG;
+	}
+
+	return vlan_features_check(skb, features);
+}
+EXPORT_SYMBOL_GPL(qeth_features_check);
+
 static int __init qeth_core_init(void)
 {
 	int rc;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 93d7e345d18043e18fe75c2b87bd4ff1bddfce32..5863ea170ff26447630ed22acd5174db23bf0ee2 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -961,6 +961,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_stop		= qeth_l2_stop,
 	.ndo_get_stats		= qeth_get_stats,
 	.ndo_start_xmit		= qeth_l2_hard_start_xmit,
+	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l2_set_rx_mode,
 	.ndo_do_ioctl		= qeth_do_ioctl,
@@ -1011,6 +1012,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
 		card->dev->hw_features = NETIF_F_SG;
 		card->dev->vlan_features = NETIF_F_SG;
+		card->dev->features |= NETIF_F_SG;
 		/* OSA 3S and earlier has no RX/TX support */
 		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
 			card->dev->hw_features |= NETIF_F_IP_CSUM;
@@ -1029,8 +1031,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
-	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
-				  PAGE_SIZE;
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	netif_carrier_off(card->dev);
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 194ae9b577ccaeda712fece16868115c13fc38bb..e5833837b799eceb069e708fe88394731eecdda4 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -82,7 +82,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
 int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
 void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions,
 			const u8 *);
-int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *);
+void qeth_l3_update_ipato(struct qeth_card *card);
 struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
 int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
 int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 0f8c12738b067d94184862777b3f2d7e2594a846..ef0961e186869dd6b8ab06c0ebd901524bc41ffc 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -164,8 +164,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
 	}
 }
 
-int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
-						struct qeth_ipaddr *addr)
+static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
+					     struct qeth_ipaddr *addr)
 {
 	struct qeth_ipato_entry *ipatoe;
 	u8 addr_bits[128] = {0, };
@@ -174,6 +174,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
 
 	if (!card->ipato.enabled)
 		return 0;
+	if (addr->type != QETH_IP_TYPE_NORMAL)
+		return 0;
 
 	qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits,
 				  (addr->proto == QETH_PROT_IPV4)? 4:16);
@@ -290,8 +292,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 		memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
 		addr->ref_counter = 1;
 
-		if (addr->type == QETH_IP_TYPE_NORMAL  &&
-				qeth_l3_is_addr_covered_by_ipato(card, addr)) {
+		if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
 			QETH_CARD_TEXT(card, 2, "tkovaddr");
 			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
 		}
@@ -605,6 +606,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
 /*
  * IP address takeover related functions
  */
+
+/**
+ * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs.
+ *
+ * Caller must hold ip_lock.
+ */
+void qeth_l3_update_ipato(struct qeth_card *card)
+{
+	struct qeth_ipaddr *addr;
+	unsigned int i;
+
+	hash_for_each(card->ip_htable, i, addr, hnode) {
+		if (addr->type != QETH_IP_TYPE_NORMAL)
+			continue;
+		if (qeth_l3_is_addr_covered_by_ipato(card, addr))
+			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+		else
+			addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+	}
+}
+
 static void qeth_l3_clear_ipato_list(struct qeth_card *card)
 {
 	struct qeth_ipato_entry *ipatoe, *tmp;
@@ -616,6 +638,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card)
 		kfree(ipatoe);
 	}
 
+	qeth_l3_update_ipato(card);
 	spin_unlock_bh(&card->ip_lock);
 }
 
@@ -640,8 +663,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
 		}
 	}
 
-	if (!rc)
+	if (!rc) {
 		list_add_tail(&new->entry, &card->ipato.entries);
+		qeth_l3_update_ipato(card);
+	}
 
 	spin_unlock_bh(&card->ip_lock);
 
@@ -664,6 +689,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card,
 			    (proto == QETH_PROT_IPV4)? 4:16) &&
 		    (ipatoe->mask_bits == mask_bits)) {
 			list_del(&ipatoe->entry);
+			qeth_l3_update_ipato(card);
 			kfree(ipatoe);
 		}
 	}
@@ -1377,6 +1403,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
 
 		tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
 		memcpy(tmp->mac, buf, sizeof(tmp->mac));
+		tmp->is_multicast = 1;
 
 		ipm = qeth_l3_ip_from_hash(card, tmp);
 		if (ipm) {
@@ -2918,6 +2945,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_stop		= qeth_l3_stop,
 	.ndo_get_stats		= qeth_get_stats,
 	.ndo_start_xmit		= qeth_l3_hard_start_xmit,
+	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l3_set_multicast_list,
 	.ndo_do_ioctl		= qeth_do_ioctl,
@@ -2958,6 +2986,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 				card->dev->vlan_features = NETIF_F_SG |
 					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 					NETIF_F_TSO;
+				card->dev->features |= NETIF_F_SG;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -2985,8 +3014,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
 	netif_keep_dst(card->dev);
-	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
-				  PAGE_SIZE;
+	netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+					  PAGE_SIZE);
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index bd12fdf678bec32d7f40ff38e5c66a4c1e26f402..6ea2b528a64efbabee5782da7bf8c5d1bce3ff4e 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -370,8 +370,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	struct qeth_ipaddr *addr;
-	int i, rc = 0;
+	bool enable;
+	int rc = 0;
 
 	if (!card)
 		return -EINVAL;
@@ -384,25 +384,18 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
 	}
 
 	if (sysfs_streq(buf, "toggle")) {
-		card->ipato.enabled = (card->ipato.enabled)? 0 : 1;
-	} else if (sysfs_streq(buf, "1")) {
-		card->ipato.enabled = 1;
-		hash_for_each(card->ip_htable, i, addr, hnode) {
-				if ((addr->type == QETH_IP_TYPE_NORMAL) &&
-				qeth_l3_is_addr_covered_by_ipato(card, addr))
-					addr->set_flags |=
-					QETH_IPA_SETIP_TAKEOVER_FLAG;
-			}
-	} else if (sysfs_streq(buf, "0")) {
-		card->ipato.enabled = 0;
-		hash_for_each(card->ip_htable, i, addr, hnode) {
-			if (addr->set_flags &
-			QETH_IPA_SETIP_TAKEOVER_FLAG)
-				addr->set_flags &=
-				~QETH_IPA_SETIP_TAKEOVER_FLAG;
-			}
-	} else
+		enable = !card->ipato.enabled;
+	} else if (kstrtobool(buf, &enable)) {
 		rc = -EINVAL;
+		goto out;
+	}
+
+	if (card->ipato.enabled != enable) {
+		card->ipato.enabled = enable;
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_update_ipato(card);
+		spin_unlock_bh(&card->ip_lock);
+	}
 out:
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
@@ -428,20 +421,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev,
 				const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
+	bool invert;
 	int rc = 0;
 
 	if (!card)
 		return -EINVAL;
 
 	mutex_lock(&card->conf_mutex);
-	if (sysfs_streq(buf, "toggle"))
-		card->ipato.invert4 = (card->ipato.invert4)? 0 : 1;
-	else if (sysfs_streq(buf, "1"))
-		card->ipato.invert4 = 1;
-	else if (sysfs_streq(buf, "0"))
-		card->ipato.invert4 = 0;
-	else
+	if (sysfs_streq(buf, "toggle")) {
+		invert = !card->ipato.invert4;
+	} else if (kstrtobool(buf, &invert)) {
 		rc = -EINVAL;
+		goto out;
+	}
+
+	if (card->ipato.invert4 != invert) {
+		card->ipato.invert4 = invert;
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_update_ipato(card);
+		spin_unlock_bh(&card->ip_lock);
+	}
+out:
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -607,20 +607,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
+	bool invert;
 	int rc = 0;
 
 	if (!card)
 		return -EINVAL;
 
 	mutex_lock(&card->conf_mutex);
-	if (sysfs_streq(buf, "toggle"))
-		card->ipato.invert6 = (card->ipato.invert6)? 0 : 1;
-	else if (sysfs_streq(buf, "1"))
-		card->ipato.invert6 = 1;
-	else if (sysfs_streq(buf, "0"))
-		card->ipato.invert6 = 0;
-	else
+	if (sysfs_streq(buf, "toggle")) {
+		invert = !card->ipato.invert6;
+	} else if (kstrtobool(buf, &invert)) {
 		rc = -EINVAL;
+		goto out;
+	}
+
+	if (card->ipato.invert6 != invert) {
+		card->ipato.invert6 = invert;
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_update_ipato(card);
+		spin_unlock_bh(&card->ip_lock);
+	}
+out:
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
diff --git a/drivers/s390/scsi/Makefile b/drivers/s390/scsi/Makefile
index 9259039e886dfae36e53503cc722309ab7fcb244..9dda431ec8f3ff036a94e0bad7311721cf86e922 100644
--- a/drivers/s390/scsi/Makefile
+++ b/drivers/s390/scsi/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the S/390 specific device drivers
 #
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 403a639574e5ea10c5c8500141204ebc513bd7c1..d52265416da2af0da11cca770304f33ab203ad20 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1673,6 +1673,7 @@ struct aac_dev
 	struct aac_hba_map_info	hba_map[AAC_MAX_BUSES][AAC_MAX_TARGETS];
 	u8			adapter_shutdown;
 	u32			handle_pci_error;
+	bool			init_reset;
 };
 
 #define aac_adapter_interrupt(dev) \
@@ -1724,6 +1725,7 @@ struct aac_dev
 #define FIB_CONTEXT_FLAG_NATIVE_HBA		(0x00000010)
 #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF	(0x00000020)
 #define FIB_CONTEXT_FLAG_SCSI_CMD	(0x00000040)
+#define FIB_CONTEXT_FLAG_EH_RESET	(0x00000080)
 
 /*
  *	Define the command values
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 525a652dab48e9923af67ce6e884154855df308c..80a8cb26cdea43c8252bc5afe68b2b72540ff045 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -467,35 +467,6 @@ int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * hw
 	return 0;
 }
 
-#ifdef CONFIG_EEH
-static inline int aac_check_eeh_failure(struct aac_dev *dev)
-{
-	/* Check for an EEH failure for the given
-	 * device node. Function eeh_dev_check_failure()
-	 * returns 0 if there has not been an EEH error
-	 * otherwise returns a non-zero value.
-	 *
-	 * Need to be called before any PCI operation,
-	 * i.e.,before aac_adapter_check_health()
-	 */
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev->pdev);
-
-	if (eeh_dev_check_failure(edev)) {
-		/* The EEH mechanisms will handle this
-		 * error and reset the device if
-		 * necessary.
-		 */
-		return 1;
-	}
-	return 0;
-}
-#else
-static inline int aac_check_eeh_failure(struct aac_dev *dev)
-{
-	return 0;
-}
-#endif
-
 /*
  *	Define the highest level of host to adapter communication routines.
  *	These routines will support host to adapter FS commuication. These
@@ -701,7 +672,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 					return -ETIMEDOUT;
 				}
 
-				if (aac_check_eeh_failure(dev))
+				if (unlikely(pci_channel_offline(dev->pdev)))
 					return -EFAULT;
 
 				if ((blink = aac_adapter_check_health(dev)) > 0) {
@@ -801,7 +772,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
 
 		spin_unlock_irqrestore(&fibptr->event_lock, flags);
 
-		if (aac_check_eeh_failure(dev))
+		if (unlikely(pci_channel_offline(dev->pdev)))
 			return -EFAULT;
 
 		fibptr->flags |= FIB_CONTEXT_FLAG_WAIT;
@@ -1583,6 +1554,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type)
 	 * will ensure that i/o is queisced and the card is flushed in that
 	 * case.
 	 */
+	aac_free_irq(aac);
 	aac_fib_map_free(aac);
 	dma_free_coherent(&aac->pdev->dev, aac->comm_size, aac->comm_addr,
 			  aac->comm_phys);
@@ -1590,7 +1562,6 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type)
 	aac->comm_phys = 0;
 	kfree(aac->queues);
 	aac->queues = NULL;
-	aac_free_irq(aac);
 	kfree(aac->fsa_dev);
 	aac->fsa_dev = NULL;
 
@@ -2511,8 +2482,8 @@ int aac_command_thread(void *data)
 			/* Synchronize our watches */
 			if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec)
 			 && (now.tv_nsec > (NSEC_PER_SEC / HZ)))
-				difference = (((NSEC_PER_SEC - now.tv_nsec) * HZ)
-				  + NSEC_PER_SEC / 2) / NSEC_PER_SEC;
+				difference = HZ + HZ / 2 -
+					     now.tv_nsec / (NSEC_PER_SEC / HZ);
 			else {
 				if (now.tv_nsec > NSEC_PER_SEC / 2)
 					++now.tv_sec;
@@ -2536,6 +2507,10 @@ int aac_command_thread(void *data)
 		if (kthread_should_stop())
 			break;
 
+		/*
+		 * we probably want usleep_range() here instead of the
+		 * jiffies computation
+		 */
 		schedule_timeout(difference);
 
 		if (kthread_should_stop())
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index c9252b138c1fe0e21d217b0fb305cc45afc1545a..d55332de08f91ad8e54e1296867569a8fa109a34 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
 			info = &aac->hba_map[bus][cid];
 			if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
 			    info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
-				fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+				fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
 				cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
 			}
 		}
@@ -1680,6 +1680,9 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	aac->cardtype = index;
 	INIT_LIST_HEAD(&aac->entry);
 
+	if (aac_reset_devices || reset_devices)
+		aac->init_reset = true;
+
 	aac->fibs = kzalloc(sizeof(struct fib) * (shost->can_queue + AAC_NUM_MGT_FIB), GFP_KERNEL);
 	if (!aac->fibs)
 		goto out_free_host;
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 93ef7c37e568e0e2ca4a38d8a08dd1b987482626..6201666941717042e3c7a2a1cd5f459b41b3360c 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -561,11 +561,16 @@ int _aac_rx_init(struct aac_dev *dev)
 	dev->a_ops.adapter_sync_cmd = rx_sync_cmd;
 	dev->a_ops.adapter_enable_int = aac_rx_disable_interrupt;
 	dev->OIMR = status = rx_readb (dev, MUnit.OIMR);
-	if ((((status & 0x0c) != 0x0c) || aac_reset_devices || reset_devices) &&
-	  !aac_rx_restart_adapter(dev, 0, IOP_HWSOFT_RESET))
-		/* Make sure the Hardware FIFO is empty */
-		while ((++restart < 512) &&
-		  (rx_readl(dev, MUnit.OutboundQueue) != 0xFFFFFFFFL));
+
+	if (((status & 0x0c) != 0x0c) || dev->init_reset) {
+		dev->init_reset = false;
+		if (!aac_rx_restart_adapter(dev, 0, IOP_HWSOFT_RESET)) {
+			/* Make sure the Hardware FIFO is empty */
+			while ((++restart < 512) &&
+			       (rx_readl(dev, MUnit.OutboundQueue) != 0xFFFFFFFFL));
+		}
+	}
+
 	/*
 	 *	Check to see if the board panic'd while booting.
 	 */
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 0c9361c87ec8de8b853f6ccaa6132663a4b982bd..fde6b6aa86e38a1af487d94b1117f3ffc340c5a5 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -868,9 +868,13 @@ int aac_src_init(struct aac_dev *dev)
 	/* Failure to reset here is an option ... */
 	dev->a_ops.adapter_sync_cmd = src_sync_cmd;
 	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
-	if ((aac_reset_devices || reset_devices) &&
-		!aac_src_restart_adapter(dev, 0, IOP_HWSOFT_RESET))
-		++restart;
+
+	if (dev->init_reset) {
+		dev->init_reset = false;
+		if (!aac_src_restart_adapter(dev, 0, IOP_HWSOFT_RESET))
+			++restart;
+	}
+
 	/*
 	 *	Check to see if the board panic'd while booting.
 	 */
@@ -1014,9 +1018,13 @@ int aac_srcv_init(struct aac_dev *dev)
 	/* Failure to reset here is an option ... */
 	dev->a_ops.adapter_sync_cmd = src_sync_cmd;
 	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
-	if ((aac_reset_devices || reset_devices) &&
-		!aac_src_restart_adapter(dev, 0, IOP_HWSOFT_RESET))
-		++restart;
+
+	if (dev->init_reset) {
+		dev->init_reset = false;
+		if (!aac_src_restart_adapter(dev, 0, IOP_HWSOFT_RESET))
+			++restart;
+	}
+
 	/*
 	 *	Check to see if flash update is running.
 	 *	Wait for the adapter to be up and running. Wait up to 5 minutes
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index 72ca2a2e08e259b70be45e8880354bf8291ef9c4..b2fa195adc7a3a6e405a6c23132060952916138f 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3135,7 +3135,8 @@ bfad_im_bsg_vendor_request(struct bsg_job *job)
 	struct fc_bsg_request *bsg_request = job->request;
 	struct fc_bsg_reply *bsg_reply = job->reply;
 	uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0];
-	struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job));
+	struct Scsi_Host *shost = fc_bsg_to_shost(job);
+	struct bfad_im_port_s *im_port = bfad_get_im_port(shost);
 	struct bfad_s *bfad = im_port->bfad;
 	void *payload_kbuf;
 	int rc = -EINVAL;
@@ -3350,7 +3351,8 @@ int
 bfad_im_bsg_els_ct_request(struct bsg_job *job)
 {
 	struct bfa_bsg_data *bsg_data;
-	struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job));
+	struct Scsi_Host *shost = fc_bsg_to_shost(job);
+	struct bfad_im_port_s *im_port = bfad_get_im_port(shost);
 	struct bfad_s *bfad = im_port->bfad;
 	bfa_bsg_fcpt_t *bsg_fcpt;
 	struct bfad_fcxp    *drv_fcxp;
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index 24e657a4ec80df3caf82eff0b284288acc875a28..c05d6e91e4bde9cde7e9146ad48bae69727d476f 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -546,6 +546,7 @@ int
 bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
 			struct device *dev)
 {
+	struct bfad_im_port_pointer *im_portp;
 	int error = 1;
 
 	mutex_lock(&bfad_mutex);
@@ -564,7 +565,8 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
 		goto out_free_idr;
 	}
 
-	im_port->shost->hostdata[0] = (unsigned long)im_port;
+	im_portp = shost_priv(im_port->shost);
+	im_portp->p = im_port;
 	im_port->shost->unique_id = im_port->idr_id;
 	im_port->shost->this_id = -1;
 	im_port->shost->max_id = MAX_FCP_TARGET;
@@ -748,7 +750,7 @@ bfad_scsi_host_alloc(struct bfad_im_port_s *im_port, struct bfad_s *bfad)
 
 	sht->sg_tablesize = bfad->cfg_data.io_max_sge;
 
-	return scsi_host_alloc(sht, sizeof(unsigned long));
+	return scsi_host_alloc(sht, sizeof(struct bfad_im_port_pointer));
 }
 
 void
diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h
index c81ec2a77ef5034d3fdd3c70f1466002e012be10..06ce4ba2b7bc9e6e562a20d98de2a6a99fff93d1 100644
--- a/drivers/scsi/bfa/bfad_im.h
+++ b/drivers/scsi/bfa/bfad_im.h
@@ -69,6 +69,16 @@ struct bfad_im_port_s {
 	struct fc_vport *fc_vport;
 };
 
+struct bfad_im_port_pointer {
+	struct bfad_im_port_s *p;
+};
+
+static inline struct bfad_im_port_s *bfad_get_im_port(struct Scsi_Host *host)
+{
+	struct bfad_im_port_pointer *im_portp = shost_priv(host);
+	return im_portp->p;
+}
+
 enum bfad_itnim_state {
 	ITNIM_STATE_NONE,
 	ITNIM_STATE_ONLINE,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 5da46052e179c200a8e476f38551e470f6dd9173..21be672679fb5026120049b1609569bf8d25259a 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -904,10 +904,14 @@ static void fc_lport_recv_els_req(struct fc_lport *lport,
 		case ELS_FLOGI:
 			if (!lport->point_to_multipoint)
 				fc_lport_recv_flogi_req(lport, fp);
+			else
+				fc_rport_recv_req(lport, fp);
 			break;
 		case ELS_LOGO:
 			if (fc_frame_sid(fp) == FC_FID_FLOGI)
 				fc_lport_recv_logo_req(lport, fp);
+			else
+				fc_rport_recv_req(lport, fp);
 			break;
 		case ELS_RSCN:
 			lport->tt.disc_recv_req(lport, fp);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index ca1566237ae7744703a9e3ae3533138c4e81af53..3183d63de4dab7f5b000f2b6da856eea70c6b900 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2145,7 +2145,7 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
 		struct sas_rphy *rphy)
 {
 	struct domain_device *dev;
-	unsigned int reslen = 0;
+	unsigned int rcvlen = 0;
 	int ret = -EINVAL;
 
 	/* no rphy means no smp target support (ie aic94xx host) */
@@ -2179,12 +2179,12 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
 
 	ret = smp_execute_task_sg(dev, job->request_payload.sg_list,
 			job->reply_payload.sg_list);
-	if (ret > 0) {
-		/* positive number is the untransferred residual */
-		reslen = ret;
+	if (ret >= 0) {
+		/* bsg_job_done() requires the length received  */
+		rcvlen = job->reply_payload.payload_len - ret;
 		ret = 0;
 	}
 
 out:
-	bsg_job_done(job, ret, reslen);
+	bsg_job_done(job, ret, rcvlen);
 }
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 56faeb049b4ac50ec04783349fcea9b79c20583f..87c08ff37dddff46fed7841247a41f585ebc8f1d 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -753,12 +753,12 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
 	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
 	if (rc < 0) {
-		(rqbp->rqb_free_buffer)(phba, rqb_entry);
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"6409 Cannot post to RQ %d: %x %x\n",
 				rqb_entry->hrq->queue_id,
 				rqb_entry->hrq->host_index,
 				rqb_entry->hrq->hba_index);
+		(rqbp->rqb_free_buffer)(phba, rqb_entry);
 	} else {
 		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
 		rqbp->buffer_count++;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index a4f28b7e4c65df81ef583eab878a3aa9fc45e0e4..e18877177f1b52d9c43ad3b991b858c80a6cc079 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 		return req;
 
 	for_each_bio(bio) {
-		ret = blk_rq_append_bio(req, bio);
+		struct bio *bounce_bio = bio;
+
+		ret = blk_rq_append_bio(req, &bounce_bio);
 		if (ret)
 			return ERR_PTR(ret);
 	}
diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c
index 01f08c03f2c185dc3f7e1d423c3459da95fb6cbb..c3765d29fd3ff8167a647563b43be57e885dd6a9 100644
--- a/drivers/scsi/scsi_debugfs.c
+++ b/drivers/scsi/scsi_debugfs.c
@@ -8,9 +8,11 @@ void scsi_show_rq(struct seq_file *m, struct request *rq)
 {
 	struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req);
 	int msecs = jiffies_to_msecs(jiffies - cmd->jiffies_at_alloc);
-	char buf[80];
+	const u8 *const cdb = READ_ONCE(cmd->cmnd);
+	char buf[80] = "(?)";
 
-	__scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len);
+	if (cdb)
+		__scsi_format_command(buf, sizeof(buf), cdb, cmd->cmd_len);
 	seq_printf(m, ", .cmd=%s, .retries=%d, allocated %d.%03d s ago", buf,
 		   cmd->retries, msecs / 1000, msecs % 1000);
 }
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 78d4aa8df675a1671df5daf12dfc7d9000f3cbe5..dfb8da83fa504c979e9ba0639b32a4cae2c8969c 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -34,7 +34,6 @@ struct scsi_dev_info_list_table {
 };
 
 
-static const char spaces[] = "                "; /* 16 of them */
 static blist_flags_t scsi_default_dev_flags;
 static LIST_HEAD(scsi_dev_info_list);
 static char scsi_dev_flags[256];
@@ -298,20 +297,13 @@ static void scsi_strcpy_devinfo(char *name, char *to, size_t to_length,
 	size_t from_length;
 
 	from_length = strlen(from);
-	strncpy(to, from, min(to_length, from_length));
-	if (from_length < to_length) {
-		if (compatible) {
-			/*
-			 * NUL terminate the string if it is short.
-			 */
-			to[from_length] = '\0';
-		} else {
-			/*
-			 * space pad the string if it is short.
-			 */
-			strncpy(&to[from_length], spaces,
-				to_length - from_length);
-		}
+	/* This zero-pads the destination */
+	strncpy(to, from, to_length);
+	if (from_length < to_length && !compatible) {
+		/*
+		 * space pad the string if it is short.
+		 */
+		memset(&to[from_length], ' ', to_length - from_length);
 	}
 	if (from_length > to_length)
 		 printk(KERN_WARNING "%s: %s string '%s' is too long\n",
@@ -382,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
 			    model, compatible);
 
 	if (strflags)
-		devinfo->flags = simple_strtoul(strflags, NULL, 0);
-	else
-		devinfo->flags = flags;
-
+		flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
+	devinfo->flags = flags;
 	devinfo->compatible = compatible;
 
 	if (compatible)
@@ -458,7 +448,8 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 			/*
 			 * vendor strings must be an exact match
 			 */
-			if (vmax != strlen(devinfo->vendor) ||
+			if (vmax != strnlen(devinfo->vendor,
+					    sizeof(devinfo->vendor)) ||
 			    memcmp(devinfo->vendor, vskip, vmax))
 				continue;
 
@@ -466,7 +457,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 			 * @model specifies the full string, and
 			 * must be larger or equal to devinfo->model
 			 */
-			mlen = strlen(devinfo->model);
+			mlen = strnlen(devinfo->model, sizeof(devinfo->model));
 			if (mmax < mlen || memcmp(devinfo->model, mskip, mlen))
 				continue;
 			return devinfo;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1cbc497e00bd95ffff6ee2020f06531f8059846c..d9ca1dfab154c83f79ba1dab210f1a3521d1ea5d 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1967,6 +1967,8 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
 out_put_device:
 	put_device(&sdev->sdev_gendev);
 out:
+	if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
+		blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
 	return false;
 }
 
@@ -2148,11 +2150,13 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 		q->limits.cluster = 0;
 
 	/*
-	 * set a reasonable default alignment on word boundaries: the
-	 * host and device may alter it using
-	 * blk_queue_update_dma_alignment() later.
+	 * Set a reasonable default alignment:  The larger of 32-byte (dword),
+	 * which is a common minimum for HBAs, and the minimum DMA alignment,
+	 * which is set by the platform.
+	 *
+	 * Devices that require a bigger alignment can increase it later.
 	 */
-	blk_queue_dma_alignment(q, 0x03);
+	blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
 }
 EXPORT_SYMBOL_GPL(__scsi_init_queue);
 
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index be5e919db0e8cd9e713727a91bc46923673ea556..0880d975eed3a56c58d27172bfd18c1a59da5d4b 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
  *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
-		int *bflags, int async)
+		blist_flags_t *bflags, int async)
 {
 	int ret;
 
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
  *   - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_probe_and_add_lun(struct scsi_target *starget,
-				  u64 lun, int *bflagsp,
+				  u64 lun, blist_flags_t *bflagsp,
 				  struct scsi_device **sdevp,
 				  enum scsi_scan_mode rescan,
 				  void *hostdata)
 {
 	struct scsi_device *sdev;
 	unsigned char *result;
-	int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
+	blist_flags_t bflags;
+	int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 
 	/*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
  *     Modifies sdevscan->lun.
  **/
 static void scsi_sequential_lun_scan(struct scsi_target *starget,
-				     int bflags, int scsi_level,
+				     blist_flags_t bflags, int scsi_level,
 				     enum scsi_scan_mode rescan)
 {
 	uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
  *     0: scan completed (or no memory, so further scanning is futile)
  *     1: could not scan with REPORT LUN
  **/
-static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
+static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
 				enum scsi_scan_mode rescan)
 {
 	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 		unsigned int id, u64 lun, enum scsi_scan_mode rescan)
 {
 	struct Scsi_Host *shost = dev_to_shost(parent);
-	int bflags = 0;
+	blist_flags_t bflags = 0;
 	int res;
 	struct scsi_target *starget;
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 50e7d7e4a86179b9a47d18569bb759be0b674b88..26ce17178401b645bd9548e49dd11fecb3babf8c 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
 
-#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name
+#define BLIST_FLAG_NAME(name)					\
+	[ilog2((__force unsigned int)BLIST_##name)] = #name
 static const char *const sdev_bflags_name[] = {
 #include "scsi_devinfo_tbl.c"
 };
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
 	for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
 		const char *name = NULL;
 
-		if (!(sdev->sdev_bflags & BIT(i)))
+		if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
 			continue;
 		if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
 			name = sdev_bflags_name[i];
@@ -1414,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget)
 		 * check.
 		 */
 		if (sdev->channel != starget->channel ||
-		    sdev->id != starget->id ||
+		    sdev->id != starget->id)
+			continue;
+		if (sdev->sdev_state == SDEV_DEL ||
+		    sdev->sdev_state == SDEV_CANCEL ||
 		    !get_device(&sdev->sdev_gendev))
 			continue;
 		spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index d0219e36080c3b79109ac405eb0cd726545585fc..10ebb213ddb33e2920e2fe83e60cc712a50c3002 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -50,14 +50,14 @@
 
 /* Our blacklist flags */
 enum {
-	SPI_BLIST_NOIUS = 0x1,
+	SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
 };
 
 /* blacklist table, modelled on scsi_devinfo.c */
 static struct {
 	char *vendor;
 	char *model;
-	unsigned flags;
+	blist_flags_t flags;
 } spi_static_device_list[] __initdata = {
 	{"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
 	{"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
 	struct scsi_target *starget = sdev->sdev_target;
-	unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
-						      &sdev->inquiry[16],
-						      SCSI_DEVINFO_SPI);
+	blist_flags_t bflags;
+
+	bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
+					     &sdev->inquiry[16],
+					     SCSI_DEVINFO_SPI);
 
 	/* Populate the target capability fields with the values
 	 * gleaned from the device inquiry */
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 24fe685227169d1be705e9ddc1d8861cb962d2bf..a028ab3322a9a4ed3b37530c19268dc90d9098f4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1312,6 +1312,7 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
 	struct request *rq = SCpnt->request;
+	u8 *cmnd;
 
 	if (SCpnt->flags & SCMD_ZONE_WRITE_LOCK)
 		sd_zbc_write_unlock_zone(SCpnt);
@@ -1320,9 +1321,10 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 		__free_page(rq->special_vec.bv_page);
 
 	if (SCpnt->cmnd != scsi_req(rq)->cmd) {
-		mempool_free(SCpnt->cmnd, sd_cdb_pool);
+		cmnd = SCpnt->cmnd;
 		SCpnt->cmnd = NULL;
 		SCpnt->cmd_len = 0;
+		mempool_free(cmnd, sd_cdb_pool);
 	}
 }
 
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 1b06cf0375dcdbd6f1780ab7535bbaa4c5742916..3b3d1d050cacaa3d83dc615e29691ccc2c5de87f 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
 		case TEST_UNIT_READY:
 			break;
 		default:
-			set_host_byte(scmnd, DID_TARGET_FAILURE);
+			set_host_byte(scmnd, DID_ERROR);
 		}
 		break;
 	case SRB_STATUS_INVALID_LUN:
+		set_host_byte(scmnd, DID_NO_CONNECT);
 		do_work = true;
 		process_err_fn = storvsc_remove_lun;
 		break;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 011c3369082c6f19772e227989cd7eb6b335179a..a355d989b414f9059abe8e04bad9bbf6d2459c34 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -6559,12 +6559,15 @@ static int ufshcd_config_vreg(struct device *dev,
 		struct ufs_vreg *vreg, bool on)
 {
 	int ret = 0;
-	struct regulator *reg = vreg->reg;
-	const char *name = vreg->name;
+	struct regulator *reg;
+	const char *name;
 	int min_uV, uA_load;
 
 	BUG_ON(!vreg);
 
+	reg = vreg->reg;
+	name = vreg->name;
+
 	if (regulator_count_voltages(reg) > 0) {
 		min_uV = on ? vreg->min_uV : 0;
 		ret = regulator_set_voltage(reg, min_uV, vreg->max_uV);
diff --git a/drivers/soc/amlogic/meson-gx-socinfo.c b/drivers/soc/amlogic/meson-gx-socinfo.c
index 89f4cf507be6819d468f84e438cf0c237ee8f257..f2d8c3c53ea44a1d3463f1d603d2ea17e7cb207d 100644
--- a/drivers/soc/amlogic/meson-gx-socinfo.c
+++ b/drivers/soc/amlogic/meson-gx-socinfo.c
@@ -20,8 +20,8 @@
 #define AO_SEC_SOCINFO_OFFSET	AO_SEC_SD_CFG8
 
 #define SOCINFO_MAJOR	GENMASK(31, 24)
-#define SOCINFO_MINOR	GENMASK(23, 16)
-#define SOCINFO_PACK	GENMASK(15, 8)
+#define SOCINFO_PACK	GENMASK(23, 16)
+#define SOCINFO_MINOR	GENMASK(15, 8)
 #define SOCINFO_MISC	GENMASK(7, 0)
 
 static const struct meson_gx_soc_id {
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index 77fe55ce790c61a8835c4e2338a36be43dafcbac..d65345312527ce450b539964aa0465e1e6787b44 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -79,6 +79,7 @@
 #define A3700_SPI_BYTE_LEN		BIT(5)
 #define A3700_SPI_CLK_PRESCALE		BIT(0)
 #define A3700_SPI_CLK_PRESCALE_MASK	(0x1f)
+#define A3700_SPI_CLK_EVEN_OFFS		(0x10)
 
 #define A3700_SPI_WFIFO_THRS_BIT	28
 #define A3700_SPI_RFIFO_THRS_BIT	24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
 
 	prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
 
+	/* For prescaler values over 15, we can only set it by steps of 2.
+	 * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
+	 * 30. We only use this range from 16 to 30.
+	 */
+	if (prescale > 15)
+		prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
+
 	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
 	val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
 
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index f95da364c2832b0142158e12c97648aab81b7165..66947097102370d0f54ba2559abddab1ac812a5d 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
 	pm_runtime_get_sync(&pdev->dev);
 
 	/* reset the hardware and block queue progress */
-	spin_lock_irq(&as->lock);
 	if (as->use_dma) {
 		atmel_spi_stop_dma(master);
 		atmel_spi_release_dma(master);
 	}
 
+	spin_lock_irq(&as->lock);
 	spi_writel(as, CR, SPI_BIT(SWRST));
 	spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
 	spi_readl(as, SR);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 2ce875764ca646a2bdfb803cae33465ab8fa1786..0835a8d88fb8f85ab5ae44a4aa74d94121d19d87 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
 	/* Sets SPCMD */
 	rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
-	/* Enables SPI function in master mode */
-	rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR);
+	/* Sets RSPI mode */
+	rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index c5cd635c28f388bec2cfd47b9a6c6c9dcec9e046..41410031f8e99e6a1d54b8f94990df0133356ced 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -525,7 +525,7 @@ static int sun4i_spi_probe(struct platform_device *pdev)
 
 static int sun4i_spi_remove(struct platform_device *pdev)
 {
-	pm_runtime_disable(&pdev->dev);
+	pm_runtime_force_suspend(&pdev->dev);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index bc7100b93dfcf0c24213f479f9a5fffc41666315..e0b9fe1d0e37d98a7243ca35a56b1d62e024e8b3 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 	while (remaining_words) {
 		int n_words, tx_words, rx_words;
 		u32 sr;
+		int stalled;
 
 		n_words = min(remaining_words, xspi->buffer_size);
 
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 		/* Read out all the data from the Rx FIFO */
 		rx_words = n_words;
+		stalled = 10;
 		while (rx_words) {
+			if (rx_words == n_words && !(stalled--) &&
+			    !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+			    (sr & XSPI_SR_RX_EMPTY_MASK)) {
+				dev_err(&spi->dev,
+					"Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+				xspi_init_hw(xspi);
+				return -EIO;
+			}
+
 			if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
 				xilinx_spi_rx(xspi);
 				rx_words--;
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 0f695df14c9d8f0a5c0a95ea8c28dc075c2aa36c..372ce9913e6dea373d4cdd06ca51f2419c7a7373 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		break;
 	case ASHMEM_SET_SIZE:
 		ret = -EINVAL;
+		mutex_lock(&ashmem_mutex);
 		if (!asma->file) {
 			ret = 0;
 			asma->size = (size_t)arg;
 		}
+		mutex_unlock(&ashmem_mutex);
 		break;
 	case ASHMEM_GET_SIZE:
 		ret = asma->size;
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
index a517b2d29f1bb6efce2f22a5520a009c1375bcfb..8f6494158d3d018b847ad989f6fa1c2648848094 100644
--- a/drivers/staging/android/ion/Kconfig
+++ b/drivers/staging/android/ion/Kconfig
@@ -37,7 +37,7 @@ config ION_CHUNK_HEAP
 
 config ION_CMA_HEAP
 	bool "Ion CMA heap support"
-	depends on ION && CMA
+	depends on ION && DMA_CMA
 	help
 	  Choose this option to enable CMA heaps with Ion. This heap is backed
 	  by the Contiguous Memory Allocator (CMA). If your system has these
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index a7d9b0e9857225abf7a6c88885d15e76afc6627b..f480885e346b69cb6ec881abefca0ee3aaf9fd15 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 	mutex_lock(&buffer->lock);
 	list_for_each_entry(a, &buffer->attachments, list) {
 		dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
-				    DMA_BIDIRECTIONAL);
+				    direction);
 	}
 	mutex_unlock(&buffer->lock);
 
@@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
 	mutex_lock(&buffer->lock);
 	list_for_each_entry(a, &buffer->attachments, list) {
 		dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
-				       DMA_BIDIRECTIONAL);
+				       direction);
 	}
 	mutex_unlock(&buffer->lock);
 
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index dd5545d9990a0f61ac2a1ba6249ec851602d1b3b..86196ffd2faf9a7733cd931fa48e24e5ace3fd22 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 	struct ion_cma_heap *cma_heap = to_cma_heap(heap);
 	struct sg_table *table;
 	struct page *pages;
+	unsigned long size = PAGE_ALIGN(len);
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	unsigned long align = get_order(size);
 	int ret;
 
-	pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL);
+	if (align > CONFIG_CMA_ALIGNMENT)
+		align = CONFIG_CMA_ALIGNMENT;
+
+	pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
 	if (!pages)
 		return -ENOMEM;
 
@@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 	if (ret)
 		goto free_mem;
 
-	sg_set_page(table->sgl, pages, len, 0);
+	sg_set_page(table->sgl, pages, size, 0);
 
 	buffer->priv_virt = pages;
 	buffer->sg_table = table;
@@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 free_mem:
 	kfree(table);
 err:
-	cma_release(cma_heap->cma, pages, buffer->size);
+	cma_release(cma_heap->cma, pages, nr_pages);
 	return -ENOMEM;
 }
 
@@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer)
 {
 	struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
 	struct page *pages = buffer->priv_virt;
+	unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
 
 	/* release memory */
-	cma_release(cma_heap->cma, pages, buffer->size);
+	cma_release(cma_heap->cma, pages, nr_pages);
 	/* release sg table */
 	sg_free_table(buffer->sg_table);
 	kfree(buffer->sg_table);
diff --git a/drivers/staging/ccree/ssi_hash.c b/drivers/staging/ccree/ssi_hash.c
index d79090ed7f9c71ee5bcbed31e27a05a5da47bb83..2035835b62dcd797e41209acdf079f0daf9f4f8f 100644
--- a/drivers/staging/ccree/ssi_hash.c
+++ b/drivers/staging/ccree/ssi_hash.c
@@ -1769,7 +1769,7 @@ static int ssi_ahash_import(struct ahash_request *req, const void *in)
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct ahash_req_ctx *state = ahash_request_ctx(req);
 	u32 tmp;
-	int rc;
+	int rc = 0;
 
 	memcpy(&tmp, in, sizeof(u32));
 	if (tmp != CC_EXPORT_MAGIC) {
@@ -1778,9 +1778,12 @@ static int ssi_ahash_import(struct ahash_request *req, const void *in)
 	}
 	in += sizeof(u32);
 
-	rc = ssi_hash_init(state, ctx);
-	if (rc)
-		goto out;
+	/* call init() to allocate bufs if the user hasn't */
+	if (!state->digest_buff) {
+		rc = ssi_hash_init(state, ctx);
+		if (rc)
+			goto out;
+	}
 
 	dma_sync_single_for_cpu(dev, state->digest_buff_dma_addr,
 				ctx->inter_digestsize, DMA_BIDIRECTIONAL);
diff --git a/drivers/staging/comedi/drivers/ni_atmio.c b/drivers/staging/comedi/drivers/ni_atmio.c
index 2d62a8c5733241738f68bd87a53247f2059dbdf8..ae6ed96d7874c4ce34d7ce3574b9eded74dbb625 100644
--- a/drivers/staging/comedi/drivers/ni_atmio.c
+++ b/drivers/staging/comedi/drivers/ni_atmio.c
@@ -361,3 +361,8 @@ static struct comedi_driver ni_atmio_driver = {
 	.detach		= ni_atmio_detach,
 };
 module_comedi_driver(ni_atmio_driver);
+
+MODULE_AUTHOR("Comedi http://www.comedi.org");
+MODULE_DESCRIPTION("Comedi low-level driver");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 986c2a40d9780ecbbc2226d9be6fa74876e85681..8267119ccc8e73108bc8e4f2edb777861192f353 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
 			      ksocknal_nid2peerlist(id.nid));
 	}
 
-	route2 = NULL;
 	list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
-		if (route2->ksnr_ipaddr == ipaddr)
-			break;
-
-		route2 = NULL;
-	}
-	if (!route2) {
-		ksocknal_add_route_locked(peer, route);
-		route->ksnr_share_count++;
-	} else {
-		ksocknal_route_decref(route);
-		route2->ksnr_share_count++;
+		if (route2->ksnr_ipaddr == ipaddr) {
+			/* Route already exists, use the old one */
+			ksocknal_route_decref(route);
+			route2->ksnr_share_count++;
+			goto out;
+		}
 	}
-
+	/* Route doesn't already exist, add the new one */
+	ksocknal_add_route_locked(peer, route);
+	route->ksnr_share_count++;
+out:
 	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
 	return 0;
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 539a26444f3181279df5803a26c1e2e59c165067..7d49d4865298a304bff9dc11d9a1684621aead10 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -71,16 +71,12 @@ lnet_sock_ioctl(int cmd, unsigned long arg)
 	}
 
 	sock_filp = sock_alloc_file(sock, 0, NULL);
-	if (IS_ERR(sock_filp)) {
-		sock_release(sock);
-		rc = PTR_ERR(sock_filp);
-		goto out;
-	}
+	if (IS_ERR(sock_filp))
+		return PTR_ERR(sock_filp);
 
 	rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
 
 	fput(sock_filp);
-out:
 	return rc;
 }
 
diff --git a/drivers/staging/media/atomisp/include/linux/atomisp.h b/drivers/staging/media/atomisp/include/linux/atomisp.h
index b5533197226d4179f7d7c757167d60a4912fbcd3..15fa5679bae7d168d91a24e548a6873748c84720 100644
--- a/drivers/staging/media/atomisp/include/linux/atomisp.h
+++ b/drivers/staging/media/atomisp/include/linux/atomisp.h
@@ -208,14 +208,14 @@ struct atomisp_dis_vector {
 };
 
 
-/** DVS 2.0 Coefficient types. This structure contains 4 pointers to
+/* DVS 2.0 Coefficient types. This structure contains 4 pointers to
  *  arrays that contain the coeffients for each type.
  */
 struct atomisp_dvs2_coef_types {
-	short __user *odd_real; /**< real part of the odd coefficients*/
-	short __user *odd_imag; /**< imaginary part of the odd coefficients*/
-	short __user *even_real;/**< real part of the even coefficients*/
-	short __user *even_imag;/**< imaginary part of the even coefficients*/
+	short __user *odd_real; /** real part of the odd coefficients*/
+	short __user *odd_imag; /** imaginary part of the odd coefficients*/
+	short __user *even_real;/** real part of the even coefficients*/
+	short __user *even_imag;/** imaginary part of the even coefficients*/
 };
 
 /*
@@ -223,10 +223,10 @@ struct atomisp_dvs2_coef_types {
  * arrays that contain the statistics for each type.
  */
 struct atomisp_dvs2_stat_types {
-	int __user *odd_real; /**< real part of the odd statistics*/
-	int __user *odd_imag; /**< imaginary part of the odd statistics*/
-	int __user *even_real;/**< real part of the even statistics*/
-	int __user *even_imag;/**< imaginary part of the even statistics*/
+	int __user *odd_real; /** real part of the odd statistics*/
+	int __user *odd_imag; /** imaginary part of the odd statistics*/
+	int __user *even_real;/** real part of the even statistics*/
+	int __user *even_imag;/** imaginary part of the even statistics*/
 };
 
 struct atomisp_dis_coefficients {
@@ -390,16 +390,16 @@ struct atomisp_metadata_config {
  * Generic resolution structure.
  */
 struct atomisp_resolution {
-	uint32_t width;  /**< Width */
-	uint32_t height; /**< Height */
+	uint32_t width;  /** Width */
+	uint32_t height; /** Height */
 };
 
 /*
  * This specifies the coordinates (x,y)
  */
 struct atomisp_zoom_point {
-	int32_t x; /**< x coordinate */
-	int32_t y; /**< y coordinate */
+	int32_t x; /** x coordinate */
+	int32_t y; /** y coordinate */
 };
 
 /*
@@ -411,9 +411,9 @@ struct atomisp_zoom_region {
 };
 
 struct atomisp_dz_config {
-	uint32_t dx; /**< Horizontal zoom factor */
-	uint32_t dy; /**< Vertical zoom factor */
-	struct atomisp_zoom_region zoom_region; /**< region for zoom */
+	uint32_t dx; /** Horizontal zoom factor */
+	uint32_t dy; /** Vertical zoom factor */
+	struct atomisp_zoom_region zoom_region; /** region for zoom */
 };
 
 struct atomisp_parm {
@@ -758,7 +758,7 @@ enum atomisp_acc_arg_type {
 	ATOMISP_ACC_ARG_FRAME	     /* Frame argument */
 };
 
-/** ISP memories, isp2400 */
+/* ISP memories, isp2400 */
 enum atomisp_acc_memory {
 	ATOMISP_ACC_MEMORY_PMEM0 = 0,
 	ATOMISP_ACC_MEMORY_DMEM0,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
index 8a18c528cad4c73f6d387ae04a2e850059a600a9..debf0e3853ffa6bc213d9e6b8d3e058d71336914 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
@@ -5187,7 +5187,7 @@ int get_frame_info_nop(struct atomisp_sub_device *asd,
 	return 0;
 }
 
-/**
+/*
  * Resets CSS parameters that depend on input resolution.
  *
  * Update params like CSS RAW binning, 2ppc mode and pp_input
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
index 6e87aa5aab4c62c184ac1bece52649e4cf1e0b73..b7f9da014641c14f91d24b84750c76cb4b843d01 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
@@ -4051,7 +4051,7 @@ int atomisp_css_get_formats_config(struct atomisp_sub_device *asd,
 int atomisp_css_get_zoom_factor(struct atomisp_sub_device *asd,
 					unsigned int *zoom)
 {
-	struct ia_css_dz_config dz_config;  /**< Digital Zoom */
+	struct ia_css_dz_config dz_config;  /** Digital Zoom */
 	struct ia_css_isp_config isp_config;
 	struct atomisp_device *isp = asd->isp;
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.h
index 685da0f48bab36fc2ca2fd52ae87d1831f5180d8..95669eedaad1121ddba9885e425cf9d3c65c781f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_ioctl32.h
@@ -28,17 +28,17 @@ struct atomisp_histogram32 {
 };
 
 struct atomisp_dvs2_stat_types32 {
-	compat_uptr_t odd_real; /**< real part of the odd statistics*/
-	compat_uptr_t odd_imag; /**< imaginary part of the odd statistics*/
-	compat_uptr_t even_real;/**< real part of the even statistics*/
-	compat_uptr_t even_imag;/**< imaginary part of the even statistics*/
+	compat_uptr_t odd_real; /** real part of the odd statistics*/
+	compat_uptr_t odd_imag; /** imaginary part of the odd statistics*/
+	compat_uptr_t even_real;/** real part of the even statistics*/
+	compat_uptr_t even_imag;/** imaginary part of the even statistics*/
 };
 
 struct atomisp_dvs2_coef_types32 {
-	compat_uptr_t odd_real; /**< real part of the odd coefficients*/
-	compat_uptr_t odd_imag; /**< imaginary part of the odd coefficients*/
-	compat_uptr_t even_real;/**< real part of the even coefficients*/
-	compat_uptr_t even_imag;/**< imaginary part of the even coefficients*/
+	compat_uptr_t odd_real; /** real part of the odd coefficients*/
+	compat_uptr_t odd_imag; /** imaginary part of the odd coefficients*/
+	compat_uptr_t even_real;/** real part of the even coefficients*/
+	compat_uptr_t even_imag;/** imaginary part of the even coefficients*/
 };
 
 struct atomisp_dvs2_statistics32 {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
index f3d61827ae8c034b034f08f69c886d9e5d8231a2..c3eba675da065d8833495b310f22f5dcc6805ca9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
@@ -223,7 +223,7 @@ struct atomisp_subdev_params {
 
 	bool dis_proj_data_valid;
 
-	struct ia_css_dz_config   dz_config;  /**< Digital Zoom */
+	struct ia_css_dz_config   dz_config;  /** Digital Zoom */
 	struct ia_css_capture_config   capture_config;
 
 	struct atomisp_css_isp_config config;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/base/circbuf/src/circbuf.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/base/circbuf/src/circbuf.c
index 19bae1610fb66099dd56c323c8a9eaed794cb7f3..050d60f0894f7641e3307a6e73997ad824dfcf72 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/base/circbuf/src/circbuf.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/base/circbuf/src/circbuf.c
@@ -21,7 +21,7 @@
  * Forward declarations.
  *
  **********************************************************************/
-/**
+/*
  * @brief Read the oldest element from the circular buffer.
  * Read the oldest element WITHOUT checking whehter the
  * circular buffer is empty or not. The oldest element is
@@ -34,7 +34,7 @@
 static inline ia_css_circbuf_elem_t
 ia_css_circbuf_read(ia_css_circbuf_t *cb);
 
-/**
+/*
  * @brief Shift a chunk of elements in the circular buffer.
  * A chunk of elements (i.e. the ones from the "start" position
  * to the "chunk_src" position) are shifted in the circular buffer,
@@ -48,7 +48,7 @@ static inline void ia_css_circbuf_shift_chunk(ia_css_circbuf_t *cb,
 						   uint32_t chunk_src,
 						   uint32_t chunk_dest);
 
-/**
+/*
  * @brief Get the "val" field in the element.
  *
  * @param elem The pointer to the element.
@@ -63,7 +63,7 @@ ia_css_circbuf_elem_get_val(ia_css_circbuf_elem_t *elem);
  * Non-inline functions.
  *
  **********************************************************************/
-/**
+/*
  * @brief Create the circular buffer.
  * Refer to "ia_css_circbuf.h" for details.
  */
@@ -88,7 +88,7 @@ ia_css_circbuf_create(ia_css_circbuf_t *cb,
 	cb->elems = elems;
 }
 
-/**
+/*
  * @brief Destroy the circular buffer.
  * Refer to "ia_css_circbuf.h" for details.
  */
@@ -99,7 +99,7 @@ void ia_css_circbuf_destroy(ia_css_circbuf_t *cb)
 	cb->elems = NULL;
 }
 
-/**
+/*
  * @brief Pop a value out of the circular buffer.
  * Refer to "ia_css_circbuf.h" for details.
  */
@@ -116,7 +116,7 @@ uint32_t ia_css_circbuf_pop(ia_css_circbuf_t *cb)
 	return ret;
 }
 
-/**
+/*
  * @brief Extract a value out of the circular buffer.
  * Refer to "ia_css_circbuf.h" for details.
  */
@@ -166,7 +166,7 @@ uint32_t ia_css_circbuf_extract(ia_css_circbuf_t *cb, int offset)
 	return val;
 }
 
-/**
+/*
  * @brief Peek an element from the circular buffer.
  * Refer to "ia_css_circbuf.h" for details.
  */
@@ -180,7 +180,7 @@ uint32_t ia_css_circbuf_peek(ia_css_circbuf_t *cb, int offset)
 	return cb->elems[pos].val;
 }
 
-/**
+/*
  * @brief Get the value of an element from the circular buffer.
  * Refer to "ia_css_circbuf.h" for details.
  */
@@ -194,7 +194,7 @@ uint32_t ia_css_circbuf_peek_from_start(ia_css_circbuf_t *cb, int offset)
 	return cb->elems[pos].val;
 }
 
-/** @brief increase size of a circular buffer.
+/* @brief increase size of a circular buffer.
  * Use 'CAUTION' before using this function. This was added to
  * support / fix issue with increasing size for tagger only
  * Please refer to "ia_css_circbuf.h" for details.
@@ -252,7 +252,7 @@ bool ia_css_circbuf_increase_size(
  * Inline functions.
  *
  ****************************************************************/
-/**
+/*
  * @brief Get the "val" field in the element.
  * Refer to "Forward declarations" for details.
  */
@@ -262,7 +262,7 @@ ia_css_circbuf_elem_get_val(ia_css_circbuf_elem_t *elem)
 	return elem->val;
 }
 
-/**
+/*
  * @brief Read the oldest element from the circular buffer.
  * Refer to "Forward declarations" for details.
  */
@@ -282,7 +282,7 @@ ia_css_circbuf_read(ia_css_circbuf_t *cb)
 	return elem;
 }
 
-/**
+/*
  * @brief Shift a chunk of elements in the circular buffer.
  * Refer to "Forward declarations" for details.
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_binarydesc.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_binarydesc.h
index 616789d9b3f644086b1bf142f4ae2bc68d4416dc..a6d650a9a1f4232042d6d0594dec6c7bf1cc2c7f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_binarydesc.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_binarydesc.h
@@ -19,7 +19,7 @@
 #include <ia_css_frame_public.h>	/* ia_css_frame_info */
 #include <ia_css_binary.h>		/* ia_css_binary_descr */
 
-/** @brief Get a binary descriptor for copy.
+/* @brief Get a binary descriptor for copy.
  *
  * @param[in] pipe
  * @param[out] copy_desc
@@ -36,7 +36,7 @@ extern void ia_css_pipe_get_copy_binarydesc(
 	struct ia_css_frame_info *out_info,
 	struct ia_css_frame_info *vf_info);
 
-/** @brief Get a binary descriptor for vfpp.
+/* @brief Get a binary descriptor for vfpp.
  *
  * @param[in] pipe
  * @param[out] vfpp_descr
@@ -51,7 +51,7 @@ extern void ia_css_pipe_get_vfpp_binarydesc(
 		struct ia_css_frame_info *in_info,
 		struct ia_css_frame_info *out_info);
 
-/** @brief Get numerator and denominator of bayer downscaling factor.
+/* @brief Get numerator and denominator of bayer downscaling factor.
  *
  * @param[in] bds_factor: The bayer downscaling factor.
  *		(= The bds_factor member in the sh_css_bds_factor structure.)
@@ -67,7 +67,7 @@ extern enum ia_css_err sh_css_bds_factor_get_numerator_denominator(
 	unsigned int *bds_factor_numerator,
 	unsigned int *bds_factor_denominator);
 
-/** @brief Get a binary descriptor for preview stage.
+/* @brief Get a binary descriptor for preview stage.
  *
  * @param[in] pipe
  * @param[out] preview_descr
@@ -86,7 +86,7 @@ extern enum ia_css_err ia_css_pipe_get_preview_binarydesc(
 	struct ia_css_frame_info *out_info,
 	struct ia_css_frame_info *vf_info);
 
-/** @brief Get a binary descriptor for video stage.
+/* @brief Get a binary descriptor for video stage.
  *
  * @param[in/out] pipe
  * @param[out] video_descr
@@ -105,7 +105,7 @@ extern enum ia_css_err ia_css_pipe_get_video_binarydesc(
 	struct ia_css_frame_info *vf_info,
 	int stream_config_left_padding);
 
-/** @brief Get a binary descriptor for yuv scaler stage.
+/* @brief Get a binary descriptor for yuv scaler stage.
  *
  * @param[in/out] pipe
  * @param[out] yuv_scaler_descr
@@ -124,7 +124,7 @@ void ia_css_pipe_get_yuvscaler_binarydesc(
 	struct ia_css_frame_info *internal_out_info,
 	struct ia_css_frame_info *vf_info);
 
-/** @brief Get a binary descriptor for capture pp stage.
+/* @brief Get a binary descriptor for capture pp stage.
  *
  * @param[in/out] pipe
  * @param[out] capture_pp_descr
@@ -140,7 +140,7 @@ extern void ia_css_pipe_get_capturepp_binarydesc(
 	struct ia_css_frame_info *out_info,
 	struct ia_css_frame_info *vf_info);
 
-/** @brief Get a binary descriptor for primary capture.
+/* @brief Get a binary descriptor for primary capture.
  *
  * @param[in] pipe
  * @param[out] prim_descr
@@ -158,7 +158,7 @@ extern void ia_css_pipe_get_primary_binarydesc(
 	struct ia_css_frame_info *vf_info,
 	unsigned int stage_idx);
 
-/** @brief Get a binary descriptor for pre gdc stage.
+/* @brief Get a binary descriptor for pre gdc stage.
  *
  * @param[in] pipe
  * @param[out] pre_gdc_descr
@@ -173,7 +173,7 @@ extern void ia_css_pipe_get_pre_gdc_binarydesc(
 	struct ia_css_frame_info *in_info,
 	struct ia_css_frame_info *out_info);
 
-/** @brief Get a binary descriptor for gdc stage.
+/* @brief Get a binary descriptor for gdc stage.
  *
  * @param[in] pipe
  * @param[out] gdc_descr
@@ -188,7 +188,7 @@ extern void ia_css_pipe_get_gdc_binarydesc(
 	struct ia_css_frame_info *in_info,
 	struct ia_css_frame_info *out_info);
 
-/** @brief Get a binary descriptor for post gdc.
+/* @brief Get a binary descriptor for post gdc.
  *
  * @param[in] pipe
  * @param[out] post_gdc_descr
@@ -205,7 +205,7 @@ extern void ia_css_pipe_get_post_gdc_binarydesc(
 	struct ia_css_frame_info *out_info,
 	struct ia_css_frame_info *vf_info);
 
-/** @brief Get a binary descriptor for de.
+/* @brief Get a binary descriptor for de.
  *
  * @param[in] pipe
  * @param[out] pre_de_descr
@@ -220,7 +220,7 @@ extern void ia_css_pipe_get_pre_de_binarydesc(
 	struct ia_css_frame_info *in_info,
 	struct ia_css_frame_info *out_info);
 
-/** @brief Get a binary descriptor for pre anr stage.
+/* @brief Get a binary descriptor for pre anr stage.
  *
  * @param[in] pipe
  * @param[out] pre_anr_descr
@@ -235,7 +235,7 @@ extern void ia_css_pipe_get_pre_anr_binarydesc(
 	struct ia_css_frame_info *in_info,
 	struct ia_css_frame_info *out_info);
 
-/** @brief Get a binary descriptor for ANR stage.
+/* @brief Get a binary descriptor for ANR stage.
  *
  * @param[in] pipe
  * @param[out] anr_descr
@@ -250,7 +250,7 @@ extern void ia_css_pipe_get_anr_binarydesc(
 	struct ia_css_frame_info *in_info,
 	struct ia_css_frame_info *out_info);
 
-/** @brief Get a binary descriptor for post anr stage.
+/* @brief Get a binary descriptor for post anr stage.
  *
  * @param[in] pipe
  * @param[out] post_anr_descr
@@ -267,7 +267,7 @@ extern void ia_css_pipe_get_post_anr_binarydesc(
 	struct ia_css_frame_info *out_info,
 	struct ia_css_frame_info *vf_info);
 
-/** @brief Get a binary descriptor for ldc stage.
+/* @brief Get a binary descriptor for ldc stage.
  *
  * @param[in/out] pipe
  * @param[out] capture_pp_descr
@@ -282,7 +282,7 @@ extern void ia_css_pipe_get_ldc_binarydesc(
 	struct ia_css_frame_info *in_info,
 	struct ia_css_frame_info *out_info);
 
-/** @brief Calculates the required BDS factor
+/* @brief Calculates the required BDS factor
  *
  * @param[in] input_res
  * @param[in] output_res
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_util.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_util.h
index ba8858759b300d03e281da379253ff2edb2815a6..155b6fb4722bafb5d49d5905ea137f46be47ef29 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_util.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/pipe/interface/ia_css_pipe_util.h
@@ -18,7 +18,7 @@
 #include <ia_css_types.h>
 #include <ia_css_frame_public.h>
 
-/** @brief Get Input format bits per pixel based on stream configuration of this
+/* @brief Get Input format bits per pixel based on stream configuration of this
  * pipe.
  *
  * @param[in] pipe
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
index f8b2e458f87669646cd5d75c91454fded7fbc9f7..a8c27676a38b353a67853dceecea81c737601390 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
@@ -22,7 +22,7 @@
 #include <ia_css_stream_public.h>
 #include <ia_css_stream_format.h>
 
-/** @brief convert "errno" error code to "ia_css_err" error code
+/* @brief convert "errno" error code to "ia_css_err" error code
  *
  * @param[in]	"errno" error code
  * @return	"ia_css_err" error code
@@ -31,7 +31,7 @@
 enum ia_css_err ia_css_convert_errno(
 	int in_err);
 
-/** @brief check vf frame info.
+/* @brief check vf frame info.
  *
  * @param[in] info
  * @return	IA_CSS_SUCCESS or error code upon error.
@@ -40,7 +40,7 @@ enum ia_css_err ia_css_convert_errno(
 extern enum ia_css_err ia_css_util_check_vf_info(
 	const struct ia_css_frame_info * const info);
 
-/** @brief check input configuration.
+/* @brief check input configuration.
  *
  * @param[in] stream_config
  * @param[in] must_be_raw
@@ -52,7 +52,7 @@ extern enum ia_css_err ia_css_util_check_input(
 	bool must_be_raw,
 	bool must_be_yuv);
 
-/** @brief check vf and out frame info.
+/* @brief check vf and out frame info.
  *
  * @param[in] out_info
  * @param[in] vf_info
@@ -63,7 +63,7 @@ extern enum ia_css_err ia_css_util_check_vf_out_info(
 	const struct ia_css_frame_info * const out_info,
 	const struct ia_css_frame_info * const vf_info);
 
-/** @brief check width and height
+/* @brief check width and height
  *
  * @param[in] width
  * @param[in] height
@@ -75,7 +75,7 @@ extern enum ia_css_err ia_css_util_check_res(
 	unsigned int height);
 
 #ifdef ISP2401
-/** @brief compare resolutions (less or equal)
+/* @brief compare resolutions (less or equal)
  *
  * @param[in] a resolution
  * @param[in] b resolution
@@ -108,7 +108,7 @@ extern bool ia_css_util_resolution_is_even(
 		const struct ia_css_resolution resolution);
 
 #endif
-/** @brief check width and height
+/* @brief check width and height
  *
  * @param[in] stream_format
  * @param[in] two_ppc
@@ -119,7 +119,7 @@ extern unsigned int ia_css_util_input_format_bpp(
 	enum ia_css_stream_format stream_format,
 	bool two_ppc);
 
-/** @brief check if input format it raw
+/* @brief check if input format it raw
  *
  * @param[in] stream_format
  * @return true if the input format is raw or false otherwise
@@ -128,7 +128,7 @@ extern unsigned int ia_css_util_input_format_bpp(
 extern bool ia_css_util_is_input_format_raw(
 	enum ia_css_stream_format stream_format);
 
-/** @brief check if input format it yuv
+/* @brief check if input format it yuv
  *
  * @param[in] stream_format
  * @return true if the input format is yuv or false otherwise
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/csi_rx_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/csi_rx_private.h
index 6720ab55d6f5bc3e84fc62ae00cd859d738c1c82..9c0cb4a63862e0f08b7f0db7bf8e6de815e6ff3f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/csi_rx_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/csi_rx_private.h
@@ -277,6 +277,6 @@ static inline void csi_rx_be_ctrl_reg_store(
 
 	ia_css_device_store_uint32(CSI_RX_BE_CTRL_BASE[ID] + reg*sizeof(hrt_data), value);
 }
-/** end of DLI */
+/* end of DLI */
 
 #endif /* __CSI_RX_PRIVATE_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/ibuf_ctrl_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/ibuf_ctrl_private.h
index 470c92d287fe550150c4027486ee00f5f1dc7e2d..4d07c2fe14696d344d7dc60091bb810531e83c92 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/ibuf_ctrl_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/ibuf_ctrl_private.h
@@ -192,7 +192,7 @@ STORAGE_CLASS_IBUF_CTRL_C void ibuf_ctrl_dump_state(
 		ia_css_print("IBUF controller ID %d Process ID %d isp_sync_state 0x%x\n", ID, i, state->proc_state[i].isp_sync_state);
 	}
 }
-/** end of NCI */
+/* end of NCI */
 
 /*****************************************************
  *
@@ -227,7 +227,7 @@ STORAGE_CLASS_IBUF_CTRL_C void ibuf_ctrl_reg_store(
 
 	ia_css_device_store_uint32(IBUF_CTRL_BASE[ID] + reg*sizeof(hrt_data), value);
 }
-/** end of DLI */
+/* end of DLI */
 
 
 #endif /* __IBUF_CTRL_PRIVATE_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq.c
index 14d1d3b627a9dc6d8b10dfe109d43d6d31ec4c54..842ae340ae138b4d03cfe36e3e307db22d55f657 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq.c
@@ -26,7 +26,7 @@
 #include "isys_irq_private.h"
 #endif
 
-/** Public interface */
+/* Public interface */
 STORAGE_CLASS_ISYS2401_IRQ_C void isys_irqc_status_enable(
 	const isys_irq_ID_t	isys_irqc_id)
 {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq_private.h
index c17ce131c9e1c530fa405ba9e3ff6c128d7509f7..e69f39893bd2eab56fc61c9ffb5d7a7be311c714 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_irq_private.h
@@ -59,7 +59,7 @@ STORAGE_CLASS_ISYS2401_IRQ_C void isys_irqc_state_dump(
 		state->status, state->edge, state->mask, state->enable, state->level_no);
 }
 
-/** end of NCI */
+/* end of NCI */
 
 /* -------------------------------------------------------+
  |              Device level interface (DLI)              |
@@ -101,7 +101,7 @@ STORAGE_CLASS_ISYS2401_IRQ_C hrt_data isys_irqc_reg_load(
 	return value;
 }
 
-/** end of DLI */
+/* end of DLI */
 
 #endif /* defined(USE_INPUT_SYSTEM_VERSION_2401) */
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_stream2mmio_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_stream2mmio_private.h
index 1603a09b621ad7c9e6d4532971e82d3e1f36182f..f946105ddf43acbeb4e97c09a5d1e898369ad9a3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_stream2mmio_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/isys_stream2mmio_private.h
@@ -122,7 +122,7 @@ STORAGE_CLASS_STREAM2MMIO_C void stream2mmio_dump_state(
 		stream2mmio_print_sid_state(&(state->sid_state[i]));
 	}
 }
-/** end of NCI */
+/* end of NCI */
 
 /*****************************************************
  *
@@ -163,6 +163,6 @@ STORAGE_CLASS_STREAM2MMIO_C void stream2mmio_reg_store(
 	ia_css_device_store_uint32(STREAM2MMIO_CTRL_BASE[ID] +
 		reg * sizeof(hrt_data), value);
 }
-/** end of DLI */
+/* end of DLI */
 
 #endif /* __ISYS_STREAM2MMIO_PRIVATE_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/pixelgen_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/pixelgen_private.h
index 3f34b508f0bfe9178f401a6fd29622f83a4522fe..c5bf540eadf1120084237819688009428155ff1e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/pixelgen_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/host/pixelgen_private.h
@@ -160,5 +160,5 @@ STORAGE_CLASS_PIXELGEN_C void pixelgen_ctrl_reg_store(
 
 	ia_css_device_store_uint32(PIXELGEN_CTRL_BASE[ID] + reg*sizeof(hrt_data), value);
 }
-/** end of DLI */
+/* end of DLI */
 #endif /* __PIXELGEN_PRIVATE_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/isys_dma_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/isys_dma_global.h
index e7a734a9fc43cc42d13f35d135c100693a19e532..1be5c6956d65b27407635795b99b8d576d5dbe7b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/isys_dma_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/isys_dma_global.h
@@ -46,7 +46,7 @@ struct isys2401_dma_port_cfg_s {
 	uint32_t cropping;
 	uint32_t width;
  };
-/** end of DMA Port */
+/* end of DMA Port */
 
 /************************************************
  *
@@ -79,7 +79,7 @@ struct isys2401_dma_cfg_s {
 	isys2401_dma_extension	extension;
 	uint32_t		height;
 };
-/** end of DMA Device */
+/* end of DMA Device */
 
 /* isys2401_dma_channel limits per DMA ID */
 extern const isys2401_dma_channel N_ISYS2401_DMA_CHANNEL_PROCS[N_ISYS2401_DMA_ID];
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/pixelgen_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/pixelgen_global.h
index 216813e42a0aa5bdc9b92435462f9dccb3f69c3d..0bf2feb8bbfb420f890f4eadf69c7ab9f188ef79 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/pixelgen_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/pixelgen_global.h
@@ -86,6 +86,6 @@ struct pixelgen_prbs_cfg_s {
 	sync_generator_cfg_t	sync_gen_cfg;
 };
 
-/** end of Pixel-generator: TPG. ("pixelgen_global.h") */
+/* end of Pixel-generator: TPG. ("pixelgen_global.h") */
 #endif /* __PIXELGEN_GLOBAL_H_INCLUDED__ */
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
index 9f7ecac4627361871ac27524aca54ed7ecf1e58e..d2e3a2deea2ecf373b4a05a2584bec76855e059a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
@@ -331,7 +331,7 @@ typedef enum {
 	IBUF_CTRL2_ID,		/* map ISYS2401_IBUF_CNTRL_C */
 	N_IBUF_CTRL_ID
 } ibuf_ctrl_ID_t;
-/** end of Input-buffer Controller */
+/* end of Input-buffer Controller */
 
 /*
  * Stream2MMIO.
@@ -364,7 +364,7 @@ typedef enum {
 	STREAM2MMIO_SID7_ID,
 	N_STREAM2MMIO_SID_ID
 } stream2mmio_sid_ID_t;
-/** end of Stream2MMIO */
+/* end of Stream2MMIO */
 
 /**
  * Input System 2401: CSI-MIPI recevier.
@@ -390,7 +390,7 @@ typedef enum {
 	CSI_RX_DLANE3_ID,		/* map to DLANE3 in CSI RX */
 	N_CSI_RX_DLANE_ID
 } csi_rx_fe_dlane_ID_t;
-/** end of CSI-MIPI receiver */
+/* end of CSI-MIPI receiver */
 
 typedef enum {
 	ISYS2401_DMA0_ID = 0,
@@ -406,7 +406,7 @@ typedef enum {
 	PIXELGEN2_ID,
 	N_PIXELGEN_ID
 } pixelgen_ID_t;
-/** end of pixel-generator. ("system_global.h") */
+/* end of pixel-generator. ("system_global.h") */
 
 typedef enum {
 	INPUT_SYSTEM_CSI_PORT0_ID = 0,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_api_version.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_api_version.h
index 1f6a55ff5db88c77f827fd238aaeca47ec7576d5..efcd6e1679e8f5d4a0868505aa54d3208dd461ca 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_api_version.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_api_version.h
@@ -31,7 +31,7 @@ more details.
 #ifndef __CSS_API_VERSION_H
 #define __CSS_API_VERSION_H
 
-/** @file
+/* @file
  * CSS API version file. This file contains the version number of the CSS-API.
  *
  * This file is generated from a set of input files describing the CSS-API
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
index 5a4eabf79ee238db51b16e51d88a0e259fab91e9..bcfd443f52022100158da33ed55c1ecc0d37dc0e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
@@ -21,7 +21,7 @@
 #endif /* __INLINE_GP_TIMER__ */
 #include "system_local.h"
 
-/** FIXME: not sure if reg_load(), reg_store() should be API.
+/* FIXME: not sure if reg_load(), reg_store() should be API.
  */
 static uint32_t
 gp_timer_reg_load(uint32_t reg);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/csi_rx_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/csi_rx_public.h
index 3b5df85fc51082fc37428fc9de6ad2becb2df8d4..426d022d3a2604ad3c32712d0f5114d696f19de5 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/csi_rx_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/csi_rx_public.h
@@ -73,7 +73,7 @@ extern void csi_rx_be_ctrl_get_state(
 extern void csi_rx_be_ctrl_dump_state(
 		const csi_rx_backend_ID_t ID,
 		csi_rx_be_ctrl_state_t *state);
-/** end of NCI */
+/* end of NCI */
 
 /*****************************************************
  *
@@ -130,6 +130,6 @@ extern void csi_rx_be_ctrl_reg_store(
 	const csi_rx_backend_ID_t ID,
 	const hrt_address reg,
 	const hrt_data value);
-/** end of DLI */
+/* end of DLI */
 #endif /* USE_INPUT_SYSTEM_VERSION_2401 */
 #endif /* __CSI_RX_PUBLIC_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ibuf_ctrl_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ibuf_ctrl_public.h
index 1ac0e64e539cb30154c62d5785c16ac2a108d477..98ee9947fb8ef513c41b13809c110c98a0b8a531 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ibuf_ctrl_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ibuf_ctrl_public.h
@@ -54,7 +54,7 @@ STORAGE_CLASS_IBUF_CTRL_H void ibuf_ctrl_get_proc_state(
 STORAGE_CLASS_IBUF_CTRL_H void ibuf_ctrl_dump_state(
 		const ibuf_ctrl_ID_t ID,
 		ibuf_ctrl_state_t *state);
-/** end of NCI */
+/* end of NCI */
 
 /*****************************************************
  *
@@ -87,7 +87,7 @@ STORAGE_CLASS_IBUF_CTRL_H void ibuf_ctrl_reg_store(
 	const ibuf_ctrl_ID_t ID,
 	const hrt_address reg,
 	const hrt_data value);
-/** end of DLI */
+/* end of DLI */
 
 #endif /* USE_INPUT_SYSTEM_VERSION_2401 */
 #endif /* __IBUF_CTRL_PUBLIC_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op1w.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op1w.h
index a025ad562bd2a15f8dda642a4a70edb6f768c0a3..0d978e5911c0596666804181af9ec5d9b04145ef 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op1w.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op1w.h
@@ -49,7 +49,7 @@
 
 /* Arithmetic */
 
-/** @brief bitwise AND
+/* @brief bitwise AND
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -63,7 +63,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_and(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief bitwise OR
+/* @brief bitwise OR
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -77,7 +77,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_or(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief bitwise XOR
+/* @brief bitwise XOR
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -91,7 +91,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_xor(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief bitwise inverse
+/* @brief bitwise inverse
  *
  * @param[in] _a	first argument
  *
@@ -105,7 +105,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_inv(
 
 /* Additive */
 
-/** @brief addition
+/* @brief addition
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -120,7 +120,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_add(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief subtraction
+/* @brief subtraction
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -135,7 +135,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_sub(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief saturated addition
+/* @brief saturated addition
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -150,7 +150,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_addsat(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief saturated subtraction
+/* @brief saturated subtraction
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -166,7 +166,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_subsat(
     const tvector1w     _b);
 
 #ifdef ISP2401
-/** @brief Unsigned saturated subtraction
+/* @brief Unsigned saturated subtraction
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -182,7 +182,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w_unsigned OP_1w_subsat_u(
     const tvector1w_unsigned _b);
 
 #endif
-/** @brief subtraction with shift right and rounding
+/* @brief subtraction with shift right and rounding
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -202,7 +202,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_subasr1(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Subtraction with shift right and rounding
+/* @brief Subtraction with shift right and rounding
  *
  * @param[in] _a	first operand
  * @param[in] _b	second operand
@@ -217,7 +217,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_subhalfrnd(
     const tvector1w	_a,
     const tvector1w	_b);
 
-/** @brief Subtraction with shift right and no rounding
+/* @brief Subtraction with shift right and no rounding
  *
  * @param[in] _a	first operand
  * @param[in] _b	second operand
@@ -233,7 +233,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_subhalf(
     const tvector1w	_b);
 
 
-/** @brief saturated absolute value
+/* @brief saturated absolute value
  *
  * @param[in] _a	input
  *
@@ -247,7 +247,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_subhalf(
 STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_abs(
     const tvector1w     _a);
 
-/** @brief saturated absolute difference
+/* @brief saturated absolute difference
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -264,7 +264,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_subabssat(
 
 /* Multiplicative */
 
-/** @brief doubling multiply
+/* @brief doubling multiply
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -281,7 +281,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector2w OP_1w_muld(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief integer multiply
+/* @brief integer multiply
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -298,7 +298,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_mul(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief fractional saturating multiply
+/* @brief fractional saturating multiply
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -316,7 +316,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_qmul(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief fractional saturating multiply with rounding
+/* @brief fractional saturating multiply with rounding
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -337,7 +337,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_qrmul(
 
 /* Comparative */
 
-/** @brief equal
+/* @brief equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -351,7 +351,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tflags OP_1w_eq(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief not equal
+/* @brief not equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -365,7 +365,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tflags OP_1w_ne(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief less or equal
+/* @brief less or equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -379,7 +379,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tflags OP_1w_le(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief less then
+/* @brief less then
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -393,7 +393,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tflags OP_1w_lt(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief greater or equal
+/* @brief greater or equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -407,7 +407,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tflags OP_1w_ge(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief greater than
+/* @brief greater than
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -423,7 +423,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tflags OP_1w_gt(
 
 /* Shift */
 
-/** @brief aritmetic shift right
+/* @brief aritmetic shift right
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -441,7 +441,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_asr(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief aritmetic shift right with rounding
+/* @brief aritmetic shift right with rounding
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -460,7 +460,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_asrrnd(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief saturating arithmetic shift left
+/* @brief saturating arithmetic shift left
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -480,7 +480,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_asl(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief saturating aritmetic shift left
+/* @brief saturating aritmetic shift left
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -493,7 +493,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_aslsat(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief logical shift left
+/* @brief logical shift left
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -510,7 +510,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_lsl(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief logical shift right
+/* @brief logical shift right
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -528,7 +528,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_lsr(
     const tvector1w     _b);
 
 #ifdef ISP2401
-/** @brief bidirectional saturating arithmetic shift
+/* @brief bidirectional saturating arithmetic shift
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -546,7 +546,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_ashift_sat(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief bidirectional non-saturating arithmetic shift
+/* @brief bidirectional non-saturating arithmetic shift
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -565,7 +565,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_ashift(
     const tvector1w     _b);
 
 
-/** @brief bidirectional logical shift
+/* @brief bidirectional logical shift
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -588,7 +588,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_lshift(
 #endif
 /* Cast */
 
-/** @brief Cast from int to 1w
+/* @brief Cast from int to 1w
  *
  * @param[in] _a	input
  *
@@ -601,7 +601,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_lshift(
 STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_int_cast_to_1w(
     const int           _a);
 
-/** @brief Cast from 1w to int
+/* @brief Cast from 1w to int
  *
  * @param[in] _a	input
  *
@@ -614,7 +614,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_int_cast_to_1w(
 STORAGE_CLASS_ISP_OP1W_FUNC_H int OP_1w_cast_to_int(
     const tvector1w      _a);
 
-/** @brief Cast from 1w to 2w
+/* @brief Cast from 1w to 2w
  *
  * @param[in] _a	input
  *
@@ -627,7 +627,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H int OP_1w_cast_to_int(
 STORAGE_CLASS_ISP_OP1W_FUNC_H tvector2w OP_1w_cast_to_2w(
     const tvector1w     _a);
 
-/** @brief Cast from 2w to 1w
+/* @brief Cast from 2w to 1w
  *
  * @param[in] _a	input
  *
@@ -641,7 +641,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_2w_cast_to_1w(
     const tvector2w    _a);
 
 
-/** @brief Cast from 2w to 1w with saturation
+/* @brief Cast from 2w to 1w with saturation
  *
  * @param[in] _a	input
  *
@@ -657,7 +657,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_2w_sat_cast_to_1w(
 
 /* clipping */
 
-/** @brief Clip asymmetrical
+/* @brief Clip asymmetrical
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -673,7 +673,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_clip_asym(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Clip zero
+/* @brief Clip zero
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -691,7 +691,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_clipz(
 
 /* division */
 
-/** @brief Truncated division
+/* @brief Truncated division
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -708,7 +708,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_div(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Fractional saturating divide
+/* @brief Fractional saturating divide
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -726,7 +726,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_qdiv(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Modulo
+/* @brief Modulo
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -741,7 +741,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_mod(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Unsigned integer Square root
+/* @brief Unsigned integer Square root
  *
  * @param[in] _a	input
  *
@@ -754,7 +754,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w_unsigned OP_1w_sqrt_u(
 
 /* Miscellaneous */
 
-/** @brief Multiplexer
+/* @brief Multiplexer
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -770,7 +770,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_mux(
     const tvector1w     _b,
     const tflags           _c);
 
-/** @brief Average without rounding
+/* @brief Average without rounding
  *
  * @param[in] _a	first operand
  * @param[in] _b	second operand
@@ -786,7 +786,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w  OP_1w_avg(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Average with rounding
+/* @brief Average with rounding
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -802,7 +802,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_avgrnd(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Minimum
+/* @brief Minimum
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -816,7 +816,7 @@ STORAGE_CLASS_ISP_OP1W_FUNC_H tvector1w OP_1w_min(
     const tvector1w     _a,
     const tvector1w     _b);
 
-/** @brief Maximum
+/* @brief Maximum
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op2w.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op2w.h
index cf7e7314842dec46940ec2b354c1513789df3498..7575d260b83748b51aab51433bef85984763ccc1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op2w.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isp_op2w.h
@@ -48,7 +48,7 @@
 
 /* Arithmetic */
 
-/** @brief bitwise AND
+/* @brief bitwise AND
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -62,7 +62,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_and(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief bitwise OR
+/* @brief bitwise OR
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -76,7 +76,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_or(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief bitwise XOR
+/* @brief bitwise XOR
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -90,7 +90,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_xor(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief bitwise inverse
+/* @brief bitwise inverse
  *
  * @param[in] _a	first argument
  *
@@ -104,7 +104,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_inv(
 
 /* Additive */
 
-/** @brief addition
+/* @brief addition
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -119,7 +119,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_add(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief subtraction
+/* @brief subtraction
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -134,7 +134,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_sub(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief saturated addition
+/* @brief saturated addition
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -149,7 +149,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_addsat(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief saturated subtraction
+/* @brief saturated subtraction
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -164,7 +164,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_subsat(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief subtraction with shift right and rounding
+/* @brief subtraction with shift right and rounding
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -184,7 +184,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_subasr1(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Subtraction with shift right and rounding
+/* @brief Subtraction with shift right and rounding
  *
  * @param[in] _a	first operand
  * @param[in] _b	second operand
@@ -199,7 +199,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_subhalfrnd(
     const tvector2w	_a,
     const tvector2w	_b);
 
-/** @brief Subtraction with shift right and no rounding
+/* @brief Subtraction with shift right and no rounding
  *
  * @param[in] _a	first operand
  * @param[in] _b	second operand
@@ -214,7 +214,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_subhalf(
     const tvector2w	_a,
     const tvector2w	_b);
 
-/** @brief saturated absolute value
+/* @brief saturated absolute value
  *
  * @param[in] _a	input
  *
@@ -228,7 +228,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_subhalf(
 STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_abs(
     const tvector2w     _a);
 
-/** @brief saturated absolute difference
+/* @brief saturated absolute difference
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -245,7 +245,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_subabssat(
 
 /* Multiplicative */
 
-/** @brief integer multiply
+/* @brief integer multiply
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -262,7 +262,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_mul(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief fractional saturating multiply
+/* @brief fractional saturating multiply
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -279,7 +279,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_qmul(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief fractional saturating multiply with rounding
+/* @brief fractional saturating multiply with rounding
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -301,7 +301,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_qrmul(
 
 /* Comparative */
 
-/** @brief equal
+/* @brief equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -315,7 +315,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tflags OP_2w_eq(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief not equal
+/* @brief not equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -329,7 +329,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tflags OP_2w_ne(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief less or equal
+/* @brief less or equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -343,7 +343,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tflags OP_2w_le(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief less then
+/* @brief less then
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -357,7 +357,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tflags OP_2w_lt(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief greater or equal
+/* @brief greater or equal
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -371,7 +371,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tflags OP_2w_ge(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief greater than
+/* @brief greater than
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -387,7 +387,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tflags OP_2w_gt(
 
 /* Shift */
 
-/** @brief aritmetic shift right
+/* @brief aritmetic shift right
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -404,7 +404,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_asr(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief aritmetic shift right with rounding
+/* @brief aritmetic shift right with rounding
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -423,7 +423,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_asrrnd(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief saturating aritmetic shift left
+/* @brief saturating aritmetic shift left
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -443,7 +443,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_asl(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief saturating aritmetic shift left
+/* @brief saturating aritmetic shift left
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -456,7 +456,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_aslsat(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief logical shift left
+/* @brief logical shift left
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -473,7 +473,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_lsl(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief logical shift right
+/* @brief logical shift right
  *
  * @param[in] _a	input
  * @param[in] _b	shift amount
@@ -492,7 +492,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_lsr(
 
 /* clipping */
 
-/** @brief Clip asymmetrical
+/* @brief Clip asymmetrical
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -507,7 +507,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_clip_asym(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Clip zero
+/* @brief Clip zero
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -524,7 +524,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_clipz(
 
 /* division */
 
-/** @brief Truncated division
+/* @brief Truncated division
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -541,7 +541,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_div(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Saturating truncated division
+/* @brief Saturating truncated division
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -559,7 +559,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector1w OP_2w_divh(
     const tvector2w     _a,
     const tvector1w     _b);
 
-/** @brief Modulo
+/* @brief Modulo
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -572,7 +572,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_mod(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Unsigned Integer Square root
+/* @brief Unsigned Integer Square root
  *
  * @param[in] _a	input
  *
@@ -585,7 +585,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector1w_unsigned OP_2w_sqrt_u(
 
 /* Miscellaneous */
 
-/** @brief Multiplexer
+/* @brief Multiplexer
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -601,7 +601,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_mux(
     const tvector2w     _b,
     const tflags           _c);
 
-/** @brief Average without rounding
+/* @brief Average without rounding
  *
  * @param[in] _a	first operand
  * @param[in] _b	second operand
@@ -617,7 +617,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w  OP_2w_avg(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Average with rounding
+/* @brief Average with rounding
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -633,7 +633,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_avgrnd(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Minimum
+/* @brief Minimum
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
@@ -647,7 +647,7 @@ STORAGE_CLASS_ISP_OP2W_FUNC_H tvector2w OP_2w_min(
     const tvector2w     _a,
     const tvector2w     _b);
 
-/** @brief Maximum
+/* @brief Maximum
  *
  * @param[in] _a	first argument
  * @param[in] _b	second argument
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isys_stream2mmio_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isys_stream2mmio_public.h
index 5624cfcfa015354fd1a7792032f6ba0e2c7fd819..6c53ca9df96c3ef4733b99a09d75872f3515eee5 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isys_stream2mmio_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/isys_stream2mmio_public.h
@@ -43,7 +43,7 @@ STORAGE_CLASS_STREAM2MMIO_H void stream2mmio_get_sid_state(
 		const stream2mmio_ID_t ID,
 		const stream2mmio_sid_ID_t sid_id,
 		stream2mmio_sid_state_t *state);
-/** end of NCI */
+/* end of NCI */
 
 /*****************************************************
  *
@@ -96,6 +96,6 @@ STORAGE_CLASS_STREAM2MMIO_H void stream2mmio_reg_store(
 		const stream2mmio_ID_t ID,
 		const hrt_address reg,
 		const hrt_data value);
-/** end of DLI */
+/* end of DLI */
 
 #endif /* __ISYS_STREAM2MMIO_PUBLIC_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/pixelgen_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/pixelgen_public.h
index c0f3f3ea32d73c26aa7c3566629d5ab40b8a62c9..f597e07d7c4f9a61253db4954353bbcc67dd4c58 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/pixelgen_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/pixelgen_public.h
@@ -41,7 +41,7 @@ STORAGE_CLASS_PIXELGEN_H void pixelgen_ctrl_get_state(
 STORAGE_CLASS_PIXELGEN_H void pixelgen_ctrl_dump_state(
 		const pixelgen_ID_t ID,
 		pixelgen_ctrl_state_t *state);
-/** end of NCI */
+/* end of NCI */
 
 /*****************************************************
  *
@@ -73,7 +73,7 @@ STORAGE_CLASS_PIXELGEN_H void pixelgen_ctrl_reg_store(
 	const pixelgen_ID_t ID,
 	const hrt_address reg,
 	const hrt_data value);
-/** end of DLI */
+/* end of DLI */
 
 #endif /* USE_INPUT_SYSTEM_VERSION_2401 */
 #endif /* __PIXELGEN_PUBLIC_H_INCLUDED__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ref_vector_func.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ref_vector_func.h
index a202d6dce1060bb10e896eae1cafdf154e4de795..c1638c06407d240d7af6286ee449ca7fca2df1fd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ref_vector_func.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ref_vector_func.h
@@ -27,7 +27,7 @@
 
 #include "ref_vector_func_types.h"
 
-/** @brief Doubling multiply accumulate with saturation
+/* @brief Doubling multiply accumulate with saturation
  *
  * @param[in] acc accumulator
  * @param[in] a multiply input
@@ -44,7 +44,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd_sat(
 	tvector1w a,
 	tvector1w b );
 
-/** @brief Doubling multiply accumulate
+/* @brief Doubling multiply accumulate
  *
  * @param[in] acc accumulator
  * @param[in] a multiply input
@@ -61,7 +61,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd(
 	tvector1w a,
 	tvector1w b );
 
-/** @brief Re-aligning multiply
+/* @brief Re-aligning multiply
  *
  * @param[in] a multiply input
  * @param[in] b multiply input
@@ -78,7 +78,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_mul_realigning(
 	tvector1w b,
 	tscalar1w shift );
 
-/** @brief Leading bit index
+/* @brief Leading bit index
  *
  * @param[in] a 	input
  *
@@ -92,7 +92,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_mul_realigning(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_lod(
 		tvector1w a);
 
-/** @brief Config Unit Input Processing
+/* @brief Config Unit Input Processing
  *
  * @param[in] a 	    input
  * @param[in] input_scale   input scaling factor
@@ -111,7 +111,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_input_scaling_offset_clamping(
 	tscalar1w_5bit_signed input_scale,
 	tscalar1w_5bit_signed input_offset);
 
-/** @brief Config Unit Output Processing
+/* @brief Config Unit Output Processing
  *
  * @param[in] a 	     output
  * @param[in] output_scale   output scaling factor
@@ -127,7 +127,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_output_scaling_clamping(
 	tvector1w a,
 	tscalar1w_5bit_signed output_scale);
 
-/** @brief Config Unit Piecewiselinear estimation
+/* @brief Config Unit Piecewiselinear estimation
  *
  * @param[in] a 	          input
  * @param[in] config_points   config parameter structure
@@ -143,7 +143,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_piecewise_estimation(
 	tvector1w a,
 	ref_config_points config_points);
 
-/** @brief Fast Config Unit
+/* @brief Fast Config Unit
  *
  * @param[in] x 		input
  * @param[in] init_vectors	LUT data structure
@@ -161,7 +161,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H  tvector1w OP_1w_XCU(
 	xcu_ref_init_vectors init_vectors);
 
 
-/** @brief LXCU
+/* @brief LXCU
  *
  * @param[in] x 		input
  * @param[in] init_vectors 	LUT data structure
@@ -180,7 +180,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_LXCU(
 	tvector1w x,
 	xcu_ref_init_vectors init_vectors);
 
-/** @brief Coring
+/* @brief Coring
  *
  * @param[in] coring_vec   Amount of coring based on brightness level
  * @param[in] filt_input   Vector of input pixels on which Coring is applied
@@ -196,7 +196,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w coring(
 	tvector1w filt_input,
 	tscalar1w m_CnrCoring0 );
 
-/** @brief Normalised FIR with coefficients [3,4,1]
+/* @brief Normalised FIR with coefficients [3,4,1]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -209,7 +209,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w coring(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_m90_nrm (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with coefficients [1,4,3]
+/* @brief Normalised FIR with coefficients [1,4,3]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -222,7 +222,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_m90_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_p90_nrm (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with coefficients [1,2,1]
+/* @brief Normalised FIR with coefficients [1,2,1]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -234,7 +234,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_p90_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with coefficients [13,16,3]
+/* @brief Normalised FIR with coefficients [13,16,3]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -246,7 +246,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph0 (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with coefficients [9,16,7]
+/* @brief Normalised FIR with coefficients [9,16,7]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -258,7 +258,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph0 (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph1 (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with coefficients [5,16,11]
+/* @brief Normalised FIR with coefficients [5,16,11]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -270,7 +270,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph1 (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph2 (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with coefficients [1,16,15]
+/* @brief Normalised FIR with coefficients [1,16,15]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -282,7 +282,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph2 (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph3 (
 	const s_1w_1x3_matrix		m);
 
-/** @brief Normalised FIR with programable phase shift
+/* @brief Normalised FIR with programable phase shift
  *
  * @param[in] m	1x3 matrix with pixels
  * @param[in] coeff	phase shift
@@ -295,7 +295,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph3 (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_calc_coeff (
 	const s_1w_1x3_matrix		m, tscalar1w_3bit coeff);
 
-/** @brief 3 tap FIR with coefficients [1,1,1]
+/* @brief 3 tap FIR with coefficients [1,1,1]
  *
  * @param[in] m	1x3 matrix with pixels
  *
@@ -308,7 +308,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_9dB_nrm (
 	const s_1w_1x3_matrix		m);
 
 #ifdef ISP2401
-/** @brief      symmetric 3 tap FIR acts as LPF or BSF
+/* @brief      symmetric 3 tap FIR acts as LPF or BSF
  *
  * @param[in] m 1x3 matrix with pixels
  * @param[in] k filter coefficient shift
@@ -336,7 +336,7 @@ sym_fir1x3m_lpf_bsf(s_1w_1x3_matrix m,
 		    tscalar_bool bsf_flag);
 #endif
 
-/** @brief Normalised 2D FIR with coefficients  [1;2;1] * [1,2,1]
+/* @brief Normalised 2D FIR with coefficients  [1;2;1] * [1,2,1]
  *
  * @param[in] m	3x3 matrix with pixels
  *
@@ -353,7 +353,7 @@ sym_fir1x3m_lpf_bsf(s_1w_1x3_matrix m,
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_6dB_nrm (
 	const s_1w_3x3_matrix		m);
 
-/** @brief Normalised 2D FIR with coefficients  [1;1;1] * [1,1,1]
+/* @brief Normalised 2D FIR with coefficients  [1;1;1] * [1,1,1]
  *
  * @param[in] m	3x3 matrix with pixels
  *
@@ -371,7 +371,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_6dB_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_9dB_nrm (
 	const s_1w_3x3_matrix		m);
 
-/** @brief Normalised dual output 2D FIR with coefficients  [1;2;1] * [1,2,1]
+/* @brief Normalised dual output 2D FIR with coefficients  [1;2;1] * [1,2,1]
  *
  * @param[in] m	4x3 matrix with pixels
  *
@@ -391,7 +391,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_9dB_nrm (
  STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_6dB_out2x1_nrm (
 	const s_1w_4x3_matrix		m);
 
-/** @brief Normalised dual output 2D FIR with coefficients [1;1;1] * [1,1,1]
+/* @brief Normalised dual output 2D FIR with coefficients [1;1;1] * [1,1,1]
  *
  * @param[in] m	4x3 matrix with pixels
  *
@@ -411,7 +411,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_9dB_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_9dB_out2x1_nrm (
 	const s_1w_4x3_matrix		m);
 
-/** @brief Normalised 2D FIR 5x5
+/* @brief Normalised 2D FIR 5x5
  *
  * @param[in] m	5x5 matrix with pixels
  *
@@ -429,7 +429,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_9dB_out2x1_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_15dB_nrm (
 	const s_1w_5x5_matrix	m);
 
-/** @brief Normalised FIR 1x5
+/* @brief Normalised FIR 1x5
  *
  * @param[in] m	1x5 matrix with pixels
  *
@@ -447,7 +447,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_15dB_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_12dB_nrm (
 	const s_1w_1x5_matrix m);
 
-/** @brief Normalised 2D FIR 5x5
+/* @brief Normalised 2D FIR 5x5
  *
  * @param[in] m	5x5 matrix with pixels
  *
@@ -465,7 +465,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_12dB_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_12dB_nrm (
 	const s_1w_5x5_matrix m);
 
-/** @brief Approximate averaging FIR 1x5
+/* @brief Approximate averaging FIR 1x5
  *
  * @param[in] m	1x5 matrix with pixels
  *
@@ -479,7 +479,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_12dB_nrm (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_box (
 	s_1w_1x5_matrix m);
 
-/** @brief Approximate averaging FIR 1x9
+/* @brief Approximate averaging FIR 1x9
  *
  * @param[in] m	1x9 matrix with pixels
  *
@@ -493,7 +493,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_box (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x9m_box (
 	s_1w_1x9_matrix m);
 
-/** @brief Approximate averaging FIR 1x11
+/* @brief Approximate averaging FIR 1x11
  *
  * @param[in] m	1x11 matrix with pixels
  *
@@ -507,7 +507,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x9m_box (
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x11m_box (
 	s_1w_1x11_matrix m);
 
-/** @brief Symmetric 7 tap filter with normalization
+/* @brief Symmetric 7 tap filter with normalization
  *
  *  @param[in] in 1x7 matrix with pixels
  *  @param[in] coeff 1x4 matrix with coefficients
@@ -528,7 +528,7 @@ fir1x7m_sym_nrm(s_1w_1x7_matrix in,
 		s_1w_1x4_matrix coeff,
 		tvector1w out_shift);
 
-/** @brief Symmetric 7 tap filter with normalization at input side
+/* @brief Symmetric 7 tap filter with normalization at input side
  *
  *  @param[in] in 1x7 matrix with pixels
  *  @param[in] coeff 1x4 matrix with coefficients
@@ -549,7 +549,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 fir1x7m_sym_innrm_approx(s_1w_1x7_matrix in,
 			 s_1w_1x4_matrix coeff);
 
-/** @brief Symmetric 7 tap filter with normalization at output side
+/* @brief Symmetric 7 tap filter with normalization at output side
  *
  *  @param[in] in 1x7 matrix with pixels
  *  @param[in] coeff 1x4 matrix with coefficients
@@ -571,7 +571,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 fir1x7m_sym_outnrm_approx(s_1w_1x7_matrix in,
 			 s_1w_1x4_matrix coeff);
 
-/** @brief 4 tap filter with normalization
+/* @brief 4 tap filter with normalization
  *
  *  @param[in] in 1x4 matrix with pixels
  *  @param[in] coeff 1x4 matrix with coefficients
@@ -588,7 +588,7 @@ fir1x4m_nrm(s_1w_1x4_matrix in,
 		s_1w_1x4_matrix coeff,
 		tvector1w out_shift);
 
-/** @brief 4 tap filter with normalization for half pixel interpolation
+/* @brief 4 tap filter with normalization for half pixel interpolation
  *
  *  @param[in] in 1x4 matrix with pixels
  *
@@ -604,7 +604,7 @@ fir1x4m_nrm(s_1w_1x4_matrix in,
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 fir1x4m_bicubic_bezier_half(s_1w_1x4_matrix in);
 
-/** @brief 4 tap filter with normalization for quarter pixel interpolation
+/* @brief 4 tap filter with normalization for quarter pixel interpolation
  *
  *  @param[in] in 1x4 matrix with pixels
  *  @param[in] coeff 1x4 matrix with coefficients
@@ -626,7 +626,7 @@ fir1x4m_bicubic_bezier_quarter(s_1w_1x4_matrix in,
 			s_1w_1x4_matrix coeff);
 
 
-/** @brief Symmetric 3 tap filter with normalization
+/* @brief Symmetric 3 tap filter with normalization
  *
  *  @param[in] in 1x3 matrix with pixels
  *  @param[in] coeff 1x2 matrix with coefficients
@@ -646,7 +646,7 @@ fir1x3m_sym_nrm(s_1w_1x3_matrix in,
 		s_1w_1x2_matrix coeff,
 		tvector1w out_shift);
 
-/** @brief Symmetric 3 tap filter with normalization
+/* @brief Symmetric 3 tap filter with normalization
  *
  *  @param[in] in 1x3 matrix with pixels
  *  @param[in] coeff 1x2 matrix with coefficients
@@ -666,7 +666,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 fir1x3m_sym_nrm_approx(s_1w_1x3_matrix in,
 		       s_1w_1x2_matrix coeff);
 
-/** @brief Mean of 1x3 matrix
+/* @brief Mean of 1x3 matrix
  *
  *  @param[in] m 1x3 matrix with pixels
  *
@@ -678,7 +678,7 @@ fir1x3m_sym_nrm_approx(s_1w_1x3_matrix in,
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x3m(
 	s_1w_1x3_matrix m);
 
-/** @brief Mean of 3x3 matrix
+/* @brief Mean of 3x3 matrix
  *
  *  @param[in] m 3x3 matrix with pixels
  *
@@ -690,7 +690,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x3m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean3x3m(
 	s_1w_3x3_matrix m);
 
-/** @brief Mean of 1x4 matrix
+/* @brief Mean of 1x4 matrix
  *
  *  @param[in] m 1x4 matrix with pixels
  *
@@ -701,7 +701,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean3x3m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x4m(
 	s_1w_1x4_matrix m);
 
-/** @brief Mean of 4x4 matrix
+/* @brief Mean of 4x4 matrix
  *
  *  @param[in] m 4x4 matrix with pixels
  *
@@ -712,7 +712,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x4m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean4x4m(
 	s_1w_4x4_matrix m);
 
-/** @brief Mean of 2x3 matrix
+/* @brief Mean of 2x3 matrix
  *
  *  @param[in] m 2x3 matrix with pixels
  *
@@ -724,7 +724,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean4x4m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean2x3m(
 	s_1w_2x3_matrix m);
 
-/** @brief Mean of 1x5 matrix
+/* @brief Mean of 1x5 matrix
  *
  *  @param[in] m 1x5 matrix with pixels
  *
@@ -735,7 +735,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean2x3m(
 */
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x5m(s_1w_1x5_matrix m);
 
-/** @brief Mean of 1x6 matrix
+/* @brief Mean of 1x6 matrix
  *
  *  @param[in] m 1x6 matrix with pixels
  *
@@ -747,7 +747,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x5m(s_1w_1x5_matrix m);
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x6m(
 	s_1w_1x6_matrix m);
 
-/** @brief Mean of 5x5 matrix
+/* @brief Mean of 5x5 matrix
  *
  *  @param[in] m 5x5 matrix with pixels
  *
@@ -759,7 +759,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x6m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean5x5m(
 	s_1w_5x5_matrix m);
 
-/** @brief Mean of 6x6 matrix
+/* @brief Mean of 6x6 matrix
  *
  *  @param[in] m 6x6 matrix with pixels
  *
@@ -771,7 +771,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean5x5m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean6x6m(
 	s_1w_6x6_matrix m);
 
-/** @brief Minimum of 4x4 matrix
+/* @brief Minimum of 4x4 matrix
  *
  *  @param[in] m 4x4 matrix with pixels
  *
@@ -783,7 +783,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean6x6m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w min4x4m(
 	s_1w_4x4_matrix m);
 
-/** @brief Maximum of 4x4 matrix
+/* @brief Maximum of 4x4 matrix
  *
  *  @param[in] m 4x4 matrix with pixels
  *
@@ -795,7 +795,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w min4x4m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w max4x4m(
 	s_1w_4x4_matrix m);
 
-/** @brief SAD between two 3x3 matrices
+/* @brief SAD between two 3x3 matrices
  *
  *  @param[in] a 3x3 matrix with pixels
  *
@@ -813,7 +813,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m_precise(
 	s_1w_3x3_matrix a,
 	s_1w_3x3_matrix b);
 
-/** @brief SAD between two 3x3 matrices
+/* @brief SAD between two 3x3 matrices
  *
  *  @param[in] a 3x3 matrix with pixels
  *
@@ -833,7 +833,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m(
 	s_1w_3x3_matrix a,
 	s_1w_3x3_matrix b);
 
-/** @brief SAD between two 5x5 matrices
+/* @brief SAD between two 5x5 matrices
  *
  *  @param[in] a 5x5 matrix with pixels
  *
@@ -847,7 +847,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad5x5m(
 	s_1w_5x5_matrix a,
 	s_1w_5x5_matrix b);
 
-/** @brief Absolute gradient between two sets of 1x5 matrices
+/* @brief Absolute gradient between two sets of 1x5 matrices
  *
  *  @param[in] m0 first set of 1x5 matrix with pixels
  *  @param[in] m1 second set of 1x5 matrix with pixels
@@ -860,7 +860,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad5x5m(
 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 absgrad1x5m(s_1w_1x5_matrix m0, s_1w_1x5_matrix m1);
 
-/** @brief Bi-linear Interpolation optimized(approximate)
+/* @brief Bi-linear Interpolation optimized(approximate)
  *
  * @param[in] a input0
  * @param[in] b input1
@@ -882,7 +882,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx_c(
 	tvector1w b,
 	tscalar1w_weight c);
 
-/** @brief Bi-linear Interpolation optimized(approximate)
+/* @brief Bi-linear Interpolation optimized(approximate)
  *
  * @param[in] a input0
  * @param[in] b input1
@@ -904,7 +904,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx(
 	tvector1w b,
 	tvector1w_weight c);
 
-/** @brief Bi-linear Interpolation
+/* @brief Bi-linear Interpolation
  *
  * @param[in] a input0
  * @param[in] b input1
@@ -925,7 +925,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol(
 	tvector1w b,
 	tscalar1w_weight c);
 
-/** @brief Generic Block Matching Algorithm
+/* @brief Generic Block Matching Algorithm
  * @param[in] search_window pointer to input search window of 16x16 pixels
  * @param[in] ref_block pointer to input reference block of 8x8 pixels, where N<=M
  * @param[in] output pointer to output sads
@@ -954,9 +954,9 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H int generic_block_matching_algorithm(
 	tscalar1w_4bit_bma_shift shift);
 
 #ifndef ISP2401
-/** @brief OP_1w_asp_bma_16_1_32way
+/* @brief OP_1w_asp_bma_16_1_32way
 #else
-/** @brief OP_1w_asp_bma_16_1_32way_nomask
+/* @brief OP_1w_asp_bma_16_1_32way_nomask
 #endif
  *
  * @param[in] search_area input search window of 16x16 pixels
@@ -984,9 +984,9 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way_nomask(
 	tscalar1w_4bit_bma_shift shift);
 
 #ifndef ISP2401
-/** @brief OP_1w_asp_bma_16_2_32way
+/* @brief OP_1w_asp_bma_16_2_32way
 #else
-/** @brief OP_1w_asp_bma_16_2_32way_nomask
+/* @brief OP_1w_asp_bma_16_2_32way_nomask
 #endif
  *
  * @param[in] search_area input search window of 16x16 pixels
@@ -1011,9 +1011,9 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way_nomask(
 	ref_block_8x8 input_block,
 	tscalar1w_4bit_bma_shift shift);
 #ifndef ISP2401
-/** @brief OP_1w_asp_bma_14_1_32way
+/* @brief OP_1w_asp_bma_14_1_32way
 #else
-/** @brief OP_1w_asp_bma_14_1_32way_nomask
+/* @brief OP_1w_asp_bma_14_1_32way_nomask
 #endif
  *
  * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
@@ -1041,9 +1041,9 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way_nomask(
 	tscalar1w_4bit_bma_shift shift);
 
 #ifndef ISP2401
-/** @brief OP_1w_asp_bma_14_2_32way
+/* @brief OP_1w_asp_bma_14_2_32way
 #else
-/** @brief OP_1w_asp_bma_14_2_32way_nomask
+/* @brief OP_1w_asp_bma_14_2_32way_nomask
 #endif
  *
  * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
@@ -1069,7 +1069,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way_nomask(
 	tscalar1w_4bit_bma_shift shift);
 
 #ifdef ISP2401
-/** @brief multiplex addition and passing
+/* @brief multiplex addition and passing
  *
  *  @param[in] _a first pixel
  *  @param[in] _b second pixel
@@ -1087,7 +1087,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_cond_add(
 
 #endif
 #ifdef HAS_bfa_unit
-/** @brief OP_1w_single_bfa_7x7
+/* @brief OP_1w_single_bfa_7x7
  *
  * @param[in] weights - spatial and range weight lut
  * @param[in] threshold - threshold plane, for range weight scaling
@@ -1115,7 +1115,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_single_bfa_7x7(
 	tvector1w central_pix,
 	s_1w_7x7_matrix src_plane);
 
-/** @brief OP_1w_joint_bfa_7x7
+/* @brief OP_1w_joint_bfa_7x7
  *
  * @param[in] weights - spatial and range weight lut
  * @param[in] threshold0 - 1st threshold plane, for range weight scaling
@@ -1149,7 +1149,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_joint_bfa_7x7(
 	tvector1w central_pix1,
 	s_1w_7x7_matrix src1_plane);
 
-/** @brief bbb_bfa_gen_spatial_weight_lut
+/* @brief bbb_bfa_gen_spatial_weight_lut
  *
  * @param[in] in - 7x7 matrix of spatial weights
  * @param[in] out - generated LUT
@@ -1163,7 +1163,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_spatial_weight_lut(
 	s_1w_7x7_matrix in,
 	tvector1w out[BFA_MAX_KWAY]);
 
-/** @brief bbb_bfa_gen_range_weight_lut
+/* @brief bbb_bfa_gen_range_weight_lut
  *
  * @param[in] in - input range weight,
  * @param[in] out - generated LUT
@@ -1184,7 +1184,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_range_weight_lut(
 #endif
 
 #ifdef ISP2401
-/** @brief OP_1w_imax32
+/* @brief OP_1w_imax32
  *
  * @param[in] src - structure that holds an array of 32 elements.
  *
@@ -1195,7 +1195,7 @@ STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_range_weight_lut(
 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imax32(
 	imax32_ref_in_vector src);
 
-/** @brief OP_1w_imaxidx32
+/* @brief OP_1w_imaxidx32
  *
  * @param[in] src - structure that holds a vector of elements.
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/math_support.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/math_support.h
index e85e5c889c1551ff402ff8458bbc00f237557633..6436dae0007e1c89b4b1a5a78e9032d900a60e1e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/math_support.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/math_support.h
@@ -168,7 +168,7 @@ static inline unsigned int round_half_down_mul(unsigned int a, unsigned int b)
 }
 #endif
 
-/** @brief Next Power of Two
+/* @brief Next Power of Two
  *
  *  @param[in] unsigned number
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/string_support.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/string_support.h
index d80437c58bdecfa78368ae7e09d668c63e3b6ac6..f4d9674cdab64f47b8749260b97e27a6e495037c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/string_support.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/string_support.h
@@ -23,7 +23,7 @@
  */
 
 
-/** @brief Copy from src_buf to dest_buf.
+/* @brief Copy from src_buf to dest_buf.
  *
  * @param[out] dest_buf. Destination buffer to copy to
  * @param[in]  dest_size. The size of the destination buffer in bytes
@@ -53,7 +53,7 @@ static inline int memcpy_s(
 	return 0;
 }
 
-/** @brief Get the length of the string, excluding the null terminator
+/* @brief Get the length of the string, excluding the null terminator
  *
  * @param[in]  src_str. The source string
  * @param[in]  max_len. Look only for max_len bytes in the string
@@ -78,7 +78,7 @@ static size_t strnlen_s(
 	return ix;
 }
 
-/** @brief Copy string from src_str to dest_str
+/* @brief Copy string from src_str to dest_str
  *
  * @param[out] dest_str. Destination buffer to copy to
  * @param[in]  dest_size. The size of the destination buffer in bytes
@@ -120,7 +120,7 @@ static inline int strncpy_s(
 	return 0;
 }
 
-/** @brief Copy string from src_str to dest_str
+/* @brief Copy string from src_str to dest_str
  *
  * @param[out] dest_str. Destination buffer to copy to
  * @param[in]  dest_size. The size of the destination buffer in bytes
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_shared/host/tag.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_shared/host/tag.c
index 9aa8c168a8036df07f16182125afa298e6ed43f6..2cf1d58941bfc5f806c1f0920c9587a5f4f1ec59 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_shared/host/tag.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_shared/host/tag.c
@@ -17,7 +17,7 @@
 #include <assert_support.h>
 #include "tag_local.h"
 
-/**
+/*
  * @brief	Creates the tag description from the given parameters.
  * @param[in]	num_captures
  * @param[in]	skip
@@ -39,7 +39,7 @@ sh_css_create_tag_descr(int num_captures,
 	tag_descr->exp_id	= exp_id;
 }
 
-/**
+/*
  * @brief	Encodes the members of tag description into a 32-bit value.
  * @param[in]	tag		Pointer to the tag description
  * @return	(unsigned int)	Encoded 32-bit tag-info
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css.h
index 2458b3767c90c77a3d065be1a03ee651b082f813..e44df6916d900b377cabd16470eac28ccbfbb13c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css.h
@@ -16,7 +16,7 @@
 #ifndef _IA_CSS_H_
 #define _IA_CSS_H_
 
-/** @file
+/* @file
  * This file is the starting point of the CSS-API. It includes all CSS-API
  * header files.
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_3a.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_3a.h
index a80a7dbaf712e333fc0543eeb468bf416723850e..080198796ad0d4848456e74fd8dc736915cea957 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_3a.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_3a.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_3A_H
 #define __IA_CSS_3A_H
 
-/** @file
+/* @file
  * This file contains types used for 3A statistics
  */
 
@@ -31,7 +31,7 @@ enum ia_css_3a_tables {
 	IA_CSS_NUM_3A_TABLES
 };
 
-/** Structure that holds 3A statistics in the ISP internal
+/* Structure that holds 3A statistics in the ISP internal
  * format. Use ia_css_get_3a_statistics() to translate
  * this to the format used on the host (3A library).
  * */
@@ -48,13 +48,13 @@ struct ia_css_isp_3a_statistics {
 	struct {
 		ia_css_ptr rgby_tbl;
 	} data_hmem;
-	uint32_t exp_id;     /**< exposure id, to match statistics to a frame,
+	uint32_t exp_id;     /** exposure id, to match statistics to a frame,
 			          see ia_css_event_public.h for more detail. */
-	uint32_t isp_config_id;/**< Unique ID to track which config was actually applied to a particular frame */
-	ia_css_ptr data_ptr; /**< pointer to base of all data */
-	uint32_t   size;     /**< total size of all data */
+	uint32_t isp_config_id;/** Unique ID to track which config was actually applied to a particular frame */
+	ia_css_ptr data_ptr; /** pointer to base of all data */
+	uint32_t   size;     /** total size of all data */
 	uint32_t   dmem_size;
-	uint32_t   vmem_size; /**< both lo and hi have this size */
+	uint32_t   vmem_size; /** both lo and hi have this size */
 	uint32_t   hmem_size;
 };
 #define SIZE_OF_DMEM_STRUCT						\
@@ -77,7 +77,7 @@ struct ia_css_isp_3a_statistics {
 	 SIZE_OF_IA_CSS_PTR +						\
 	 4 * sizeof(uint32_t))
 
-/** Map with host-side pointers to ISP-format statistics.
+/* Map with host-side pointers to ISP-format statistics.
  * These pointers can either be copies of ISP data or memory mapped
  * ISP pointers.
  * All of the data behind these pointers is allocated contiguously, the
@@ -85,17 +85,17 @@ struct ia_css_isp_3a_statistics {
  * point into this one block of data.
  */
 struct ia_css_isp_3a_statistics_map {
-	void                    *data_ptr; /**< Pointer to start of memory */
+	void                    *data_ptr; /** Pointer to start of memory */
 	struct ia_css_3a_output *dmem_stats;
 	uint16_t                *vmem_stats_hi;
 	uint16_t                *vmem_stats_lo;
 	struct ia_css_bh_table  *hmem_stats;
-	uint32_t                 size; /**< total size in bytes of data_ptr */
-	uint32_t                 data_allocated; /**< indicate whether data_ptr
+	uint32_t                 size; /** total size in bytes of data_ptr */
+	uint32_t                 data_allocated; /** indicate whether data_ptr
 						    was allocated or not. */
 };
 
-/** @brief Copy and translate 3A statistics from an ISP buffer to a host buffer
+/* @brief Copy and translate 3A statistics from an ISP buffer to a host buffer
  * @param[out]	host_stats Host buffer.
  * @param[in]	isp_stats ISP buffer.
  * @return	error value if temporary memory cannot be allocated
@@ -109,7 +109,7 @@ enum ia_css_err
 ia_css_get_3a_statistics(struct ia_css_3a_statistics           *host_stats,
 			 const struct ia_css_isp_3a_statistics *isp_stats);
 
-/** @brief Translate 3A statistics from ISP format to host format.
+/* @brief Translate 3A statistics from ISP format to host format.
  * @param[out]	host_stats host-format statistics
  * @param[in]	isp_stats  ISP-format statistics
  * @return	None
@@ -125,35 +125,35 @@ ia_css_translate_3a_statistics(
 
 /* Convenience functions for alloc/free of certain datatypes */
 
-/** @brief Allocate memory for the 3a statistics on the ISP
+/* @brief Allocate memory for the 3a statistics on the ISP
  * @param[in]	grid The grid.
  * @return		Pointer to the allocated 3a statistics buffer on the ISP
 */
 struct ia_css_isp_3a_statistics *
 ia_css_isp_3a_statistics_allocate(const struct ia_css_3a_grid_info *grid);
 
-/** @brief Free the 3a statistics memory on the isp
+/* @brief Free the 3a statistics memory on the isp
  * @param[in]	me Pointer to the 3a statistics buffer on the ISP.
  * @return		None
 */
 void
 ia_css_isp_3a_statistics_free(struct ia_css_isp_3a_statistics *me);
 
-/** @brief Allocate memory for the 3a statistics on the host
+/* @brief Allocate memory for the 3a statistics on the host
  * @param[in]	grid The grid.
  * @return		Pointer to the allocated 3a statistics buffer on the host
 */
 struct ia_css_3a_statistics *
 ia_css_3a_statistics_allocate(const struct ia_css_3a_grid_info *grid);
 
-/** @brief Free the 3a statistics memory on the host
+/* @brief Free the 3a statistics memory on the host
  * @param[in]	me Pointer to the 3a statistics buffer on the host.
  * @return		None
  */
 void
 ia_css_3a_statistics_free(struct ia_css_3a_statistics *me);
 
-/** @brief Allocate a 3a statistics map structure
+/* @brief Allocate a 3a statistics map structure
  * @param[in]	isp_stats pointer to ISP 3a statistis struct
  * @param[in]	data_ptr  host-side pointer to ISP 3a statistics.
  * @return		Pointer to the allocated 3a statistics map
@@ -174,7 +174,7 @@ ia_css_isp_3a_statistics_map_allocate(
 	const struct ia_css_isp_3a_statistics *isp_stats,
 	void *data_ptr);
 
-/** @brief Free the 3a statistics map
+/* @brief Free the 3a statistics map
  * @param[in]	me Pointer to the 3a statistics map
  * @return		None
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_acc_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_acc_types.h
index a2a1873aca83e09cddb7257b681cd198738d63db..138bc3bb4627e5056f1fc1e828142f3ab1f430f3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_acc_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_acc_types.h
@@ -15,7 +15,7 @@
 #ifndef _IA_CSS_ACC_TYPES_H
 #define _IA_CSS_ACC_TYPES_H
 
-/** @file
+/* @file
  * This file contains types used for acceleration
  */
 
@@ -40,16 +40,16 @@
  * in the kernel and HAL.
 */
 
-/** Type of acceleration.
+/* Type of acceleration.
  */
 enum ia_css_acc_type {
-	IA_CSS_ACC_NONE,	/**< Normal binary */
-	IA_CSS_ACC_OUTPUT,	/**< Accelerator stage on output frame */
-	IA_CSS_ACC_VIEWFINDER,	/**< Accelerator stage on viewfinder frame */
-	IA_CSS_ACC_STANDALONE,	/**< Stand-alone acceleration */
+	IA_CSS_ACC_NONE,	/** Normal binary */
+	IA_CSS_ACC_OUTPUT,	/** Accelerator stage on output frame */
+	IA_CSS_ACC_VIEWFINDER,	/** Accelerator stage on viewfinder frame */
+	IA_CSS_ACC_STANDALONE,	/** Stand-alone acceleration */
 };
 
-/** Cells types
+/* Cells types
  */
 enum ia_css_cell_type {
 	IA_CSS_SP0 = 0,
@@ -58,45 +58,45 @@ enum ia_css_cell_type {
 	MAX_NUM_OF_CELLS
 };
 
-/** Firmware types.
+/* Firmware types.
  */
 enum ia_css_fw_type {
-	ia_css_sp_firmware,		/**< Firmware for the SP */
-	ia_css_isp_firmware,	/**< Firmware for the ISP */
-	ia_css_bootloader_firmware, /**< Firmware for the BootLoader */
-	ia_css_acc_firmware		/**< Firmware for accelrations */
+	ia_css_sp_firmware,		/** Firmware for the SP */
+	ia_css_isp_firmware,	/** Firmware for the ISP */
+	ia_css_bootloader_firmware, /** Firmware for the BootLoader */
+	ia_css_acc_firmware		/** Firmware for accelrations */
 };
 
 struct ia_css_blob_descr;
 
-/** Blob descriptor.
+/* Blob descriptor.
  * This structure describes an SP or ISP blob.
  * It describes the test, data and bss sections as well as position in a
  * firmware file.
  * For convenience, it contains dynamic data after loading.
  */
 struct ia_css_blob_info {
-	/**< Static blob data */
-	uint32_t offset;		/**< Blob offset in fw file */
-	struct ia_css_isp_param_memory_offsets memory_offsets;  /**< offset wrt hdr in bytes */
-	uint32_t prog_name_offset;  /**< offset wrt hdr in bytes */
-	uint32_t size;			/**< Size of blob */
-	uint32_t padding_size;	/**< total cummulative of bytes added due to section alignment */
-	uint32_t icache_source;	/**< Position of icache in blob */
-	uint32_t icache_size;	/**< Size of icache section */
-	uint32_t icache_padding;/**< bytes added due to icache section alignment */
-	uint32_t text_source;	/**< Position of text in blob */
-	uint32_t text_size;		/**< Size of text section */
-	uint32_t text_padding;	/**< bytes added due to text section alignment */
-	uint32_t data_source;	/**< Position of data in blob */
-	uint32_t data_target;	/**< Start of data in SP dmem */
-	uint32_t data_size;		/**< Size of text section */
-	uint32_t data_padding;	/**< bytes added due to data section alignment */
-	uint32_t bss_target;	/**< Start position of bss in SP dmem */
-	uint32_t bss_size;		/**< Size of bss section */
-	/**< Dynamic data filled by loader */
-	CSS_ALIGN(const void  *code, 8);		/**< Code section absolute pointer within fw, code = icache + text */
-	CSS_ALIGN(const void  *data, 8);		/**< Data section absolute pointer within fw, data = data + bss */
+	/** Static blob data */
+	uint32_t offset;		/** Blob offset in fw file */
+	struct ia_css_isp_param_memory_offsets memory_offsets;  /** offset wrt hdr in bytes */
+	uint32_t prog_name_offset;  /** offset wrt hdr in bytes */
+	uint32_t size;			/** Size of blob */
+	uint32_t padding_size;	/** total cummulative of bytes added due to section alignment */
+	uint32_t icache_source;	/** Position of icache in blob */
+	uint32_t icache_size;	/** Size of icache section */
+	uint32_t icache_padding;/** bytes added due to icache section alignment */
+	uint32_t text_source;	/** Position of text in blob */
+	uint32_t text_size;		/** Size of text section */
+	uint32_t text_padding;	/** bytes added due to text section alignment */
+	uint32_t data_source;	/** Position of data in blob */
+	uint32_t data_target;	/** Start of data in SP dmem */
+	uint32_t data_size;		/** Size of text section */
+	uint32_t data_padding;	/** bytes added due to data section alignment */
+	uint32_t bss_target;	/** Start position of bss in SP dmem */
+	uint32_t bss_size;		/** Size of bss section */
+	/** Dynamic data filled by loader */
+	CSS_ALIGN(const void  *code, 8);		/** Code section absolute pointer within fw, code = icache + text */
+	CSS_ALIGN(const void  *data, 8);		/** Data section absolute pointer within fw, data = data + bss */
 };
 
 struct ia_css_binary_input_info {
@@ -140,9 +140,9 @@ struct ia_css_binary_s3a_info {
 	uint32_t		fixed_s3a_deci_log;
 };
 
-/** DPC related binary info */
+/* DPC related binary info */
 struct ia_css_binary_dpc_info {
-	uint32_t		bnr_lite; /**< bnr lite enable flag */
+	uint32_t		bnr_lite; /** bnr lite enable flag */
 };
 
 struct ia_css_binary_iterator_info {
@@ -193,7 +193,7 @@ struct ia_css_binary_block_info {
 	uint32_t	output_block_height;
 };
 
-/** Structure describing an ISP binary.
+/* Structure describing an ISP binary.
  * It describes the capabilities of a binary, like the maximum resolution,
  * support features, dma channels, uds features, etc.
  * This part is to be used by the SP.
@@ -210,7 +210,7 @@ struct ia_css_binary_info {
 	struct ia_css_binary_dvs_info		dvs;
 	struct ia_css_binary_vf_dec_info	vf_dec;
 	struct ia_css_binary_s3a_info		s3a;
-	struct ia_css_binary_dpc_info		dpc_bnr; /**< DPC related binary info */
+	struct ia_css_binary_dpc_info		dpc_bnr; /** DPC related binary info */
 	struct ia_css_binary_iterator_info	iterator;
 	struct ia_css_binary_address_info	addresses;
 	struct ia_css_binary_uds_info		uds;
@@ -269,7 +269,7 @@ struct ia_css_binary_info {
 	} dma;
 };
 
-/** Structure describing an ISP binary.
+/* Structure describing an ISP binary.
  * It describes the capabilities of a binary, like the maximum resolution,
  * support features, dma channels, uds features, etc.
  */
@@ -281,8 +281,8 @@ struct ia_css_binary_xinfo {
 	enum ia_css_acc_type	     type;
 	CSS_ALIGN(int32_t	     num_output_formats, 8);
 	enum ia_css_frame_format     output_formats[IA_CSS_FRAME_FORMAT_NUM];
-	CSS_ALIGN(int32_t	     num_vf_formats, 8); /**< number of supported vf formats */
-	enum ia_css_frame_format     vf_formats[IA_CSS_FRAME_FORMAT_NUM]; /**< types of supported vf formats */
+	CSS_ALIGN(int32_t	     num_vf_formats, 8); /** number of supported vf formats */
+	enum ia_css_frame_format     vf_formats[IA_CSS_FRAME_FORMAT_NUM]; /** types of supported vf formats */
 	uint8_t			     num_output_pins;
 	ia_css_ptr		     xmem_addr;
 	CSS_ALIGN(const struct ia_css_blob_descr *blob, 8);
@@ -291,55 +291,55 @@ struct ia_css_binary_xinfo {
 	CSS_ALIGN(struct ia_css_binary_xinfo *next, 8);
 };
 
-/** Structure describing the Bootloader (an ISP binary).
+/* Structure describing the Bootloader (an ISP binary).
  * It contains several address, either in ddr, isp_dmem or
  * the entry function in icache.
  */
 struct ia_css_bl_info {
-	uint32_t num_dma_cmds;	/**< Number of cmds sent by CSS */
-	uint32_t dma_cmd_list;	/**< Dma command list sent by CSS */
-	uint32_t sw_state;	/**< Polled from css */
+	uint32_t num_dma_cmds;	/** Number of cmds sent by CSS */
+	uint32_t dma_cmd_list;	/** Dma command list sent by CSS */
+	uint32_t sw_state;	/** Polled from css */
 	/* Entry functions */
-	uint32_t bl_entry;	/**< The SP entry function */
+	uint32_t bl_entry;	/** The SP entry function */
 };
 
-/** Structure describing the SP binary.
+/* Structure describing the SP binary.
  * It contains several address, either in ddr, sp_dmem or
  * the entry function in pmem.
  */
 struct ia_css_sp_info {
-	uint32_t init_dmem_data; /**< data sect config, stored to dmem */
-	uint32_t per_frame_data; /**< Per frame data, stored to dmem */
-	uint32_t group;		/**< Per pipeline data, loaded by dma */
-	uint32_t output;		/**< SP output data, loaded by dmem */
-	uint32_t host_sp_queue;	/**< Host <-> SP queues */
-	uint32_t host_sp_com;/**< Host <-> SP commands */
-	uint32_t isp_started;	/**< Polled from sensor thread, csim only */
-	uint32_t sw_state;	/**< Polled from css */
-	uint32_t host_sp_queues_initialized; /**< Polled from the SP */
-	uint32_t sleep_mode;  /**< different mode to halt SP */
-	uint32_t invalidate_tlb;		/**< inform SP to invalidate mmu TLB */
+	uint32_t init_dmem_data; /** data sect config, stored to dmem */
+	uint32_t per_frame_data; /** Per frame data, stored to dmem */
+	uint32_t group;		/** Per pipeline data, loaded by dma */
+	uint32_t output;		/** SP output data, loaded by dmem */
+	uint32_t host_sp_queue;	/** Host <-> SP queues */
+	uint32_t host_sp_com;/** Host <-> SP commands */
+	uint32_t isp_started;	/** Polled from sensor thread, csim only */
+	uint32_t sw_state;	/** Polled from css */
+	uint32_t host_sp_queues_initialized; /** Polled from the SP */
+	uint32_t sleep_mode;  /** different mode to halt SP */
+	uint32_t invalidate_tlb;		/** inform SP to invalidate mmu TLB */
 #ifndef ISP2401
-	uint32_t stop_copy_preview;       /**< suspend copy and preview pipe when capture */
+	uint32_t stop_copy_preview;       /** suspend copy and preview pipe when capture */
 #endif
-	uint32_t debug_buffer_ddr_address;	/**< inform SP the address
+	uint32_t debug_buffer_ddr_address;	/** inform SP the address
 	of DDR debug queue */
-	uint32_t perf_counter_input_system_error; /**< input system perf
+	uint32_t perf_counter_input_system_error; /** input system perf
 	counter array */
 #ifdef HAS_WATCHDOG_SP_THREAD_DEBUG
-	uint32_t debug_wait; /**< thread/pipe post mortem debug */
-	uint32_t debug_stage; /**< thread/pipe post mortem debug */
-	uint32_t debug_stripe; /**< thread/pipe post mortem debug */
+	uint32_t debug_wait; /** thread/pipe post mortem debug */
+	uint32_t debug_stage; /** thread/pipe post mortem debug */
+	uint32_t debug_stripe; /** thread/pipe post mortem debug */
 #endif
-	uint32_t threads_stack; /**< sp thread's stack pointers */
-	uint32_t threads_stack_size; /**< sp thread's stack sizes */
-	uint32_t curr_binary_id;        /**< current binary id */
-	uint32_t raw_copy_line_count;   /**< raw copy line counter */
-	uint32_t ddr_parameter_address; /**< acc param ddrptr, sp dmem */
-	uint32_t ddr_parameter_size;    /**< acc param size, sp dmem */
+	uint32_t threads_stack; /** sp thread's stack pointers */
+	uint32_t threads_stack_size; /** sp thread's stack sizes */
+	uint32_t curr_binary_id;        /** current binary id */
+	uint32_t raw_copy_line_count;   /** raw copy line counter */
+	uint32_t ddr_parameter_address; /** acc param ddrptr, sp dmem */
+	uint32_t ddr_parameter_size;    /** acc param size, sp dmem */
 	/* Entry functions */
-	uint32_t sp_entry;	/**< The SP entry function */
-	uint32_t tagger_frames_addr;   /**< Base address of tagger state */
+	uint32_t sp_entry;	/** The SP entry function */
+	uint32_t tagger_frames_addr;   /** Base address of tagger state */
 };
 
 /* The following #if is there because this header file is also included
@@ -348,37 +348,37 @@ struct ia_css_sp_info {
    More permanent solution will be to refactor this include.
 */
 #if !defined(__ISP)
-/** Accelerator firmware information.
+/* Accelerator firmware information.
  */
 struct ia_css_acc_info {
-	uint32_t per_frame_data; /**< Dummy for now */
+	uint32_t per_frame_data; /** Dummy for now */
 };
 
-/** Firmware information.
+/* Firmware information.
  */
 union ia_css_fw_union {
-	struct ia_css_binary_xinfo	isp; /**< ISP info */
-	struct ia_css_sp_info		sp;  /**< SP info */
-	struct ia_css_bl_info           bl;  /**< Bootloader info */
-	struct ia_css_acc_info		acc; /**< Accelerator info */
+	struct ia_css_binary_xinfo	isp; /** ISP info */
+	struct ia_css_sp_info		sp;  /** SP info */
+	struct ia_css_bl_info           bl;  /** Bootloader info */
+	struct ia_css_acc_info		acc; /** Accelerator info */
 };
 
-/** Firmware information.
+/* Firmware information.
  */
 struct ia_css_fw_info {
-	size_t			 header_size; /**< size of fw header */
+	size_t			 header_size; /** size of fw header */
 	CSS_ALIGN(uint32_t type, 8);
-	union ia_css_fw_union	 info; /**< Binary info */
-	struct ia_css_blob_info  blob; /**< Blob info */
+	union ia_css_fw_union	 info; /** Binary info */
+	struct ia_css_blob_info  blob; /** Blob info */
 	/* Dynamic part */
 	struct ia_css_fw_info   *next;
-	CSS_ALIGN(uint32_t       loaded, 8);	/**< Firmware has been loaded */
-	CSS_ALIGN(const uint8_t *isp_code, 8);  /**< ISP pointer to code */
-	/**< Firmware handle between user space and kernel */
+	CSS_ALIGN(uint32_t       loaded, 8);	/** Firmware has been loaded */
+	CSS_ALIGN(const uint8_t *isp_code, 8);  /** ISP pointer to code */
+	/** Firmware handle between user space and kernel */
 	CSS_ALIGN(uint32_t	handle, 8);
-	/**< Sections to copy from/to ISP */
+	/** Sections to copy from/to ISP */
 	struct ia_css_isp_param_css_segments mem_initializers;
-	/**< Initializer for local ISP memories */
+	/** Initializer for local ISP memories */
 };
 
 struct ia_css_blob_descr {
@@ -390,39 +390,39 @@ struct ia_css_blob_descr {
 
 struct ia_css_acc_fw;
 
-/** Structure describing the SP binary of a stand-alone accelerator.
+/* Structure describing the SP binary of a stand-alone accelerator.
  */
 struct ia_css_acc_sp {
-	void (*init)(struct ia_css_acc_fw *);	/**< init for crun */
-	uint32_t sp_prog_name_offset;		/**< program name offset wrt hdr in bytes */
-	uint32_t sp_blob_offset;		/**< blob offset wrt hdr in bytes */
-	void	 *entry;			/**< Address of sp entry point */
-	uint32_t *css_abort;			/**< SP dmem abort flag */
-	void	 *isp_code;			/**< SP dmem address holding xmem
+	void (*init)(struct ia_css_acc_fw *);	/** init for crun */
+	uint32_t sp_prog_name_offset;		/** program name offset wrt hdr in bytes */
+	uint32_t sp_blob_offset;		/** blob offset wrt hdr in bytes */
+	void	 *entry;			/** Address of sp entry point */
+	uint32_t *css_abort;			/** SP dmem abort flag */
+	void	 *isp_code;			/** SP dmem address holding xmem
 						     address of isp code */
-	struct ia_css_fw_info fw;		/**< SP fw descriptor */
-	const uint8_t *code;			/**< ISP pointer of allocated SP code */
+	struct ia_css_fw_info fw;		/** SP fw descriptor */
+	const uint8_t *code;			/** ISP pointer of allocated SP code */
 };
 
-/** Acceleration firmware descriptor.
+/* Acceleration firmware descriptor.
   * This descriptor descibes either SP code (stand-alone), or
   * ISP code (a separate pipeline stage).
   */
 struct ia_css_acc_fw_hdr {
-	enum ia_css_acc_type type;	/**< Type of accelerator */
-	uint32_t	isp_prog_name_offset; /**< program name offset wrt
+	enum ia_css_acc_type type;	/** Type of accelerator */
+	uint32_t	isp_prog_name_offset; /** program name offset wrt
 						   header in bytes */
-	uint32_t	isp_blob_offset;      /**< blob offset wrt header
+	uint32_t	isp_blob_offset;      /** blob offset wrt header
 						   in bytes */
-	uint32_t	isp_size;	      /**< Size of isp blob */
-	const uint8_t  *isp_code;	      /**< ISP pointer to code */
-	struct ia_css_acc_sp  sp;  /**< Standalone sp code */
-	/**< Firmware handle between user space and kernel */
+	uint32_t	isp_size;	      /** Size of isp blob */
+	const uint8_t  *isp_code;	      /** ISP pointer to code */
+	struct ia_css_acc_sp  sp;  /** Standalone sp code */
+	/** Firmware handle between user space and kernel */
 	uint32_t	handle;
-	struct ia_css_data parameters; /**< Current SP parameters */
+	struct ia_css_data parameters; /** Current SP parameters */
 };
 
-/** Firmware structure.
+/* Firmware structure.
   * This contains the header and actual blobs.
   * For standalone, it contains SP and ISP blob.
   * For a pipeline stage accelerator, it contains ISP code only.
@@ -430,7 +430,7 @@ struct ia_css_acc_fw_hdr {
   * header and computed using the access macros below.
   */
 struct ia_css_acc_fw {
-	struct ia_css_acc_fw_hdr header; /**< firmware header */
+	struct ia_css_acc_fw_hdr header; /** firmware header */
 	/*
 	int8_t   isp_progname[];	  **< ISP program name
 	int8_t   sp_progname[];	  **< SP program name, stand-alone only
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_buffer.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_buffer.h
index b2ecf3618c1524dce6b902876f997b28b7772321..a0058eac7d5a12e67ce2c0d7a908acbfebd4a539 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_buffer.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_buffer.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_BUFFER_H
 #define __IA_CSS_BUFFER_H
 
-/** @file
+/* @file
  * This file contains datastructures and types for buffers used in CSS
  */
 
@@ -23,7 +23,7 @@
 #include "ia_css_types.h"
 #include "ia_css_timer.h"
 
-/** Enumeration of buffer types. Buffers can be queued and de-queued
+/* Enumeration of buffer types. Buffers can be queued and de-queued
  *  to hand them over between IA and ISP.
  */
 enum ia_css_buffer_type {
@@ -48,28 +48,28 @@ enum ia_css_buffer_type {
 
 /* Driver API is not SP/ISP visible, 64 bit types not supported on hivecc */
 #if !defined(__ISP)
-/** Buffer structure. This is a container structure that enables content
+/* Buffer structure. This is a container structure that enables content
  *  independent buffer queues and access functions.
  */
 struct ia_css_buffer {
-	enum ia_css_buffer_type type; /**< Buffer type. */
+	enum ia_css_buffer_type type; /** Buffer type. */
 	unsigned int exp_id;
-	/**< exposure id for this buffer; 0 = not available
+	/** exposure id for this buffer; 0 = not available
 	     see ia_css_event_public.h for more detail. */
 	union {
-		struct ia_css_isp_3a_statistics  *stats_3a;    /**< 3A statistics & optionally RGBY statistics. */
-		struct ia_css_isp_dvs_statistics *stats_dvs;   /**< DVS statistics. */
-		struct ia_css_isp_skc_dvs_statistics *stats_skc_dvs;  /**< SKC DVS statistics. */
-		struct ia_css_frame              *frame;       /**< Frame buffer. */
-		struct ia_css_acc_param          *custom_data; /**< Custom buffer. */
-		struct ia_css_metadata           *metadata;    /**< Sensor metadata. */
-	} data; /**< Buffer data pointer. */
-	uint64_t driver_cookie; /**< cookie for the driver */
-	struct ia_css_time_meas timing_data; /**< timing data (readings from the timer) */
-	struct ia_css_clock_tick isys_eof_clock_tick; /**< ISYS's end of frame timer tick*/
+		struct ia_css_isp_3a_statistics  *stats_3a;    /** 3A statistics & optionally RGBY statistics. */
+		struct ia_css_isp_dvs_statistics *stats_dvs;   /** DVS statistics. */
+		struct ia_css_isp_skc_dvs_statistics *stats_skc_dvs;  /** SKC DVS statistics. */
+		struct ia_css_frame              *frame;       /** Frame buffer. */
+		struct ia_css_acc_param          *custom_data; /** Custom buffer. */
+		struct ia_css_metadata           *metadata;    /** Sensor metadata. */
+	} data; /** Buffer data pointer. */
+	uint64_t driver_cookie; /** cookie for the driver */
+	struct ia_css_time_meas timing_data; /** timing data (readings from the timer) */
+	struct ia_css_clock_tick isys_eof_clock_tick; /** ISYS's end of frame timer tick*/
 };
 
-/** @brief Dequeue param buffers from sp2host_queue
+/* @brief Dequeue param buffers from sp2host_queue
  *
  * @return                                       None
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_control.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_control.h
index a15d3e3683419b30f494406b0e8a2f2f37a7ad1d..021a313fab857079fba9f714da58f4621a263859 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_control.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_control.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_CONTROL_H
 #define __IA_CSS_CONTROL_H
 
-/** @file
+/* @file
  * This file contains functionality for starting and controlling CSS
  */
 
@@ -24,7 +24,7 @@
 #include <ia_css_firmware.h>
 #include <ia_css_irq.h>
 
-/** @brief Initialize the CSS API.
+/* @brief Initialize the CSS API.
  * @param[in]	env		Environment, provides functions to access the
  *				environment in which the CSS code runs. This is
  *				used for host side memory access and message
@@ -51,7 +51,7 @@ enum ia_css_err ia_css_init(
 	uint32_t                 l1_base,
 	enum ia_css_irq_type     irq_type);
 
-/** @brief Un-initialize the CSS API.
+/* @brief Un-initialize the CSS API.
  * @return	None
  *
  * This function deallocates all memory that has been allocated by the CSS API
@@ -66,7 +66,7 @@ enum ia_css_err ia_css_init(
 void
 ia_css_uninit(void);
 
-/** @brief Suspend CSS API for power down
+/* @brief Suspend CSS API for power down
  * @return	success or faulure code
  *
  * suspend shuts down the system by:
@@ -80,7 +80,7 @@ ia_css_uninit(void);
 enum ia_css_err
 ia_css_suspend(void);
 
-/** @brief Resume CSS API from power down
+/* @brief Resume CSS API from power down
  * @return	success or failure code
  *
  * After a power cycle, this function will bring the CSS API back into
@@ -91,7 +91,7 @@ ia_css_suspend(void);
 enum ia_css_err
 ia_css_resume(void);
 
-/** @brief Enable use of a separate queue for ISYS events.
+/* @brief Enable use of a separate queue for ISYS events.
  *
  * @param[in]	enable: enable or disable use of separate ISYS event queues.
  * @return		error if called when SP is running.
@@ -105,7 +105,7 @@ ia_css_resume(void);
 enum ia_css_err
 ia_css_enable_isys_event_queue(bool enable);
 
-/** @brief Test whether the ISP has started.
+/* @brief Test whether the ISP has started.
  *
  * @return	Boolean flag true if the ISP has started or false otherwise.
  *
@@ -114,7 +114,7 @@ ia_css_enable_isys_event_queue(bool enable);
 bool
 ia_css_isp_has_started(void);
 
-/** @brief Test whether the SP has initialized.
+/* @brief Test whether the SP has initialized.
  *
  * @return	Boolean flag true if the SP has initialized or false otherwise.
  *
@@ -123,7 +123,7 @@ ia_css_isp_has_started(void);
 bool
 ia_css_sp_has_initialized(void);
 
-/** @brief Test whether the SP has terminated.
+/* @brief Test whether the SP has terminated.
  *
  * @return	Boolean flag true if the SP has terminated or false otherwise.
  *
@@ -132,7 +132,7 @@ ia_css_sp_has_initialized(void);
 bool
 ia_css_sp_has_terminated(void);
 
-/** @brief start SP hardware
+/* @brief start SP hardware
  *
  * @return			IA_CSS_SUCCESS or error code upon error.
  *
@@ -144,7 +144,7 @@ enum ia_css_err
 ia_css_start_sp(void);
 
 
-/** @brief stop SP hardware
+/* @brief stop SP hardware
  *
  * @return			IA_CSS_SUCCESS or error code upon error.
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_device_access.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_device_access.h
index 59459f7a9876ea6b7ad1f357891c0456ca72ea88..84a960b7abbc046169a92a44bac4e499bd8285d3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_device_access.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_device_access.h
@@ -15,7 +15,7 @@
 #ifndef _IA_CSS_DEVICE_ACCESS_H
 #define _IA_CSS_DEVICE_ACCESS_H
 
-/** @file
+/* @file
  * File containing internal functions for the CSS-API to access the CSS device.
  */
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_dvs.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_dvs.h
index 147bf81959d3b1a5e113ac01497c778b49f62253..1f01534964e3e851035f417404bf7619d319517a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_dvs.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_dvs.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_DVS_H
 #define __IA_CSS_DVS_H
 
-/** @file
+/* @file
  * This file contains types for DVS statistics
  */
 
@@ -31,7 +31,7 @@ enum dvs_statistics_type {
 };
 
 
-/** Structure that holds DVS statistics in the ISP internal
+/* Structure that holds DVS statistics in the ISP internal
  * format. Use ia_css_get_dvs_statistics() to translate
  * this to the format used on the host (DVS engine).
  * */
@@ -40,12 +40,12 @@ struct ia_css_isp_dvs_statistics {
 	ia_css_ptr ver_proj;
 	uint32_t   hor_size;
 	uint32_t   ver_size;
-	uint32_t   exp_id;   /**< see ia_css_event_public.h for more detail */
+	uint32_t   exp_id;   /** see ia_css_event_public.h for more detail */
 	ia_css_ptr data_ptr; /* base pointer containing all memory */
 	uint32_t   size;     /* size of allocated memory in data_ptr */
 };
 
-/** Structure that holds SKC DVS statistics in the ISP internal
+/* Structure that holds SKC DVS statistics in the ISP internal
  * format. Use ia_css_dvs_statistics_get() to translate this to
  * the format used on the host.
  * */
@@ -82,7 +82,7 @@ union ia_css_dvs_statistics_host {
 	struct ia_css_skc_dvs_statistics *p_skc_dvs_statistics_host;
 };
 
-/** @brief Copy DVS statistics from an ISP buffer to a host buffer.
+/* @brief Copy DVS statistics from an ISP buffer to a host buffer.
  * @param[in]	host_stats Host buffer
  * @param[in]	isp_stats ISP buffer
  * @return	error value if temporary memory cannot be allocated
@@ -100,7 +100,7 @@ enum ia_css_err
 ia_css_get_dvs_statistics(struct ia_css_dvs_statistics *host_stats,
 			  const struct ia_css_isp_dvs_statistics *isp_stats);
 
-/** @brief Translate DVS statistics from ISP format to host format
+/* @brief Translate DVS statistics from ISP format to host format
  * @param[in]	host_stats Host buffer
  * @param[in]	isp_stats ISP buffer
  * @return	None
@@ -116,7 +116,7 @@ ia_css_translate_dvs_statistics(
 		struct ia_css_dvs_statistics *host_stats,
 		const struct ia_css_isp_dvs_statistics_map *isp_stats);
 
-/** @brief Copy DVS 2.0 statistics from an ISP buffer to a host buffer.
+/* @brief Copy DVS 2.0 statistics from an ISP buffer to a host buffer.
  * @param[in]	host_stats Host buffer
  * @param[in]	isp_stats ISP buffer
  * @return	error value if temporary memory cannot be allocated
@@ -134,7 +134,7 @@ enum ia_css_err
 ia_css_get_dvs2_statistics(struct ia_css_dvs2_statistics *host_stats,
 			   const struct ia_css_isp_dvs_statistics *isp_stats);
 
-/** @brief Translate DVS2 statistics from ISP format to host format
+/* @brief Translate DVS2 statistics from ISP format to host format
  * @param[in]	host_stats Host buffer
  * @param[in]	isp_stats ISP buffer
  * @return		None
@@ -150,7 +150,7 @@ ia_css_translate_dvs2_statistics(
 		struct ia_css_dvs2_statistics	   *host_stats,
 		const struct ia_css_isp_dvs_statistics_map *isp_stats);
 
-/** @brief Copy DVS statistics from an ISP buffer to a host buffer.
+/* @brief Copy DVS statistics from an ISP buffer to a host buffer.
  * @param[in] type - DVS statistics type
  * @param[in] host_stats Host buffer
  * @param[in] isp_stats ISP buffer
@@ -161,105 +161,105 @@ ia_css_dvs_statistics_get(enum dvs_statistics_type type,
 			  union ia_css_dvs_statistics_host  *host_stats,
 			  const union ia_css_dvs_statistics_isp *isp_stats);
 
-/** @brief Allocate the DVS statistics memory on the ISP
+/* @brief Allocate the DVS statistics memory on the ISP
  * @param[in]	grid The grid.
  * @return	Pointer to the allocated DVS statistics buffer on the ISP
 */
 struct ia_css_isp_dvs_statistics *
 ia_css_isp_dvs_statistics_allocate(const struct ia_css_dvs_grid_info *grid);
 
-/** @brief Free the DVS statistics memory on the ISP
+/* @brief Free the DVS statistics memory on the ISP
  * @param[in]	me Pointer to the DVS statistics buffer on the ISP.
  * @return	None
 */
 void
 ia_css_isp_dvs_statistics_free(struct ia_css_isp_dvs_statistics *me);
 
-/** @brief Allocate the DVS 2.0 statistics memory
+/* @brief Allocate the DVS 2.0 statistics memory
  * @param[in]	grid The grid.
  * @return	Pointer to the allocated DVS statistics buffer on the ISP
 */
 struct ia_css_isp_dvs_statistics *
 ia_css_isp_dvs2_statistics_allocate(const struct ia_css_dvs_grid_info *grid);
 
-/** @brief Free the DVS 2.0 statistics memory
+/* @brief Free the DVS 2.0 statistics memory
  * @param[in]	me Pointer to the DVS statistics buffer on the ISP.
  * @return	None
 */
 void
 ia_css_isp_dvs2_statistics_free(struct ia_css_isp_dvs_statistics *me);
 
-/** @brief Allocate the DVS statistics memory on the host
+/* @brief Allocate the DVS statistics memory on the host
  * @param[in]	grid The grid.
  * @return	Pointer to the allocated DVS statistics buffer on the host
 */
 struct ia_css_dvs_statistics *
 ia_css_dvs_statistics_allocate(const struct ia_css_dvs_grid_info *grid);
 
-/** @brief Free the DVS statistics memory on the host
+/* @brief Free the DVS statistics memory on the host
  * @param[in]	me Pointer to the DVS statistics buffer on the host.
  * @return	None
 */
 void
 ia_css_dvs_statistics_free(struct ia_css_dvs_statistics *me);
 
-/** @brief Allocate the DVS coefficients memory
+/* @brief Allocate the DVS coefficients memory
  * @param[in]	grid The grid.
  * @return	Pointer to the allocated DVS coefficients buffer
 */
 struct ia_css_dvs_coefficients *
 ia_css_dvs_coefficients_allocate(const struct ia_css_dvs_grid_info *grid);
 
-/** @brief Free the DVS coefficients memory
+/* @brief Free the DVS coefficients memory
  * @param[in]	me Pointer to the DVS coefficients buffer.
  * @return	None
  */
 void
 ia_css_dvs_coefficients_free(struct ia_css_dvs_coefficients *me);
 
-/** @brief Allocate the DVS 2.0 statistics memory on the host
+/* @brief Allocate the DVS 2.0 statistics memory on the host
  * @param[in]	grid The grid.
  * @return	Pointer to the allocated DVS 2.0 statistics buffer on the host
  */
 struct ia_css_dvs2_statistics *
 ia_css_dvs2_statistics_allocate(const struct ia_css_dvs_grid_info *grid);
 
-/** @brief Free the DVS 2.0 statistics memory
+/* @brief Free the DVS 2.0 statistics memory
  * @param[in]	me Pointer to the DVS 2.0 statistics buffer on the host.
  * @return	None
 */
 void
 ia_css_dvs2_statistics_free(struct ia_css_dvs2_statistics *me);
 
-/** @brief Allocate the DVS 2.0 coefficients memory
+/* @brief Allocate the DVS 2.0 coefficients memory
  * @param[in]	grid The grid.
  * @return	Pointer to the allocated DVS 2.0 coefficients buffer
 */
 struct ia_css_dvs2_coefficients *
 ia_css_dvs2_coefficients_allocate(const struct ia_css_dvs_grid_info *grid);
 
-/** @brief Free the DVS 2.0 coefficients memory
+/* @brief Free the DVS 2.0 coefficients memory
  * @param[in]	me Pointer to the DVS 2.0 coefficients buffer.
  * @return	None
 */
 void
 ia_css_dvs2_coefficients_free(struct ia_css_dvs2_coefficients *me);
 
-/** @brief Allocate the DVS 2.0 6-axis config memory
+/* @brief Allocate the DVS 2.0 6-axis config memory
  * @param[in]	stream The stream.
  * @return	Pointer to the allocated DVS 6axis configuration buffer
 */
 struct ia_css_dvs_6axis_config *
 ia_css_dvs2_6axis_config_allocate(const struct ia_css_stream *stream);
 
-/** @brief Free the DVS 2.0 6-axis config memory
+/* @brief Free the DVS 2.0 6-axis config memory
  * @param[in]	dvs_6axis_config Pointer to the DVS 6axis configuration buffer
  * @return	None
  */
 void
 ia_css_dvs2_6axis_config_free(struct ia_css_dvs_6axis_config *dvs_6axis_config);
 
-/** @brief Allocate a dvs statistics map structure
+/* @brief Allocate a dvs statistics map structure
  * @param[in]	isp_stats pointer to ISP dvs statistis struct
  * @param[in]	data_ptr  host-side pointer to ISP dvs statistics.
  * @return	Pointer to the allocated dvs statistics map
@@ -280,7 +280,7 @@ ia_css_isp_dvs_statistics_map_allocate(
 	const struct ia_css_isp_dvs_statistics *isp_stats,
 	void *data_ptr);
 
-/** @brief Free the dvs statistics map
+/* @brief Free the dvs statistics map
  * @param[in]	me Pointer to the dvs statistics map
  * @return	None
  *
@@ -291,7 +291,7 @@ ia_css_isp_dvs_statistics_map_allocate(
 void
 ia_css_isp_dvs_statistics_map_free(struct ia_css_isp_dvs_statistics_map *me);
 
-/** @brief Allocate memory for the SKC DVS statistics on the ISP
+/* @brief Allocate memory for the SKC DVS statistics on the ISP
  * @return		Pointer to the allocated ACC DVS statistics buffer on the ISP
 */
 struct ia_css_isp_skc_dvs_statistics *ia_css_skc_dvs_statistics_allocate(void);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_env.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_env.h
index 1ae9daf0be767cf50e34037e05766261561a0ba8..8b0218ee658de09fd67e47e984c283090f07b2af 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_env.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_env.h
@@ -20,12 +20,12 @@
 #include "ia_css_types.h"
 #include "ia_css_acc_types.h"
 
-/** @file
+/* @file
  * This file contains prototypes for functions that need to be provided to the
  * CSS-API host-code by the environment in which the CSS-API code runs.
  */
 
-/** Memory allocation attributes, for use in ia_css_css_mem_env. */
+/* Memory allocation attributes, for use in ia_css_css_mem_env. */
 enum ia_css_mem_attr {
 	IA_CSS_MEM_ATTR_CACHED = 1 << 0,
 	IA_CSS_MEM_ATTR_ZEROED = 1 << 1,
@@ -33,62 +33,62 @@ enum ia_css_mem_attr {
 	IA_CSS_MEM_ATTR_CONTIGUOUS = 1 << 3,
 };
 
-/** Environment with function pointers for local IA memory allocation.
+/* Environment with function pointers for local IA memory allocation.
  *  This provides the CSS code with environment specific functionality
  *  for memory allocation of small local buffers such as local data structures.
  *  This is never expected to allocate more than one page of memory (4K bytes).
  */
 struct ia_css_cpu_mem_env {
 	void (*flush)(struct ia_css_acc_fw *fw);
-	/**< Flush function to flush the cache for given accelerator. */
+	/** Flush function to flush the cache for given accelerator. */
 };
 
-/** Environment with function pointers to access the CSS hardware. This includes
+/* Environment with function pointers to access the CSS hardware. This includes
  *  registers and local memories.
  */
 struct ia_css_hw_access_env {
 	void (*store_8)(hrt_address addr, uint8_t data);
-	/**< Store an 8 bit value into an address in the CSS HW address space.
+	/** Store an 8 bit value into an address in the CSS HW address space.
 	     The address must be an 8 bit aligned address. */
 	void (*store_16)(hrt_address addr, uint16_t data);
-	/**< Store a 16 bit value into an address in the CSS HW address space.
+	/** Store a 16 bit value into an address in the CSS HW address space.
 	     The address must be a 16 bit aligned address. */
 	void (*store_32)(hrt_address addr, uint32_t data);
-	/**< Store a 32 bit value into an address in the CSS HW address space.
+	/** Store a 32 bit value into an address in the CSS HW address space.
 	     The address must be a 32 bit aligned address. */
 	uint8_t (*load_8)(hrt_address addr);
-	/**< Load an 8 bit value from an address in the CSS HW address
+	/** Load an 8 bit value from an address in the CSS HW address
 	     space. The address must be an 8 bit aligned address. */
 	uint16_t (*load_16)(hrt_address addr);
-	/**< Load a 16 bit value from an address in the CSS HW address
+	/** Load a 16 bit value from an address in the CSS HW address
 	     space. The address must be a 16 bit aligned address. */
 	uint32_t (*load_32)(hrt_address addr);
-	/**< Load a 32 bit value from an address in the CSS HW address
+	/** Load a 32 bit value from an address in the CSS HW address
 	     space. The address must be a 32 bit aligned address. */
 	void (*store)(hrt_address addr, const void *data, uint32_t bytes);
-	/**< Store a number of bytes into a byte-aligned address in the CSS HW address space. */
+	/** Store a number of bytes into a byte-aligned address in the CSS HW address space. */
 	void (*load)(hrt_address addr, void *data, uint32_t bytes);
-	/**< Load a number of bytes from a byte-aligned address in the CSS HW address space. */
+	/** Load a number of bytes from a byte-aligned address in the CSS HW address space. */
 };
 
-/** Environment with function pointers to print error and debug messages.
+/* Environment with function pointers to print error and debug messages.
  */
 struct ia_css_print_env {
 	int (*debug_print)(const char *fmt, va_list args);
-	/**< Print a debug message. */
+	/** Print a debug message. */
 	int (*error_print)(const char *fmt, va_list args);
-	/**< Print an error message.*/
+	/** Print an error message.*/
 };
 
-/** Environment structure. This includes function pointers to access several
+/* Environment structure. This includes function pointers to access several
  *  features provided by the environment in which the CSS API is used.
  *  This is used to run the camera IP in multiple platforms such as Linux,
  *  Windows and several simulation environments.
  */
 struct ia_css_env {
-	struct ia_css_cpu_mem_env   cpu_mem_env;   /**< local flush. */
-	struct ia_css_hw_access_env hw_access_env; /**< CSS HW access functions */
-	struct ia_css_print_env     print_env;     /**< Message printing env. */
+	struct ia_css_cpu_mem_env   cpu_mem_env;   /** local flush. */
+	struct ia_css_hw_access_env hw_access_env; /** CSS HW access functions */
+	struct ia_css_print_env     print_env;     /** Message printing env. */
 };
 
 #endif /* __IA_CSS_ENV_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_err.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_err.h
index 572e4e55c69e29cb405746d0894e4de4cd8046f4..cf895815ea31e0f598bb8480a332b57378ebfae5 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_err.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_err.h
@@ -15,12 +15,12 @@
 #ifndef __IA_CSS_ERR_H
 #define __IA_CSS_ERR_H
 
-/** @file
+/* @file
  * This file contains possible return values for most
  * functions in the CSS-API.
  */
 
-/** Errors, these values are used as the return value for most
+/* Errors, these values are used as the return value for most
  *  functions in this API.
  */
 enum ia_css_err {
@@ -41,22 +41,22 @@ enum ia_css_err {
 	IA_CSS_ERR_NOT_SUPPORTED
 };
 
-/** FW warnings. This enum contains a value for each warning that
+/* FW warnings. This enum contains a value for each warning that
  * the SP FW could indicate potential performance issue
  */
 enum ia_css_fw_warning {
 	IA_CSS_FW_WARNING_NONE,
-	IA_CSS_FW_WARNING_ISYS_QUEUE_FULL, /** < CSS system delayed because of insufficient space in the ISys queue.
+	IA_CSS_FW_WARNING_ISYS_QUEUE_FULL, /* < CSS system delayed because of insufficient space in the ISys queue.
 		This warning can be avoided by de-queing ISYS buffers more timely. */
-	IA_CSS_FW_WARNING_PSYS_QUEUE_FULL, /** < CSS system delayed because of insufficient space in the PSys queue.
+	IA_CSS_FW_WARNING_PSYS_QUEUE_FULL, /* < CSS system delayed because of insufficient space in the PSys queue.
 		This warning can be avoided by de-queing PSYS buffers more timely. */
-	IA_CSS_FW_WARNING_CIRCBUF_ALL_LOCKED, /** < CSS system delayed because of insufficient available buffers.
+	IA_CSS_FW_WARNING_CIRCBUF_ALL_LOCKED, /* < CSS system delayed because of insufficient available buffers.
 		This warning can be avoided by unlocking locked frame-buffers more timely. */
-	IA_CSS_FW_WARNING_EXP_ID_LOCKED, /** < Exposure ID skipped because the frame associated to it was still locked.
+	IA_CSS_FW_WARNING_EXP_ID_LOCKED, /* < Exposure ID skipped because the frame associated to it was still locked.
 		This warning can be avoided by unlocking locked frame-buffers more timely. */
-	IA_CSS_FW_WARNING_TAG_EXP_ID_FAILED, /** < Exposure ID cannot be found on the circular buffer.
+	IA_CSS_FW_WARNING_TAG_EXP_ID_FAILED, /* < Exposure ID cannot be found on the circular buffer.
 		This warning can be avoided by unlocking locked frame-buffers more timely. */
-	IA_CSS_FW_WARNING_FRAME_PARAM_MISMATCH, /** < Frame and param pair mismatched in tagger.
+	IA_CSS_FW_WARNING_FRAME_PARAM_MISMATCH, /* < Frame and param pair mismatched in tagger.
 		This warning can be avoided by providing a param set for each frame. */
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_event_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_event_public.h
index aaf349772abef324a7a296f065b0ece97346cc3d..036a2f03d3bd416850e253ade9951fe1f6cec9b4 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_event_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_event_public.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_EVENT_PUBLIC_H
 #define __IA_CSS_EVENT_PUBLIC_H
 
-/** @file
+/* @file
  * This file contains CSS-API events functionality
  */
 
@@ -24,7 +24,7 @@
 #include <ia_css_types.h>	/* ia_css_pipe */
 #include <ia_css_timer.h>	/* ia_css_timer */
 
-/** The event type, distinguishes the kind of events that
+/* The event type, distinguishes the kind of events that
  * can are generated by the CSS system.
  *
  * !!!IMPORTANT!!! KEEP THE FOLLOWING IN SYNC:
@@ -35,43 +35,43 @@
  */
 enum ia_css_event_type {
 	IA_CSS_EVENT_TYPE_OUTPUT_FRAME_DONE		= 1 << 0,
-	/**< Output frame ready. */
+	/** Output frame ready. */
 	IA_CSS_EVENT_TYPE_SECOND_OUTPUT_FRAME_DONE	= 1 << 1,
-	/**< Second output frame ready. */
+	/** Second output frame ready. */
 	IA_CSS_EVENT_TYPE_VF_OUTPUT_FRAME_DONE		= 1 << 2,
-	/**< Viewfinder Output frame ready. */
+	/** Viewfinder Output frame ready. */
 	IA_CSS_EVENT_TYPE_SECOND_VF_OUTPUT_FRAME_DONE	= 1 << 3,
-	/**< Second viewfinder Output frame ready. */
+	/** Second viewfinder Output frame ready. */
 	IA_CSS_EVENT_TYPE_3A_STATISTICS_DONE		= 1 << 4,
-	/**< Indication that 3A statistics are available. */
+	/** Indication that 3A statistics are available. */
 	IA_CSS_EVENT_TYPE_DIS_STATISTICS_DONE		= 1 << 5,
-	/**< Indication that DIS statistics are available. */
+	/** Indication that DIS statistics are available. */
 	IA_CSS_EVENT_TYPE_PIPELINE_DONE			= 1 << 6,
-	/**< Pipeline Done event, sent after last pipeline stage. */
+	/** Pipeline Done event, sent after last pipeline stage. */
 	IA_CSS_EVENT_TYPE_FRAME_TAGGED			= 1 << 7,
-	/**< Frame tagged. */
+	/** Frame tagged. */
 	IA_CSS_EVENT_TYPE_INPUT_FRAME_DONE		= 1 << 8,
-	/**< Input frame ready. */
+	/** Input frame ready. */
 	IA_CSS_EVENT_TYPE_METADATA_DONE			= 1 << 9,
-	/**< Metadata ready. */
+	/** Metadata ready. */
 	IA_CSS_EVENT_TYPE_LACE_STATISTICS_DONE		= 1 << 10,
-	/**< Indication that LACE statistics are available. */
+	/** Indication that LACE statistics are available. */
 	IA_CSS_EVENT_TYPE_ACC_STAGE_COMPLETE		= 1 << 11,
-	/**< Extension stage complete. */
+	/** Extension stage complete. */
 	IA_CSS_EVENT_TYPE_TIMER				= 1 << 12,
-	/**< Timer event for measuring the SP side latencies. It contains the
+	/** Timer event for measuring the SP side latencies. It contains the
              32-bit timer value from the SP */
 	IA_CSS_EVENT_TYPE_PORT_EOF			= 1 << 13,
-	/**< End Of Frame event, sent when in buffered sensor mode. */
+	/** End Of Frame event, sent when in buffered sensor mode. */
 	IA_CSS_EVENT_TYPE_FW_WARNING			= 1 << 14,
-	/**< Performance warning encounter by FW */
+	/** Performance warning encounter by FW */
 	IA_CSS_EVENT_TYPE_FW_ASSERT			= 1 << 15,
-	/**< Assertion hit by FW */
+	/** Assertion hit by FW */
 };
 
 #define IA_CSS_EVENT_TYPE_NONE 0
 
-/** IA_CSS_EVENT_TYPE_ALL is a mask for all pipe related events.
+/* IA_CSS_EVENT_TYPE_ALL is a mask for all pipe related events.
  * The other events (such as PORT_EOF) cannot be enabled/disabled
  * and are hence excluded from this macro.
  */
@@ -89,7 +89,7 @@ enum ia_css_event_type {
 	 IA_CSS_EVENT_TYPE_LACE_STATISTICS_DONE		| \
 	 IA_CSS_EVENT_TYPE_ACC_STAGE_COMPLETE)
 
-/** The event struct, container for the event type and its related values.
+/* The event struct, container for the event type and its related values.
  * Depending on the event type, either pipe or port will be filled.
  * Pipeline related events (like buffer/frame events) will return a valid and filled pipe handle.
  * For non pipeline related events (but i.e. stream specific, like EOF event), the port will be
@@ -97,14 +97,14 @@ enum ia_css_event_type {
  */
 struct ia_css_event {
 	struct ia_css_pipe    *pipe;
-	/**< Pipe handle on which event happened, NULL for non pipe related
+	/** Pipe handle on which event happened, NULL for non pipe related
 	     events. */
 	enum ia_css_event_type type;
-	/**< Type of Event, always valid/filled. */
+	/** Type of Event, always valid/filled. */
 	uint8_t                port;
-	/**< Port number for EOF event (not valid for other events). */
+	/** Port number for EOF event (not valid for other events). */
 	uint8_t                exp_id;
-	/**< Exposure id for EOF/FRAME_TAGGED/FW_WARNING event (not valid for other events)
+	/** Exposure id for EOF/FRAME_TAGGED/FW_WARNING event (not valid for other events)
 	     The exposure ID is unique only within a logical stream and it is
 	     only generated on systems that have an input system (such as 2400
 	     and 2401).
@@ -120,26 +120,26 @@ struct ia_css_event {
 	     in the exposure IDs. Therefor applications should not use this
 	     to detect frame drops. */
 	uint32_t               fw_handle;
-	/**< Firmware Handle for ACC_STAGE_COMPLETE event (not valid for other
+	/** Firmware Handle for ACC_STAGE_COMPLETE event (not valid for other
 	     events). */
 	enum ia_css_fw_warning fw_warning;
-	/**< Firmware warning code, only for WARNING events. */
+	/** Firmware warning code, only for WARNING events. */
 	uint8_t                fw_assert_module_id;
-	/**< Firmware module id, only for ASSERT events, should be logged by driver. */
+	/** Firmware module id, only for ASSERT events, should be logged by driver. */
 	uint16_t               fw_assert_line_no;
-	/**< Firmware line number, only for ASSERT events, should be logged by driver. */
+	/** Firmware line number, only for ASSERT events, should be logged by driver. */
 	clock_value_t	       timer_data;
-	/**< For storing the full 32-bit of the timer value. Valid only for TIMER
+	/** For storing the full 32-bit of the timer value. Valid only for TIMER
 	     event */
 	uint8_t                timer_code;
-	/**< For storing the code of the TIMER event. Valid only for
+	/** For storing the code of the TIMER event. Valid only for
 	     TIMER event */
 	uint8_t                timer_subcode;
-	/**< For storing the subcode of the TIMER event. Valid only
+	/** For storing the subcode of the TIMER event. Valid only
 	     for TIMER event */
 };
 
-/** @brief Dequeue a PSYS event from the CSS system.
+/* @brief Dequeue a PSYS event from the CSS system.
  *
  * @param[out]	event   Pointer to the event struct which will be filled by
  *                      this function if an event is available.
@@ -156,7 +156,7 @@ struct ia_css_event {
 enum ia_css_err
 ia_css_dequeue_psys_event(struct ia_css_event *event);
 
-/** @brief Dequeue an event from the CSS system.
+/* @brief Dequeue an event from the CSS system.
  *
  * @param[out]	event   Pointer to the event struct which will be filled by
  *                      this function if an event is available.
@@ -171,7 +171,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event);
 enum ia_css_err
 ia_css_dequeue_event(struct ia_css_event *event);
 
-/** @brief Dequeue an ISYS event from the CSS system.
+/* @brief Dequeue an ISYS event from the CSS system.
  *
  * @param[out]	event   Pointer to the event struct which will be filled by
  *                      this function if an event is available.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_firmware.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_firmware.h
index 06d375a09be27b83705adc9feae8b1eaac263e79..d7d7f0a995e5c0370f3ab89c1c8381279f3abbc5 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_firmware.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_firmware.h
@@ -15,21 +15,21 @@
 #ifndef __IA_CSS_FIRMWARE_H
 #define __IA_CSS_FIRMWARE_H
 
-/** @file
+/* @file
  * This file contains firmware loading/unloading support functionality
  */
 
 #include "ia_css_err.h"
 #include "ia_css_env.h"
 
-/** CSS firmware package structure.
+/* CSS firmware package structure.
  */
 struct ia_css_fw {
-	void	    *data;  /**< pointer to the firmware data */
-	unsigned int bytes; /**< length in bytes of firmware data */
+	void	    *data;  /** pointer to the firmware data */
+	unsigned int bytes; /** length in bytes of firmware data */
 };
 
-/** @brief Loads the firmware
+/* @brief Loads the firmware
  * @param[in]	env		Environment, provides functions to access the
  *				environment in which the CSS code runs. This is
  *				used for host side memory access and message
@@ -51,7 +51,7 @@ enum ia_css_err
 ia_css_load_firmware(const struct ia_css_env *env,
 	    const struct ia_css_fw  *fw);
 
-/** @brief Unloads the firmware
+/* @brief Unloads the firmware
  * @return	None
  *
  * This function unloads the firmware loaded by ia_css_load_firmware.
@@ -61,7 +61,7 @@ ia_css_load_firmware(const struct ia_css_env *env,
 void
 ia_css_unload_firmware(void);
 
-/** @brief Checks firmware version
+/* @brief Checks firmware version
  * @param[in]	fw	Firmware package containing the firmware for all
  *			predefined ISP binaries.
  * @return		Returns true when the firmware version matches with the CSS
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frac.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frac.h
index da9c60144c6dcaf6fe075ee626e7c49ed60ec301..e5ffc579aef1e09969dcc14fd4e1e5b220d7c044 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frac.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frac.h
@@ -15,7 +15,7 @@
 #ifndef _IA_CSS_FRAC_H
 #define _IA_CSS_FRAC_H
 
-/** @file
+/* @file
  * This file contains typedefs used for fractional numbers
  */
 
@@ -25,13 +25,13 @@
  * NOTE: the 16 bit fixed point types actually occupy 32 bits
  * to save on extension operations in the ISP code.
  */
-/** Unsigned fixed point value, 0 integer bits, 16 fractional bits */
+/* Unsigned fixed point value, 0 integer bits, 16 fractional bits */
 typedef uint32_t ia_css_u0_16;
-/** Unsigned fixed point value, 5 integer bits, 11 fractional bits */
+/* Unsigned fixed point value, 5 integer bits, 11 fractional bits */
 typedef uint32_t ia_css_u5_11;
-/** Unsigned fixed point value, 8 integer bits, 8 fractional bits */
+/* Unsigned fixed point value, 8 integer bits, 8 fractional bits */
 typedef uint32_t ia_css_u8_8;
-/** Signed fixed point value, 0 integer bits, 15 fractional bits */
+/* Signed fixed point value, 0 integer bits, 15 fractional bits */
 typedef int32_t ia_css_s0_15;
 
 #endif /* _IA_CSS_FRAC_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_format.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_format.h
index d534fbd913803ab37812a4b09f98396d56ab00e3..2f177edc36ac5e7ec9b4311671ae51b643682ac0 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_format.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_format.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_FRAME_FORMAT_H
 #define __IA_CSS_FRAME_FORMAT_H
 
-/** @file
+/* @file
  * This file contains information about formats supported in the ISP
  */
 
-/** Frame formats, some of these come from fourcc.org, others are
+/* Frame formats, some of these come from fourcc.org, others are
    better explained by video4linux2. The NV11 seems to be described only
    on MSDN pages, but even those seem to be gone now.
    Frames can come in many forms, the main categories are RAW, RGB and YUV
@@ -48,45 +48,45 @@
 	- css/bxt_sandbox/isysapi/interface/ia_css_isysapi_fw_types.h
 */
 enum ia_css_frame_format {
-	IA_CSS_FRAME_FORMAT_NV11 = 0,   /**< 12 bit YUV 411, Y, UV plane */
-	IA_CSS_FRAME_FORMAT_NV12,       /**< 12 bit YUV 420, Y, UV plane */
-	IA_CSS_FRAME_FORMAT_NV12_16,    /**< 16 bit YUV 420, Y, UV plane */
-	IA_CSS_FRAME_FORMAT_NV12_TILEY, /**< 12 bit YUV 420, Intel proprietary tiled format, TileY */
-	IA_CSS_FRAME_FORMAT_NV16,       /**< 16 bit YUV 422, Y, UV plane */
-	IA_CSS_FRAME_FORMAT_NV21,       /**< 12 bit YUV 420, Y, VU plane */
-	IA_CSS_FRAME_FORMAT_NV61,       /**< 16 bit YUV 422, Y, VU plane */
-	IA_CSS_FRAME_FORMAT_YV12,       /**< 12 bit YUV 420, Y, V, U plane */
-	IA_CSS_FRAME_FORMAT_YV16,       /**< 16 bit YUV 422, Y, V, U plane */
-	IA_CSS_FRAME_FORMAT_YUV420,     /**< 12 bit YUV 420, Y, U, V plane */
-	IA_CSS_FRAME_FORMAT_YUV420_16,  /**< yuv420, 16 bits per subpixel */
-	IA_CSS_FRAME_FORMAT_YUV422,     /**< 16 bit YUV 422, Y, U, V plane */
-	IA_CSS_FRAME_FORMAT_YUV422_16,  /**< yuv422, 16 bits per subpixel */
-	IA_CSS_FRAME_FORMAT_UYVY,       /**< 16 bit YUV 422, UYVY interleaved */
-	IA_CSS_FRAME_FORMAT_YUYV,       /**< 16 bit YUV 422, YUYV interleaved */
-	IA_CSS_FRAME_FORMAT_YUV444,     /**< 24 bit YUV 444, Y, U, V plane */
-	IA_CSS_FRAME_FORMAT_YUV_LINE,   /**< Internal format, 2 y lines followed
+	IA_CSS_FRAME_FORMAT_NV11 = 0,   /** 12 bit YUV 411, Y, UV plane */
+	IA_CSS_FRAME_FORMAT_NV12,       /** 12 bit YUV 420, Y, UV plane */
+	IA_CSS_FRAME_FORMAT_NV12_16,    /** 16 bit YUV 420, Y, UV plane */
+	IA_CSS_FRAME_FORMAT_NV12_TILEY, /** 12 bit YUV 420, Intel proprietary tiled format, TileY */
+	IA_CSS_FRAME_FORMAT_NV16,       /** 16 bit YUV 422, Y, UV plane */
+	IA_CSS_FRAME_FORMAT_NV21,       /** 12 bit YUV 420, Y, VU plane */
+	IA_CSS_FRAME_FORMAT_NV61,       /** 16 bit YUV 422, Y, VU plane */
+	IA_CSS_FRAME_FORMAT_YV12,       /** 12 bit YUV 420, Y, V, U plane */
+	IA_CSS_FRAME_FORMAT_YV16,       /** 16 bit YUV 422, Y, V, U plane */
+	IA_CSS_FRAME_FORMAT_YUV420,     /** 12 bit YUV 420, Y, U, V plane */
+	IA_CSS_FRAME_FORMAT_YUV420_16,  /** yuv420, 16 bits per subpixel */
+	IA_CSS_FRAME_FORMAT_YUV422,     /** 16 bit YUV 422, Y, U, V plane */
+	IA_CSS_FRAME_FORMAT_YUV422_16,  /** yuv422, 16 bits per subpixel */
+	IA_CSS_FRAME_FORMAT_UYVY,       /** 16 bit YUV 422, UYVY interleaved */
+	IA_CSS_FRAME_FORMAT_YUYV,       /** 16 bit YUV 422, YUYV interleaved */
+	IA_CSS_FRAME_FORMAT_YUV444,     /** 24 bit YUV 444, Y, U, V plane */
+	IA_CSS_FRAME_FORMAT_YUV_LINE,   /** Internal format, 2 y lines followed
 					     by a uvinterleaved line */
-	IA_CSS_FRAME_FORMAT_RAW,	/**< RAW, 1 plane */
-	IA_CSS_FRAME_FORMAT_RGB565,     /**< 16 bit RGB, 1 plane. Each 3 sub
+	IA_CSS_FRAME_FORMAT_RAW,	/** RAW, 1 plane */
+	IA_CSS_FRAME_FORMAT_RGB565,     /** 16 bit RGB, 1 plane. Each 3 sub
 					     pixels are packed into one 16 bit
 					     value, 5 bits for R, 6 bits for G
 					     and 5 bits for B. */
-	IA_CSS_FRAME_FORMAT_PLANAR_RGB888, /**< 24 bit RGB, 3 planes */
-	IA_CSS_FRAME_FORMAT_RGBA888,	/**< 32 bit RGBA, 1 plane, A=Alpha
+	IA_CSS_FRAME_FORMAT_PLANAR_RGB888, /** 24 bit RGB, 3 planes */
+	IA_CSS_FRAME_FORMAT_RGBA888,	/** 32 bit RGBA, 1 plane, A=Alpha
 					     (alpha is unused) */
-	IA_CSS_FRAME_FORMAT_QPLANE6, /**< Internal, for advanced ISP */
-	IA_CSS_FRAME_FORMAT_BINARY_8,	/**< byte stream, used for jpeg. For
+	IA_CSS_FRAME_FORMAT_QPLANE6, /** Internal, for advanced ISP */
+	IA_CSS_FRAME_FORMAT_BINARY_8,	/** byte stream, used for jpeg. For
 					     frames of this type, we set the
 					     height to 1 and the width to the
 					     number of allocated bytes. */
-	IA_CSS_FRAME_FORMAT_MIPI,	/**< MIPI frame, 1 plane */
-	IA_CSS_FRAME_FORMAT_RAW_PACKED, /**< RAW, 1 plane, packed */
-	IA_CSS_FRAME_FORMAT_CSI_MIPI_YUV420_8,	      /**< 8 bit per Y/U/V.
+	IA_CSS_FRAME_FORMAT_MIPI,	/** MIPI frame, 1 plane */
+	IA_CSS_FRAME_FORMAT_RAW_PACKED, /** RAW, 1 plane, packed */
+	IA_CSS_FRAME_FORMAT_CSI_MIPI_YUV420_8,	      /** 8 bit per Y/U/V.
 							   Y odd line; UYVY
 							   interleaved even line */
-	IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8, /**< Legacy YUV420. UY odd
+	IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8, /** Legacy YUV420. UY odd
 							   line; VY even line */
-	IA_CSS_FRAME_FORMAT_CSI_MIPI_YUV420_10       /**< 10 bit per Y/U/V. Y odd
+	IA_CSS_FRAME_FORMAT_CSI_MIPI_YUV420_10       /** 10 bit per Y/U/V. Y odd
 							   line; UYVY interleaved
 							   even line */
 };
@@ -95,7 +95,7 @@ enum ia_css_frame_format {
 /*       because of issues this would cause with the Clockwork code checking tool.               */
 #define IA_CSS_FRAME_FORMAT_NUM (IA_CSS_FRAME_FORMAT_CSI_MIPI_YUV420_10 + 1)
 
-/** Number of valid output frame formats for ISP **/
+/* Number of valid output frame formats for ISP **/
 #define IA_CSS_FRAME_OUT_FORMAT_NUM	(IA_CSS_FRAME_FORMAT_RGBA888 + 1)
 
 #endif /* __IA_CSS_FRAME_FORMAT_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_public.h
index 92f2389176b2e0f825cdc94cb954340fb8ead303..ba7a076c3afa7a251878821526ce9bd53ef35025 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_frame_public.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_FRAME_PUBLIC_H
 #define __IA_CSS_FRAME_PUBLIC_H
 
-/** @file
+/* @file
  * This file contains structs to describe various frame-formats supported by the ISP.
  */
 
@@ -25,73 +25,73 @@
 #include "ia_css_frame_format.h"
 #include "ia_css_buffer.h"
 
-/** For RAW input, the bayer order needs to be specified separately. There
+/* For RAW input, the bayer order needs to be specified separately. There
  *  are 4 possible orders. The name is constructed by taking the first two
  *  colors on the first line and the first two colors from the second line.
  */
 enum ia_css_bayer_order {
-	IA_CSS_BAYER_ORDER_GRBG, /**< GRGRGRGRGR .. BGBGBGBGBG */
-	IA_CSS_BAYER_ORDER_RGGB, /**< RGRGRGRGRG .. GBGBGBGBGB */
-	IA_CSS_BAYER_ORDER_BGGR, /**< BGBGBGBGBG .. GRGRGRGRGR */
-	IA_CSS_BAYER_ORDER_GBRG, /**< GBGBGBGBGB .. RGRGRGRGRG */
+	IA_CSS_BAYER_ORDER_GRBG, /** GRGRGRGRGR .. BGBGBGBGBG */
+	IA_CSS_BAYER_ORDER_RGGB, /** RGRGRGRGRG .. GBGBGBGBGB */
+	IA_CSS_BAYER_ORDER_BGGR, /** BGBGBGBGBG .. GRGRGRGRGR */
+	IA_CSS_BAYER_ORDER_GBRG, /** GBGBGBGBGB .. RGRGRGRGRG */
 };
 #define IA_CSS_BAYER_ORDER_NUM (IA_CSS_BAYER_ORDER_GBRG + 1)
 
-/** Frame plane structure. This describes one plane in an image
+/* Frame plane structure. This describes one plane in an image
  *  frame buffer.
  */
 struct ia_css_frame_plane {
-	unsigned int height; /**< height of a plane in lines */
-	unsigned int width;  /**< width of a line, in DMA elements, note that
+	unsigned int height; /** height of a plane in lines */
+	unsigned int width;  /** width of a line, in DMA elements, note that
 				  for RGB565 the three subpixels are stored in
 				  one element. For all other formats this is
 				  the number of subpixels per line. */
-	unsigned int stride; /**< stride of a line in bytes */
-	unsigned int offset; /**< offset in bytes to start of frame data.
+	unsigned int stride; /** stride of a line in bytes */
+	unsigned int offset; /** offset in bytes to start of frame data.
 				  offset is wrt data field in ia_css_frame */
 };
 
-/** Binary "plane". This is used to story binary streams such as jpeg
+/* Binary "plane". This is used to story binary streams such as jpeg
  *  images. This is not actually a real plane.
  */
 struct ia_css_frame_binary_plane {
-	unsigned int		  size; /**< number of bytes in the stream */
-	struct ia_css_frame_plane data; /**< plane */
+	unsigned int		  size; /** number of bytes in the stream */
+	struct ia_css_frame_plane data; /** plane */
 };
 
-/** Container for planar YUV frames. This contains 3 planes.
+/* Container for planar YUV frames. This contains 3 planes.
  */
 struct ia_css_frame_yuv_planes {
-	struct ia_css_frame_plane y; /**< Y plane */
-	struct ia_css_frame_plane u; /**< U plane */
-	struct ia_css_frame_plane v; /**< V plane */
+	struct ia_css_frame_plane y; /** Y plane */
+	struct ia_css_frame_plane u; /** U plane */
+	struct ia_css_frame_plane v; /** V plane */
 };
 
-/** Container for semi-planar YUV frames.
+/* Container for semi-planar YUV frames.
   */
 struct ia_css_frame_nv_planes {
-	struct ia_css_frame_plane y;  /**< Y plane */
-	struct ia_css_frame_plane uv; /**< UV plane */
+	struct ia_css_frame_plane y;  /** Y plane */
+	struct ia_css_frame_plane uv; /** UV plane */
 };
 
-/** Container for planar RGB frames. Each color has its own plane.
+/* Container for planar RGB frames. Each color has its own plane.
  */
 struct ia_css_frame_rgb_planes {
-	struct ia_css_frame_plane r; /**< Red plane */
-	struct ia_css_frame_plane g; /**< Green plane */
-	struct ia_css_frame_plane b; /**< Blue plane */
+	struct ia_css_frame_plane r; /** Red plane */
+	struct ia_css_frame_plane g; /** Green plane */
+	struct ia_css_frame_plane b; /** Blue plane */
 };
 
-/** Container for 6-plane frames. These frames are used internally
+/* Container for 6-plane frames. These frames are used internally
  *  in the advanced ISP only.
  */
 struct ia_css_frame_plane6_planes {
-	struct ia_css_frame_plane r;	  /**< Red plane */
-	struct ia_css_frame_plane r_at_b; /**< Red at blue plane */
-	struct ia_css_frame_plane gr;	  /**< Red-green plane */
-	struct ia_css_frame_plane gb;	  /**< Blue-green plane */
-	struct ia_css_frame_plane b;	  /**< Blue plane */
-	struct ia_css_frame_plane b_at_r; /**< Blue at red plane */
+	struct ia_css_frame_plane r;	  /** Red plane */
+	struct ia_css_frame_plane r_at_b; /** Red at blue plane */
+	struct ia_css_frame_plane gr;	  /** Red-green plane */
+	struct ia_css_frame_plane gb;	  /** Blue-green plane */
+	struct ia_css_frame_plane b;	  /** Blue plane */
+	struct ia_css_frame_plane b_at_r; /** Blue at red plane */
 };
 
 /* Crop info struct - stores the lines to be cropped in isp */
@@ -103,15 +103,15 @@ struct ia_css_crop_info {
 	unsigned int start_line;
 };
 
-/** Frame info struct. This describes the contents of an image frame buffer.
+/* Frame info struct. This describes the contents of an image frame buffer.
   */
 struct ia_css_frame_info {
-	struct ia_css_resolution res; /**< Frame resolution (valid data) */
-	unsigned int padded_width; /**< stride of line in memory (in pixels) */
-	enum ia_css_frame_format format; /**< format of the frame data */
-	unsigned int raw_bit_depth; /**< number of valid bits per pixel,
+	struct ia_css_resolution res; /** Frame resolution (valid data) */
+	unsigned int padded_width; /** stride of line in memory (in pixels) */
+	enum ia_css_frame_format format; /** format of the frame data */
+	unsigned int raw_bit_depth; /** number of valid bits per pixel,
 					 only valid for RAW bayer frames */
-	enum ia_css_bayer_order raw_bayer_order; /**< bayer order, only valid
+	enum ia_css_bayer_order raw_bayer_order; /** bayer order, only valid
 						      for RAW bayer frames */
 	/* the params below are computed based on bayer_order
 	 * we can remove the raw_bayer_order if it is redundant
@@ -136,9 +136,9 @@ struct ia_css_frame_info {
  *  Specifies the DVS loop delay in "frame periods"
  */
 enum ia_css_frame_delay {
-	IA_CSS_FRAME_DELAY_0, /**< Frame delay = 0 */
-	IA_CSS_FRAME_DELAY_1, /**< Frame delay = 1 */
-	IA_CSS_FRAME_DELAY_2  /**< Frame delay = 2 */
+	IA_CSS_FRAME_DELAY_0, /** Frame delay = 0 */
+	IA_CSS_FRAME_DELAY_1, /** Frame delay = 1 */
+	IA_CSS_FRAME_DELAY_2  /** Frame delay = 2 */
 };
 
 enum ia_css_frame_flash_state {
@@ -147,13 +147,13 @@ enum ia_css_frame_flash_state {
 	IA_CSS_FRAME_FLASH_STATE_FULL
 };
 
-/** Frame structure. This structure describes an image buffer or frame.
+/* Frame structure. This structure describes an image buffer or frame.
  *  This is the main structure used for all input and output images.
  */
 struct ia_css_frame {
-	struct ia_css_frame_info info; /**< info struct describing the frame */
-	ia_css_ptr   data;	       /**< pointer to start of image data */
-	unsigned int data_bytes;       /**< size of image data in bytes */
+	struct ia_css_frame_info info; /** info struct describing the frame */
+	ia_css_ptr   data;	       /** pointer to start of image data */
+	unsigned int data_bytes;       /** size of image data in bytes */
 	/* LA: move this to ia_css_buffer */
 	/*
 	 * -1 if data address is static during life time of pipeline
@@ -171,10 +171,10 @@ struct ia_css_frame {
 	enum ia_css_buffer_type buf_type;
 	enum ia_css_frame_flash_state flash_state;
 	unsigned int exp_id;
-	/**< exposure id, see ia_css_event_public.h for more detail */
-	uint32_t isp_config_id; /**< Unique ID to track which config was actually applied to a particular frame */
-	bool valid; /**< First video output frame is not valid */
-	bool contiguous; /**< memory is allocated physically contiguously */
+	/** exposure id, see ia_css_event_public.h for more detail */
+	uint32_t isp_config_id; /** Unique ID to track which config was actually applied to a particular frame */
+	bool valid; /** First video output frame is not valid */
+	bool contiguous; /** memory is allocated physically contiguously */
 	union {
 		unsigned int	_initialisation_dummy;
 		struct ia_css_frame_plane raw;
@@ -185,7 +185,7 @@ struct ia_css_frame {
 		struct ia_css_frame_nv_planes nv;
 		struct ia_css_frame_plane6_planes plane6;
 		struct ia_css_frame_binary_plane binary;
-	} planes; /**< frame planes, select the right one based on
+	} planes; /** frame planes, select the right one based on
 		       info.format */
 };
 
@@ -204,7 +204,7 @@ struct ia_css_frame {
 	{ 0 }					/* planes */ \
 }
 
-/** @brief Fill a frame with zeros
+/* @brief Fill a frame with zeros
  *
  * @param	frame		The frame.
  * @return	None
@@ -213,7 +213,7 @@ struct ia_css_frame {
  */
 void ia_css_frame_zero(struct ia_css_frame *frame);
 
-/** @brief Allocate a CSS frame structure
+/* @brief Allocate a CSS frame structure
  *
  * @param	frame		The allocated frame.
  * @param	width		The width (in pixels) of the frame.
@@ -234,7 +234,7 @@ ia_css_frame_allocate(struct ia_css_frame **frame,
 		      unsigned int stride,
 		      unsigned int raw_bit_depth);
 
-/** @brief Allocate a CSS frame structure using a frame info structure.
+/* @brief Allocate a CSS frame structure using a frame info structure.
  *
  * @param	frame	The allocated frame.
  * @param[in]	info	The frame info structure.
@@ -247,7 +247,7 @@ ia_css_frame_allocate(struct ia_css_frame **frame,
 enum ia_css_err
 ia_css_frame_allocate_from_info(struct ia_css_frame **frame,
 				const struct ia_css_frame_info *info);
-/** @brief Free a CSS frame structure.
+/* @brief Free a CSS frame structure.
  *
  * @param[in]	frame	Pointer to the frame.
  * @return	None
@@ -258,7 +258,7 @@ ia_css_frame_allocate_from_info(struct ia_css_frame **frame,
 void
 ia_css_frame_free(struct ia_css_frame *frame);
 
-/** @brief Allocate a contiguous CSS frame structure
+/* @brief Allocate a contiguous CSS frame structure
  *
  * @param	frame		The allocated frame.
  * @param	width		The width (in pixels) of the frame.
@@ -280,7 +280,7 @@ ia_css_frame_allocate_contiguous(struct ia_css_frame **frame,
 				 unsigned int stride,
 				 unsigned int raw_bit_depth);
 
-/** @brief Allocate a contiguous CSS frame from a frame info structure.
+/* @brief Allocate a contiguous CSS frame from a frame info structure.
  *
  * @param	frame	The allocated frame.
  * @param[in]	info	The frame info structure.
@@ -296,7 +296,7 @@ enum ia_css_err
 ia_css_frame_allocate_contiguous_from_info(struct ia_css_frame **frame,
 					  const struct ia_css_frame_info *info);
 
-/** @brief Allocate a CSS frame structure using a frame info structure.
+/* @brief Allocate a CSS frame structure using a frame info structure.
  *
  * @param	frame	The allocated frame.
  * @param[in]	info	The frame info structure.
@@ -309,7 +309,7 @@ enum ia_css_err
 ia_css_frame_create_from_info(struct ia_css_frame **frame,
 	const struct ia_css_frame_info *info);
 
-/** @brief Set a mapped data buffer to a CSS frame
+/* @brief Set a mapped data buffer to a CSS frame
  *
  * @param[in]	frame       Valid CSS frame pointer
  * @param[in]	mapped_data  Mapped data buffer to be assigned to the CSS frame
@@ -327,7 +327,7 @@ ia_css_frame_set_data(struct ia_css_frame *frame,
 	const ia_css_ptr   mapped_data,
 	size_t data_size_bytes);
 
-/** @brief Map an existing frame data pointer to a CSS frame.
+/* @brief Map an existing frame data pointer to a CSS frame.
  *
  * @param	frame		Pointer to the frame to be initialized
  * @param[in]	info		The frame info.
@@ -350,7 +350,7 @@ ia_css_frame_map(struct ia_css_frame **frame,
 		 uint16_t attribute,
 		 void *context);
 
-/** @brief Unmap a CSS frame structure.
+/* @brief Unmap a CSS frame structure.
  *
  * @param[in]	frame	Pointer to the CSS frame.
  * @return	None
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
index 8a17c3346caa0eb097d54f80a52c1bf4d21c6ba1..f415570a3da98a3e4080cb46b8c11270ec3a762c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_INPUT_PORT_H
 #define __IA_CSS_INPUT_PORT_H
 
-/** @file
+/* @file
  * This file contains information about the possible input ports for CSS
  */
 
-/** Enumeration of the physical input ports on the CSS hardware.
+/* Enumeration of the physical input ports on the CSS hardware.
  *  There are 3 MIPI CSI-2 ports.
  */
 enum ia_css_csi2_port {
@@ -28,39 +28,39 @@ enum ia_css_csi2_port {
 	IA_CSS_CSI2_PORT2  /* Implicitly map to MIPI_PORT2_ID */
 };
 
-/** Backward compatible for CSS API 2.0 only
+/* Backward compatible for CSS API 2.0 only
  *  TO BE REMOVED when all drivers move to CSS	API 2.1
  */
 #define	IA_CSS_CSI2_PORT_4LANE IA_CSS_CSI2_PORT0
 #define	IA_CSS_CSI2_PORT_1LANE IA_CSS_CSI2_PORT1
 #define	IA_CSS_CSI2_PORT_2LANE IA_CSS_CSI2_PORT2
 
-/** The CSI2 interface supports 2 types of compression or can
+/* The CSI2 interface supports 2 types of compression or can
  *  be run without compression.
  */
 enum ia_css_csi2_compression_type {
-	IA_CSS_CSI2_COMPRESSION_TYPE_NONE, /**< No compression */
-	IA_CSS_CSI2_COMPRESSION_TYPE_1,    /**< Compression scheme 1 */
-	IA_CSS_CSI2_COMPRESSION_TYPE_2     /**< Compression scheme 2 */
+	IA_CSS_CSI2_COMPRESSION_TYPE_NONE, /** No compression */
+	IA_CSS_CSI2_COMPRESSION_TYPE_1,    /** Compression scheme 1 */
+	IA_CSS_CSI2_COMPRESSION_TYPE_2     /** Compression scheme 2 */
 };
 
 struct ia_css_csi2_compression {
 	enum ia_css_csi2_compression_type type;
-	/**< Compression used */
+	/** Compression used */
 	unsigned int                      compressed_bits_per_pixel;
-	/**< Compressed bits per pixel (only when compression is enabled) */
+	/** Compressed bits per pixel (only when compression is enabled) */
 	unsigned int                      uncompressed_bits_per_pixel;
-	/**< Uncompressed bits per pixel (only when compression is enabled) */
+	/** Uncompressed bits per pixel (only when compression is enabled) */
 };
 
-/** Input port structure.
+/* Input port structure.
  */
 struct ia_css_input_port {
-	enum ia_css_csi2_port port; /**< Physical CSI-2 port */
-	unsigned int num_lanes; /**< Number of lanes used (4-lane port only) */
-	unsigned int timeout;   /**< Timeout value */
-	unsigned int rxcount;   /**< Register value, should include all lanes */
-	struct ia_css_csi2_compression compression; /**< Compression used */
+	enum ia_css_csi2_port port; /** Physical CSI-2 port */
+	unsigned int num_lanes; /** Number of lanes used (4-lane port only) */
+	unsigned int timeout;   /** Timeout value */
+	unsigned int rxcount;   /** Register value, should include all lanes */
+	struct ia_css_csi2_compression compression; /** Compression used */
 };
 
 #endif /* __IA_CSS_INPUT_PORT_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
index 416ca4d28732ea7c3dadd598653200a8c40104d7..10ef61178bb2bae95a1e65d873c973f75da2b7b2 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_IRQ_H
 #define __IA_CSS_IRQ_H
 
-/** @file
+/* @file
  * This file contains information for Interrupts/IRQs from CSS
  */
 
@@ -23,14 +23,14 @@
 #include "ia_css_pipe_public.h"
 #include "ia_css_input_port.h"
 
-/** Interrupt types, these enumerate all supported interrupt types.
+/* Interrupt types, these enumerate all supported interrupt types.
  */
 enum ia_css_irq_type {
-	IA_CSS_IRQ_TYPE_EDGE,  /**< Edge (level) sensitive interrupt */
-	IA_CSS_IRQ_TYPE_PULSE  /**< Pulse-shaped interrupt */
+	IA_CSS_IRQ_TYPE_EDGE,  /** Edge (level) sensitive interrupt */
+	IA_CSS_IRQ_TYPE_PULSE  /** Pulse-shaped interrupt */
 };
 
-/** Interrupt request type.
+/* Interrupt request type.
  *  When the CSS hardware generates an interrupt, a function in this API
  *  needs to be called to retrieve information about the interrupt.
  *  This interrupt type is part of this information and indicates what
@@ -46,55 +46,55 @@ enum ia_css_irq_type {
  */
 enum ia_css_irq_info {
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_ERROR            = 1 << 0,
-	/**< the css receiver has encountered an error */
+	/** the css receiver has encountered an error */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_FIFO_OVERFLOW    = 1 << 1,
-	/**< the FIFO in the csi receiver has overflown */
+	/** the FIFO in the csi receiver has overflown */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_SOF              = 1 << 2,
-	/**< the css receiver received the start of frame */
+	/** the css receiver received the start of frame */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_EOF              = 1 << 3,
-	/**< the css receiver received the end of frame */
+	/** the css receiver received the end of frame */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_SOL              = 1 << 4,
-	/**< the css receiver received the start of line */
+	/** the css receiver received the start of line */
 	IA_CSS_IRQ_INFO_PSYS_EVENTS_READY             = 1 << 5,
-	/**< One or more events are available in the PSYS event queue */
+	/** One or more events are available in the PSYS event queue */
 	IA_CSS_IRQ_INFO_EVENTS_READY = IA_CSS_IRQ_INFO_PSYS_EVENTS_READY,
-	/**< deprecated{obsolete version of IA_CSS_IRQ_INFO_PSYS_EVENTS_READY,
+	/** deprecated{obsolete version of IA_CSS_IRQ_INFO_PSYS_EVENTS_READY,
 	 * same functionality.} */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_EOL              = 1 << 6,
-	/**< the css receiver received the end of line */
+	/** the css receiver received the end of line */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_SIDEBAND_CHANGED = 1 << 7,
-	/**< the css receiver received a change in side band signals */
+	/** the css receiver received a change in side band signals */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_GEN_SHORT_0      = 1 << 8,
-	/**< generic short packets (0) */
+	/** generic short packets (0) */
 	IA_CSS_IRQ_INFO_CSS_RECEIVER_GEN_SHORT_1      = 1 << 9,
-	/**< generic short packets (1) */
+	/** generic short packets (1) */
 	IA_CSS_IRQ_INFO_IF_PRIM_ERROR                 = 1 << 10,
-	/**< the primary input formatter (A) has encountered an error */
+	/** the primary input formatter (A) has encountered an error */
 	IA_CSS_IRQ_INFO_IF_PRIM_B_ERROR               = 1 << 11,
-	/**< the primary input formatter (B) has encountered an error */
+	/** the primary input formatter (B) has encountered an error */
 	IA_CSS_IRQ_INFO_IF_SEC_ERROR                  = 1 << 12,
-	/**< the secondary input formatter has encountered an error */
+	/** the secondary input formatter has encountered an error */
 	IA_CSS_IRQ_INFO_STREAM_TO_MEM_ERROR           = 1 << 13,
-	/**< the stream-to-memory device has encountered an error */
+	/** the stream-to-memory device has encountered an error */
 	IA_CSS_IRQ_INFO_SW_0                          = 1 << 14,
-	/**< software interrupt 0 */
+	/** software interrupt 0 */
 	IA_CSS_IRQ_INFO_SW_1                          = 1 << 15,
-	/**< software interrupt 1 */
+	/** software interrupt 1 */
 	IA_CSS_IRQ_INFO_SW_2                          = 1 << 16,
-	/**< software interrupt 2 */
+	/** software interrupt 2 */
 	IA_CSS_IRQ_INFO_ISP_BINARY_STATISTICS_READY   = 1 << 17,
-	/**< ISP binary statistics are ready */
+	/** ISP binary statistics are ready */
 	IA_CSS_IRQ_INFO_INPUT_SYSTEM_ERROR            = 1 << 18,
-	/**< the input system in in error */
+	/** the input system in in error */
 	IA_CSS_IRQ_INFO_IF_ERROR                      = 1 << 19,
-	/**< the input formatter in in error */
+	/** the input formatter in in error */
 	IA_CSS_IRQ_INFO_DMA_ERROR                     = 1 << 20,
-	/**< the dma in in error */
+	/** the dma in in error */
 	IA_CSS_IRQ_INFO_ISYS_EVENTS_READY             = 1 << 21,
-	/**< end-of-frame events are ready in the isys_event queue */
+	/** end-of-frame events are ready in the isys_event queue */
 };
 
-/** CSS receiver error types. Whenever the CSS receiver has encountered
+/* CSS receiver error types. Whenever the CSS receiver has encountered
  *  an error, this enumeration is used to indicate which errors have occurred.
  *
  *  Note that multiple error flags can be enabled at once and that this is in
@@ -105,39 +105,39 @@ enum ia_css_irq_info {
  * different receiver types, or possibly none in case of tests systems.
  */
 enum ia_css_rx_irq_info {
-	IA_CSS_RX_IRQ_INFO_BUFFER_OVERRUN   = 1U << 0, /**< buffer overrun */
-	IA_CSS_RX_IRQ_INFO_ENTER_SLEEP_MODE = 1U << 1, /**< entering sleep mode */
-	IA_CSS_RX_IRQ_INFO_EXIT_SLEEP_MODE  = 1U << 2, /**< exited sleep mode */
-	IA_CSS_RX_IRQ_INFO_ECC_CORRECTED    = 1U << 3, /**< ECC corrected */
+	IA_CSS_RX_IRQ_INFO_BUFFER_OVERRUN   = 1U << 0, /** buffer overrun */
+	IA_CSS_RX_IRQ_INFO_ENTER_SLEEP_MODE = 1U << 1, /** entering sleep mode */
+	IA_CSS_RX_IRQ_INFO_EXIT_SLEEP_MODE  = 1U << 2, /** exited sleep mode */
+	IA_CSS_RX_IRQ_INFO_ECC_CORRECTED    = 1U << 3, /** ECC corrected */
 	IA_CSS_RX_IRQ_INFO_ERR_SOT          = 1U << 4,
-						/**< Start of transmission */
-	IA_CSS_RX_IRQ_INFO_ERR_SOT_SYNC     = 1U << 5, /**< SOT sync (??) */
-	IA_CSS_RX_IRQ_INFO_ERR_CONTROL      = 1U << 6, /**< Control (??) */
-	IA_CSS_RX_IRQ_INFO_ERR_ECC_DOUBLE   = 1U << 7, /**< Double ECC */
-	IA_CSS_RX_IRQ_INFO_ERR_CRC          = 1U << 8, /**< CRC error */
-	IA_CSS_RX_IRQ_INFO_ERR_UNKNOWN_ID   = 1U << 9, /**< Unknown ID */
-	IA_CSS_RX_IRQ_INFO_ERR_FRAME_SYNC   = 1U << 10,/**< Frame sync error */
-	IA_CSS_RX_IRQ_INFO_ERR_FRAME_DATA   = 1U << 11,/**< Frame data error */
-	IA_CSS_RX_IRQ_INFO_ERR_DATA_TIMEOUT = 1U << 12,/**< Timeout occurred */
-	IA_CSS_RX_IRQ_INFO_ERR_UNKNOWN_ESC  = 1U << 13,/**< Unknown escape seq. */
-	IA_CSS_RX_IRQ_INFO_ERR_LINE_SYNC    = 1U << 14,/**< Line Sync error */
+						/** Start of transmission */
+	IA_CSS_RX_IRQ_INFO_ERR_SOT_SYNC     = 1U << 5, /** SOT sync (??) */
+	IA_CSS_RX_IRQ_INFO_ERR_CONTROL      = 1U << 6, /** Control (??) */
+	IA_CSS_RX_IRQ_INFO_ERR_ECC_DOUBLE   = 1U << 7, /** Double ECC */
+	IA_CSS_RX_IRQ_INFO_ERR_CRC          = 1U << 8, /** CRC error */
+	IA_CSS_RX_IRQ_INFO_ERR_UNKNOWN_ID   = 1U << 9, /** Unknown ID */
+	IA_CSS_RX_IRQ_INFO_ERR_FRAME_SYNC   = 1U << 10,/** Frame sync error */
+	IA_CSS_RX_IRQ_INFO_ERR_FRAME_DATA   = 1U << 11,/** Frame data error */
+	IA_CSS_RX_IRQ_INFO_ERR_DATA_TIMEOUT = 1U << 12,/** Timeout occurred */
+	IA_CSS_RX_IRQ_INFO_ERR_UNKNOWN_ESC  = 1U << 13,/** Unknown escape seq. */
+	IA_CSS_RX_IRQ_INFO_ERR_LINE_SYNC    = 1U << 14,/** Line Sync error */
 	IA_CSS_RX_IRQ_INFO_INIT_TIMEOUT     = 1U << 15,
 };
 
-/** Interrupt info structure. This structure contains information about an
+/* Interrupt info structure. This structure contains information about an
  *  interrupt. This needs to be used after an interrupt is received on the IA
  *  to perform the correct action.
  */
 struct ia_css_irq {
-	enum ia_css_irq_info type; /**< Interrupt type. */
-	unsigned int sw_irq_0_val; /**< In case of SW interrupt 0, value. */
-	unsigned int sw_irq_1_val; /**< In case of SW interrupt 1, value. */
-	unsigned int sw_irq_2_val; /**< In case of SW interrupt 2, value. */
+	enum ia_css_irq_info type; /** Interrupt type. */
+	unsigned int sw_irq_0_val; /** In case of SW interrupt 0, value. */
+	unsigned int sw_irq_1_val; /** In case of SW interrupt 1, value. */
+	unsigned int sw_irq_2_val; /** In case of SW interrupt 2, value. */
 	struct ia_css_pipe *pipe;
-	/**< The image pipe that generated the interrupt. */
+	/** The image pipe that generated the interrupt. */
 };
 
-/** @brief Obtain interrupt information.
+/* @brief Obtain interrupt information.
  *
  * @param[out] info	Pointer to the interrupt info. The interrupt
  *			information wil be written to this info.
@@ -154,7 +154,7 @@ struct ia_css_irq {
 enum ia_css_err
 ia_css_irq_translate(unsigned int *info);
 
-/** @brief Get CSI receiver error info.
+/* @brief Get CSI receiver error info.
  *
  * @param[out] irq_bits	Pointer to the interrupt bits. The interrupt
  *			bits will be written this info.
@@ -172,7 +172,7 @@ ia_css_irq_translate(unsigned int *info);
 void
 ia_css_rx_get_irq_info(unsigned int *irq_bits);
 
-/** @brief Get CSI receiver error info.
+/* @brief Get CSI receiver error info.
  *
  * @param[in]  port     Input port identifier.
  * @param[out] irq_bits	Pointer to the interrupt bits. The interrupt
@@ -188,7 +188,7 @@ ia_css_rx_get_irq_info(unsigned int *irq_bits);
 void
 ia_css_rx_port_get_irq_info(enum ia_css_csi2_port port, unsigned int *irq_bits);
 
-/** @brief Clear CSI receiver error info.
+/* @brief Clear CSI receiver error info.
  *
  * @param[in] irq_bits	The bits that should be cleared from the CSI receiver
  *			interrupt bits register.
@@ -205,7 +205,7 @@ ia_css_rx_port_get_irq_info(enum ia_css_csi2_port port, unsigned int *irq_bits);
 void
 ia_css_rx_clear_irq_info(unsigned int irq_bits);
 
-/** @brief Clear CSI receiver error info.
+/* @brief Clear CSI receiver error info.
  *
  * @param[in] port      Input port identifier.
  * @param[in] irq_bits	The bits that should be cleared from the CSI receiver
@@ -220,7 +220,7 @@ ia_css_rx_clear_irq_info(unsigned int irq_bits);
 void
 ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port port, unsigned int irq_bits);
 
-/** @brief Enable or disable specific interrupts.
+/* @brief Enable or disable specific interrupts.
  *
  * @param[in] type	The interrupt type that will be enabled/disabled.
  * @param[in] enable	enable or disable.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
index c40c5a19bfe190a5580f10872489ccf2d8191485..8b674c98224c2b73111c664c071fd1f442557100 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_METADATA_H
 #define __IA_CSS_METADATA_H
 
-/** @file
+/* @file
  * This file contains structure for processing sensor metadata.
  */
 
@@ -23,32 +23,32 @@
 #include "ia_css_types.h"
 #include "ia_css_stream_format.h"
 
-/** Metadata configuration. This data structure contains necessary info
+/* Metadata configuration. This data structure contains necessary info
  *  to process sensor metadata.
  */
 struct ia_css_metadata_config {
-	enum ia_css_stream_format data_type; /**< Data type of CSI-2 embedded
+	enum ia_css_stream_format data_type; /** Data type of CSI-2 embedded
 			data. The default value is IA_CSS_STREAM_FORMAT_EMBEDDED. For
 			certain sensors, user can choose non-default data type for embedded
 			data. */
-	struct ia_css_resolution  resolution; /**< Resolution */
+	struct ia_css_resolution  resolution; /** Resolution */
 };
 
 struct ia_css_metadata_info {
-	struct ia_css_resolution resolution; /**< Resolution */
-	uint32_t                 stride;     /**< Stride in bytes */
-	uint32_t                 size;       /**< Total size in bytes */
+	struct ia_css_resolution resolution; /** Resolution */
+	uint32_t                 stride;     /** Stride in bytes */
+	uint32_t                 size;       /** Total size in bytes */
 };
 
 struct ia_css_metadata {
-	struct ia_css_metadata_info info;    /**< Layout info */
-	ia_css_ptr	            address; /**< CSS virtual address */
+	struct ia_css_metadata_info info;    /** Layout info */
+	ia_css_ptr	            address; /** CSS virtual address */
 	uint32_t	            exp_id;
-	/**< Exposure ID, see ia_css_event_public.h for more detail */
+	/** Exposure ID, see ia_css_event_public.h for more detail */
 };
 #define SIZE_OF_IA_CSS_METADATA_STRUCT sizeof(struct ia_css_metadata)
 
-/** @brief Allocate a metadata buffer.
+/* @brief Allocate a metadata buffer.
  * @param[in]   metadata_info Metadata info struct, contains details on metadata buffers.
  * @return      Pointer of metadata buffer or NULL (if error)
  *
@@ -58,7 +58,7 @@ struct ia_css_metadata {
 struct ia_css_metadata *
 ia_css_metadata_allocate(const struct ia_css_metadata_info *metadata_info);
 
-/** @brief Free a metadata buffer.
+/* @brief Free a metadata buffer.
  *
  * @param[in]	metadata	Pointer of metadata buffer.
  * @return	None
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
index fd2c01b60b28127b55928a664d24a2ebe53b53ca..f9c9cd76be97c7c52318701bbf413b8a9ff7c2f9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_MIPI_H
 #define __IA_CSS_MIPI_H
 
-/** @file
+/* @file
  * This file contains MIPI support functionality
  */
 
@@ -24,10 +24,10 @@
 #include "ia_css_stream_format.h"
 #include "ia_css_input_port.h"
 
-/** Backward compatible for CSS API 2.0 only
+/* Backward compatible for CSS API 2.0 only
  * TO BE REMOVED when all drivers move to CSS API 2.1.
  */
-/** @brief Specify a CSS MIPI frame buffer.
+/* @brief Specify a CSS MIPI frame buffer.
  *
  * @param[in]	size_mem_words	The frame size in memory words (32B).
  * @param[in]	contiguous	Allocate memory physically contiguously or not.
@@ -42,7 +42,7 @@ ia_css_mipi_frame_specify(const unsigned int	size_mem_words,
 				const bool contiguous);
 
 #if !defined(HAS_NO_INPUT_SYSTEM)
-/** @brief Register size of a CSS MIPI frame for check during capturing.
+/* @brief Register size of a CSS MIPI frame for check during capturing.
  *
  * @param[in]	port	CSI-2 port this check is registered.
  * @param[in]	size_mem_words	The frame size in memory words (32B).
@@ -59,7 +59,7 @@ ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
 				const unsigned int	size_mem_words);
 #endif
 
-/** @brief Calculate the size of a mipi frame.
+/* @brief Calculate the size of a mipi frame.
  *
  * @param[in]	width		The width (in pixels) of the frame.
  * @param[in]	height		The height (in lines) of the frame.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mmu.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mmu.h
index 48f8855d61f6eb99422cb0aa35919c8562a01ebe..13c21056bfbf77fca1e71a8ceb37f41a79a6027c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mmu.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mmu.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_MMU_H
 #define __IA_CSS_MMU_H
 
-/** @file
+/* @file
  * This file contains one support function for invalidating the CSS MMU cache
  */
 
-/** @brief Invalidate the MMU internal cache.
+/* @brief Invalidate the MMU internal cache.
  * @return	None
  *
  * This function triggers an invalidation of the translate-look-aside
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_morph.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_morph.h
index 969840da52b2199af47b73b5fa8d8ac10c003a0a..de409638d009bc502f1ca522442dbcb48dcd1c0d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_morph.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_morph.h
@@ -15,13 +15,13 @@
 #ifndef __IA_CSS_MORPH_H
 #define __IA_CSS_MORPH_H
 
-/** @file
+/* @file
  * This file contains supporting for morphing table
  */
 
 #include <ia_css_types.h>
 
-/** @brief Morphing table
+/* @brief Morphing table
  * @param[in]	width Width of the morphing table.
  * @param[in]	height Height of the morphing table.
  * @return		Pointer to the morphing table
@@ -29,7 +29,7 @@
 struct ia_css_morph_table *
 ia_css_morph_table_allocate(unsigned int width, unsigned int height);
 
-/** @brief Free the morph table
+/* @brief Free the morph table
  * @param[in]	me Pointer to the morph table.
  * @return		None
 */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_pipe_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_pipe_public.h
index 733e0ef3afe8bdc0c430ec9a33bf73afd992a8ab..df0aad9a6ab91a1656360bddbcca0ce0d0bfd17c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_pipe_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_pipe_public.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_PIPE_PUBLIC_H
 #define __IA_CSS_PIPE_PUBLIC_H
 
-/** @file
+/* @file
  * This file contains the public interface for CSS pipes.
  */
 
@@ -34,7 +34,7 @@ enum {
 	IA_CSS_PIPE_MAX_OUTPUT_STAGE,
 };
 
-/** Enumeration of pipe modes. This mode can be used to create
+/* Enumeration of pipe modes. This mode can be used to create
  *  an image pipe for this mode. These pipes can be combined
  *  to configure and run streams on the ISP.
  *
@@ -42,12 +42,12 @@ enum {
  *  create a continuous capture stream.
  */
 enum ia_css_pipe_mode {
-	IA_CSS_PIPE_MODE_PREVIEW,	/**< Preview pipe */
-	IA_CSS_PIPE_MODE_VIDEO,		/**< Video pipe */
-	IA_CSS_PIPE_MODE_CAPTURE,	/**< Still capture pipe */
-	IA_CSS_PIPE_MODE_ACC,		/**< Accelerated pipe */
-	IA_CSS_PIPE_MODE_COPY,		/**< Copy pipe, only used for embedded/image data copying */
-	IA_CSS_PIPE_MODE_YUVPP,		/**< YUV post processing pipe, used for all use cases with YUV input,
+	IA_CSS_PIPE_MODE_PREVIEW,	/** Preview pipe */
+	IA_CSS_PIPE_MODE_VIDEO,		/** Video pipe */
+	IA_CSS_PIPE_MODE_CAPTURE,	/** Still capture pipe */
+	IA_CSS_PIPE_MODE_ACC,		/** Accelerated pipe */
+	IA_CSS_PIPE_MODE_COPY,		/** Copy pipe, only used for embedded/image data copying */
+	IA_CSS_PIPE_MODE_YUVPP,		/** YUV post processing pipe, used for all use cases with YUV input,
 									for SoC sensor and external ISP */
 };
 /* Temporary define  */
@@ -58,10 +58,10 @@ enum ia_css_pipe_mode {
  * the order should match with definition in sh_css_defs.h
  */
 enum ia_css_pipe_version {
-	IA_CSS_PIPE_VERSION_1 = 1,		/**< ISP1.0 pipe */
-	IA_CSS_PIPE_VERSION_2_2 = 2,		/**< ISP2.2 pipe */
-	IA_CSS_PIPE_VERSION_2_6_1 = 3,		/**< ISP2.6.1 pipe */
-	IA_CSS_PIPE_VERSION_2_7 = 4		/**< ISP2.7 pipe */
+	IA_CSS_PIPE_VERSION_1 = 1,		/** ISP1.0 pipe */
+	IA_CSS_PIPE_VERSION_2_2 = 2,		/** ISP2.2 pipe */
+	IA_CSS_PIPE_VERSION_2_6_1 = 3,		/** ISP2.6.1 pipe */
+	IA_CSS_PIPE_VERSION_2_7 = 4		/** ISP2.7 pipe */
 };
 
 /**
@@ -71,79 +71,79 @@ enum ia_css_pipe_version {
  */
 struct ia_css_pipe_config {
 	enum ia_css_pipe_mode mode;
-	/**< mode, indicates which mode the pipe should use. */
+	/** mode, indicates which mode the pipe should use. */
 	enum ia_css_pipe_version isp_pipe_version;
-	/**< pipe version, indicates which imaging pipeline the pipe should use. */
+	/** pipe version, indicates which imaging pipeline the pipe should use. */
 	struct ia_css_resolution input_effective_res;
-	/**< input effective resolution */
+	/** input effective resolution */
 	struct ia_css_resolution bayer_ds_out_res;
-	/**< bayer down scaling */
+	/** bayer down scaling */
 	struct ia_css_resolution capt_pp_in_res;
 #ifndef ISP2401
-	/**< bayer down scaling */
+	/** bayer down scaling */
 #else
-	/**< capture post processing input resolution */
+	/** capture post processing input resolution */
 #endif
 	struct ia_css_resolution vf_pp_in_res;
 #ifndef ISP2401
-	/**< bayer down scaling */
+	/** bayer down scaling */
 #else
-	/**< view finder post processing input resolution */
+	/** view finder post processing input resolution */
 	struct ia_css_resolution output_system_in_res;
-	/**< For IPU3 only: use output_system_in_res to specify what input resolution
+	/** For IPU3 only: use output_system_in_res to specify what input resolution
 	     will OSYS receive, this resolution is equal to the output resolution of GDC
 	     if not determined CSS will set output_system_in_res with main osys output pin resolution
 	     All other IPUs may ignore this property */
 #endif
 	struct ia_css_resolution dvs_crop_out_res;
-	/**< dvs crop, video only, not in use yet. Use dvs_envelope below. */
+	/** dvs crop, video only, not in use yet. Use dvs_envelope below. */
 	struct ia_css_frame_info output_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
-	/**< output of YUV scaling */
+	/** output of YUV scaling */
 	struct ia_css_frame_info vf_output_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
-	/**< output of VF YUV scaling */
+	/** output of VF YUV scaling */
 	struct ia_css_fw_info *acc_extension;
-	/**< Pipeline extension accelerator */
+	/** Pipeline extension accelerator */
 	struct ia_css_fw_info **acc_stages;
-	/**< Standalone accelerator stages */
+	/** Standalone accelerator stages */
 	uint32_t num_acc_stages;
-	/**< Number of standalone accelerator stages */
+	/** Number of standalone accelerator stages */
 	struct ia_css_capture_config default_capture_config;
-	/**< Default capture config for initial capture pipe configuration. */
-	struct ia_css_resolution dvs_envelope; /**< temporary */
+	/** Default capture config for initial capture pipe configuration. */
+	struct ia_css_resolution dvs_envelope; /** temporary */
 	enum ia_css_frame_delay dvs_frame_delay;
-	/**< indicates the DVS loop delay in frame periods */
+	/** indicates the DVS loop delay in frame periods */
 	int acc_num_execs;
-	/**< For acceleration pipes only: determine how many times the pipe
+	/** For acceleration pipes only: determine how many times the pipe
 	     should be run. Setting this to -1 means it will run until
 	     stopped. */
 	bool enable_dz;
-	/**< Disabling digital zoom for a pipeline, if this is set to false,
+	/** Disabling digital zoom for a pipeline, if this is set to false,
 	     then setting a zoom factor will have no effect.
 	     In some use cases this provides better performance. */
 	bool enable_dpc;
-	/**< Disabling "Defect Pixel Correction" for a pipeline, if this is set
+	/** Disabling "Defect Pixel Correction" for a pipeline, if this is set
 	     to false. In some use cases this provides better performance. */
 	bool enable_vfpp_bci;
-	/**< Enabling BCI mode will cause yuv_scale binary to be picked up
+	/** Enabling BCI mode will cause yuv_scale binary to be picked up
 	     instead of vf_pp. This only applies to viewfinder post
 	     processing stages. */
 #ifdef ISP2401
 	bool enable_luma_only;
-	/**< Enabling of monochrome mode for a pipeline. If enabled only luma processing
+	/** Enabling of monochrome mode for a pipeline. If enabled only luma processing
 	     will be done. */
 	bool enable_tnr;
-	/**< Enabling of TNR (temporal noise reduction). This is only applicable to video
+	/** Enabling of TNR (temporal noise reduction). This is only applicable to video
 	     pipes. Non video-pipes should always set this parameter to false. */
 #endif
 	struct ia_css_isp_config *p_isp_config;
-	/**< Pointer to ISP configuration */
+	/** Pointer to ISP configuration */
 	struct ia_css_resolution gdc_in_buffer_res;
-	/**< GDC in buffer resolution. */
+	/** GDC in buffer resolution. */
 	struct ia_css_point gdc_in_buffer_offset;
-	/**< GDC in buffer offset - indicates the pixel coordinates of the first valid pixel inside the buffer */
+	/** GDC in buffer offset - indicates the pixel coordinates of the first valid pixel inside the buffer */
 #ifdef ISP2401
 	struct ia_css_coordinate internal_frame_origin_bqs_on_sctbl;
-	/**< Origin of internal frame positioned on shading table at shading correction in ISP.
+	/** Origin of internal frame positioned on shading table at shading correction in ISP.
 	     NOTE: Shading table is larger than or equal to internal frame.
 		   Shading table has shading gains and internal frame has bayer data.
 		   The origin of internal frame is used in shading correction in ISP
@@ -228,20 +228,20 @@ struct ia_css_pipe_config {
 
 #endif
 
-/** Pipe info, this struct describes properties of a pipe after it's stream has
+/* Pipe info, this struct describes properties of a pipe after it's stream has
  * been created.
  * ~~~** DO NOT ADD NEW FIELD **~~~ This structure will be deprecated.
  *           - On the Behalf of CSS-API Committee.
  */
 struct ia_css_pipe_info {
 	struct ia_css_frame_info output_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
-	/**< Info about output resolution. This contains the stride which
+	/** Info about output resolution. This contains the stride which
 	     should be used for memory allocation. */
 	struct ia_css_frame_info vf_output_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
-	/**< Info about viewfinder output resolution (optional). This contains
+	/** Info about viewfinder output resolution (optional). This contains
 	     the stride that should be used for memory allocation. */
 	struct ia_css_frame_info raw_output_info;
-	/**< Raw output resolution. This indicates the resolution of the
+	/** Raw output resolution. This indicates the resolution of the
 	     RAW bayer output for pipes that support this. Currently, only the
 	     still capture pipes support this feature. When this resolution is
 	     smaller than the input resolution, cropping will be performed by
@@ -252,17 +252,17 @@ struct ia_css_pipe_info {
 	     the input resolution - 8x8. */
 #ifdef ISP2401
 	struct ia_css_resolution output_system_in_res_info;
-	/**< For IPU3 only. Info about output system in resolution which is considered
+	/** For IPU3 only. Info about output system in resolution which is considered
 	     as gdc out resolution. */
 #endif
 	struct ia_css_shading_info shading_info;
-	/**< After an image pipe is created, this field will contain the info
+	/** After an image pipe is created, this field will contain the info
 	     for the shading correction. */
 	struct ia_css_grid_info  grid_info;
-	/**< After an image pipe is created, this field will contain the grid
+	/** After an image pipe is created, this field will contain the grid
 	     info for 3A and DVS. */
 	int num_invalid_frames;
-	/**< The very first frames in a started stream do not contain valid data.
+	/** The very first frames in a started stream do not contain valid data.
 	     In this field, the CSS-firmware communicates to the host-driver how
 	     many initial frames will contain invalid data; this allows the
 	     host-driver to discard those initial invalid frames and start it's
@@ -299,7 +299,7 @@ struct ia_css_pipe_info {
 
 #endif
 
-/** @brief Load default pipe configuration
+/* @brief Load default pipe configuration
  * @param[out]	pipe_config The pipe configuration.
  * @return	None
  *
@@ -334,7 +334,7 @@ struct ia_css_pipe_info {
  */
 void ia_css_pipe_config_defaults(struct ia_css_pipe_config *pipe_config);
 
-/** @brief Create a pipe
+/* @brief Create a pipe
  * @param[in]	config The pipe configuration.
  * @param[out]	pipe The pipe.
  * @return	IA_CSS_SUCCESS or the error code.
@@ -346,7 +346,7 @@ enum ia_css_err
 ia_css_pipe_create(const struct ia_css_pipe_config *config,
 		   struct ia_css_pipe **pipe);
 
-/** @brief Destroy a pipe
+/* @brief Destroy a pipe
  * @param[in]	pipe The pipe.
  * @return	IA_CSS_SUCCESS or the error code.
  *
@@ -355,7 +355,7 @@ ia_css_pipe_create(const struct ia_css_pipe_config *config,
 enum ia_css_err
 ia_css_pipe_destroy(struct ia_css_pipe *pipe);
 
-/** @brief Provides information about a pipe
+/* @brief Provides information about a pipe
  * @param[in]	pipe The pipe.
  * @param[out]	pipe_info The pipe information.
  * @return	IA_CSS_SUCCESS or IA_CSS_ERR_INVALID_ARGUMENTS.
@@ -366,7 +366,7 @@ enum ia_css_err
 ia_css_pipe_get_info(const struct ia_css_pipe *pipe,
 		     struct ia_css_pipe_info *pipe_info);
 
-/** @brief Configure a pipe with filter coefficients.
+/* @brief Configure a pipe with filter coefficients.
  * @param[in]	pipe	The pipe.
  * @param[in]	config	The pointer to ISP configuration.
  * @return		IA_CSS_SUCCESS or error code upon error.
@@ -378,7 +378,7 @@ enum ia_css_err
 ia_css_pipe_set_isp_config(struct ia_css_pipe *pipe,
 						   struct ia_css_isp_config *config);
 
-/** @brief Controls when the Event generator raises an IRQ to the Host.
+/* @brief Controls when the Event generator raises an IRQ to the Host.
  *
  * @param[in]	pipe	The pipe.
  * @param[in]	or_mask	Binary or of enum ia_css_event_irq_mask_type. Each pipe
@@ -455,7 +455,7 @@ ia_css_pipe_set_irq_mask(struct ia_css_pipe *pipe,
 			 unsigned int or_mask,
 			 unsigned int and_mask);
 
-/** @brief Reads the current event IRQ mask from the CSS.
+/* @brief Reads the current event IRQ mask from the CSS.
  *
  * @param[in]	pipe The pipe.
  * @param[out]	or_mask	Current or_mask. The bits in this mask are a binary or
@@ -476,7 +476,7 @@ ia_css_event_get_irq_mask(const struct ia_css_pipe *pipe,
 			  unsigned int *or_mask,
 			  unsigned int *and_mask);
 
-/** @brief Queue a buffer for an image pipe.
+/* @brief Queue a buffer for an image pipe.
  *
  * @param[in] pipe	The pipe that will own the buffer.
  * @param[in] buffer	Pointer to the buffer.
@@ -498,7 +498,7 @@ enum ia_css_err
 ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 			   const struct ia_css_buffer *buffer);
 
-/** @brief Dequeue a buffer from an image pipe.
+/* @brief Dequeue a buffer from an image pipe.
  *
  * @param[in]    pipe	 The pipeline that the buffer queue belongs to.
  * @param[in,out] buffer The buffer is used to lookup the type which determines
@@ -519,7 +519,7 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 			   struct ia_css_buffer *buffer);
 
 
-/** @brief  Set the state (Enable or Disable) of the Extension stage in the
+/* @brief  Set the state (Enable or Disable) of the Extension stage in the
  *          given pipe.
  * @param[in] pipe         Pipe handle.
  * @param[in] fw_handle    Extension firmware Handle (ia_css_fw_info.handle)
@@ -546,7 +546,7 @@ ia_css_pipe_set_qos_ext_state (struct ia_css_pipe *pipe,
                            uint32_t fw_handle,
                            bool  enable);
 
-/** @brief  Get the state (Enable or Disable) of the Extension stage in the
+/* @brief  Get the state (Enable or Disable) of the Extension stage in the
  *          given pipe.
  * @param[in]  pipe        Pipe handle.
  * @param[in]  fw_handle   Extension firmware Handle (ia_css_fw_info.handle)
@@ -573,7 +573,7 @@ ia_css_pipe_get_qos_ext_state (struct ia_css_pipe *pipe,
                            bool * enable);
 
 #ifdef ISP2401
-/** @brief  Update mapped CSS and ISP arguments for QoS pipe during SP runtime.
+/* @brief  Update mapped CSS and ISP arguments for QoS pipe during SP runtime.
  * @param[in] pipe     	Pipe handle.
  * @param[in] fw_handle	Extension firmware Handle (ia_css_fw_info.handle).
  * @param[in] css_seg  	Parameter memory descriptors for CSS segments.
@@ -595,7 +595,7 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe, uint32_t fw_hand
 			struct ia_css_isp_param_isp_segments *isp_seg);
 
 #endif
-/** @brief Get selected configuration settings
+/* @brief Get selected configuration settings
  * @param[in]	pipe	The pipe.
  * @param[out]	config	Configuration settings.
  * @return		None
@@ -604,7 +604,7 @@ void
 ia_css_pipe_get_isp_config(struct ia_css_pipe *pipe,
 			     struct ia_css_isp_config *config);
 
-/** @brief Set the scaler lut on this pipe. A copy of lut is made in the inuit
+/* @brief Set the scaler lut on this pipe. A copy of lut is made in the inuit
  *         address space. So the LUT can be freed by caller.
  * @param[in]  pipe        Pipe handle.
  * @param[in]  lut         Look up tabel
@@ -623,7 +623,7 @@ ia_css_pipe_get_isp_config(struct ia_css_pipe *pipe,
 enum ia_css_err
 ia_css_pipe_set_bci_scaler_lut( struct ia_css_pipe *pipe,
 				const void *lut);
-/** @brief Checking of DVS statistics ability
+/* @brief Checking of DVS statistics ability
  * @param[in]	pipe_info	The pipe info.
  * @return		true - has DVS statistics ability
  * 			false - otherwise
@@ -631,7 +631,7 @@ ia_css_pipe_set_bci_scaler_lut( struct ia_css_pipe *pipe,
 bool ia_css_pipe_has_dvs_stats(struct ia_css_pipe_info *pipe_info);
 
 #ifdef ISP2401
-/** @brief Override the frameformat set on the output pins.
+/* @brief Override the frameformat set on the output pins.
  * @param[in]  pipe        Pipe handle.
  * @param[in]  output_pin  Pin index to set the format on
  *                         0 - main output pin
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_prbs.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_prbs.h
index 9b0eeb08ca0493bdb48e39030a041dc463506a34..6f24656b6cb46c0d675cd68f72743723455a5ad4 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_prbs.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_prbs.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_PRBS_H
 #define __IA_CSS_PRBS_H
 
-/** @file
+/* @file
  * This file contains support for Pseudo Random Bit Sequence (PRBS) inputs
  */
 
-/** Enumerate the PRBS IDs.
+/* Enumerate the PRBS IDs.
  */
 enum ia_css_prbs_id {
 	IA_CSS_PRBS_ID0,
@@ -44,10 +44,10 @@ enum ia_css_prbs_id {
  */
 struct ia_css_prbs_config {
 	enum ia_css_prbs_id	id;
-	unsigned int		h_blank;	/**< horizontal blank */
-	unsigned int		v_blank;	/**< vertical blank */
-	int			seed;	/**< random seed for the 1st 2-pixel-components/clock */
-	int			seed1;	/**< random seed for the 2nd 2-pixel-components/clock */
+	unsigned int		h_blank;	/** horizontal blank */
+	unsigned int		v_blank;	/** vertical blank */
+	int			seed;	/** random seed for the 1st 2-pixel-components/clock */
+	int			seed1;	/** random seed for the 2nd 2-pixel-components/clock */
 };
 
 #endif /* __IA_CSS_PRBS_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_properties.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_properties.h
index 19af4021b24c511091d98a7e591714e2c72501b5..9a167306611c9a95602ad30f2bf82bb2d92f78d6 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_properties.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_properties.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_PROPERTIES_H
 #define __IA_CSS_PROPERTIES_H
 
-/** @file
+/* @file
  * This file contains support for retrieving properties of some hardware the CSS system
  */
 
@@ -24,12 +24,12 @@
 
 struct ia_css_properties {
 	int  gdc_coord_one;
-	bool l1_base_is_index; /**< Indicate whether the L1 page base
+	bool l1_base_is_index; /** Indicate whether the L1 page base
 				    is a page index or a byte address. */
 	enum ia_css_vamem_type vamem_type;
 };
 
-/** @brief Get hardware properties
+/* @brief Get hardware properties
  * @param[in,out]	properties The hardware properties
  * @return	None
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_shading.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_shading.h
index cb0f249e98c8b8dde2649e9297d433a49e058fd4..588f53d32b723688a4bbe5697839369eeac4c98d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_shading.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_shading.h
@@ -15,13 +15,13 @@
 #ifndef __IA_CSS_SHADING_H
 #define __IA_CSS_SHADING_H
 
-/** @file
+/* @file
  * This file contains support for setting the shading table for CSS
  */
 
 #include <ia_css_types.h>
 
-/** @brief Shading table
+/* @brief Shading table
  * @param[in]	width Width of the shading table.
  * @param[in]	height Height of the shading table.
  * @return		Pointer to the shading table
@@ -30,7 +30,7 @@ struct ia_css_shading_table *
 ia_css_shading_table_alloc(unsigned int width,
 			   unsigned int height);
 
-/** @brief Free shading table
+/* @brief Free shading table
  * @param[in]	table Pointer to the shading table.
  * @return		None
 */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream.h
index 453fe4db0133ad1568b1ce6410902c580de69398..fb6e8c2ca8bfb011023a2c3488f66742c854719c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream.h
@@ -48,7 +48,7 @@ struct ia_css_stream {
 	bool                           started;
 };
 
-/** @brief Get a binary in the stream, which binary has the shading correction.
+/* @brief Get a binary in the stream, which binary has the shading correction.
  *
  * @param[in] stream: The stream.
  * @return	The binary which has the shading correction.
@@ -76,7 +76,7 @@ sh_css_invalidate_params(struct ia_css_stream *stream);
 const struct ia_css_fpn_table *
 ia_css_get_fpn_table(struct ia_css_stream *stream);
 
-/** @brief Get a pointer to the shading table.
+/* @brief Get a pointer to the shading table.
  *
  * @param[in] stream: The stream.
  * @return	The pointer to the shading table.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
index ae608a9c9051f14759f3ee0e710559091238c9ce..f7e9020a86e14d979015ba46c87554d7e7efe30b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
@@ -15,74 +15,74 @@
 #ifndef __IA_CSS_STREAM_FORMAT_H
 #define __IA_CSS_STREAM_FORMAT_H
 
-/** @file
+/* @file
  * This file contains formats usable for ISP streaming input
  */
 
 #include <type_support.h> /* bool */
 
-/** The ISP streaming input interface supports the following formats.
+/* The ISP streaming input interface supports the following formats.
  *  These match the corresponding MIPI formats.
  */
 enum ia_css_stream_format {
-	IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY,    /**< 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_8,  /**< 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_10, /**< 10 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_16, /**< 16 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_8,  /**< UYVY..UYVY, 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_10, /**< UYVY..UYVY, 10 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_16, /**< UYVY..UYVY, 16 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_444,  /**< BGR..BGR, 4 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_555,  /**< BGR..BGR, 5 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_565,  /**< BGR..BGR, 5 bits B and R, 6 bits G */
-	IA_CSS_STREAM_FORMAT_RGB_666,  /**< BGR..BGR, 6 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_888,  /**< BGR..BGR, 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RAW_6,    /**< RAW data, 6 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_7,    /**< RAW data, 7 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_8,    /**< RAW data, 8 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_10,   /**< RAW data, 10 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_12,   /**< RAW data, 12 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_14,   /**< RAW data, 14 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_16,   /**< RAW data, 16 bits per pixel, which is
+	IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY,    /** 8 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_YUV420_8,  /** 8 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_YUV420_10, /** 10 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_YUV420_16, /** 16 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_YUV422_8,  /** UYVY..UYVY, 8 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_YUV422_10, /** UYVY..UYVY, 10 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_YUV422_16, /** UYVY..UYVY, 16 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_RGB_444,  /** BGR..BGR, 4 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_RGB_555,  /** BGR..BGR, 5 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_RGB_565,  /** BGR..BGR, 5 bits B and R, 6 bits G */
+	IA_CSS_STREAM_FORMAT_RGB_666,  /** BGR..BGR, 6 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_RGB_888,  /** BGR..BGR, 8 bits per subpixel */
+	IA_CSS_STREAM_FORMAT_RAW_6,    /** RAW data, 6 bits per pixel */
+	IA_CSS_STREAM_FORMAT_RAW_7,    /** RAW data, 7 bits per pixel */
+	IA_CSS_STREAM_FORMAT_RAW_8,    /** RAW data, 8 bits per pixel */
+	IA_CSS_STREAM_FORMAT_RAW_10,   /** RAW data, 10 bits per pixel */
+	IA_CSS_STREAM_FORMAT_RAW_12,   /** RAW data, 12 bits per pixel */
+	IA_CSS_STREAM_FORMAT_RAW_14,   /** RAW data, 14 bits per pixel */
+	IA_CSS_STREAM_FORMAT_RAW_16,   /** RAW data, 16 bits per pixel, which is
 					    not specified in CSI-MIPI standard*/
-	IA_CSS_STREAM_FORMAT_BINARY_8, /**< Binary byte stream, which is target at
+	IA_CSS_STREAM_FORMAT_BINARY_8, /** Binary byte stream, which is target at
 					    JPEG. */
 
-	/** CSI2-MIPI specific format: Generic short packet data. It is used to
+	/* CSI2-MIPI specific format: Generic short packet data. It is used to
 	 *  keep the timing information for the opening/closing of shutters,
 	 *  triggering of flashes and etc.
 	 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT1,  /**< Generic Short Packet Code 1 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT2,  /**< Generic Short Packet Code 2 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT3,  /**< Generic Short Packet Code 3 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT4,  /**< Generic Short Packet Code 4 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT5,  /**< Generic Short Packet Code 5 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT6,  /**< Generic Short Packet Code 6 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT7,  /**< Generic Short Packet Code 7 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT8,  /**< Generic Short Packet Code 8 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT1,  /** Generic Short Packet Code 1 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT2,  /** Generic Short Packet Code 2 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT3,  /** Generic Short Packet Code 3 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT4,  /** Generic Short Packet Code 4 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT5,  /** Generic Short Packet Code 5 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT6,  /** Generic Short Packet Code 6 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT7,  /** Generic Short Packet Code 7 */
+	IA_CSS_STREAM_FORMAT_GENERIC_SHORT8,  /** Generic Short Packet Code 8 */
 
-	/** CSI2-MIPI specific format: YUV data.
+	/* CSI2-MIPI specific format: YUV data.
 	 */
-	IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT,  /**< YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-	IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT, /**< YUV420 8-bit (Chroma Shifted Pixel Sampling) */
+	IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT,  /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
+	IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT, /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
 
-	/** CSI2-MIPI specific format: Generic long packet data
+	/* CSI2-MIPI specific format: Generic long packet data
 	 */
-	IA_CSS_STREAM_FORMAT_EMBEDDED, /**< Embedded 8-bit non Image Data */
+	IA_CSS_STREAM_FORMAT_EMBEDDED, /** Embedded 8-bit non Image Data */
 
-	/** CSI2-MIPI specific format: User defined byte-based data. For example,
+	/* CSI2-MIPI specific format: User defined byte-based data. For example,
 	 *  the data transmitter (e.g. the SoC sensor) can keep the JPEG data as
 	 *  the User Defined Data Type 4 and the MPEG data as the
 	 *  User Defined Data Type 7.
 	 */
-	IA_CSS_STREAM_FORMAT_USER_DEF1,  /**< User defined 8-bit data type 1 */
-	IA_CSS_STREAM_FORMAT_USER_DEF2,  /**< User defined 8-bit data type 2 */
-	IA_CSS_STREAM_FORMAT_USER_DEF3,  /**< User defined 8-bit data type 3 */
-	IA_CSS_STREAM_FORMAT_USER_DEF4,  /**< User defined 8-bit data type 4 */
-	IA_CSS_STREAM_FORMAT_USER_DEF5,  /**< User defined 8-bit data type 5 */
-	IA_CSS_STREAM_FORMAT_USER_DEF6,  /**< User defined 8-bit data type 6 */
-	IA_CSS_STREAM_FORMAT_USER_DEF7,  /**< User defined 8-bit data type 7 */
-	IA_CSS_STREAM_FORMAT_USER_DEF8,  /**< User defined 8-bit data type 8 */
+	IA_CSS_STREAM_FORMAT_USER_DEF1,  /** User defined 8-bit data type 1 */
+	IA_CSS_STREAM_FORMAT_USER_DEF2,  /** User defined 8-bit data type 2 */
+	IA_CSS_STREAM_FORMAT_USER_DEF3,  /** User defined 8-bit data type 3 */
+	IA_CSS_STREAM_FORMAT_USER_DEF4,  /** User defined 8-bit data type 4 */
+	IA_CSS_STREAM_FORMAT_USER_DEF5,  /** User defined 8-bit data type 5 */
+	IA_CSS_STREAM_FORMAT_USER_DEF6,  /** User defined 8-bit data type 6 */
+	IA_CSS_STREAM_FORMAT_USER_DEF7,  /** User defined 8-bit data type 7 */
+	IA_CSS_STREAM_FORMAT_USER_DEF8,  /** User defined 8-bit data type 8 */
 };
 
 #define	IA_CSS_STREAM_FORMAT_NUM	IA_CSS_STREAM_FORMAT_USER_DEF8
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
index 2c8d9de10a59fdd337c463729c11fc9c6e3e894f..ca3203357ff5f2e68f36dc837781ca8b1a7f0ff6 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_STREAM_PUBLIC_H
 #define __IA_CSS_STREAM_PUBLIC_H
 
-/** @file
+/* @file
  * This file contains support for configuring and controlling streams
  */
 
@@ -27,26 +27,26 @@
 #include "ia_css_prbs.h"
 #include "ia_css_input_port.h"
 
-/** Input modes, these enumerate all supported input modes.
+/* Input modes, these enumerate all supported input modes.
  *  Note that not all ISP modes support all input modes.
  */
 enum ia_css_input_mode {
-	IA_CSS_INPUT_MODE_SENSOR, /**< data from sensor */
-	IA_CSS_INPUT_MODE_FIFO,   /**< data from input-fifo */
-	IA_CSS_INPUT_MODE_TPG,    /**< data from test-pattern generator */
-	IA_CSS_INPUT_MODE_PRBS,   /**< data from pseudo-random bit stream */
-	IA_CSS_INPUT_MODE_MEMORY, /**< data from a frame in memory */
-	IA_CSS_INPUT_MODE_BUFFERED_SENSOR /**< data is sent through mipi buffer */
+	IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */
+	IA_CSS_INPUT_MODE_FIFO,   /** data from input-fifo */
+	IA_CSS_INPUT_MODE_TPG,    /** data from test-pattern generator */
+	IA_CSS_INPUT_MODE_PRBS,   /** data from pseudo-random bit stream */
+	IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */
+	IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */
 };
 
-/** Structure of the MIPI buffer configuration
+/* Structure of the MIPI buffer configuration
  */
 struct ia_css_mipi_buffer_config {
-	unsigned int size_mem_words; /**< The frame size in the system memory
+	unsigned int size_mem_words; /** The frame size in the system memory
 					  words (32B) */
-	bool contiguous;	     /**< Allocated memory physically
+	bool contiguous;	     /** Allocated memory physically
 					  contiguously or not. \deprecated{Will be false always.}*/
-	unsigned int nof_mipi_buffers; /**< The number of MIPI buffers required for this
+	unsigned int nof_mipi_buffers; /** The number of MIPI buffers required for this
 					stream */
 };
 
@@ -57,44 +57,44 @@ enum {
 	IA_CSS_STREAM_MAX_ISYS_STREAM_PER_CH
 };
 
-/** This is input data configuration for one MIPI data type. We can have
+/* This is input data configuration for one MIPI data type. We can have
  *  multiple of this in one virtual channel.
  */
 struct ia_css_stream_isys_stream_config {
-	struct ia_css_resolution  input_res; /**< Resolution of input data */
-	enum ia_css_stream_format format; /**< Format of input stream. This data
+	struct ia_css_resolution  input_res; /** Resolution of input data */
+	enum ia_css_stream_format format; /** Format of input stream. This data
 					       format will be mapped to MIPI data
 					       type internally. */
-	int linked_isys_stream_id; /**< default value is -1, other value means
+	int linked_isys_stream_id; /** default value is -1, other value means
 							current isys_stream shares the same buffer with
 							indicated isys_stream*/
-	bool valid; /**< indicate whether other fields have valid value */
+	bool valid; /** indicate whether other fields have valid value */
 };
 
 struct ia_css_stream_input_config {
-	struct ia_css_resolution  input_res; /**< Resolution of input data */
-	struct ia_css_resolution  effective_res; /**< Resolution of input data.
+	struct ia_css_resolution  input_res; /** Resolution of input data */
+	struct ia_css_resolution  effective_res; /** Resolution of input data.
 							Used for CSS 2400/1 System and deprecated for other
 							systems (replaced by input_effective_res in
 							ia_css_pipe_config) */
-	enum ia_css_stream_format format; /**< Format of input stream. This data
+	enum ia_css_stream_format format; /** Format of input stream. This data
 					       format will be mapped to MIPI data
 					       type internally. */
-	enum ia_css_bayer_order bayer_order; /**< Bayer order for RAW streams */
+	enum ia_css_bayer_order bayer_order; /** Bayer order for RAW streams */
 };
 
 
-/** Input stream description. This describes how input will flow into the
+/* Input stream description. This describes how input will flow into the
  *  CSS. This is used to program the CSS hardware.
  */
 struct ia_css_stream_config {
-	enum ia_css_input_mode    mode; /**< Input mode */
+	enum ia_css_input_mode    mode; /** Input mode */
 	union {
-		struct ia_css_input_port  port; /**< Port, for sensor only. */
-		struct ia_css_tpg_config  tpg;  /**< TPG configuration */
-		struct ia_css_prbs_config prbs; /**< PRBS configuration */
-	} source; /**< Source of input data */
-	unsigned int	      channel_id; /**< Channel on which input data
+		struct ia_css_input_port  port; /** Port, for sensor only. */
+		struct ia_css_tpg_config  tpg;  /** TPG configuration */
+		struct ia_css_prbs_config prbs; /** PRBS configuration */
+	} source; /** Source of input data */
+	unsigned int	      channel_id; /** Channel on which input data
 						   will arrive. Use this field
 						   to specify virtual channel id.
 						   Valid values are: 0, 1, 2, 3 */
@@ -110,29 +110,29 @@ struct ia_css_stream_config {
 	 * and will be deprecated. In the future,all platforms will use the N*N method
 	 */
 #endif
-	unsigned int sensor_binning_factor; /**< Binning factor used by sensor
+	unsigned int sensor_binning_factor; /** Binning factor used by sensor
 						 to produce image data. This is
 						 used for shading correction. */
-	unsigned int pixels_per_clock; /**< Number of pixels per clock, which can be
+	unsigned int pixels_per_clock; /** Number of pixels per clock, which can be
 					    1, 2 or 4. */
-	bool online; /**< offline will activate RAW copy on SP, use this for
+	bool online; /** offline will activate RAW copy on SP, use this for
 			  continuous capture. */
 		/* ISYS2401 usage: ISP receives data directly from sensor, no copy. */
-	unsigned init_num_cont_raw_buf; /**< initial number of raw buffers to
+	unsigned init_num_cont_raw_buf; /** initial number of raw buffers to
 					     allocate */
-	unsigned target_num_cont_raw_buf; /**< total number of raw buffers to
+	unsigned target_num_cont_raw_buf; /** total number of raw buffers to
 					     allocate */
-	bool pack_raw_pixels; /**< Pack pixels in the raw buffers */
-	bool continuous; /**< Use SP copy feature to continuously capture frames
+	bool pack_raw_pixels; /** Pack pixels in the raw buffers */
+	bool continuous; /** Use SP copy feature to continuously capture frames
 			      to system memory and run pipes in offline mode */
-	bool disable_cont_viewfinder; /**< disable continous viewfinder for ZSL use case */
-	int32_t flash_gpio_pin; /**< pin on which the flash is connected, -1 for no flash */
-	int left_padding; /**< The number of input-formatter left-paddings, -1 for default from binary.*/
-	struct ia_css_mipi_buffer_config mipi_buffer_config; /**< mipi buffer configuration */
-	struct ia_css_metadata_config	metadata_config;     /**< Metadata configuration. */
-	bool ia_css_enable_raw_buffer_locking; /**< Enable Raw Buffer Locking for HALv3 Support */
+	bool disable_cont_viewfinder; /** disable continous viewfinder for ZSL use case */
+	int32_t flash_gpio_pin; /** pin on which the flash is connected, -1 for no flash */
+	int left_padding; /** The number of input-formatter left-paddings, -1 for default from binary.*/
+	struct ia_css_mipi_buffer_config mipi_buffer_config; /** mipi buffer configuration */
+	struct ia_css_metadata_config	metadata_config;     /** Metadata configuration. */
+	bool ia_css_enable_raw_buffer_locking; /** Enable Raw Buffer Locking for HALv3 Support */
 	bool lock_all;
-	/**< Lock all RAW buffers (true) or lock only buffers processed by
+	/** Lock all RAW buffers (true) or lock only buffers processed by
 	     video or preview pipe (false).
 	     This setting needs to be enabled to allow raw buffer locking
 	     without continuous viewfinder. */
@@ -140,15 +140,15 @@ struct ia_css_stream_config {
 
 struct ia_css_stream;
 
-/** Stream info, this struct describes properties of a stream after it has been
+/* Stream info, this struct describes properties of a stream after it has been
  *  created.
  */
 struct ia_css_stream_info {
 	struct ia_css_metadata_info metadata_info;
-	/**< Info about the metadata layout, this contains the stride. */
+	/** Info about the metadata layout, this contains the stride. */
 };
 
-/** @brief Load default stream configuration
+/* @brief Load default stream configuration
  * @param[in,out]	stream_config The stream configuration.
  * @return	None
  *
@@ -165,7 +165,7 @@ void ia_css_stream_config_defaults(struct ia_css_stream_config *stream_config);
  * create the internal structures and fill in the configuration data and pipes
  */
 
- /** @brief Creates a stream
+ /* @brief Creates a stream
  * @param[in]	stream_config The stream configuration.
  * @param[in]	num_pipes The number of pipes to incorporate in the stream.
  * @param[in]	pipes The pipes.
@@ -180,7 +180,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 					 struct ia_css_pipe *pipes[],
 					 struct ia_css_stream **stream);
 
-/** @brief Destroys a stream
+/* @brief Destroys a stream
  * @param[in]	stream The stream.
  * @return	IA_CSS_SUCCESS or the error code.
  *
@@ -189,7 +189,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 enum ia_css_err
 ia_css_stream_destroy(struct ia_css_stream *stream);
 
-/** @brief Provides information about a stream
+/* @brief Provides information about a stream
  * @param[in]	stream The stream.
  * @param[out]	stream_info The information about the stream.
  * @return	IA_CSS_SUCCESS or the error code.
@@ -200,7 +200,7 @@ enum ia_css_err
 ia_css_stream_get_info(const struct ia_css_stream *stream,
 		       struct ia_css_stream_info *stream_info);
 
-/** @brief load (rebuild) a stream that was unloaded.
+/* @brief load (rebuild) a stream that was unloaded.
  * @param[in]	stream The stream
  * @return		IA_CSS_SUCCESS or the error code
  *
@@ -210,7 +210,7 @@ ia_css_stream_get_info(const struct ia_css_stream *stream,
 enum ia_css_err
 ia_css_stream_load(struct ia_css_stream *stream);
 
-/** @brief Starts the stream.
+/* @brief Starts the stream.
  * @param[in]	stream The stream.
  * @return IA_CSS_SUCCESS or the error code.
  *
@@ -223,7 +223,7 @@ ia_css_stream_load(struct ia_css_stream *stream);
 enum ia_css_err
 ia_css_stream_start(struct ia_css_stream *stream);
 
-/** @brief Stop the stream.
+/* @brief Stop the stream.
  * @param[in]	stream The stream.
  * @return	IA_CSS_SUCCESS or the error code.
  *
@@ -233,7 +233,7 @@ ia_css_stream_start(struct ia_css_stream *stream);
 enum ia_css_err
 ia_css_stream_stop(struct ia_css_stream *stream);
 
-/** @brief Check if a stream has stopped
+/* @brief Check if a stream has stopped
  * @param[in]	stream The stream.
  * @return	boolean flag
  *
@@ -242,7 +242,7 @@ ia_css_stream_stop(struct ia_css_stream *stream);
 bool
 ia_css_stream_has_stopped(struct ia_css_stream *stream);
 
-/** @brief	destroy a stream according to the stream seed previosly saved in the seed array.
+/* @brief	destroy a stream according to the stream seed previosly saved in the seed array.
  * @param[in]	stream The stream.
  * @return	IA_CSS_SUCCESS (no other errors are generated now)
  *
@@ -251,7 +251,7 @@ ia_css_stream_has_stopped(struct ia_css_stream *stream);
 enum ia_css_err
 ia_css_stream_unload(struct ia_css_stream *stream);
 
-/** @brief Returns stream format
+/* @brief Returns stream format
  * @param[in]	stream The stream.
  * @return	format of the string
  *
@@ -260,7 +260,7 @@ ia_css_stream_unload(struct ia_css_stream *stream);
 enum ia_css_stream_format
 ia_css_stream_get_format(const struct ia_css_stream *stream);
 
-/** @brief Check if the stream is configured for 2 pixels per clock
+/* @brief Check if the stream is configured for 2 pixels per clock
  * @param[in]	stream The stream.
  * @return	boolean flag
  *
@@ -270,7 +270,7 @@ ia_css_stream_get_format(const struct ia_css_stream *stream);
 bool
 ia_css_stream_get_two_pixels_per_clock(const struct ia_css_stream *stream);
 
-/** @brief Sets the output frame stride (at the last pipe)
+/* @brief Sets the output frame stride (at the last pipe)
  * @param[in]	stream The stream
  * @param[in]	output_padded_width - the output buffer stride.
  * @return	ia_css_err
@@ -280,7 +280,7 @@ ia_css_stream_get_two_pixels_per_clock(const struct ia_css_stream *stream);
 enum ia_css_err
 ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int output_padded_width);
 
-/** @brief Return max number of continuous RAW frames.
+/* @brief Return max number of continuous RAW frames.
  * @param[in]	stream The stream.
  * @param[out]	buffer_depth The maximum number of continuous RAW frames.
  * @return	IA_CSS_SUCCESS or IA_CSS_ERR_INVALID_ARGUMENTS
@@ -291,7 +291,7 @@ ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int
 enum ia_css_err
 ia_css_stream_get_max_buffer_depth(struct ia_css_stream *stream, int *buffer_depth);
 
-/** @brief Set nr of continuous RAW frames to use.
+/* @brief Set nr of continuous RAW frames to use.
  *
  * @param[in]	stream The stream.
  * @param[in]	buffer_depth	Number of frames to set.
@@ -302,7 +302,7 @@ ia_css_stream_get_max_buffer_depth(struct ia_css_stream *stream, int *buffer_dep
 enum ia_css_err
 ia_css_stream_set_buffer_depth(struct ia_css_stream *stream, int buffer_depth);
 
-/** @brief Get number of continuous RAW frames to use.
+/* @brief Get number of continuous RAW frames to use.
  * @param[in]	stream The stream.
  * @param[out]	buffer_depth The number of frames to use
  * @return	IA_CSS_SUCCESS or IA_CSS_ERR_INVALID_ARGUMENTS
@@ -315,7 +315,7 @@ ia_css_stream_get_buffer_depth(struct ia_css_stream *stream, int *buffer_depth);
 
 /* ===== CAPTURE ===== */
 
-/** @brief Configure the continuous capture
+/* @brief Configure the continuous capture
  *
  * @param[in]	stream		The stream.
  * @param[in]	num_captures	The number of RAW frames to be processed to
@@ -347,7 +347,7 @@ ia_css_stream_capture(struct ia_css_stream *stream,
 			unsigned int skip,
 			int offset);
 
-/** @brief Specify which raw frame to tag based on exp_id found in frame info
+/* @brief Specify which raw frame to tag based on exp_id found in frame info
  *
  * @param[in]	stream The stream.
  * @param[in]	exp_id	The exposure id of the raw frame to tag.
@@ -363,7 +363,7 @@ ia_css_stream_capture_frame(struct ia_css_stream *stream,
 
 /* ===== VIDEO ===== */
 
-/** @brief Send streaming data into the css input FIFO
+/* @brief Send streaming data into the css input FIFO
  *
  * @param[in]	stream	The stream.
  * @param[in]	data	Pointer to the pixels to be send.
@@ -395,7 +395,7 @@ ia_css_stream_send_input_frame(const struct ia_css_stream *stream,
 			       unsigned int width,
 			       unsigned int height);
 
-/** @brief Start an input frame on the CSS input FIFO.
+/* @brief Start an input frame on the CSS input FIFO.
  *
  * @param[in]	stream The stream.
  * @return	None
@@ -411,7 +411,7 @@ ia_css_stream_send_input_frame(const struct ia_css_stream *stream,
 void
 ia_css_stream_start_input_frame(const struct ia_css_stream *stream);
 
-/** @brief Send a line of input data into the CSS input FIFO.
+/* @brief Send a line of input data into the CSS input FIFO.
  *
  * @param[in]	stream		The stream.
  * @param[in]	data	Array of the first line of image data.
@@ -435,7 +435,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
 			      const unsigned short *data2,
 			      unsigned int width2);
 
-/** @brief Send a line of input embedded data into the CSS input FIFO.
+/* @brief Send a line of input embedded data into the CSS input FIFO.
  *
  * @param[in]	stream     Pointer of the stream.
  * @param[in]	format     Format of the embedded data.
@@ -457,7 +457,7 @@ ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
 			      const unsigned short *data,
 			      unsigned int width);
 
-/** @brief End an input frame on the CSS input FIFO.
+/* @brief End an input frame on the CSS input FIFO.
  *
  * @param[in]	stream	The stream.
  * @return	None
@@ -467,7 +467,7 @@ ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
 void
 ia_css_stream_end_input_frame(const struct ia_css_stream *stream);
 
-/** @brief send a request flash command to SP
+/* @brief send a request flash command to SP
  *
  * @param[in]	stream The stream.
  * @return	None
@@ -481,7 +481,7 @@ ia_css_stream_end_input_frame(const struct ia_css_stream *stream);
 void
 ia_css_stream_request_flash(struct ia_css_stream *stream);
 
-/** @brief Configure a stream with filter coefficients.
+/* @brief Configure a stream with filter coefficients.
  *  	   @deprecated {Replaced by
  *  				   ia_css_pipe_set_isp_config_on_pipe()}
  *
@@ -503,7 +503,7 @@ ia_css_stream_set_isp_config_on_pipe(struct ia_css_stream *stream,
 			     const struct ia_css_isp_config *config,
 			     struct ia_css_pipe *pipe);
 
-/** @brief Configure a stream with filter coefficients.
+/* @brief Configure a stream with filter coefficients.
  *  	   @deprecated {Replaced by
  *  				   ia_css_pipe_set_isp_config()}
  * @param[in]	stream	The stream.
@@ -523,7 +523,7 @@ ia_css_stream_set_isp_config(
 	struct ia_css_stream *stream,
 	const struct ia_css_isp_config *config);
 
-/** @brief Get selected configuration settings
+/* @brief Get selected configuration settings
  * @param[in]	stream	The stream.
  * @param[out]	config	Configuration settings.
  * @return		None
@@ -532,7 +532,7 @@ void
 ia_css_stream_get_isp_config(const struct ia_css_stream *stream,
 			     struct ia_css_isp_config *config);
 
-/** @brief allocate continuous raw frames for continuous capture
+/* @brief allocate continuous raw frames for continuous capture
  * @param[in]	stream The stream.
  * @return IA_CSS_SUCCESS or error code.
  *
@@ -544,7 +544,7 @@ ia_css_stream_get_isp_config(const struct ia_css_stream *stream,
 enum ia_css_err
 ia_css_alloc_continuous_frame_remain(struct ia_css_stream *stream);
 
-/** @brief allocate continuous raw frames for continuous capture
+/* @brief allocate continuous raw frames for continuous capture
  * @param[in]	stream The stream.
  * @return	IA_CSS_SUCCESS or error code.
  *
@@ -555,7 +555,7 @@ ia_css_alloc_continuous_frame_remain(struct ia_css_stream *stream);
 enum ia_css_err
 ia_css_update_continuous_frames(struct ia_css_stream *stream);
 
-/** @brief ia_css_unlock_raw_frame . unlock a raw frame (HALv3 Support)
+/* @brief ia_css_unlock_raw_frame . unlock a raw frame (HALv3 Support)
  * @param[in]	stream The stream.
  * @param[in]   exp_id exposure id that uniquely identifies the locked Raw Frame Buffer
  * @return      ia_css_err IA_CSS_SUCCESS or error code
@@ -567,7 +567,7 @@ ia_css_update_continuous_frames(struct ia_css_stream *stream);
 enum ia_css_err
 ia_css_unlock_raw_frame(struct ia_css_stream *stream, uint32_t exp_id);
 
-/** @brief ia_css_en_dz_capt_pipe . Enable/Disable digital zoom for capture pipe
+/* @brief ia_css_en_dz_capt_pipe . Enable/Disable digital zoom for capture pipe
  * @param[in]   stream The stream.
  * @param[in]   enable - true, disable - false
  * @return      None
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_timer.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_timer.h
index 575bb28b4becc09810b2009386fec9dbdfbe3166..b256d7c88716c471f54375192fcc8d3961c711c2 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_timer.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_timer.h
@@ -31,47 +31,47 @@ more details.
 #ifndef __IA_CSS_TIMER_H
 #define __IA_CSS_TIMER_H
 
-/** @file
+/* @file
  * Timer interface definitions
  */
 #include <type_support.h>		/* for uint32_t */
 #include "ia_css_err.h"
 
-/** @brief timer reading definition */
+/* @brief timer reading definition */
 typedef uint32_t clock_value_t;
 
-/** @brief 32 bit clock tick,(timestamp based on timer-value of CSS-internal timer)*/
+/* @brief 32 bit clock tick,(timestamp based on timer-value of CSS-internal timer)*/
 struct ia_css_clock_tick {
-	clock_value_t ticks; /**< measured time in ticks.*/
+	clock_value_t ticks; /** measured time in ticks.*/
 };
 
-/** @brief TIMER event codes */
+/* @brief TIMER event codes */
 enum ia_css_tm_event {
 	IA_CSS_TM_EVENT_AFTER_INIT,
-	/**< Timer Event after Initialization */
+	/** Timer Event after Initialization */
 	IA_CSS_TM_EVENT_MAIN_END,
-	/**< Timer Event after end of Main */
+	/** Timer Event after end of Main */
 	IA_CSS_TM_EVENT_THREAD_START,
-	/**< Timer Event after thread start */
+	/** Timer Event after thread start */
 	IA_CSS_TM_EVENT_FRAME_PROC_START,
-	/**< Timer Event after Frame Process Start */
+	/** Timer Event after Frame Process Start */
 	IA_CSS_TM_EVENT_FRAME_PROC_END
-	/**< Timer Event after Frame Process End */
+	/** Timer Event after Frame Process End */
 };
 
-/** @brief code measurement common struct */
+/* @brief code measurement common struct */
 struct ia_css_time_meas {
-	clock_value_t	start_timer_value;	/**< measured time in ticks */
-	clock_value_t	end_timer_value;	/**< measured time in ticks */
+	clock_value_t	start_timer_value;	/** measured time in ticks */
+	clock_value_t	end_timer_value;	/** measured time in ticks */
 };
 
 /**@brief SIZE_OF_IA_CSS_CLOCK_TICK_STRUCT checks to ensure correct alignment for struct ia_css_clock_tick. */
 #define SIZE_OF_IA_CSS_CLOCK_TICK_STRUCT sizeof(clock_value_t)
-/** @brief checks to ensure correct alignment for ia_css_time_meas. */
+/* @brief checks to ensure correct alignment for ia_css_time_meas. */
 #define SIZE_OF_IA_CSS_TIME_MEAS_STRUCT (sizeof(clock_value_t) \
 					+ sizeof(clock_value_t))
 
-/** @brief API to fetch timer count directly
+/* @brief API to fetch timer count directly
 *
 * @param curr_ts [out] measured count value
 * @return IA_CSS_SUCCESS if success
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_tpg.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_tpg.h
index 9238a3317a464cdddc4faa629e9aa9e6c72d079a..81498bd7485b7405e1825963cc8713d2c654fb60 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_tpg.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_tpg.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_TPG_H
 #define __IA_CSS_TPG_H
 
-/** @file
+/* @file
  * This file contains support for the test pattern generator (TPG)
  */
 
-/** Enumerate the TPG IDs.
+/* Enumerate the TPG IDs.
  */
 enum ia_css_tpg_id {
 	IA_CSS_TPG_ID0,
@@ -35,7 +35,7 @@ enum ia_css_tpg_id {
  */
 #define N_CSS_TPG_IDS (IA_CSS_TPG_ID2+1)
 
-/** Enumerate the TPG modes.
+/* Enumerate the TPG modes.
  */
 enum ia_css_tpg_mode {
 	IA_CSS_TPG_MODE_RAMP,
@@ -44,7 +44,7 @@ enum ia_css_tpg_mode {
 	IA_CSS_TPG_MODE_MONO
 };
 
-/** @brief Configure the test pattern generator.
+/* @brief Configure the test pattern generator.
  *
  * Configure the Test Pattern Generator, the way these values are used to
  * generate the pattern can be seen in the HRT extension for the test pattern
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_types.h
index 5fec3d5c89d88186828f9692c4818dbf104b366d..725b90072cfe620081180dbb61e93aa7cee4fcfd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_types.h
@@ -16,7 +16,7 @@
 #ifndef _IA_CSS_TYPES_H
 #define _IA_CSS_TYPES_H
 
-/** @file
+/* @file
  * This file contains types used for the ia_css parameters.
  * These types are in a separate file because they are expected
  * to be used in software layers that do not access the CSS API
@@ -58,7 +58,7 @@
 #include "isp/kernels/output/output_1.0/ia_css_output_types.h"
 
 #define IA_CSS_DVS_STAT_GRID_INFO_SUPPORTED
-/**< Should be removed after Driver adaptation will be done */
+/** Should be removed after Driver adaptation will be done */
 
 #define IA_CSS_VERSION_MAJOR    2
 #define IA_CSS_VERSION_MINOR    0
@@ -69,8 +69,8 @@
 /* Min and max exposure IDs. These macros are here to allow
  * the drivers to get this information. Changing these macros
  * constitutes a CSS API change. */
-#define IA_CSS_ISYS_MIN_EXPOSURE_ID 1   /**< Minimum exposure ID */
-#define IA_CSS_ISYS_MAX_EXPOSURE_ID 250 /**< Maximum exposure ID */
+#define IA_CSS_ISYS_MIN_EXPOSURE_ID 1   /** Minimum exposure ID */
+#define IA_CSS_ISYS_MAX_EXPOSURE_ID 250 /** Maximum exposure ID */
 
 /* opaque types */
 struct ia_css_isp_parameters;
@@ -79,72 +79,72 @@ struct ia_css_memory_offsets;
 struct ia_css_config_memory_offsets;
 struct ia_css_state_memory_offsets;
 
-/** Virtual address within the CSS address space. */
+/* Virtual address within the CSS address space. */
 typedef uint32_t ia_css_ptr;
 
-/** Generic resolution structure.
+/* Generic resolution structure.
  */
 struct ia_css_resolution {
-	uint32_t width;  /**< Width */
-	uint32_t height; /**< Height */
+	uint32_t width;  /** Width */
+	uint32_t height; /** Height */
 };
 
-/** Generic coordinate structure.
+/* Generic coordinate structure.
  */
 struct ia_css_coordinate {
-	int32_t x;	/**< Value of a coordinate on the horizontal axis */
-	int32_t y;	/**< Value of a coordinate on the vertical axis */
+	int32_t x;	/** Value of a coordinate on the horizontal axis */
+	int32_t y;	/** Value of a coordinate on the vertical axis */
 };
 
-/** Vector with signed values. This is used to indicate motion for
+/* Vector with signed values. This is used to indicate motion for
  * Digital Image Stabilization.
  */
 struct ia_css_vector {
-	int32_t x; /**< horizontal motion (in pixels) */
-	int32_t y; /**< vertical motion (in pixels) */
+	int32_t x; /** horizontal motion (in pixels) */
+	int32_t y; /** vertical motion (in pixels) */
 };
 
 /* Short hands */
 #define IA_CSS_ISP_DMEM IA_CSS_ISP_DMEM0
 #define IA_CSS_ISP_VMEM IA_CSS_ISP_VMEM0
 
-/** CSS data descriptor */
+/* CSS data descriptor */
 struct ia_css_data {
-	ia_css_ptr address; /**< CSS virtual address */
-	uint32_t   size;    /**< Disabled if 0 */
+	ia_css_ptr address; /** CSS virtual address */
+	uint32_t   size;    /** Disabled if 0 */
 };
 
-/** Host data descriptor */
+/* Host data descriptor */
 struct ia_css_host_data {
-	char      *address; /**< Host address */
-	uint32_t   size;    /**< Disabled if 0 */
+	char      *address; /** Host address */
+	uint32_t   size;    /** Disabled if 0 */
 };
 
-/** ISP data descriptor */
+/* ISP data descriptor */
 struct ia_css_isp_data {
-	uint32_t   address; /**< ISP address */
-	uint32_t   size;    /**< Disabled if 0 */
+	uint32_t   address; /** ISP address */
+	uint32_t   size;    /** Disabled if 0 */
 };
 
-/** Shading Correction types. */
+/* Shading Correction types. */
 enum ia_css_shading_correction_type {
 #ifndef ISP2401
-	IA_CSS_SHADING_CORRECTION_TYPE_1 /**< Shading Correction 1.0 (pipe 1.0 on ISP2300, pipe 2.2 on ISP2400) */
+	IA_CSS_SHADING_CORRECTION_TYPE_1 /** Shading Correction 1.0 (pipe 1.0 on ISP2300, pipe 2.2 on ISP2400) */
 #else
-	IA_CSS_SHADING_CORRECTION_NONE,	 /**< Shading Correction is not processed in the pipe. */
-	IA_CSS_SHADING_CORRECTION_TYPE_1 /**< Shading Correction 1.0 (pipe 1.0 on ISP2300, pipe 2.2 on ISP2400/2401) */
+	IA_CSS_SHADING_CORRECTION_NONE,	 /** Shading Correction is not processed in the pipe. */
+	IA_CSS_SHADING_CORRECTION_TYPE_1 /** Shading Correction 1.0 (pipe 1.0 on ISP2300, pipe 2.2 on ISP2400/2401) */
 #endif
 
-	/**< More shading correction types can be added in the future. */
+	/** More shading correction types can be added in the future. */
 };
 
-/** Shading Correction information. */
+/* Shading Correction information. */
 struct ia_css_shading_info {
-	enum ia_css_shading_correction_type type; /**< Shading Correction type. */
+	enum ia_css_shading_correction_type type; /** Shading Correction type. */
 
-	union {	/** Shading Correction information of each Shading Correction types. */
+	union {	/* Shading Correction information of each Shading Correction types. */
 
-		/** Shading Correction information of IA_CSS_SHADING_CORRECTION_TYPE_1.
+		/* Shading Correction information of IA_CSS_SHADING_CORRECTION_TYPE_1.
 		 *
 		 *  This structure contains the information necessary to generate
 		 *  the shading table required in the isp.
@@ -288,20 +288,20 @@ struct ia_css_shading_info {
 		 */
 		struct {
 #ifndef ISP2401
-			uint32_t enable;	/**< Shading correction enabled.
+			uint32_t enable;	/** Shading correction enabled.
 						     0:disabled, 1:enabled */
-			uint32_t num_hor_grids;	/**< Number of data points per line
+			uint32_t num_hor_grids;	/** Number of data points per line
 						     per color on shading table. */
-			uint32_t num_ver_grids;	/**< Number of lines of data points
+			uint32_t num_ver_grids;	/** Number of lines of data points
 						     per color on shading table. */
-			uint32_t bqs_per_grid_cell; /**< Grid cell size
+			uint32_t bqs_per_grid_cell; /** Grid cell size
 						in BQ(Bayer Quad) unit.
 						(1BQ means {Gr,R,B,Gb}(2x2 pixels).)
 						Valid values are 8,16,32,64. */
 #else
-			uint32_t num_hor_grids;	/**< Number of data points per line per color on shading table. */
-			uint32_t num_ver_grids;	/**< Number of lines of data points per color on shading table. */
-			uint32_t bqs_per_grid_cell; /**< Grid cell size in BQ unit.
+			uint32_t num_hor_grids;	/** Number of data points per line per color on shading table. */
+			uint32_t num_ver_grids;	/** Number of lines of data points per color on shading table. */
+			uint32_t bqs_per_grid_cell; /** Grid cell size in BQ unit.
 							 NOTE: bqs = size in BQ(Bayer Quad) unit.
 							       1BQ means {Gr,R,B,Gb} (2x2 pixels).
 							       Horizontal 1 bqs corresponds to horizontal 2 pixels.
@@ -310,13 +310,13 @@ struct ia_css_shading_info {
 			uint32_t bayer_scale_hor_ratio_in;
 			uint32_t bayer_scale_hor_ratio_out;
 #ifndef ISP2401
-			/**< Horizontal ratio of bayer scaling
+			/** Horizontal ratio of bayer scaling
 			between input width and output width, for the scaling
 			which should be done before shading correction.
 			  output_width = input_width * bayer_scale_hor_ratio_out
 						/ bayer_scale_hor_ratio_in */
 #else
-				/**< Horizontal ratio of bayer scaling between input width and output width,
+				/** Horizontal ratio of bayer scaling between input width and output width,
 				     for the scaling which should be done before shading correction.
 					output_width = input_width * bayer_scale_hor_ratio_out
 									/ bayer_scale_hor_ratio_in + 0.5 */
@@ -324,30 +324,30 @@ struct ia_css_shading_info {
 			uint32_t bayer_scale_ver_ratio_in;
 			uint32_t bayer_scale_ver_ratio_out;
 #ifndef ISP2401
-			/**< Vertical ratio of bayer scaling
+			/** Vertical ratio of bayer scaling
 			between input height and output height, for the scaling
 			which should be done before shading correction.
 			  output_height = input_height * bayer_scale_ver_ratio_out
 						/ bayer_scale_ver_ratio_in */
 			uint32_t sc_bayer_origin_x_bqs_on_shading_table;
-			/**< X coordinate (in bqs) of bayer origin on shading table.
+			/** X coordinate (in bqs) of bayer origin on shading table.
 			This indicates the left-most pixel of bayer
 			(not include margin) inputted to the shading correction.
 			This corresponds to the left-most pixel of bayer
 			inputted to isp from sensor. */
 			uint32_t sc_bayer_origin_y_bqs_on_shading_table;
-			/**< Y coordinate (in bqs) of bayer origin on shading table.
+			/** Y coordinate (in bqs) of bayer origin on shading table.
 			This indicates the top pixel of bayer
 			(not include margin) inputted to the shading correction.
 			This corresponds to the top pixel of bayer
 			inputted to isp from sensor. */
 #else
-				/**< Vertical ratio of bayer scaling between input height and output height,
+				/** Vertical ratio of bayer scaling between input height and output height,
 				     for the scaling which should be done before shading correction.
 					output_height = input_height * bayer_scale_ver_ratio_out
 									/ bayer_scale_ver_ratio_in + 0.5 */
 			struct ia_css_resolution isp_input_sensor_data_res_bqs;
-				/**< Sensor data size (in bqs) inputted to ISP. This is the size BEFORE bayer scaling.
+				/** Sensor data size (in bqs) inputted to ISP. This is the size BEFORE bayer scaling.
 				     NOTE: This is NOT the size of the physical sensor size.
 					   CSS requests the driver that ISP inputs sensor data
 					   by the size of isp_input_sensor_data_res_bqs.
@@ -357,22 +357,22 @@ struct ia_css_shading_info {
 					   ISP assumes the area of isp_input_sensor_data_res_bqs
 					   is centered on the physical sensor. */
 			struct ia_css_resolution sensor_data_res_bqs;
-				/**< Sensor data size (in bqs) at shading correction.
+				/** Sensor data size (in bqs) at shading correction.
 				     This is the size AFTER bayer scaling. */
 			struct ia_css_coordinate sensor_data_origin_bqs_on_sctbl;
-				/**< Origin of sensor data area positioned on shading table at shading correction.
+				/** Origin of sensor data area positioned on shading table at shading correction.
 				     The coordinate x,y should be positive values. */
 #endif
 		} type_1;
 
-		/**< More structures can be added here when more shading correction types will be added
+		/** More structures can be added here when more shading correction types will be added
 		     in the future. */
 	} info;
 };
 
 #ifndef ISP2401
 
-/** Default Shading Correction information of Shading Correction Type 1. */
+/* Default Shading Correction information of Shading Correction Type 1. */
 #define DEFAULT_SHADING_INFO_TYPE_1 \
 { \
 	IA_CSS_SHADING_CORRECTION_TYPE_1,	/* type */ \
@@ -394,7 +394,7 @@ struct ia_css_shading_info {
 
 #else
 
-/** Default Shading Correction information of Shading Correction Type 1. */
+/* Default Shading Correction information of Shading Correction Type 1. */
 #define DEFAULT_SHADING_INFO_TYPE_1 \
 { \
 	IA_CSS_SHADING_CORRECTION_TYPE_1,	/* type */ \
@@ -416,27 +416,27 @@ struct ia_css_shading_info {
 
 #endif
 
-/** Default Shading Correction information. */
+/* Default Shading Correction information. */
 #define DEFAULT_SHADING_INFO	DEFAULT_SHADING_INFO_TYPE_1
 
-/** structure that describes the 3A and DIS grids */
+/* structure that describes the 3A and DIS grids */
 struct ia_css_grid_info {
-	/** \name ISP input size
+	/* \name ISP input size
 	  * that is visible for user
 	  * @{
 	  */
 	uint32_t isp_in_width;
 	uint32_t isp_in_height;
-	/** @}*/
+	/* @}*/
 
-	struct ia_css_3a_grid_info  s3a_grid; /**< 3A grid info */
+	struct ia_css_3a_grid_info  s3a_grid; /** 3A grid info */
 	union ia_css_dvs_grid_u dvs_grid;
-		/**< All types of DVS statistics grid info union */
+		/** All types of DVS statistics grid info union */
 
 	enum ia_css_vamem_type vamem_type;
 };
 
-/** defaults for ia_css_grid_info structs */
+/* defaults for ia_css_grid_info structs */
 #define DEFAULT_GRID_INFO \
 { \
 	0,				/* isp_in_width */ \
@@ -446,25 +446,25 @@ struct ia_css_grid_info {
 	IA_CSS_VAMEM_TYPE_1		/* vamem_type */ \
 }
 
-/** Morphing table, used for geometric distortion and chromatic abberration
+/* Morphing table, used for geometric distortion and chromatic abberration
  *  correction (GDCAC, also called GDC).
  *  This table describes the imperfections introduced by the lens, the
  *  advanced ISP can correct for these imperfections using this table.
  */
 struct ia_css_morph_table {
-	uint32_t enable; /**< To disable GDC, set this field to false. The
+	uint32_t enable; /** To disable GDC, set this field to false. The
 			  coordinates fields can be set to NULL in this case. */
-	uint32_t height; /**< Table height */
-	uint32_t width;  /**< Table width */
+	uint32_t height; /** Table height */
+	uint32_t width;  /** Table width */
 	uint16_t *coordinates_x[IA_CSS_MORPH_TABLE_NUM_PLANES];
-	/**< X coordinates that describe the sensor imperfection */
+	/** X coordinates that describe the sensor imperfection */
 	uint16_t *coordinates_y[IA_CSS_MORPH_TABLE_NUM_PLANES];
-	/**< Y coordinates that describe the sensor imperfection */
+	/** Y coordinates that describe the sensor imperfection */
 };
 
 struct ia_css_dvs_6axis_config {
 	unsigned int exp_id;
-	/**< Exposure ID, see ia_css_event_public.h for more detail */
+	/** Exposure ID, see ia_css_event_public.h for more detail */
 	uint32_t width_y;
 	uint32_t height_y;
 	uint32_t width_uv;
@@ -479,16 +479,16 @@ struct ia_css_dvs_6axis_config {
  * This specifies the coordinates (x,y)
  */
 struct ia_css_point {
-	int32_t x; /**< x coordinate */
-	int32_t y; /**< y coordinate */
+	int32_t x; /** x coordinate */
+	int32_t y; /** y coordinate */
 };
 
 /**
  * This specifies the region
  */
 struct ia_css_region {
-	struct ia_css_point origin; /**< Starting point coordinates for the region */
-	struct ia_css_resolution resolution; /**< Region resolution */
+	struct ia_css_point origin; /** Starting point coordinates for the region */
+	struct ia_css_resolution resolution; /** Region resolution */
 };
 
 /**
@@ -509,30 +509,30 @@ struct ia_css_region {
  * y + height <= effective input height
  */
 struct ia_css_dz_config {
-	uint32_t dx; /**< Horizontal zoom factor */
-	uint32_t dy; /**< Vertical zoom factor */
-	struct ia_css_region zoom_region; /**< region for zoom */
+	uint32_t dx; /** Horizontal zoom factor */
+	uint32_t dy; /** Vertical zoom factor */
+	struct ia_css_region zoom_region; /** region for zoom */
 };
 
-/** The still capture mode, this can be RAW (simply copy sensor input to DDR),
+/* The still capture mode, this can be RAW (simply copy sensor input to DDR),
  *  Primary ISP, the Advanced ISP (GDC) or the low-light ISP (ANR).
  */
 enum ia_css_capture_mode {
-	IA_CSS_CAPTURE_MODE_RAW,      /**< no processing, copy data only */
-	IA_CSS_CAPTURE_MODE_BAYER,    /**< bayer processing, up to demosaic */
-	IA_CSS_CAPTURE_MODE_PRIMARY,  /**< primary ISP */
-	IA_CSS_CAPTURE_MODE_ADVANCED, /**< advanced ISP (GDC) */
-	IA_CSS_CAPTURE_MODE_LOW_LIGHT /**< low light ISP (ANR) */
+	IA_CSS_CAPTURE_MODE_RAW,      /** no processing, copy data only */
+	IA_CSS_CAPTURE_MODE_BAYER,    /** bayer processing, up to demosaic */
+	IA_CSS_CAPTURE_MODE_PRIMARY,  /** primary ISP */
+	IA_CSS_CAPTURE_MODE_ADVANCED, /** advanced ISP (GDC) */
+	IA_CSS_CAPTURE_MODE_LOW_LIGHT /** low light ISP (ANR) */
 };
 
 struct ia_css_capture_config {
-	enum ia_css_capture_mode mode; /**< Still capture mode */
-	uint32_t enable_xnr;	       /**< Enable/disable XNR */
+	enum ia_css_capture_mode mode; /** Still capture mode */
+	uint32_t enable_xnr;	       /** Enable/disable XNR */
 	uint32_t enable_raw_output;
-	bool enable_capture_pp_bli;    /**< Enable capture_pp_bli mode */
+	bool enable_capture_pp_bli;    /** Enable capture_pp_bli mode */
 };
 
-/** default settings for ia_css_capture_config structs */
+/* default settings for ia_css_capture_config structs */
 #define DEFAULT_CAPTURE_CONFIG \
 { \
 	IA_CSS_CAPTURE_MODE_PRIMARY,	/* mode (capture) */ \
@@ -542,7 +542,7 @@ struct ia_css_capture_config {
 }
 
 
-/** ISP filter configuration. This is a collection of configurations
+/* ISP filter configuration. This is a collection of configurations
  *  for each of the ISP filters (modules).
  *
  *  NOTE! The contents of all pointers is copied when get or set with the
@@ -557,98 +557,98 @@ struct ia_css_capture_config {
  *    ["ISP block", 2only] : ISP block is used only for ISP2.
  */
 struct ia_css_isp_config {
-	struct ia_css_wb_config   *wb_config;	/**< White Balance
+	struct ia_css_wb_config   *wb_config;	/** White Balance
 							[WB1, 1&2] */
-	struct ia_css_cc_config   *cc_config;	/**< Color Correction
+	struct ia_css_cc_config   *cc_config;	/** Color Correction
 							[CSC1, 1only] */
-	struct ia_css_tnr_config  *tnr_config;	/**< Temporal Noise Reduction
+	struct ia_css_tnr_config  *tnr_config;	/** Temporal Noise Reduction
 							[TNR1, 1&2] */
-	struct ia_css_ecd_config  *ecd_config;	/**< Eigen Color Demosaicing
+	struct ia_css_ecd_config  *ecd_config;	/** Eigen Color Demosaicing
 							[DE2, 2only] */
-	struct ia_css_ynr_config  *ynr_config;	/**< Y(Luma) Noise Reduction
+	struct ia_css_ynr_config  *ynr_config;	/** Y(Luma) Noise Reduction
 							[YNR2&YEE2, 2only] */
-	struct ia_css_fc_config   *fc_config;	/**< Fringe Control
+	struct ia_css_fc_config   *fc_config;	/** Fringe Control
 							[FC2, 2only] */
-	struct ia_css_formats_config   *formats_config;	/**< Formats Control for main output
+	struct ia_css_formats_config   *formats_config;	/** Formats Control for main output
 							[FORMATS, 1&2] */
-	struct ia_css_cnr_config  *cnr_config;	/**< Chroma Noise Reduction
+	struct ia_css_cnr_config  *cnr_config;	/** Chroma Noise Reduction
 							[CNR2, 2only] */
-	struct ia_css_macc_config *macc_config;	/**< MACC
+	struct ia_css_macc_config *macc_config;	/** MACC
 							[MACC2, 2only] */
-	struct ia_css_ctc_config  *ctc_config;	/**< Chroma Tone Control
+	struct ia_css_ctc_config  *ctc_config;	/** Chroma Tone Control
 							[CTC2, 2only] */
-	struct ia_css_aa_config   *aa_config;	/**< YUV Anti-Aliasing
+	struct ia_css_aa_config   *aa_config;	/** YUV Anti-Aliasing
 							[AA2, 2only]
 							(not used currently) */
-	struct ia_css_aa_config   *baa_config;	/**< Bayer Anti-Aliasing
+	struct ia_css_aa_config   *baa_config;	/** Bayer Anti-Aliasing
 							[BAA2, 1&2] */
-	struct ia_css_ce_config   *ce_config;	/**< Chroma Enhancement
+	struct ia_css_ce_config   *ce_config;	/** Chroma Enhancement
 							[CE1, 1only] */
 	struct ia_css_dvs_6axis_config *dvs_6axis_config;
-	struct ia_css_ob_config   *ob_config;  /**< Objective Black
+	struct ia_css_ob_config   *ob_config;  /** Objective Black
 							[OB1, 1&2] */
-	struct ia_css_dp_config   *dp_config;  /**< Defect Pixel Correction
+	struct ia_css_dp_config   *dp_config;  /** Defect Pixel Correction
 							[DPC1/DPC2, 1&2] */
-	struct ia_css_nr_config   *nr_config;  /**< Noise Reduction
+	struct ia_css_nr_config   *nr_config;  /** Noise Reduction
 							[BNR1&YNR1&CNR1, 1&2]*/
-	struct ia_css_ee_config   *ee_config;  /**< Edge Enhancement
+	struct ia_css_ee_config   *ee_config;  /** Edge Enhancement
 							[YEE1, 1&2] */
-	struct ia_css_de_config   *de_config;  /**< Demosaic
+	struct ia_css_de_config   *de_config;  /** Demosaic
 							[DE1, 1only] */
-	struct ia_css_gc_config   *gc_config;  /**< Gamma Correction (for YUV)
+	struct ia_css_gc_config   *gc_config;  /** Gamma Correction (for YUV)
 							[GC1, 1only] */
-	struct ia_css_anr_config  *anr_config; /**< Advanced Noise Reduction */
-	struct ia_css_3a_config   *s3a_config; /**< 3A Statistics config */
-	struct ia_css_xnr_config  *xnr_config; /**< eXtra Noise Reduction */
-	struct ia_css_dz_config   *dz_config;  /**< Digital Zoom */
-	struct ia_css_cc_config *yuv2rgb_cc_config; /**< Color Correction
+	struct ia_css_anr_config  *anr_config; /** Advanced Noise Reduction */
+	struct ia_css_3a_config   *s3a_config; /** 3A Statistics config */
+	struct ia_css_xnr_config  *xnr_config; /** eXtra Noise Reduction */
+	struct ia_css_dz_config   *dz_config;  /** Digital Zoom */
+	struct ia_css_cc_config *yuv2rgb_cc_config; /** Color Correction
 							[CCM2, 2only] */
-	struct ia_css_cc_config *rgb2yuv_cc_config; /**< Color Correction
+	struct ia_css_cc_config *rgb2yuv_cc_config; /** Color Correction
 							[CSC2, 2only] */
-	struct ia_css_macc_table  *macc_table;	/**< MACC
+	struct ia_css_macc_table  *macc_table;	/** MACC
 							[MACC1/MACC2, 1&2]*/
-	struct ia_css_gamma_table *gamma_table;	/**< Gamma Correction (for YUV)
+	struct ia_css_gamma_table *gamma_table;	/** Gamma Correction (for YUV)
 							[GC1, 1only] */
-	struct ia_css_ctc_table   *ctc_table;	/**< Chroma Tone Control
+	struct ia_css_ctc_table   *ctc_table;	/** Chroma Tone Control
 							[CTC1, 1only] */
 
-	/** \deprecated */
-	struct ia_css_xnr_table   *xnr_table;	/**< eXtra Noise Reduction
+	/* \deprecated */
+	struct ia_css_xnr_table   *xnr_table;	/** eXtra Noise Reduction
 							[XNR1, 1&2] */
-	struct ia_css_rgb_gamma_table *r_gamma_table;/**< sRGB Gamma Correction
+	struct ia_css_rgb_gamma_table *r_gamma_table;/** sRGB Gamma Correction
 							[GC2, 2only] */
-	struct ia_css_rgb_gamma_table *g_gamma_table;/**< sRGB Gamma Correction
+	struct ia_css_rgb_gamma_table *g_gamma_table;/** sRGB Gamma Correction
 							[GC2, 2only] */
-	struct ia_css_rgb_gamma_table *b_gamma_table;/**< sRGB Gamma Correction
+	struct ia_css_rgb_gamma_table *b_gamma_table;/** sRGB Gamma Correction
 							[GC2, 2only] */
-	struct ia_css_vector      *motion_vector; /**< For 2-axis DVS */
+	struct ia_css_vector      *motion_vector; /** For 2-axis DVS */
 	struct ia_css_shading_table *shading_table;
 	struct ia_css_morph_table   *morph_table;
-	struct ia_css_dvs_coefficients *dvs_coefs; /**< DVS 1.0 coefficients */
-	struct ia_css_dvs2_coefficients *dvs2_coefs; /**< DVS 2.0 coefficients */
+	struct ia_css_dvs_coefficients *dvs_coefs; /** DVS 1.0 coefficients */
+	struct ia_css_dvs2_coefficients *dvs2_coefs; /** DVS 2.0 coefficients */
 	struct ia_css_capture_config   *capture_config;
 	struct ia_css_anr_thres   *anr_thres;
-	/** @deprecated{Old shading settings, see bugzilla bz675 for details} */
+	/* @deprecated{Old shading settings, see bugzilla bz675 for details} */
 	struct ia_css_shading_settings *shading_settings;
-	struct ia_css_xnr3_config *xnr3_config; /**< eXtreme Noise Reduction v3 */
-	/** comment from Lasse: Be aware how this feature will affect coordinate
+	struct ia_css_xnr3_config *xnr3_config; /** eXtreme Noise Reduction v3 */
+	/* comment from Lasse: Be aware how this feature will affect coordinate
 	 *  normalization in different parts of the system. (e.g. face detection,
 	 *  touch focus, 3A statistics and windows of interest, shading correction,
 	 *  DVS, GDC) from IQ tool level and application level down-to ISP FW level.
 	 *  the risk for regression is not in the individual blocks, but how they
 	 *  integrate together. */
-	struct ia_css_output_config   *output_config;	/**< Main Output Mirroring, flipping */
+	struct ia_css_output_config   *output_config;	/** Main Output Mirroring, flipping */
 
 #ifdef ISP2401
-	struct ia_css_tnr3_kernel_config         *tnr3_config;           /**< TNR3 config */
+	struct ia_css_tnr3_kernel_config         *tnr3_config;           /** TNR3 config */
 #endif
-	struct ia_css_scaler_config              *scaler_config;         /**< Skylake: scaler config (optional) */
-	struct ia_css_formats_config             *formats_config_display;/**< Formats control for viewfinder/display output (optional)
+	struct ia_css_scaler_config              *scaler_config;         /** Skylake: scaler config (optional) */
+	struct ia_css_formats_config             *formats_config_display;/** Formats control for viewfinder/display output (optional)
 										[OSYS, n/a] */
-	struct ia_css_output_config              *output_config_display; /**< Viewfinder/display output mirroring, flipping (optional) */
+	struct ia_css_output_config              *output_config_display; /** Viewfinder/display output mirroring, flipping (optional) */
 
-	struct ia_css_frame                      *output_frame;          /**< Output frame the config is to be applied to (optional) */
-	uint32_t			isp_config_id;	/**< Unique ID to track which config was actually applied to a particular frame */
+	struct ia_css_frame                      *output_frame;          /** Output frame the config is to be applied to (optional) */
+	uint32_t			isp_config_id;	/** Unique ID to track which config was actually applied to a particular frame */
 };
 
 #endif /* _IA_CSS_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_version.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_version.h
index 48c59896e8479fa2185cf50e1b5e067bba35e5e8..1e88901e0b8299fa7e9d9017132037c97465c43d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_version.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_version.h
@@ -15,16 +15,16 @@
 #ifndef __IA_CSS_VERSION_H
 #define __IA_CSS_VERSION_H
 
-/** @file
+/* @file
  * This file contains functions to retrieve CSS-API version information
  */
 
 #include <ia_css_err.h>
 
-/** a common size for the version arrays */
+/* a common size for the version arrays */
 #define MAX_VERSION_SIZE	500
 
-/** @brief Retrieves the current CSS version
+/* @brief Retrieves the current CSS version
  * @param[out]	version		A pointer to a buffer where to put the generated
  *				version string. NULL is ignored.
  * @param[in]	max_size	Size of the version buffer. If version string
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/aa/aa_2/ia_css_aa2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/aa/aa_2/ia_css_aa2_types.h
index 834eedbbeeff651e42baf9309abcb9e930c784b2..0b95bf9b9aaf8ebba856bf562724ac32a756673c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/aa/aa_2/ia_css_aa2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/aa/aa_2/ia_css_aa2_types.h
@@ -15,12 +15,12 @@
 #ifndef __IA_CSS_AA2_TYPES_H
 #define __IA_CSS_AA2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Anti-Aliasing parameters.
 */
 
 
-/** Anti-Aliasing configuration.
+/* Anti-Aliasing configuration.
  *
  *  This structure is used both for YUV AA and Bayer AA.
  *
@@ -39,7 +39,7 @@
  *     ISP2: BAA2 is used.
  */
 struct ia_css_aa_config {
-	uint16_t strength;	/**< Strength of the filter.
+	uint16_t strength;	/** Strength of the filter.
 					u0.13, [0,8191],
 					default/ineffective 0 */
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_1.0/ia_css_anr_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_1.0/ia_css_anr_types.h
index e205574098f2cf6b3c6260bdebcbf5f5a3accbd8..dc317a85736932cd805f1a0b216d608c84632e30 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_1.0/ia_css_anr_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_1.0/ia_css_anr_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_ANR_TYPES_H
 #define __IA_CSS_ANR_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Advanced Noise Reduction kernel v1
 */
 
@@ -23,11 +23,11 @@
 #define ANR_BPP                 10
 #define ANR_ELEMENT_BITS        ((CEIL_DIV(ANR_BPP, 8))*8)
 
-/** Advanced Noise Reduction configuration.
+/* Advanced Noise Reduction configuration.
  *  This is also known as Low-Light.
  */
 struct ia_css_anr_config {
-	int32_t threshold; /**< Threshold */
+	int32_t threshold; /** Threshold */
 	int32_t thresholds[4*4*4];
 	int32_t factors[3];
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr2_types.h
index 3832ada433ec4b5aa2404baaceeece9476d1c8ba..9b611315392ccc37cf06d6987b36c61b6942c726 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr2_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_ANR2_TYPES_H
 #define __IA_CSS_ANR2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Advanced Noise Reduction kernel v2
 */
 
@@ -23,7 +23,7 @@
 
 #define ANR_PARAM_SIZE          13
 
-/** Advanced Noise Reduction (ANR) thresholds */
+/* Advanced Noise Reduction (ANR) thresholds */
 struct ia_css_anr_thres {
 	int16_t data[13*64];
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr_param.h
index 4a289853367ab1acc0c5e0466035fac61aa0c697..312141793fd23806fd83c6f4e4ddf3e0576de5b6 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/anr/anr_2/ia_css_anr_param.h
@@ -18,7 +18,7 @@
 #include "vmem.h"
 #include "ia_css_anr2_types.h"
 
-/** Advanced Noise Reduction (ANR) thresholds */
+/* Advanced Noise Reduction (ANR) thresholds */
 
 struct ia_css_isp_anr2_params {
 	VMEM_ARRAY(data, ANR_PARAM_SIZE*ISP_VEC_NELEMS);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bayer_ls/bayer_ls_1.0/ia_css_bayer_ls_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bayer_ls/bayer_ls_1.0/ia_css_bayer_ls_param.h
index 75ca7606b95cfdd81e1b01e826fea1d0b1b206df..a0d355454aa39c38fc1cc049644322c52c43c035 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bayer_ls/bayer_ls_1.0/ia_css_bayer_ls_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bayer_ls/bayer_ls_1.0/ia_css_bayer_ls_param.h
@@ -27,7 +27,7 @@
 #define BAYER_QUAD_HEIGHT 2
 #define NOF_BAYER_VECTORS 4
 
-/** bayer load/store */
+/* bayer load/store */
 struct sh_css_isp_bayer_ls_isp_config {
 	uint32_t base_address[NUM_BAYER_LS];
 	uint32_t width[NUM_BAYER_LS];
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bh/bh_2/ia_css_bh_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bh/bh_2/ia_css_bh_types.h
index 9ae27a9e0baa546f81a4e88c96622da665355756..ec1688e7352d17fb2b5379c40f6c5c22c04922cc 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bh/bh_2/ia_css_bh_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bh/bh_2/ia_css_bh_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_BH_TYPES_H
 #define __IA_CSS_BH_TYPES_H
 
-/** Number of elements in the BH table.
+/* Number of elements in the BH table.
   * Should be consistent with hmem.h
   */
 #define IA_CSS_HMEM_BH_TABLE_SIZE	ISP_HIST_DEPTH
@@ -27,7 +27,7 @@
 #define BH_COLOR_Y	(3)
 #define BH_COLOR_NUM	(4)
 
-/** BH table */
+/* BH table */
 struct ia_css_bh_table {
 	uint32_t hmem[ISP_HIST_COMPONENTS][IA_CSS_HMEM_BH_UNIT_SIZE];
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_types.h
index 219fb835cb26bd12020ce7ad5a344fb9ab1d8cdc..87e0f19c856b2e7e0ee05f5750ca6e63f61eed23 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_types.h
@@ -15,13 +15,13 @@
 #ifndef __IA_CSS_BNLM_TYPES_H
 #define __IA_CSS_BNLM_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Bayer Non-Linear Mean parameters.
 */
 
 #include "type_support.h" /* int32_t */
 
-/** Bayer Non-Linear Mean configuration
+/* Bayer Non-Linear Mean configuration
  *
  * \brief BNLM public parameters.
  * \details Struct with all parameters for the BNLM kernel that can be set
@@ -30,16 +30,16 @@
  * ISP2.6.1: BNLM is used.
  */
 struct ia_css_bnlm_config {
-	bool		rad_enable;	/**< Enable a radial dependency in a weight calculation */
-	int32_t		rad_x_origin;	/**< Initial x coordinate for a radius calculation */
-	int32_t		rad_y_origin;	/**< Initial x coordinate for a radius calculation */
+	bool		rad_enable;	/** Enable a radial dependency in a weight calculation */
+	int32_t		rad_x_origin;	/** Initial x coordinate for a radius calculation */
+	int32_t		rad_y_origin;	/** Initial x coordinate for a radius calculation */
 	/* a threshold for average of weights if this < Th, do not denoise pixel */
 	int32_t		avg_min_th;
 	/* minimum weight for denoising if max < th, do not denoise pixel */
 	int32_t		max_min_th;
 
 	/**@{*/
-	/** Coefficient for approximation, in the form of (1 + x / N)^N,
+	/* Coefficient for approximation, in the form of (1 + x / N)^N,
 	 * that fits the first-order exp() to default exp_lut in BNLM sheet
 	 * */
 	int32_t		exp_coeff_a;
@@ -48,55 +48,55 @@ struct ia_css_bnlm_config {
 	uint32_t	exp_exponent;
 	/**@}*/
 
-	int32_t nl_th[3];	/**< Detail thresholds */
+	int32_t nl_th[3];	/** Detail thresholds */
 
-	/** Index for n-th maximum candidate weight for each detail group */
+	/* Index for n-th maximum candidate weight for each detail group */
 	int32_t match_quality_max_idx[4];
 
 	/**@{*/
-	/** A lookup table for 1/sqrt(1+mu) approximation */
+	/* A lookup table for 1/sqrt(1+mu) approximation */
 	int32_t mu_root_lut_thr[15];
 	int32_t mu_root_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** A lookup table for SAD normalization */
+	/* A lookup table for SAD normalization */
 	int32_t sad_norm_lut_thr[15];
 	int32_t sad_norm_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** A lookup table that models a weight's dependency on textures */
+	/* A lookup table that models a weight's dependency on textures */
 	int32_t sig_detail_lut_thr[15];
 	int32_t sig_detail_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** A lookup table that models a weight's dependency on a pixel's radial distance */
+	/* A lookup table that models a weight's dependency on a pixel's radial distance */
 	int32_t sig_rad_lut_thr[15];
 	int32_t sig_rad_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** A lookup table to control denoise power depending on a pixel's radial distance */
+	/* A lookup table to control denoise power depending on a pixel's radial distance */
 	int32_t rad_pow_lut_thr[15];
 	int32_t rad_pow_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** Non linear transfer functions to calculate the blending coefficient depending on detail group */
-	/** detail group 0 */
+	/* Non linear transfer functions to calculate the blending coefficient depending on detail group */
+	/* detail group 0 */
 	/**@{*/
 	int32_t nl_0_lut_thr[15];
 	int32_t nl_0_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** detail group 1 */
+	/* detail group 1 */
 	int32_t nl_1_lut_thr[15];
 	int32_t nl_1_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** detail group 2 */
+	/* detail group 2 */
 	int32_t nl_2_lut_thr[15];
 	int32_t nl_2_lut_val[16];
 	/**@}*/
 	/**@{*/
-	/** detail group 3 */
+	/* detail group 3 */
 	int32_t nl_3_lut_thr[15];
 	int32_t nl_3_lut_val[16];
 	/**@}*/
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2_types.h
index be80f705d8a173525c96e9f0be1da56256b7bde2..551bd0ed3bacc087fdebda3bf553b89251a2fd48 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2_types.h
@@ -15,13 +15,13 @@
 #ifndef __IA_CSS_BNR2_2_TYPES_H
 #define __IA_CSS_BNR2_2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Bayer Noise Reduction parameters.
 */
 
 #include "type_support.h" /* int32_t */
 
-/** Bayer Noise Reduction 2.2 configuration
+/* Bayer Noise Reduction 2.2 configuration
  *
  * \brief BNR2_2 public parameters.
  * \details Struct with all parameters for the BNR2.2 kernel that can be set
@@ -31,41 +31,41 @@
  */
 struct ia_css_bnr2_2_config {
 	/**@{*/
-	/** Directional variance gain for R/G/B components in dark region */
+	/* Directional variance gain for R/G/B components in dark region */
 	int32_t d_var_gain_r;
 	int32_t d_var_gain_g;
 	int32_t d_var_gain_b;
 	/**@}*/
 	/**@{*/
-	/** Slope of Directional variance gain between dark and bright region */
+	/* Slope of Directional variance gain between dark and bright region */
 	int32_t d_var_gain_slope_r;
 	int32_t d_var_gain_slope_g;
 	int32_t d_var_gain_slope_b;
 	/**@}*/
 	/**@{*/
-	/** Non-Directional variance gain for R/G/B components in dark region */
+	/* Non-Directional variance gain for R/G/B components in dark region */
 	int32_t n_var_gain_r;
 	int32_t n_var_gain_g;
 	int32_t n_var_gain_b;
 	/**@}*/
 	/**@{*/
-	/** Slope of Non-Directional variance gain between dark and bright region */
+	/* Slope of Non-Directional variance gain between dark and bright region */
 	int32_t n_var_gain_slope_r;
 	int32_t n_var_gain_slope_g;
 	int32_t n_var_gain_slope_b;
 	/**@}*/
 
-	int32_t dir_thres;		/**< Threshold for directional filtering */
-	int32_t dir_thres_w;		/**< Threshold width for directional filtering */
-	int32_t var_offset_coef;	/**< Variance offset coefficient */
-	int32_t dir_gain;		/**< Gain for directional coefficient */
-	int32_t detail_gain;		/**< Gain for low contrast texture control */
-	int32_t detail_gain_divisor;	/**< Gain divisor for low contrast texture control */
-	int32_t detail_level_offset;	/**< Bias value for low contrast texture control */
-	int32_t d_var_th_min;		/**< Minimum clipping value for directional variance*/
-	int32_t d_var_th_max;		/**< Maximum clipping value for diretional variance*/
-	int32_t n_var_th_min;		/**< Minimum clipping value for non-directional variance*/
-	int32_t n_var_th_max;		/**< Maximum clipping value for non-directional variance*/
+	int32_t dir_thres;		/** Threshold for directional filtering */
+	int32_t dir_thres_w;		/** Threshold width for directional filtering */
+	int32_t var_offset_coef;	/** Variance offset coefficient */
+	int32_t dir_gain;		/** Gain for directional coefficient */
+	int32_t detail_gain;		/** Gain for low contrast texture control */
+	int32_t detail_gain_divisor;	/** Gain divisor for low contrast texture control */
+	int32_t detail_level_offset;	/** Bias value for low contrast texture control */
+	int32_t d_var_th_min;		/** Minimum clipping value for directional variance*/
+	int32_t d_var_th_max;		/** Maximum clipping value for diretional variance*/
+	int32_t n_var_th_min;		/** Minimum clipping value for non-directional variance*/
+	int32_t n_var_th_max;		/** Maximum clipping value for non-directional variance*/
 };
 
 #endif /* __IA_CSS_BNR2_2_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/cnr/cnr_2/ia_css_cnr2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/cnr/cnr_2/ia_css_cnr2_types.h
index 6df6c2be9a70db6b4fd38704798c21e7f1e09130..3ebc069d8ada9f3f1d0e64db4d4c5ee845dcce95 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/cnr/cnr_2/ia_css_cnr2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/cnr/cnr_2/ia_css_cnr2_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_CNR2_TYPES_H
 #define __IA_CSS_CNR2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Chroma Noise Reduction (CNR) parameters
 */
 
-/** Chroma Noise Reduction configuration.
+/* Chroma Noise Reduction configuration.
  *
  *  Small sensitivity of edge means strong smoothness and NR performance.
  *  If you see blurred color on vertical edges,
@@ -33,21 +33,21 @@
  *  ISP2: CNR2 is used for Still.
  */
 struct ia_css_cnr_config {
-	uint16_t coring_u;	/**< Coring level of U.
+	uint16_t coring_u;	/** Coring level of U.
 				u0.13, [0,8191], default/ineffective 0 */
-	uint16_t coring_v;	/**< Coring level of V.
+	uint16_t coring_v;	/** Coring level of V.
 				u0.13, [0,8191], default/ineffective 0 */
-	uint16_t sense_gain_vy;	/**< Sensitivity of horizontal edge of Y.
+	uint16_t sense_gain_vy;	/** Sensitivity of horizontal edge of Y.
 				u13.0, [0,8191], default 100, ineffective 8191 */
-	uint16_t sense_gain_vu;	/**< Sensitivity of horizontal edge of U.
+	uint16_t sense_gain_vu;	/** Sensitivity of horizontal edge of U.
 				u13.0, [0,8191], default 100, ineffective 8191 */
-	uint16_t sense_gain_vv;	/**< Sensitivity of horizontal edge of V.
+	uint16_t sense_gain_vv;	/** Sensitivity of horizontal edge of V.
 				u13.0, [0,8191], default 100, ineffective 8191 */
-	uint16_t sense_gain_hy;	/**< Sensitivity of vertical edge of Y.
+	uint16_t sense_gain_hy;	/** Sensitivity of vertical edge of Y.
 				u13.0, [0,8191], default 50, ineffective 8191 */
-	uint16_t sense_gain_hu;	/**< Sensitivity of vertical edge of U.
+	uint16_t sense_gain_hu;	/** Sensitivity of vertical edge of U.
 				u13.0, [0,8191], default 50, ineffective 8191 */
-	uint16_t sense_gain_hv;	/**< Sensitivity of vertical edge of V.
+	uint16_t sense_gain_hv;	/** Sensitivity of vertical edge of V.
 				u13.0, [0,8191], default 50, ineffective 8191 */
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/conversion/conversion_1.0/ia_css_conversion_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/conversion/conversion_1.0/ia_css_conversion_types.h
index 3f11442500f036cc4fd157bc54b517d4c3746415..47a38fd65950be7a1367f79ee10d5d31c03f04f8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/conversion/conversion_1.0/ia_css_conversion_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/conversion/conversion_1.0/ia_css_conversion_types.h
@@ -23,10 +23,10 @@
  *
  */
 struct ia_css_conversion_config {
-	uint32_t en;     /**< en parameter */
-	uint32_t dummy0; /**< dummy0 dummy parameter 0 */
-	uint32_t dummy1; /**< dummy1 dummy parameter 1 */
-	uint32_t dummy2; /**< dummy2 dummy parameter 2 */
+	uint32_t en;     /** en parameter */
+	uint32_t dummy0; /** dummy0 dummy parameter 0 */
+	uint32_t dummy1; /** dummy1 dummy parameter 1 */
+	uint32_t dummy2; /** dummy2 dummy parameter 2 */
 };
 
 #endif /* __IA_CSS_CONVERSION_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_param.h
index 8bfc8dad37a87ef20d577266c0e9a05338a569cd..0f1812cdd92ab47a2416562da69af13da420f028 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_param.h
@@ -19,7 +19,7 @@
 #include "dma.h"
 #include "sh_css_internal.h" /* sh_css_crop_pos */
 
-/** Crop frame */
+/* Crop frame */
 struct sh_css_isp_crop_isp_config {
 	uint32_t width_a_over_b;
 	struct dma_port_config port_b;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_types.h
index 8091ad4d46024b02c690895c0b6dcc55eb90a501..b5d454225f891a79b65c4b2f01cb50c95eb56392 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/crop/crop_1.0/ia_css_crop_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_CROP_TYPES_H
 #define __IA_CSS_CROP_TYPES_H
 
-/** Crop frame
+/* Crop frame
  *
  *  ISP block: crop frame
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/csc/csc_1.0/ia_css_csc_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/csc/csc_1.0/ia_css_csc_types.h
index 54ced072467f9903ce70313dc3dc29c1daa4fbe5..10404380c63731e432496fc62696acd508c20554 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/csc/csc_1.0/ia_css_csc_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/csc/csc_1.0/ia_css_csc_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_CSC_TYPES_H
 #define __IA_CSS_CSC_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Color Space Conversion parameters.
 */
 
-/** Color Correction configuration.
+/* Color Correction configuration.
  *
  *  This structure is used for 3 cases.
  *  ("YCgCo" is the output format of Demosaic.)
@@ -68,9 +68,9 @@
  *  	4096	-3430	-666
  */
 struct ia_css_cc_config {
-	uint32_t fraction_bits;/**< Fractional bits of matrix.
+	uint32_t fraction_bits;/** Fractional bits of matrix.
 					u8.0, [0,13] */
-	int32_t matrix[3 * 3]; /**< Conversion matrix.
+	int32_t matrix[3 * 3]; /** Conversion matrix.
 					s[13-fraction_bits].[fraction_bits],
 					[-8192,8191] */
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_param.h
index c66e823618f608a866f7432193a96a513637e7f7..ad7040c9d7cbf8944642167598422cda9990cc53 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_param.h
@@ -22,11 +22,11 @@
 
 /*VMEM Luma params*/
 struct ia_css_isp_ctc2_vmem_params {
-	/**< Gains by Y(Luma) at Y = 0.0,Y_X1, Y_X2, Y_X3, Y_X4*/
+	/** Gains by Y(Luma) at Y = 0.0,Y_X1, Y_X2, Y_X3, Y_X4*/
 	VMEM_ARRAY(y_x, ISP_VEC_NELEMS);
-	/** kneepoints by Y(Luma) 0.0, y_x1, y_x2, y _x3, y_x4*/
+	/* kneepoints by Y(Luma) 0.0, y_x1, y_x2, y _x3, y_x4*/
 	VMEM_ARRAY(y_y, ISP_VEC_NELEMS);
-	/** Slopes of lines interconnecting
+	/* Slopes of lines interconnecting
 	 *  0.0 -> y_x1 -> y_x2 -> y _x3 -> y_x4 -> 1.0*/
 	VMEM_ARRAY(e_y_slope, ISP_VEC_NELEMS);
 };
@@ -34,15 +34,15 @@ struct ia_css_isp_ctc2_vmem_params {
 /*DMEM Chroma params*/
 struct ia_css_isp_ctc2_dmem_params {
 
-	/** Gains by UV(Chroma) under kneepoints uv_x0 and uv_x1*/
+	/* Gains by UV(Chroma) under kneepoints uv_x0 and uv_x1*/
 	int32_t uv_y0;
 	int32_t uv_y1;
 
-	/** Kneepoints by UV(Chroma)- uv_x0 and uv_x1*/
+	/* Kneepoints by UV(Chroma)- uv_x0 and uv_x1*/
 	int32_t uv_x0;
 	int32_t uv_x1;
 
-	/** Slope of line interconnecting uv_x0 -> uv_x1*/
+	/* Slope of line interconnecting uv_x0 -> uv_x1*/
 	int32_t uv_dydx;
 
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_types.h
index 7b75f01e2ad2ab85b7691ea8c6c78b75d4a1641a..1222cf33e8511757084e733490014869be11182f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc2/ia_css_ctc2_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_CTC2_TYPES_H
 #define __IA_CSS_CTC2_TYPES_H
 
-/** Chroma Tone Control configuration.
+/* Chroma Tone Control configuration.
 *
 *  ISP block: CTC2 (CTC by polygonal approximation)
 * (ISP1: CTC1 (CTC by look-up table) is used.)
@@ -24,7 +24,7 @@
 */
 struct ia_css_ctc2_config {
 
-	/**< Gains by Y(Luma) at Y =0.0,Y_X1, Y_X2, Y_X3, Y_X4 and Y_X5
+	/** Gains by Y(Luma) at Y =0.0,Y_X1, Y_X2, Y_X3, Y_X4 and Y_X5
 	*   --default/ineffective value: 4096(0.5f)
 	*/
 	int32_t y_y0;
@@ -33,19 +33,19 @@ struct ia_css_ctc2_config {
 	int32_t y_y3;
 	int32_t y_y4;
 	int32_t y_y5;
-	/** 1st-4th  kneepoints by Y(Luma) --default/ineffective value:n/a
+	/* 1st-4th  kneepoints by Y(Luma) --default/ineffective value:n/a
 	*   requirement: 0.0 < y_x1 < y_x2 <y _x3 < y_x4 < 1.0
 	*/
 	int32_t y_x1;
 	int32_t y_x2;
 	int32_t y_x3;
 	int32_t y_x4;
-	/** Gains by UV(Chroma) under threholds uv_x0 and uv_x1
+	/* Gains by UV(Chroma) under threholds uv_x0 and uv_x1
 	*   --default/ineffective value: 4096(0.5f)
 	*/
 	int32_t uv_y0;
 	int32_t uv_y1;
-	/** Minimum and Maximum Thresholds by UV(Chroma)- uv_x0 and uv_x1
+	/* Minimum and Maximum Thresholds by UV(Chroma)- uv_x0 and uv_x1
 	*   --default/ineffective value: n/a
 	*/
 	int32_t uv_x0;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_types.h
index 1da215bb966d15d166b2ccbaf5421b8c81178e00..4ac47ce10566f0770151374c6b4a9f3106de3428 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_CTC_TYPES_H
 #define __IA_CSS_CTC_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Chroma Tone Control parameters.
 */
 
-/** Fractional bits for CTC gain (used only for ISP1).
+/* Fractional bits for CTC gain (used only for ISP1).
  *
  *  IA_CSS_CTC_COEF_SHIFT(=13) includes not only the fractional bits
  *  of gain(=8), but also the bits(=5) to convert chroma
@@ -32,14 +32,14 @@
  */
 #define IA_CSS_CTC_COEF_SHIFT          13
 
-/** Number of elements in the CTC table. */
+/* Number of elements in the CTC table. */
 #define IA_CSS_VAMEM_1_CTC_TABLE_SIZE_LOG2      10
-/** Number of elements in the CTC table. */
+/* Number of elements in the CTC table. */
 #define IA_CSS_VAMEM_1_CTC_TABLE_SIZE           (1U<<IA_CSS_VAMEM_1_CTC_TABLE_SIZE_LOG2)
 
-/** Number of elements in the CTC table. */
+/* Number of elements in the CTC table. */
 #define IA_CSS_VAMEM_2_CTC_TABLE_SIZE_LOG2      8
-/** Number of elements in the CTC table. */
+/* Number of elements in the CTC table. */
 #define IA_CSS_VAMEM_2_CTC_TABLE_SIZE           ((1U<<IA_CSS_VAMEM_2_CTC_TABLE_SIZE_LOG2) + 1)
 
 enum ia_css_vamem_type {
@@ -47,44 +47,44 @@ enum ia_css_vamem_type {
 	IA_CSS_VAMEM_TYPE_2
 };
 
-/** Chroma Tone Control configuration.
+/* Chroma Tone Control configuration.
  *
  *  ISP block: CTC2 (CTC by polygonal line approximation)
  * (ISP1: CTC1 (CTC by look-up table) is used.)
  *  ISP2: CTC2 is used.
  */
 struct ia_css_ctc_config {
-	uint16_t y0;	/**< 1st kneepoint gain.
+	uint16_t y0;	/** 1st kneepoint gain.
 				u[ce_gain_exp].[13-ce_gain_exp], [0,8191],
 				default/ineffective 4096(0.5) */
-	uint16_t y1;	/**< 2nd kneepoint gain.
+	uint16_t y1;	/** 2nd kneepoint gain.
 				u[ce_gain_exp].[13-ce_gain_exp], [0,8191],
 				default/ineffective 4096(0.5) */
-	uint16_t y2;	/**< 3rd kneepoint gain.
+	uint16_t y2;	/** 3rd kneepoint gain.
 				u[ce_gain_exp].[13-ce_gain_exp], [0,8191],
 				default/ineffective 4096(0.5) */
-	uint16_t y3;	/**< 4th kneepoint gain.
+	uint16_t y3;	/** 4th kneepoint gain.
 				u[ce_gain_exp].[13-ce_gain_exp], [0,8191],
 				default/ineffective 4096(0.5) */
-	uint16_t y4;	/**< 5th kneepoint gain.
+	uint16_t y4;	/** 5th kneepoint gain.
 				u[ce_gain_exp].[13-ce_gain_exp], [0,8191],
 				default/ineffective 4096(0.5) */
-	uint16_t y5;	/**< 6th kneepoint gain.
+	uint16_t y5;	/** 6th kneepoint gain.
 				u[ce_gain_exp].[13-ce_gain_exp], [0,8191],
 				default/ineffective 4096(0.5) */
-	uint16_t ce_gain_exp;	/**< Common exponent of y-axis gain.
+	uint16_t ce_gain_exp;	/** Common exponent of y-axis gain.
 				u8.0, [0,13],
 				default/ineffective 1 */
-	uint16_t x1;	/**< 2nd kneepoint luma.
+	uint16_t x1;	/** 2nd kneepoint luma.
 				u0.13, [0,8191], constraints: 0<x1<x2,
 				default/ineffective 1024 */
-	uint16_t x2;	/**< 3rd kneepoint luma.
+	uint16_t x2;	/** 3rd kneepoint luma.
 				u0.13, [0,8191], constraints: x1<x2<x3,
 				default/ineffective 2048 */
-	uint16_t x3;	/**< 4th kneepoint luma.
+	uint16_t x3;	/** 4th kneepoint luma.
 				u0.13, [0,8191], constraints: x2<x3<x4,
 				default/ineffective 6144 */
-	uint16_t x4;	/**< 5tn kneepoint luma.
+	uint16_t x4;	/** 5tn kneepoint luma.
 				u0.13, [0,8191], constraints: x3<x4<8191,
 				default/ineffective 7168 */
 };
@@ -94,7 +94,7 @@ union ia_css_ctc_data {
 	uint16_t vamem_2[IA_CSS_VAMEM_2_CTC_TABLE_SIZE];
 };
 
-/** CTC table, used for Chroma Tone Control.
+/* CTC table, used for Chroma Tone Control.
  *
  *  ISP block: CTC1 (CTC by look-up table)
  *  ISP1: CTC1 is used.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_1.0/ia_css_de_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_1.0/ia_css_de_types.h
index 525c838d5a99c113f3453d3b91db7c3dabcd8fab..803be68abc541cf5b3a5c9a81e7426eb45a5832f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_1.0/ia_css_de_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_1.0/ia_css_de_types.h
@@ -15,25 +15,25 @@
 #ifndef __IA_CSS_DE_TYPES_H
 #define __IA_CSS_DE_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Demosaic (bayer-to-YCgCo) parameters.
 */
 
-/** Demosaic (bayer-to-YCgCo) configuration.
+/* Demosaic (bayer-to-YCgCo) configuration.
  *
  *  ISP block: DE1
  *  ISP1: DE1 is used.
  * (ISP2: DE2 is used.)
  */
 struct ia_css_de_config {
-	ia_css_u0_16 pixelnoise; /**< Pixel noise used in moire elimination.
+	ia_css_u0_16 pixelnoise; /** Pixel noise used in moire elimination.
 				u0.16, [0,65535],
 				default 0, ineffective 0 */
-	ia_css_u0_16 c1_coring_threshold; /**< Coring threshold for C1.
+	ia_css_u0_16 c1_coring_threshold; /** Coring threshold for C1.
 				This is the same as nr_config.threshold_cb.
 				u0.16, [0,65535],
 				default 128(0.001953125), ineffective 0 */
-	ia_css_u0_16 c2_coring_threshold; /**< Coring threshold for C2.
+	ia_css_u0_16 c2_coring_threshold; /** Coring threshold for C2.
 				This is the same as nr_config.threshold_cr.
 				u0.16, [0,65535],
 				default 128(0.001953125), ineffective 0 */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_2/ia_css_de2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_2/ia_css_de2_types.h
index eac1b27798574de3b1f26983ef6cbc988a573e88..50bdde419bb1247708aae4a46ea8cc94556c5146 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_2/ia_css_de2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/de/de_2/ia_css_de2_types.h
@@ -15,24 +15,24 @@
 #ifndef __IA_CSS_DE2_TYPES_H
 #define __IA_CSS_DE2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Demosaicing parameters.
 */
 
-/** Eigen Color Demosaicing configuration.
+/* Eigen Color Demosaicing configuration.
  *
  *  ISP block: DE2
  * (ISP1: DE1 is used.)
  *  ISP2: DE2 is used.
  */
 struct ia_css_ecd_config {
-	uint16_t zip_strength;	/**< Strength of zipper reduction.
+	uint16_t zip_strength;	/** Strength of zipper reduction.
 				u0.13, [0,8191],
 				default 5489(0.67), ineffective 0 */
-	uint16_t fc_strength;	/**< Strength of false color reduction.
+	uint16_t fc_strength;	/** Strength of false color reduction.
 				u0.13, [0,8191],
 				default 8191(almost 1.0), ineffective 0 */
-	uint16_t fc_debias;	/**< Prevent color change
+	uint16_t fc_debias;	/** Prevent color change
 				     on noise or Gr/Gb imbalance.
 				u0.13, [0,8191],
 				default 0, ineffective 0 */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dp/dp_1.0/ia_css_dp_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dp/dp_1.0/ia_css_dp_types.h
index b5d7b6b175b642d19400c36c67eda9c2b6ad3360..1bf6dcef7dc76764a9cde48121807b45c4cda76b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dp/dp_1.0/ia_css_dp_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dp/dp_1.0/ia_css_dp_types.h
@@ -15,12 +15,12 @@
 #ifndef __IA_CSS_DP_TYPES_H
 #define __IA_CSS_DP_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Defect Pixel Correction (DPC) parameters.
 */
 
 
-/** Defect Pixel Correction configuration.
+/* Defect Pixel Correction configuration.
  *
  *  ISP block: DPC1 (DPC after WB)
  *             DPC2 (DPC before WB)
@@ -28,14 +28,14 @@
  *  ISP2: DPC2 is used.
  */
 struct ia_css_dp_config {
-	ia_css_u0_16 threshold; /**< The threshold of defect pixel correction,
+	ia_css_u0_16 threshold; /** The threshold of defect pixel correction,
 			      representing the permissible difference of
 			      intensity between one pixel and its
 			      surrounding pixels. Smaller values result
 				in more frequent pixel corrections.
 				u0.16, [0,65535],
 				default 8192, ineffective 65535 */
-	ia_css_u8_8 gain;	 /**< The sensitivity of mis-correction. ISP will
+	ia_css_u8_8 gain;	 /** The sensitivity of mis-correction. ISP will
 			      miss a lot of defects if the value is set
 				too large.
 				u8.8, [0,65535],
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_types.h
index b2c974196ce8201324a434141668941866bdcdfb..6727682d287ffdb4dcc67144b97dea3d0034b081 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_types.h
@@ -15,14 +15,14 @@
 #ifndef __IA_CSS_DPC2_TYPES_H
 #define __IA_CSS_DPC2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Defect Pixel Correction 2 (DPC2) parameters.
 */
 
 #include "type_support.h"
 
 /**@{*/
-/** Floating point constants for different metrics. */
+/* Floating point constants for different metrics. */
 #define METRIC1_ONE_FP	(1<<12)
 #define METRIC2_ONE_FP	(1<<5)
 #define METRIC3_ONE_FP	(1<<12)
@@ -30,7 +30,7 @@
 /**@}*/
 
 /**@{*/
-/** Defect Pixel Correction 2 configuration.
+/* Defect Pixel Correction 2 configuration.
  *
  * \brief DPC2 public parameters.
  * \details Struct with all parameters for the Defect Pixel Correction 2
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_param.h
index 4d0abfe4d0fd2c2ba4d048208a37dd79a77f30be..66a7e58659c0b92d11b0934a56886aab8b1c8fa9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_param.h
@@ -30,7 +30,7 @@
 #ifdef ISP2401
 
 #endif
-/** dvserence frame */
+/* dvserence frame */
 struct sh_css_isp_dvs_isp_config {
 	uint32_t num_horizontal_blocks;
 	uint32_t num_vertical_blocks;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_types.h
index 216c54a21ea59d3b3683eaa196fb3e966bac7574..30772d217fb2731c73adf69c2e6d062de0d749b4 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_DVS_TYPES_H
 #define __IA_CSS_DVS_TYPES_H
 
-/** DVS frame
+/* DVS frame
  *
  *  ISP block: dvs frame
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_types.h
index 07651f0ac558be7ee64dc0389f3f985b2ac71ae3..32e91824a5e5a0f86f73e1926618bf99cc3606f8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_EED1_8_TYPES_H
 #define __IA_CSS_EED1_8_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Edge Enhanced Demosaic parameters.
 */
 
@@ -36,51 +36,51 @@
  */
 #define IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS	9
 
-/** Edge Enhanced Demosaic configuration
+/* Edge Enhanced Demosaic configuration
  *
  * ISP2.6.1: EED1_8 is used.
  */
 
 struct ia_css_eed1_8_config {
-	int32_t rbzp_strength;	/**< Strength of zipper reduction. */
-
-	int32_t fcstrength;	/**< Strength of false color reduction. */
-	int32_t fcthres_0;	/**< Threshold to prevent chroma coring due to noise or green disparity in dark region. */
-	int32_t fcthres_1;	/**< Threshold to prevent chroma coring due to noise or green disparity in bright region. */
-	int32_t fc_sat_coef;	/**< How much color saturation to maintain in high color saturation region. */
-	int32_t fc_coring_prm;	/**< Chroma coring coefficient for tint color suppression. */
-
-	int32_t aerel_thres0;	/**< Threshold for Non-Directional Reliability at dark region. */
-	int32_t aerel_gain0;	/**< Gain for Non-Directional Reliability at dark region. */
-	int32_t aerel_thres1;	/**< Threshold for Non-Directional Reliability at bright region. */
-	int32_t aerel_gain1;	/**< Gain for Non-Directional Reliability at bright region. */
-
-	int32_t derel_thres0;	/**< Threshold for Directional Reliability at dark region. */
-	int32_t derel_gain0;	/**< Gain for Directional Reliability at dark region. */
-	int32_t derel_thres1;	/**< Threshold for Directional Reliability at bright region. */
-	int32_t derel_gain1;	/**< Gain for Directional Reliability at bright region. */
-
-	int32_t coring_pos0;	/**< Positive Edge Coring Threshold in dark region. */
-	int32_t coring_pos1;	/**< Positive Edge Coring Threshold in bright region. */
-	int32_t coring_neg0;	/**< Negative Edge Coring Threshold in dark region. */
-	int32_t coring_neg1;	/**< Negative Edge Coring Threshold in bright region. */
-
-	int32_t gain_exp;	/**< Common Exponent of Gain. */
-	int32_t gain_pos0;	/**< Gain for Positive Edge in dark region. */
-	int32_t gain_pos1;	/**< Gain for Positive Edge in bright region. */
-	int32_t gain_neg0;	/**< Gain for Negative Edge in dark region. */
-	int32_t gain_neg1;	/**< Gain for Negative Edge in bright region. */
-
-	int32_t pos_margin0;	/**< Margin for Positive Edge in dark region. */
-	int32_t pos_margin1;	/**< Margin for Positive Edge in bright region. */
-	int32_t neg_margin0;	/**< Margin for Negative Edge in dark region. */
-	int32_t neg_margin1;	/**< Margin for Negative Edge in bright region. */
-
-	int32_t dew_enhance_seg_x[IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS];		/**< Segment data for directional edge weight: X. */
-	int32_t dew_enhance_seg_y[IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS];		/**< Segment data for directional edge weight: Y. */
-	int32_t dew_enhance_seg_slope[(IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS - 1)];	/**< Segment data for directional edge weight: Slope. */
-	int32_t dew_enhance_seg_exp[(IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS - 1)];	/**< Segment data for directional edge weight: Exponent. */
-	int32_t dedgew_max;	/**< Max Weight for Directional Edge. */
+	int32_t rbzp_strength;	/** Strength of zipper reduction. */
+
+	int32_t fcstrength;	/** Strength of false color reduction. */
+	int32_t fcthres_0;	/** Threshold to prevent chroma coring due to noise or green disparity in dark region. */
+	int32_t fcthres_1;	/** Threshold to prevent chroma coring due to noise or green disparity in bright region. */
+	int32_t fc_sat_coef;	/** How much color saturation to maintain in high color saturation region. */
+	int32_t fc_coring_prm;	/** Chroma coring coefficient for tint color suppression. */
+
+	int32_t aerel_thres0;	/** Threshold for Non-Directional Reliability at dark region. */
+	int32_t aerel_gain0;	/** Gain for Non-Directional Reliability at dark region. */
+	int32_t aerel_thres1;	/** Threshold for Non-Directional Reliability at bright region. */
+	int32_t aerel_gain1;	/** Gain for Non-Directional Reliability at bright region. */
+
+	int32_t derel_thres0;	/** Threshold for Directional Reliability at dark region. */
+	int32_t derel_gain0;	/** Gain for Directional Reliability at dark region. */
+	int32_t derel_thres1;	/** Threshold for Directional Reliability at bright region. */
+	int32_t derel_gain1;	/** Gain for Directional Reliability at bright region. */
+
+	int32_t coring_pos0;	/** Positive Edge Coring Threshold in dark region. */
+	int32_t coring_pos1;	/** Positive Edge Coring Threshold in bright region. */
+	int32_t coring_neg0;	/** Negative Edge Coring Threshold in dark region. */
+	int32_t coring_neg1;	/** Negative Edge Coring Threshold in bright region. */
+
+	int32_t gain_exp;	/** Common Exponent of Gain. */
+	int32_t gain_pos0;	/** Gain for Positive Edge in dark region. */
+	int32_t gain_pos1;	/** Gain for Positive Edge in bright region. */
+	int32_t gain_neg0;	/** Gain for Negative Edge in dark region. */
+	int32_t gain_neg1;	/** Gain for Negative Edge in bright region. */
+
+	int32_t pos_margin0;	/** Margin for Positive Edge in dark region. */
+	int32_t pos_margin1;	/** Margin for Positive Edge in bright region. */
+	int32_t neg_margin0;	/** Margin for Negative Edge in dark region. */
+	int32_t neg_margin1;	/** Margin for Negative Edge in bright region. */
+
+	int32_t dew_enhance_seg_x[IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS];		/** Segment data for directional edge weight: X. */
+	int32_t dew_enhance_seg_y[IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS];		/** Segment data for directional edge weight: Y. */
+	int32_t dew_enhance_seg_slope[(IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS - 1)];	/** Segment data for directional edge weight: Slope. */
+	int32_t dew_enhance_seg_exp[(IA_CSS_NUMBER_OF_DEW_ENHANCE_SEGMENTS - 1)];	/** Segment data for directional edge weight: Exponent. */
+	int32_t dedgew_max;	/** Max Weight for Directional Edge. */
 };
 
 #endif /* __IA_CSS_EED1_8_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fc/fc_1.0/ia_css_formats_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fc/fc_1.0/ia_css_formats_types.h
index df1565a5914c18886d65a581868a9635e9886c02..49479572b40d231a6248227d7f2fb83c0224b989 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fc/fc_1.0/ia_css_formats_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fc/fc_1.0/ia_css_formats_types.h
@@ -15,20 +15,20 @@
 #ifndef __IA_CSS_FORMATS_TYPES_H
 #define __IA_CSS_FORMATS_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for output format parameters.
 */
 
 #include "type_support.h"
 
-/** Formats configuration.
+/* Formats configuration.
  *
  *  ISP block: FORMATS
  *  ISP1: FORMATS is used.
  *  ISP2: FORMATS is used.
  */
 struct ia_css_formats_config {
-	uint32_t video_full_range_flag; /**< selects the range of YUV output.
+	uint32_t video_full_range_flag; /** selects the range of YUV output.
 				u8.0, [0,1],
 				default 1, ineffective n/a\n
 				1 - full range, luma 0-255, chroma 0-255\n
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fpn/fpn_1.0/ia_css_fpn_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fpn/fpn_1.0/ia_css_fpn_types.h
index 5a2f0c06a80d3c4d4b4a60f2acaf516289bfc5b5..ef287fa3c428a0caac7573611a550061478006bd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fpn/fpn_1.0/ia_css_fpn_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/fpn/fpn_1.0/ia_css_fpn_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_FPN_TYPES_H
 #define __IA_CSS_FPN_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Fixed Pattern Noise parameters.
 */
 
-/** Fixed Pattern Noise table.
+/* Fixed Pattern Noise table.
  *
  *  This contains the fixed patterns noise values
  *  obtained from a black frame capture.
@@ -33,15 +33,15 @@
  */
 
 struct ia_css_fpn_table {
-	int16_t *data;		/**< Table content (fixed patterns noise).
+	int16_t *data;		/** Table content (fixed patterns noise).
 					u0.[13-shift], [0,63] */
-	uint32_t width;		/**< Table width (in pixels).
+	uint32_t width;		/** Table width (in pixels).
 					This is the input frame width. */
-	uint32_t height;	/**< Table height (in pixels).
+	uint32_t height;	/** Table height (in pixels).
 					This is the input frame height. */
-	uint32_t shift;		/**< Common exponent of table content.
+	uint32_t shift;		/** Common exponent of table content.
 					u8.0, [0,13] */
-	uint32_t enabled;	/**< Fpn is enabled.
+	uint32_t enabled;	/** Fpn is enabled.
 					bool */
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_1.0/ia_css_gc_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_1.0/ia_css_gc_types.h
index dd9f0eda3353119a96b5a6b4ba436089e6a0de6b..594807fe2925b88d51b493df50527a2e42e5336f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_1.0/ia_css_gc_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_1.0/ia_css_gc_types.h
@@ -15,36 +15,36 @@
 #ifndef __IA_CSS_GC_TYPES_H
 #define __IA_CSS_GC_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Gamma Correction parameters.
 */
 
 #include "isp/kernels/ctc/ctc_1.0/ia_css_ctc_types.h"  /* FIXME: Needed for ia_css_vamem_type */
 
-/** Fractional bits for GAMMA gain */
+/* Fractional bits for GAMMA gain */
 #define IA_CSS_GAMMA_GAIN_K_SHIFT      13
 
-/** Number of elements in the gamma table. */
+/* Number of elements in the gamma table. */
 #define IA_CSS_VAMEM_1_GAMMA_TABLE_SIZE_LOG2    10
 #define IA_CSS_VAMEM_1_GAMMA_TABLE_SIZE         (1U<<IA_CSS_VAMEM_1_GAMMA_TABLE_SIZE_LOG2)
 
-/** Number of elements in the gamma table. */
+/* Number of elements in the gamma table. */
 #define IA_CSS_VAMEM_2_GAMMA_TABLE_SIZE_LOG2    8
 #define IA_CSS_VAMEM_2_GAMMA_TABLE_SIZE         ((1U<<IA_CSS_VAMEM_2_GAMMA_TABLE_SIZE_LOG2) + 1)
 
-/** Gamma table, used for Y(Luma) Gamma Correction.
+/* Gamma table, used for Y(Luma) Gamma Correction.
  *
  *  ISP block: GC1 (YUV Gamma Correction)
  *  ISP1: GC1 is used.
  * (ISP2: GC2(sRGB Gamma Correction) is used.)
  */
-/**< IA_CSS_VAMEM_TYPE_1(ISP2300) or
+/** IA_CSS_VAMEM_TYPE_1(ISP2300) or
      IA_CSS_VAMEM_TYPE_2(ISP2400) */
 union ia_css_gc_data {
 	uint16_t vamem_1[IA_CSS_VAMEM_1_GAMMA_TABLE_SIZE];
-	/**< Y(Luma) Gamma table on vamem type 1. u0.8, [0,255] */
+	/** Y(Luma) Gamma table on vamem type 1. u0.8, [0,255] */
 	uint16_t vamem_2[IA_CSS_VAMEM_2_GAMMA_TABLE_SIZE];
-	/**< Y(Luma) Gamma table on vamem type 2. u0.8, [0,255] */
+	/** Y(Luma) Gamma table on vamem type 2. u0.8, [0,255] */
 };
 
 struct ia_css_gamma_table {
@@ -52,22 +52,22 @@ struct ia_css_gamma_table {
 	union ia_css_gc_data data;
 };
 
-/** Gamma Correction configuration (used only for YUV Gamma Correction).
+/* Gamma Correction configuration (used only for YUV Gamma Correction).
  *
  *  ISP block: GC1 (YUV Gamma Correction)
  *  ISP1: GC1 is used.
  * (ISP2: GC2 (sRGB Gamma Correction) is used.)
   */
 struct ia_css_gc_config {
-	uint16_t gain_k1; /**< Gain to adjust U after YUV Gamma Correction.
+	uint16_t gain_k1; /** Gain to adjust U after YUV Gamma Correction.
 				u0.16, [0,65535],
 				default/ineffective 19000(0.29) */
-	uint16_t gain_k2; /**< Gain to adjust V after YUV Gamma Correction.
+	uint16_t gain_k2; /** Gain to adjust V after YUV Gamma Correction.
 				u0.16, [0,65535],
 				default/ineffective 19000(0.29) */
 };
 
-/** Chroma Enhancement configuration.
+/* Chroma Enhancement configuration.
  *
  *  This parameter specifies range of chroma output level.
  *  The standard range is [0,255] or [16,240].
@@ -77,20 +77,20 @@ struct ia_css_gc_config {
  * (ISP2: CE1 is not used.)
  */
 struct ia_css_ce_config {
-	uint8_t uv_level_min; /**< Minimum of chroma output level.
+	uint8_t uv_level_min; /** Minimum of chroma output level.
 				u0.8, [0,255], default/ineffective 0 */
-	uint8_t uv_level_max; /**< Maximum of chroma output level.
+	uint8_t uv_level_max; /** Maximum of chroma output level.
 				u0.8, [0,255], default/ineffective 255 */
 };
 
-/** Multi-Axes Color Correction (MACC) configuration.
+/* Multi-Axes Color Correction (MACC) configuration.
  *
  *  ISP block: MACC2 (MACC by matrix and exponent(ia_css_macc_config))
  * (ISP1: MACC1 (MACC by only matrix) is used.)
  *  ISP2: MACC2 is used.
  */
 struct ia_css_macc_config {
-	uint8_t exp;	/**< Common exponent of ia_css_macc_table.
+	uint8_t exp;	/** Common exponent of ia_css_macc_table.
 				u8.0, [0,13], default 1, ineffective 1 */
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_2/ia_css_gc2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_2/ia_css_gc2_types.h
index e439583bdfb6f2e8ae0d5b7f7f0bb20225a6c012..fab7467d30a5500a56fc34ce6dd9905013a13820 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_2/ia_css_gc2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/gc/gc_2/ia_css_gc2_types.h
@@ -17,33 +17,33 @@
 
 #include "isp/kernels/ctc/ctc_1.0/ia_css_ctc_types.h"  /* FIXME: needed for ia_css_vamem_type */
 
-/** @file
+/* @file
 * CSS-API header file for Gamma Correction parameters.
 */
 
-/** sRGB Gamma table, used for sRGB Gamma Correction.
+/* sRGB Gamma table, used for sRGB Gamma Correction.
  *
  *  ISP block: GC2 (sRGB Gamma Correction)
  * (ISP1: GC1(YUV Gamma Correction) is used.)
  *  ISP2: GC2 is used.
  */
 
-/** Number of elements in the sRGB gamma table. */
+/* Number of elements in the sRGB gamma table. */
 #define IA_CSS_VAMEM_1_RGB_GAMMA_TABLE_SIZE_LOG2 8
 #define IA_CSS_VAMEM_1_RGB_GAMMA_TABLE_SIZE      (1U<<IA_CSS_VAMEM_1_RGB_GAMMA_TABLE_SIZE_LOG2)
 
-/** Number of elements in the sRGB gamma table. */
+/* Number of elements in the sRGB gamma table. */
 #define IA_CSS_VAMEM_2_RGB_GAMMA_TABLE_SIZE_LOG2    8
 #define IA_CSS_VAMEM_2_RGB_GAMMA_TABLE_SIZE     ((1U<<IA_CSS_VAMEM_2_RGB_GAMMA_TABLE_SIZE_LOG2) + 1)
 
-/**< IA_CSS_VAMEM_TYPE_1(ISP2300) or
+/** IA_CSS_VAMEM_TYPE_1(ISP2300) or
      IA_CSS_VAMEM_TYPE_2(ISP2400) */
 union ia_css_rgb_gamma_data {
 	uint16_t vamem_1[IA_CSS_VAMEM_1_RGB_GAMMA_TABLE_SIZE];
-	/**< RGB Gamma table on vamem type1. This table is not used,
+	/** RGB Gamma table on vamem type1. This table is not used,
 		because sRGB Gamma Correction is not implemented for ISP2300. */
 	uint16_t vamem_2[IA_CSS_VAMEM_2_RGB_GAMMA_TABLE_SIZE];
-		/**< RGB Gamma table on vamem type2. u0.12, [0,4095] */
+		/** RGB Gamma table on vamem type2. u0.12, [0,4095] */
 };
 
 struct ia_css_rgb_gamma_table {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/hdr/ia_css_hdr_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/hdr/ia_css_hdr_types.h
index c3345b32e3e6e6f300374a9739de59f49e194933..26464421b077e0df7fe39c1a1e5de1773d0f51c7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/hdr/ia_css_hdr_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/hdr/ia_css_hdr_types.h
@@ -24,14 +24,14 @@
  * \detail Currently HDR paramters are used only for testing purposes
  */
 struct ia_css_hdr_irradiance_params {
-	int test_irr;                                          /**< Test parameter */
-	int match_shift[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];  /**< Histogram matching shift parameter */
-	int match_mul[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];    /**< Histogram matching multiplication parameter */
-	int thr_low[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];      /**< Weight map soft threshold low bound parameter */
-	int thr_high[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];     /**< Weight map soft threshold high bound parameter */
-	int thr_coeff[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];    /**< Soft threshold linear function coefficien */
-	int thr_shift[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];    /**< Soft threshold precision shift parameter */
-	int weight_bpp;                                        /**< Weight map bits per pixel */
+	int test_irr;                                          /** Test parameter */
+	int match_shift[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];  /** Histogram matching shift parameter */
+	int match_mul[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];    /** Histogram matching multiplication parameter */
+	int thr_low[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];      /** Weight map soft threshold low bound parameter */
+	int thr_high[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];     /** Weight map soft threshold high bound parameter */
+	int thr_coeff[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];    /** Soft threshold linear function coefficien */
+	int thr_shift[IA_CSS_HDR_MAX_NUM_INPUT_FRAMES - 1];    /** Soft threshold precision shift parameter */
+	int weight_bpp;                                        /** Weight map bits per pixel */
 };
 
 /**
@@ -39,7 +39,7 @@ struct ia_css_hdr_irradiance_params {
  * \detail Currently HDR paramters are used only for testing purposes
  */
 struct ia_css_hdr_deghost_params {
-	int test_deg; /**< Test parameter */
+	int test_deg; /** Test parameter */
 };
 
 /**
@@ -47,7 +47,7 @@ struct ia_css_hdr_deghost_params {
  * \detail Currently HDR paramters are used only for testing purposes
  */
 struct ia_css_hdr_exclusion_params {
-	int test_excl; /**< Test parameter */
+	int test_excl; /** Test parameter */
 };
 
 /**
@@ -56,9 +56,9 @@ struct ia_css_hdr_exclusion_params {
  * the CSS API. Currenly, only test paramters are defined.
  */
 struct ia_css_hdr_config {
-	struct ia_css_hdr_irradiance_params irradiance; /**< HDR irradiance paramaters */
-	struct ia_css_hdr_deghost_params    deghost;    /**< HDR deghosting parameters */
-	struct ia_css_hdr_exclusion_params  exclusion; /**< HDR exclusion parameters */
+	struct ia_css_hdr_irradiance_params irradiance; /** HDR irradiance paramaters */
+	struct ia_css_hdr_deghost_params    deghost;    /** HDR deghosting parameters */
+	struct ia_css_hdr_exclusion_params  exclusion; /** HDR exclusion parameters */
 };
 
 #endif /* __IA_CSS_HDR_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.c
index 78e159c04851d6cb225e5e041fac14db35acf58f..f80480cf9de202cd368ce611d64a3be7d446b93e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.c
@@ -1,5 +1,5 @@
 #ifdef ISP2401
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/yuv444_io_ls/ia_css_yuv444_io.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/yuv444_io_ls/ia_css_yuv444_io.host.c
index f7e1a632c47e4c9ebc938ea4926c52668b527f1e..eb9e9439cc2188e4501a37e006c233fc2d09ff04 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/yuv444_io_ls/ia_css_yuv444_io.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ipu2_io_ls/yuv444_io_ls/ia_css_yuv444_io.host.c
@@ -1,5 +1,5 @@
 #ifdef ISP2401
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc1_5/ia_css_macc1_5_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc1_5/ia_css_macc1_5_types.h
index 3d510bf5886a95c7dc2aaec8345cbc29fbf36e67..9cd31c2c0253b8bbf774785d79edc076d105e9cf 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc1_5/ia_css_macc1_5_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc1_5/ia_css_macc1_5_types.h
@@ -15,22 +15,22 @@
 #ifndef __IA_CSS_MACC1_5_TYPES_H
 #define __IA_CSS_MACC1_5_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Multi-Axis Color Conversion algorithm parameters.
 */
 
-/** Multi-Axis Color Conversion configuration
+/* Multi-Axis Color Conversion configuration
  *
  * ISP2.6.1: MACC1_5 is used.
  */
 
 
-/** Number of axes in the MACC table. */
+/* Number of axes in the MACC table. */
 #define IA_CSS_MACC_NUM_AXES           16
-/** Number of coefficients per MACC axes. */
+/* Number of coefficients per MACC axes. */
 #define IA_CSS_MACC_NUM_COEFS          4
 
-/** Multi-Axes Color Correction (MACC) table.
+/* Multi-Axes Color Correction (MACC) table.
  *
  *  ISP block: MACC (MACC by only matrix)
  *             MACC1_5 (MACC by matrix and exponent(ia_css_macc_config))
@@ -55,19 +55,19 @@
  */
 struct ia_css_macc1_5_table {
 	int16_t data[IA_CSS_MACC_NUM_COEFS * IA_CSS_MACC_NUM_AXES];
-	/**< 16 of 2x2 matix
+	/** 16 of 2x2 matix
 	  MACC1_5: s[macc_config.exp].[13-macc_config.exp], [-8192,8191]
 	    default/ineffective: (s1.12)
 		16 of "identity 2x2 matix" {4096,0,0,4096} */
 };
 
-/** Multi-Axes Color Correction (MACC) configuration.
+/* Multi-Axes Color Correction (MACC) configuration.
  *
  *  ISP block: MACC1_5 (MACC by matrix and exponent(ia_css_macc_config))
  *  ISP2: MACC1_5 is used.
  */
 struct ia_css_macc1_5_config {
-	uint8_t exp;	/**< Common exponent of ia_css_macc_table.
+	uint8_t exp;	/** Common exponent of ia_css_macc_table.
 				u8.0, [0,13], default 1, ineffective 1 */
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc_1.0/ia_css_macc_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc_1.0/ia_css_macc_types.h
index a25581c6f3ac4cb82ac3de70f1853a6a6cdb7002..2c9e5a8ceb9880e93b315d0e1bc0f3e20ba53af0 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc_1.0/ia_css_macc_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/macc/macc_1.0/ia_css_macc_types.h
@@ -15,17 +15,17 @@
 #ifndef __IA_CSS_MACC_TYPES_H
 #define __IA_CSS_MACC_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Multi-Axis Color Correction (MACC) parameters.
 */
 
-/** Number of axes in the MACC table. */
+/* Number of axes in the MACC table. */
 #define IA_CSS_MACC_NUM_AXES           16
-/** Number of coefficients per MACC axes. */
+/* Number of coefficients per MACC axes. */
 #define IA_CSS_MACC_NUM_COEFS          4
-/** The number of planes in the morphing table. */
+/* The number of planes in the morphing table. */
 
-/** Multi-Axis Color Correction (MACC) table.
+/* Multi-Axis Color Correction (MACC) table.
  *
  *  ISP block: MACC1 (MACC by only matrix)
  *             MACC2 (MACC by matrix and exponent(ia_css_macc_config))
@@ -51,7 +51,7 @@
 
 struct ia_css_macc_table {
 	int16_t data[IA_CSS_MACC_NUM_COEFS * IA_CSS_MACC_NUM_AXES];
-	/**< 16 of 2x2 matix
+	/** 16 of 2x2 matix
 	  MACC1: s2.13, [-65536,65535]
 	    default/ineffective:
 		16 of "identity 2x2 matix" {8192,0,0,8192}
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob2/ia_css_ob2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob2/ia_css_ob2_types.h
index eeaadfeb5a1ec4be4942523885007e58cbaa354b..d981394c1c1146dd27ba919204ac1d2c60ada9c8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob2/ia_css_ob2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob2/ia_css_ob2_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_OB2_TYPES_H
 #define __IA_CSS_OB2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Optical Black algorithm parameters.
 */
 
-/** Optical Black configuration
+/* Optical Black configuration
  *
  * ISP2.6.1: OB2 is used.
  */
@@ -27,16 +27,16 @@
 #include "ia_css_frac.h"
 
 struct ia_css_ob2_config {
-	ia_css_u0_16 level_gr;    /**< Black level for GR pixels.
+	ia_css_u0_16 level_gr;    /** Black level for GR pixels.
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	ia_css_u0_16  level_r;     /**< Black level for R pixels.
+	ia_css_u0_16  level_r;     /** Black level for R pixels.
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	ia_css_u0_16  level_b;     /**< Black level for B pixels.
+	ia_css_u0_16  level_b;     /** Black level for B pixels.
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	ia_css_u0_16  level_gb;    /**< Black level for GB pixels.
+	ia_css_u0_16  level_gb;    /** Black level for GB pixels.
 					u0.16, [0,65535],
 					default/ineffective 0 */
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob_1.0/ia_css_ob_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob_1.0/ia_css_ob_types.h
index 88459b6c003d180f35f9eb82def73e29c0b1b6d9..a9717b8f44acc744786592080e48412c4888dbef 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob_1.0/ia_css_ob_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ob/ob_1.0/ia_css_ob_types.h
@@ -15,51 +15,51 @@
 #ifndef __IA_CSS_OB_TYPES_H
 #define __IA_CSS_OB_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Optical Black level parameters.
 */
 
 #include "ia_css_frac.h"
 
-/** Optical black mode.
+/* Optical black mode.
  */
 enum ia_css_ob_mode {
-	IA_CSS_OB_MODE_NONE,	/**< OB has no effect. */
-	IA_CSS_OB_MODE_FIXED,	/**< Fixed OB */
-	IA_CSS_OB_MODE_RASTER	/**< Raster OB */
+	IA_CSS_OB_MODE_NONE,	/** OB has no effect. */
+	IA_CSS_OB_MODE_FIXED,	/** Fixed OB */
+	IA_CSS_OB_MODE_RASTER	/** Raster OB */
 };
 
-/** Optical Black level configuration.
+/* Optical Black level configuration.
  *
  *  ISP block: OB1
  *  ISP1: OB1 is used.
  *  ISP2: OB1 is used.
  */
 struct ia_css_ob_config {
-	enum ia_css_ob_mode mode; /**< Mode (None / Fixed / Raster).
+	enum ia_css_ob_mode mode; /** Mode (None / Fixed / Raster).
 					enum, [0,2],
 					default 1, ineffective 0 */
-	ia_css_u0_16 level_gr;    /**< Black level for GR pixels
+	ia_css_u0_16 level_gr;    /** Black level for GR pixels
 					(used for Fixed Mode only).
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	ia_css_u0_16 level_r;     /**< Black level for R pixels
+	ia_css_u0_16 level_r;     /** Black level for R pixels
 					(used for Fixed Mode only).
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	ia_css_u0_16 level_b;     /**< Black level for B pixels
+	ia_css_u0_16 level_b;     /** Black level for B pixels
 					(used for Fixed Mode only).
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	ia_css_u0_16 level_gb;    /**< Black level for GB pixels
+	ia_css_u0_16 level_gb;    /** Black level for GB pixels
 					(used for Fixed Mode only).
 					u0.16, [0,65535],
 					default/ineffective 0 */
-	uint16_t start_position; /**< Start position of OB area
+	uint16_t start_position; /** Start position of OB area
 					(used for Raster Mode only).
 					u16.0, [0,63],
 					default/ineffective 0 */
-	uint16_t end_position;  /**< End position of OB area
+	uint16_t end_position;  /** End position of OB area
 					(used for Raster Mode only).
 					u16.0, [0,63],
 					default/ineffective 0 */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_param.h
index 26ec27e085c1da60a6b3d035deda02f24b3b5906..eb7defa4114563a23bae8f9c8991585db334f7b8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_param.h
@@ -19,7 +19,7 @@
 #include "dma.h"
 #include "ia_css_frame_comm.h" /* ia_css_frame_sp_info */
 
-/** output frame */
+/* output frame */
 struct sh_css_isp_output_isp_config {
 	uint32_t width_a_over_b;
 	uint32_t height;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_types.h
index 4335ac28b31d76fec890dff2e4fbb0e208f8a0df..9c7342fb8145f379ec394567a5a9f5db7727f6f1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_OUTPUT_TYPES_H
 #define __IA_CSS_OUTPUT_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for parameters of output frames.
 */
 
-/** Output frame
+/* Output frame
  *
  *  ISP block: output frame
  */
@@ -40,8 +40,8 @@ struct ia_css_output1_configuration {
 };
 
 struct ia_css_output_config {
-	uint8_t enable_hflip;  /**< enable horizontal output mirroring */
-	uint8_t enable_vflip;  /**< enable vertical output mirroring */
+	uint8_t enable_hflip;  /** enable horizontal output mirroring */
+	uint8_t enable_vflip;  /** enable vertical output mirroring */
 };
 
 #endif /* __IA_CSS_OUTPUT_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/qplane/qplane_2/ia_css_qplane_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/qplane/qplane_2/ia_css_qplane_types.h
index 955fd472a2414b939023b789fa35c76c22f3860b..62d371841619b0104ebd917cea589d1b3b75c077 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/qplane/qplane_2/ia_css_qplane_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/qplane/qplane_2/ia_css_qplane_types.h
@@ -18,7 +18,7 @@
 #include <ia_css_frame_public.h>
 #include "sh_css_internal.h"
 
-/** qplane frame
+/* qplane frame
  *
  *  ISP block: qplane frame
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
index 54f8c299d22730092c10fcd80720fbafd6a0a2cd..5c0b8febd79aeac36909f87dddba98e45c8e4be6 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
@@ -18,7 +18,7 @@
 #include <ia_css_frame_public.h>
 #include "sh_css_internal.h"
 
-/** Raw frame
+/* Raw frame
  *
  *  ISP block: Raw frame
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_param.h
index 1f1b72a417d1d9ed3e51c8b8920154e19b23c787..026443b999a647500ae9b55105f0afd2fab50e98 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_param.h
@@ -19,7 +19,7 @@
 #include "sh_css_defs.h"
 #include "dma.h"
 
-/** Reference frame */
+/* Reference frame */
 struct ia_css_ref_configuration {
 	const struct ia_css_frame *ref_frames[MAX_NUM_VIDEO_DELAY_FRAMES];
 	uint32_t dvs_frame_delay;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_types.h
index ce0eaeeee9c6ef3fac5528812a88ebc0b1138666..4750fba268b938ea19e84009447b19cfa8459068 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ref/ref_1.0/ia_css_ref_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_REF_TYPES_H
 #define __IA_CSS_REF_TYPES_H
 
-/** Reference frame
+/* Reference frame
  *
  *  ISP block: reference frame
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a/s3a_1.0/ia_css_s3a_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a/s3a_1.0/ia_css_s3a_types.h
index f57ed1ec5981a4f17c19fff115bfcdbcc6c818c1..8d674d2c6427515c5e2e926b07846e6f2ac578d7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a/s3a_1.0/ia_css_s3a_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a/s3a_1.0/ia_css_s3a_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_S3A_TYPES_H
 #define __IA_CSS_S3A_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for 3A statistics parameters.
 */
 
@@ -25,11 +25,11 @@
 #include "../../../../components/stats_3a/src/stats_3a_public.h"
 #endif
 
-/** 3A configuration. This configures the 3A statistics collection
+/* 3A configuration. This configures the 3A statistics collection
  *  module.
  */
  
-/** 3A statistics grid
+/* 3A statistics grid
  *
  *  ISP block: S3A1 (3A Support for 3A ver.1 (Histogram is not used for AE))
  *             S3A2 (3A Support for 3A ver.2 (Histogram is used for AE))
@@ -39,23 +39,23 @@
 struct ia_css_3a_grid_info {
 
 #if defined(SYSTEM_css_skycam_c0_system)
-	uint32_t ae_enable;					/**< ae enabled in binary,
+	uint32_t ae_enable;					/** ae enabled in binary,
 								   0:disabled, 1:enabled */
-	struct ae_public_config_grid_config	ae_grd_info;	/**< see description in ae_public.h*/
+	struct ae_public_config_grid_config	ae_grd_info;	/** see description in ae_public.h*/
 
-  	uint32_t awb_enable;					/**< awb enabled in binary,
+  	uint32_t awb_enable;					/** awb enabled in binary,
 								   0:disabled, 1:enabled */
-	struct awb_public_config_grid_config	awb_grd_info;	/**< see description in awb_public.h*/
+	struct awb_public_config_grid_config	awb_grd_info;	/** see description in awb_public.h*/
 
-  	uint32_t af_enable;					/**< af enabled in binary,
+  	uint32_t af_enable;					/** af enabled in binary,
 								   0:disabled, 1:enabled */
-	struct af_public_grid_config		af_grd_info;	/**< see description in af_public.h*/
+	struct af_public_grid_config		af_grd_info;	/** see description in af_public.h*/
 
-  	uint32_t awb_fr_enable;					/**< awb_fr enabled in binary,
+  	uint32_t awb_fr_enable;					/** awb_fr enabled in binary,
 								   0:disabled, 1:enabled */
-	struct awb_fr_public_grid_config	awb_fr_grd_info;/**< see description in awb_fr_public.h*/
+	struct awb_fr_public_grid_config	awb_fr_grd_info;/** see description in awb_fr_public.h*/
   
-        uint32_t elem_bit_depth;    /**< TODO:Taken from BYT  - need input from AIQ
+        uint32_t elem_bit_depth;    /** TODO:Taken from BYT  - need input from AIQ
 					if needed for SKC
 					Bit depth of element used
 					to calculate 3A statistics.
@@ -63,34 +63,34 @@ struct ia_css_3a_grid_info {
 					bayer bit depth in DSP. */
 
 #else
-	uint32_t enable;            /**< 3A statistics enabled.
+	uint32_t enable;            /** 3A statistics enabled.
 					0:disabled, 1:enabled */
-	uint32_t use_dmem;          /**< DMEM or VMEM determines layout.
+	uint32_t use_dmem;          /** DMEM or VMEM determines layout.
 					0:3A statistics are stored to VMEM,
 					1:3A statistics are stored to DMEM */
-	uint32_t has_histogram;     /**< Statistics include histogram.
+	uint32_t has_histogram;     /** Statistics include histogram.
 					0:no histogram, 1:has histogram */
-	uint32_t width;	    	    /**< Width of 3A grid table.
+	uint32_t width;	    	    /** Width of 3A grid table.
 					(= Horizontal number of grid cells
 					in table, which cells have effective
 					statistics.) */
-	uint32_t height;	    /**< Height of 3A grid table.
+	uint32_t height;	    /** Height of 3A grid table.
 					(= Vertical number of grid cells
 					in table, which cells have effective
 					statistics.) */
-	uint32_t aligned_width;     /**< Horizontal stride (for alloc).
+	uint32_t aligned_width;     /** Horizontal stride (for alloc).
 					(= Horizontal number of grid cells
 					in table, which means
 					the allocated width.) */
-	uint32_t aligned_height;    /**< Vertical stride (for alloc).
+	uint32_t aligned_height;    /** Vertical stride (for alloc).
 					(= Vertical number of grid cells
 					in table, which means
 					the allocated height.) */
-	uint32_t bqs_per_grid_cell; /**< Grid cell size in BQ(Bayer Quad) unit.
+	uint32_t bqs_per_grid_cell; /** Grid cell size in BQ(Bayer Quad) unit.
 					(1BQ means {Gr,R,B,Gb}(2x2 pixels).)
 					Valid values are 8,16,32,64. */
-	uint32_t deci_factor_log2;  /**< log2 of bqs_per_grid_cell. */
-	uint32_t elem_bit_depth;    /**< Bit depth of element used
+	uint32_t deci_factor_log2;  /** log2 of bqs_per_grid_cell. */
+	uint32_t elem_bit_depth;    /** Bit depth of element used
 					to calculate 3A statistics.
 					This is 13, which is the normalized
 					bayer bit depth in DSP. */
@@ -148,7 +148,7 @@ struct ia_css_3a_grid_info {
  * However, that will require driver/ 3A lib modifications.
  */
 
-/** 3A configuration. This configures the 3A statistics collection
+/* 3A configuration. This configures the 3A statistics collection
  *  module.
  *
  *  ae_y_*: Coefficients to calculate luminance from bayer.
@@ -167,38 +167,38 @@ struct ia_css_3a_grid_info {
  *  ISP2: S3A2 and SDVS2 are used.
  */
 struct ia_css_3a_config {
-	ia_css_u0_16 ae_y_coef_r;	/**< Weight of R for Y.
+	ia_css_u0_16 ae_y_coef_r;	/** Weight of R for Y.
 						u0.16, [0,65535],
 						default/ineffective 25559 */
-	ia_css_u0_16 ae_y_coef_g;	/**< Weight of G for Y.
+	ia_css_u0_16 ae_y_coef_g;	/** Weight of G for Y.
 						u0.16, [0,65535],
 						default/ineffective 32768 */
-	ia_css_u0_16 ae_y_coef_b;	/**< Weight of B for Y.
+	ia_css_u0_16 ae_y_coef_b;	/** Weight of B for Y.
 						u0.16, [0,65535],
 						default/ineffective 7209 */
-	ia_css_u0_16 awb_lg_high_raw;	/**< AWB level gate high for raw.
+	ia_css_u0_16 awb_lg_high_raw;	/** AWB level gate high for raw.
 						u0.16, [0,65535],
 						default 65472(=1023*64),
 						ineffective 65535 */
-	ia_css_u0_16 awb_lg_low;	/**< AWB level gate low.
+	ia_css_u0_16 awb_lg_low;	/** AWB level gate low.
 						u0.16, [0,65535],
 						default 64(=1*64),
 						ineffective 0 */
-	ia_css_u0_16 awb_lg_high;	/**< AWB level gate high.
+	ia_css_u0_16 awb_lg_high;	/** AWB level gate high.
 						u0.16, [0,65535],
 						default 65535,
 						ineffective 65535 */
-	ia_css_s0_15 af_fir1_coef[7];	/**< AF FIR coefficients of fir1.
+	ia_css_s0_15 af_fir1_coef[7];	/** AF FIR coefficients of fir1.
 						s0.15, [-32768,32767],
 				default/ineffective
 				-6689,-12207,-32768,32767,12207,6689,0 */
-	ia_css_s0_15 af_fir2_coef[7];	/**< AF FIR coefficients of fir2.
+	ia_css_s0_15 af_fir2_coef[7];	/** AF FIR coefficients of fir2.
 						s0.15, [-32768,32767],
 				default/ineffective
 				2053,0,-18437,32767,-18437,2053,0 */
 };
 
-/** 3A statistics. This structure describes the data stored
+/* 3A statistics. This structure describes the data stored
  *  in each 3A grid point.
  *
  *  ISP block: S3A1 (3A Support for 3A ver.1) (Histogram is not used for AE)
@@ -209,43 +209,43 @@ struct ia_css_3a_config {
  *  ISP2: S3A2 is used.
  */
 struct ia_css_3a_output {
-	int32_t ae_y;    /**< Sum of Y in a statistics window, for AE.
+	int32_t ae_y;    /** Sum of Y in a statistics window, for AE.
 				(u19.13) */
-	int32_t awb_cnt; /**< Number of effective pixels
+	int32_t awb_cnt; /** Number of effective pixels
 				in a statistics window.
 				Pixels passed by the AWB level gate check are
 				judged as "effective". (u32) */
-	int32_t awb_gr;  /**< Sum of Gr in a statistics window, for AWB.
+	int32_t awb_gr;  /** Sum of Gr in a statistics window, for AWB.
 				All Gr pixels (not only for effective pixels)
 				are summed. (u19.13) */
-	int32_t awb_r;   /**< Sum of R in a statistics window, for AWB.
+	int32_t awb_r;   /** Sum of R in a statistics window, for AWB.
 				All R pixels (not only for effective pixels)
 				are summed. (u19.13) */
-	int32_t awb_b;   /**< Sum of B in a statistics window, for AWB.
+	int32_t awb_b;   /** Sum of B in a statistics window, for AWB.
 				All B pixels (not only for effective pixels)
 				are summed. (u19.13) */
-	int32_t awb_gb;  /**< Sum of Gb in a statistics window, for AWB.
+	int32_t awb_gb;  /** Sum of Gb in a statistics window, for AWB.
 				All Gb pixels (not only for effective pixels)
 				are summed. (u19.13) */
-	int32_t af_hpf1; /**< Sum of |Y| following high pass filter af_fir1
+	int32_t af_hpf1; /** Sum of |Y| following high pass filter af_fir1
 				within a statistics window, for AF. (u19.13) */
-	int32_t af_hpf2; /**< Sum of |Y| following high pass filter af_fir2
+	int32_t af_hpf2; /** Sum of |Y| following high pass filter af_fir2
 				within a statistics window, for AF. (u19.13) */
 };
 
 
-/** 3A Statistics. This structure describes the statistics that are generated
+/* 3A Statistics. This structure describes the statistics that are generated
  *  using the provided configuration (ia_css_3a_config).
  */
 struct ia_css_3a_statistics {
-	struct ia_css_3a_grid_info    grid;	/**< grid info contains the dimensions of the 3A grid */
-	struct ia_css_3a_output      *data;	/**< the pointer to 3a_output[grid.width * grid.height]
+	struct ia_css_3a_grid_info    grid;	/** grid info contains the dimensions of the 3A grid */
+	struct ia_css_3a_output      *data;	/** the pointer to 3a_output[grid.width * grid.height]
 						     containing the 3A statistics */
-	struct ia_css_3a_rgby_output *rgby_data;/**< the pointer to 3a_rgby_output[256]
+	struct ia_css_3a_rgby_output *rgby_data;/** the pointer to 3a_rgby_output[256]
 						     containing the histogram */
 };
 
-/** Histogram (Statistics for AE).
+/* Histogram (Statistics for AE).
  *
  *  4 histograms(r,g,b,y),
  *  256 bins for each histogram, unsigned 24bit value for each bin.
@@ -256,10 +256,10 @@ struct ia_css_3a_statistics {
  *  ISP2: HIST2 is used.
  */
 struct ia_css_3a_rgby_output {
-	uint32_t r;	/**< Number of R of one bin of the histogram R. (u24) */
-	uint32_t g;	/**< Number of G of one bin of the histogram G. (u24) */
-	uint32_t b;	/**< Number of B of one bin of the histogram B. (u24) */
-	uint32_t y;	/**< Number of Y of one bin of the histogram Y. (u24) */
+	uint32_t r;	/** Number of R of one bin of the histogram R. (u24) */
+	uint32_t g;	/** Number of G of one bin of the histogram G. (u24) */
+	uint32_t b;	/** Number of B of one bin of the histogram B. (u24) */
+	uint32_t y;	/** Number of Y of one bin of the histogram Y. (u24) */
 };
 
 #endif /* __IA_CSS_S3A_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a_stat_ls/ia_css_s3a_stat_ls_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a_stat_ls/ia_css_s3a_stat_ls_param.h
index 8b2b56b0310b6f27de2f1d748acf9b7fda1b96cf..9aa019539f479c7d55457ea29878d7aa74da4db8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a_stat_ls/ia_css_s3a_stat_ls_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/s3a_stat_ls/ia_css_s3a_stat_ls_param.h
@@ -22,7 +22,7 @@
 
 #define NUM_S3A_LS 1
 
-/** s3a statistics store */
+/* s3a statistics store */
 #ifdef ISP2401
 struct ia_css_s3a_stat_ls_configuration {
 	uint32_t s3a_grid_size_log2;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.h
index 44e3c43a5d4a6d503b2c5dcbaf05372e27d8e85b..b35ac3e4009be38ecb15ed6d2c824434d730d1c9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.h
@@ -32,7 +32,7 @@ ia_css_sc_dump(
 	unsigned level);
 
 #ifdef ISP2401
-/** @brief Configure the shading correction.
+/* @brief Configure the shading correction.
  * @param[out]	to	Parameters used in the shading correction kernel in the isp.
  * @param[in]	from	Parameters passed from the host.
  * @param[in]	size	Size of the sh_css_isp_sc_isp_config structure.
@@ -45,7 +45,7 @@ ia_css_sc_config(
 	const struct ia_css_sc_configuration *from,
 	unsigned size);
 
-/** @brief Configure the shading correction.
+/* @brief Configure the shading correction.
  * @param[in]	binary	The binary, which has the shading correction.
  * @param[in]	internal_frame_origin_x_bqs_on_sctbl
  *			X coordinate (in bqs) of the origin of the internal frame on the shading table.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc_types.h
index 5a833bc48af1f58509786b79e9af6cba34c39a1d..30ce499ac8cfc1d3264eb2c0d68007b2c6e38bab 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sc/sc_1.0/ia_css_sc_types.h
@@ -15,25 +15,25 @@
 #ifndef __IA_CSS_SC_TYPES_H
 #define __IA_CSS_SC_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Lens Shading Correction (SC) parameters.
 */
 
 
-/** Number of color planes in the shading table. */
+/* Number of color planes in the shading table. */
 #define IA_CSS_SC_NUM_COLORS           4
 
-/** The 4 colors that a shading table consists of.
+/* The 4 colors that a shading table consists of.
  *  For each color we store a grid of values.
  */
 enum ia_css_sc_color {
-	IA_CSS_SC_COLOR_GR, /**< Green on a green-red line */
-	IA_CSS_SC_COLOR_R,  /**< Red */
-	IA_CSS_SC_COLOR_B,  /**< Blue */
-	IA_CSS_SC_COLOR_GB  /**< Green on a green-blue line */
+	IA_CSS_SC_COLOR_GR, /** Green on a green-red line */
+	IA_CSS_SC_COLOR_R,  /** Red */
+	IA_CSS_SC_COLOR_B,  /** Blue */
+	IA_CSS_SC_COLOR_GB  /** Green on a green-blue line */
 };
 
-/** Lens Shading Correction table.
+/* Lens Shading Correction table.
  *
  *  This describes the color shading artefacts
  *  introduced by lens imperfections. To correct artefacts,
@@ -64,39 +64,39 @@ enum ia_css_sc_color {
  *  ISP2: SC1 is used.
  */
 struct ia_css_shading_table {
-	uint32_t enable; /**< Set to false for no shading correction.
+	uint32_t enable; /** Set to false for no shading correction.
 		          The data field can be NULL when enable == true */
 /* ------ deprecated(bz675) : from ------ */
-	uint32_t sensor_width;  /**< Native sensor width in pixels. */
-	uint32_t sensor_height; /**< Native sensor height in lines.
+	uint32_t sensor_width;  /** Native sensor width in pixels. */
+	uint32_t sensor_height; /** Native sensor height in lines.
 		When shading_settings.enable_shading_table_conversion is set
 		as 0, sensor_width and sensor_height are NOT used.
 		These are used only in the legacy shading table conversion
 		in the css, when shading_settings.
 		enable_shading_table_conversion is set as 1. */
 /* ------ deprecated(bz675) : to ------ */
-	uint32_t width;  /**< Number of data points per line per color.
+	uint32_t width;  /** Number of data points per line per color.
 				u8.0, [0,81] */
-	uint32_t height; /**< Number of lines of data points per color.
+	uint32_t height; /** Number of lines of data points per color.
 				u8.0, [0,61] */
-	uint32_t fraction_bits; /**< Bits of fractional part in the data
+	uint32_t fraction_bits; /** Bits of fractional part in the data
 				points.
 				u8.0, [0,13] */
 	uint16_t *data[IA_CSS_SC_NUM_COLORS];
-	/**< Table data, one array for each color.
+	/** Table data, one array for each color.
 	     Use ia_css_sc_color to index this array.
 	     u[13-fraction_bits].[fraction_bits], [0,8191] */
 };
 
 /* ------ deprecated(bz675) : from ------ */
-/** Shading Correction settings.
+/* Shading Correction settings.
  *
  *  NOTE:
  *  This structure should be removed when the shading table conversion is
  *  removed from the css.
  */
 struct ia_css_shading_settings {
-	uint32_t enable_shading_table_conversion; /**< Set to 0,
+	uint32_t enable_shading_table_conversion; /** Set to 0,
 		if the conversion of the shading table should be disabled
 		in the css. (default 1)
 		  0: The shading table is directly sent to the isp.
@@ -119,14 +119,14 @@ struct ia_css_shading_settings {
 
 #ifdef ISP2401
 
-/** Shading Correction configuration.
+/* Shading Correction configuration.
  *
  *  NOTE: The shading table size is larger than or equal to the internal frame size.
  */
 struct ia_css_sc_configuration {
-	uint32_t internal_frame_origin_x_bqs_on_sctbl; /**< Origin X (in bqs) of internal frame on shading table. */
-	uint32_t internal_frame_origin_y_bqs_on_sctbl; /**< Origin Y (in bqs) of internal frame on shading table. */
-						/**< NOTE: bqs = size in BQ(Bayer Quad) unit.
+	uint32_t internal_frame_origin_x_bqs_on_sctbl; /** Origin X (in bqs) of internal frame on shading table. */
+	uint32_t internal_frame_origin_y_bqs_on_sctbl; /** Origin Y (in bqs) of internal frame on shading table. */
+						/** NOTE: bqs = size in BQ(Bayer Quad) unit.
 							1BQ means {Gr,R,B,Gb}(2x2 pixels).
 							Horizontal 1 bqs corresponds to horizontal 2 pixels.
 							Vertical 1 bqs corresponds to vertical 2 pixels. */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/common/ia_css_sdis_common_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/common/ia_css_sdis_common_types.h
index 295dc60b778c1016c3b556ddbaa48c4f7f0c4237..031983c357e4732865fe1f07a2012eba116682a4 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/common/ia_css_sdis_common_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/common/ia_css_sdis_common_types.h
@@ -15,21 +15,21 @@
 #ifndef __IA_CSS_SDIS_COMMON_TYPES_H
 #define __IA_CSS_SDIS_COMMON_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for DVS statistics parameters.
 */
 
 #include <type_support.h>
 
-/** DVS statistics grid dimensions in number of cells.
+/* DVS statistics grid dimensions in number of cells.
  */
 
 struct ia_css_dvs_grid_dim {
-	uint32_t width;		/**< Width of DVS grid table in cells */
-	uint32_t height;	/**< Height of DVS grid table in cells */
+	uint32_t width;		/** Width of DVS grid table in cells */
+	uint32_t height;	/** Height of DVS grid table in cells */
 };
 
-/** DVS statistics dimensions in number of cells for
+/* DVS statistics dimensions in number of cells for
  * grid, coeffieicient and projection.
  */
 
@@ -55,7 +55,7 @@ struct ia_css_sdis_info {
 		0,	/* dis_deci_factor_log2 */ \
 	}
 
-/** DVS statistics grid
+/* DVS statistics grid
  *
  *  ISP block: SDVS1 (DIS/DVS Support for DIS/DVS ver.1 (2-axes))
  *             SDVS2 (DVS Support for DVS ver.2 (6-axes))
@@ -63,23 +63,23 @@ struct ia_css_sdis_info {
  *  ISP2: SDVS2 is used.
  */
 struct ia_css_dvs_grid_res {
-	uint32_t width;	    	/**< Width of DVS grid table.
+	uint32_t width;	    	/** Width of DVS grid table.
 					(= Horizontal number of grid cells
 					in table, which cells have effective
 					statistics.)
 					For DVS1, this is equal to
 					 the number of vertical statistics. */
-	uint32_t aligned_width; /**< Stride of each grid line.
+	uint32_t aligned_width; /** Stride of each grid line.
 					(= Horizontal number of grid cells
 					in table, which means
 					the allocated width.) */
-	uint32_t height;	/**< Height of DVS grid table.
+	uint32_t height;	/** Height of DVS grid table.
 					(= Vertical number of grid cells
 					in table, which cells have effective
 					statistics.)
 					For DVS1, This is equal to
 					the number of horizontal statistics. */
-	uint32_t aligned_height;/**< Stride of each grid column.
+	uint32_t aligned_height;/** Stride of each grid column.
 					(= Vertical number of grid cells
 					in table, which means
 					the allocated height.) */
@@ -89,125 +89,125 @@ struct ia_css_dvs_grid_res {
  * However, that implies driver I/F changes
  */
 struct ia_css_dvs_grid_info {
-	uint32_t enable;        /**< DVS statistics enabled.
+	uint32_t enable;        /** DVS statistics enabled.
 					0:disabled, 1:enabled */
-	uint32_t width;	    	/**< Width of DVS grid table.
+	uint32_t width;	    	/** Width of DVS grid table.
 					(= Horizontal number of grid cells
 					in table, which cells have effective
 					statistics.)
 					For DVS1, this is equal to
 					 the number of vertical statistics. */
-	uint32_t aligned_width; /**< Stride of each grid line.
+	uint32_t aligned_width; /** Stride of each grid line.
 					(= Horizontal number of grid cells
 					in table, which means
 					the allocated width.) */
-	uint32_t height;	/**< Height of DVS grid table.
+	uint32_t height;	/** Height of DVS grid table.
 					(= Vertical number of grid cells
 					in table, which cells have effective
 					statistics.)
 					For DVS1, This is equal to
 					the number of horizontal statistics. */
-	uint32_t aligned_height;/**< Stride of each grid column.
+	uint32_t aligned_height;/** Stride of each grid column.
 					(= Vertical number of grid cells
 					in table, which means
 					the allocated height.) */
-	uint32_t bqs_per_grid_cell; /**< Grid cell size in BQ(Bayer Quad) unit.
+	uint32_t bqs_per_grid_cell; /** Grid cell size in BQ(Bayer Quad) unit.
 					(1BQ means {Gr,R,B,Gb}(2x2 pixels).)
 					For DVS1, valid value is 64.
 					For DVS2, valid value is only 64,
 					currently. */
-	uint32_t num_hor_coefs;	/**< Number of horizontal coefficients. */
-	uint32_t num_ver_coefs;	/**< Number of vertical coefficients. */
+	uint32_t num_hor_coefs;	/** Number of horizontal coefficients. */
+	uint32_t num_ver_coefs;	/** Number of vertical coefficients. */
 };
 
-/** Number of DVS statistics levels
+/* Number of DVS statistics levels
  */
 #define IA_CSS_DVS_STAT_NUM_OF_LEVELS	3
 
-/** DVS statistics generated by accelerator global configuration
+/* DVS statistics generated by accelerator global configuration
  */
 struct dvs_stat_public_dvs_global_cfg {
 	unsigned char kappa;
-	/**< DVS statistics global configuration - kappa */
+	/** DVS statistics global configuration - kappa */
 	unsigned char match_shift;
-	/**< DVS statistics global configuration - match_shift */
+	/** DVS statistics global configuration - match_shift */
 	unsigned char ybin_mode;
-	/**< DVS statistics global configuration - y binning mode */
+	/** DVS statistics global configuration - y binning mode */
 };
 
-/** DVS statistics generated by accelerator level grid
+/* DVS statistics generated by accelerator level grid
  *  configuration
  */
 struct dvs_stat_public_dvs_level_grid_cfg {
 	unsigned char grid_width;
-	/**< DVS statistics grid width */
+	/** DVS statistics grid width */
 	unsigned char grid_height;
-	/**< DVS statistics grid height */
+	/** DVS statistics grid height */
 	unsigned char block_width;
-	/**< DVS statistics block width */
+	/** DVS statistics block width */
 	unsigned char block_height;
-	/**< DVS statistics block  height */
+	/** DVS statistics block  height */
 };
 
-/** DVS statistics generated by accelerator level grid start
+/* DVS statistics generated by accelerator level grid start
  *  configuration
  */
 struct dvs_stat_public_dvs_level_grid_start {
 	unsigned short x_start;
-	/**< DVS statistics level x start */
+	/** DVS statistics level x start */
 	unsigned short y_start;
-	/**< DVS statistics level y start */
+	/** DVS statistics level y start */
 	unsigned char enable;
-	/**< DVS statistics level enable */
+	/** DVS statistics level enable */
 };
 
-/** DVS statistics generated by accelerator level grid end
+/* DVS statistics generated by accelerator level grid end
  *  configuration
  */
 struct dvs_stat_public_dvs_level_grid_end {
 	unsigned short x_end;
-	/**< DVS statistics level x end */
+	/** DVS statistics level x end */
 	unsigned short y_end;
-	/**< DVS statistics level y end */
+	/** DVS statistics level y end */
 };
 
-/** DVS statistics generated by accelerator Feature Extraction
+/* DVS statistics generated by accelerator Feature Extraction
  *  Region Of Interest (FE-ROI) configuration
  */
 struct dvs_stat_public_dvs_level_fe_roi_cfg {
 	unsigned char x_start;
-	/**< DVS statistics fe-roi level x start */
+	/** DVS statistics fe-roi level x start */
 	unsigned char y_start;
-	/**< DVS statistics fe-roi level y start */
+	/** DVS statistics fe-roi level y start */
 	unsigned char x_end;
-	/**< DVS statistics fe-roi level x end */
+	/** DVS statistics fe-roi level x end */
 	unsigned char y_end;
-	/**< DVS statistics fe-roi level y end */
+	/** DVS statistics fe-roi level y end */
 };
 
-/** DVS statistics generated by accelerator public configuration
+/* DVS statistics generated by accelerator public configuration
  */
 struct dvs_stat_public_dvs_grd_cfg {
 	struct dvs_stat_public_dvs_level_grid_cfg    grd_cfg;
-	/**< DVS statistics level grid configuration */
+	/** DVS statistics level grid configuration */
 	struct dvs_stat_public_dvs_level_grid_start  grd_start;
-	/**< DVS statistics level grid start configuration */
+	/** DVS statistics level grid start configuration */
 	struct dvs_stat_public_dvs_level_grid_end    grd_end;
-	/**< DVS statistics level grid end configuration */
+	/** DVS statistics level grid end configuration */
 };
 
-/** DVS statistics grid generated by accelerator
+/* DVS statistics grid generated by accelerator
  */
 struct ia_css_dvs_stat_grid_info {
 	struct dvs_stat_public_dvs_global_cfg       dvs_gbl_cfg;
-	/**< DVS statistics global configuration (kappa, match, binning) */
+	/** DVS statistics global configuration (kappa, match, binning) */
 	struct dvs_stat_public_dvs_grd_cfg       grd_cfg[IA_CSS_DVS_STAT_NUM_OF_LEVELS];
-	/**< DVS statistics grid configuration (blocks and grids) */
+	/** DVS statistics grid configuration (blocks and grids) */
 	struct dvs_stat_public_dvs_level_fe_roi_cfg fe_roi_cfg[IA_CSS_DVS_STAT_NUM_OF_LEVELS];
-	/**< DVS statistics FE ROI (region of interest) configuration */
+	/** DVS statistics FE ROI (region of interest) configuration */
 };
 
-/** DVS statistics generated by accelerator default grid info
+/* DVS statistics generated by accelerator default grid info
  */
 #define DEFAULT_DVS_GRID_INFO { \
 { \
@@ -219,14 +219,14 @@ struct ia_css_dvs_stat_grid_info {
 }
 
 
-/** Union that holds all types of DVS statistics grid info in
+/* Union that holds all types of DVS statistics grid info in
  *  CSS format
  * */
 union ia_css_dvs_grid_u {
 	struct ia_css_dvs_stat_grid_info dvs_stat_grid_info;
-	/**< DVS statistics produced by accelerator grid info */
+	/** DVS statistics produced by accelerator grid info */
 	struct ia_css_dvs_grid_info dvs_grid_info;
-	/**< DVS (DVS1/DVS2) grid info */
+	/** DVS (DVS1/DVS2) grid info */
 };
 
 #endif /* __IA_CSS_SDIS_COMMON_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_1.0/ia_css_sdis_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_1.0/ia_css_sdis_types.h
index d408b58a027d957c06624e595d6ec4bde543d848..d2ee57008fb6ad87c07eea76112b8608f909706a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_1.0/ia_css_sdis_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_1.0/ia_css_sdis_types.h
@@ -15,38 +15,38 @@
 #ifndef __IA_CSS_SDIS_TYPES_H
 #define __IA_CSS_SDIS_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for DVS statistics parameters.
 */
 
-/** Number of DVS coefficient types */
+/* Number of DVS coefficient types */
 #define IA_CSS_DVS_NUM_COEF_TYPES      6
 
 #ifndef PIPE_GENERATION
 #include "isp/kernels/sdis/common/ia_css_sdis_common_types.h"
 #endif
 
-/** DVS 1.0 Coefficients.
+/* DVS 1.0 Coefficients.
  *  This structure describes the coefficients that are needed for the dvs statistics.
  */
 
 struct ia_css_dvs_coefficients {
-	struct ia_css_dvs_grid_info grid;/**< grid info contains the dimensions of the dvs grid */
-	int16_t *hor_coefs;	/**< the pointer to int16_t[grid.num_hor_coefs * IA_CSS_DVS_NUM_COEF_TYPES]
+	struct ia_css_dvs_grid_info grid;/** grid info contains the dimensions of the dvs grid */
+	int16_t *hor_coefs;	/** the pointer to int16_t[grid.num_hor_coefs * IA_CSS_DVS_NUM_COEF_TYPES]
 				     containing the horizontal coefficients */
-	int16_t *ver_coefs;	/**< the pointer to int16_t[grid.num_ver_coefs * IA_CSS_DVS_NUM_COEF_TYPES]
+	int16_t *ver_coefs;	/** the pointer to int16_t[grid.num_ver_coefs * IA_CSS_DVS_NUM_COEF_TYPES]
 				     containing the vertical coefficients */
 };
 
-/** DVS 1.0 Statistics.
+/* DVS 1.0 Statistics.
  *  This structure describes the statistics that are generated using the provided coefficients.
  */
 
 struct ia_css_dvs_statistics {
-	struct ia_css_dvs_grid_info grid;/**< grid info contains the dimensions of the dvs grid */
-	int32_t *hor_proj;	/**< the pointer to int16_t[grid.height * IA_CSS_DVS_NUM_COEF_TYPES]
+	struct ia_css_dvs_grid_info grid;/** grid info contains the dimensions of the dvs grid */
+	int32_t *hor_proj;	/** the pointer to int16_t[grid.height * IA_CSS_DVS_NUM_COEF_TYPES]
 				     containing the horizontal projections */
-	int32_t *ver_proj;	/**< the pointer to int16_t[grid.width * IA_CSS_DVS_NUM_COEF_TYPES]
+	int32_t *ver_proj;	/** the pointer to int16_t[grid.width * IA_CSS_DVS_NUM_COEF_TYPES]
 				     containing the vertical projections */
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_2/ia_css_sdis2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_2/ia_css_sdis2_types.h
index 7db7dd10fe0081bc3ce4325f55f3a2e72d40e383..2a0bc403174667b1f3e55ba72620cdad47258639 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_2/ia_css_sdis2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/sdis/sdis_2/ia_css_sdis2_types.h
@@ -15,55 +15,55 @@
 #ifndef __IA_CSS_SDIS2_TYPES_H
 #define __IA_CSS_SDIS2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for DVS statistics parameters.
 */
 
-/** Number of DVS coefficient types */
+/* Number of DVS coefficient types */
 #define IA_CSS_DVS2_NUM_COEF_TYPES     4
 
 #ifndef PIPE_GENERATION
 #include "isp/kernels/sdis/common/ia_css_sdis_common_types.h"
 #endif
 
-/** DVS 2.0 Coefficient types. This structure contains 4 pointers to
+/* DVS 2.0 Coefficient types. This structure contains 4 pointers to
  *  arrays that contain the coeffients for each type.
  */
 struct ia_css_dvs2_coef_types {
-	int16_t *odd_real; /**< real part of the odd coefficients*/
-	int16_t *odd_imag; /**< imaginary part of the odd coefficients*/
-	int16_t *even_real;/**< real part of the even coefficients*/
-	int16_t *even_imag;/**< imaginary part of the even coefficients*/
+	int16_t *odd_real; /** real part of the odd coefficients*/
+	int16_t *odd_imag; /** imaginary part of the odd coefficients*/
+	int16_t *even_real;/** real part of the even coefficients*/
+	int16_t *even_imag;/** imaginary part of the even coefficients*/
 };
 
-/** DVS 2.0 Coefficients. This structure describes the coefficients that are needed for the dvs statistics.
+/* DVS 2.0 Coefficients. This structure describes the coefficients that are needed for the dvs statistics.
  *  e.g. hor_coefs.odd_real is the pointer to int16_t[grid.num_hor_coefs] containing the horizontal odd real 
  *  coefficients.
  */
 struct ia_css_dvs2_coefficients {
-	struct ia_css_dvs_grid_info grid;        /**< grid info contains the dimensions of the dvs grid */
-	struct ia_css_dvs2_coef_types hor_coefs; /**< struct with pointers that contain the horizontal coefficients */
-	struct ia_css_dvs2_coef_types ver_coefs; /**< struct with pointers that contain the vertical coefficients */
+	struct ia_css_dvs_grid_info grid;        /** grid info contains the dimensions of the dvs grid */
+	struct ia_css_dvs2_coef_types hor_coefs; /** struct with pointers that contain the horizontal coefficients */
+	struct ia_css_dvs2_coef_types ver_coefs; /** struct with pointers that contain the vertical coefficients */
 };
 
-/** DVS 2.0 Statistic types. This structure contains 4 pointers to
+/* DVS 2.0 Statistic types. This structure contains 4 pointers to
  *  arrays that contain the statistics for each type.
  */
 struct ia_css_dvs2_stat_types {
-	int32_t *odd_real; /**< real part of the odd statistics*/
-	int32_t *odd_imag; /**< imaginary part of the odd statistics*/
-	int32_t *even_real;/**< real part of the even statistics*/
-	int32_t *even_imag;/**< imaginary part of the even statistics*/
+	int32_t *odd_real; /** real part of the odd statistics*/
+	int32_t *odd_imag; /** imaginary part of the odd statistics*/
+	int32_t *even_real;/** real part of the even statistics*/
+	int32_t *even_imag;/** imaginary part of the even statistics*/
 };
 
-/** DVS 2.0 Statistics. This structure describes the statistics that are generated using the provided coefficients.
+/* DVS 2.0 Statistics. This structure describes the statistics that are generated using the provided coefficients.
  *  e.g. hor_prod.odd_real is the pointer to int16_t[grid.aligned_height][grid.aligned_width] containing 
  *  the horizontal odd real statistics. Valid statistics data area is int16_t[0..grid.height-1][0..grid.width-1]
  */
 struct ia_css_dvs2_statistics {
-	struct ia_css_dvs_grid_info grid;       /**< grid info contains the dimensions of the dvs grid */
-	struct ia_css_dvs2_stat_types hor_prod; /**< struct with pointers that contain the horizontal statistics */
-	struct ia_css_dvs2_stat_types ver_prod; /**< struct with pointers that contain the vertical statistics */
+	struct ia_css_dvs_grid_info grid;       /** grid info contains the dimensions of the dvs grid */
+	struct ia_css_dvs2_stat_types hor_prod; /** struct with pointers that contain the horizontal statistics */
+	struct ia_css_dvs2_stat_types ver_prod; /** struct with pointers that contain the vertical statistics */
 };
 
 #endif /* __IA_CSS_SDIS2_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_types.h
index cc47a50e5ad55b620ef67b415356fe13c373db06..91ea8dd4651d6f9b25f4dd81104987450e42e33e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_types.h
@@ -15,13 +15,13 @@
 #ifndef __IA_CSS_TDF_TYPES_H
 #define __IA_CSS_TDF_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Transform Domain Filter parameters.
 */
 
 #include "type_support.h"
 
-/** Transform Domain Filter configuration
+/* Transform Domain Filter configuration
  *
  * \brief TDF public parameters.
  * \details Struct with all parameters for the TDF kernel that can be set
@@ -30,23 +30,23 @@
  * ISP2.6.1: TDF is used.
  */
 struct ia_css_tdf_config {
-	int32_t thres_flat_table[64];	/**< Final optimized strength table of NR for flat region. */
-	int32_t thres_detail_table[64];	/**< Final optimized strength table of NR for detail region. */
-	int32_t epsilon_0;		/**< Coefficient to control variance for dark area (for flat region). */
-	int32_t epsilon_1;		/**< Coefficient to control variance for bright area (for flat region). */
-	int32_t eps_scale_text;		/**< Epsilon scaling coefficient for texture region. */
-	int32_t eps_scale_edge;		/**< Epsilon scaling coefficient for edge region. */
-	int32_t sepa_flat;		/**< Threshold to judge flat (edge < m_Flat_thre). */
-	int32_t sepa_edge;		/**< Threshold to judge edge (edge > m_Edge_thre). */
-	int32_t blend_flat;		/**< Blending ratio at flat region. */
-	int32_t blend_text;		/**< Blending ratio at texture region. */
-	int32_t blend_edge;		/**< Blending ratio at edge region. */
-	int32_t shading_gain;		/**< Gain of Shading control. */
-	int32_t shading_base_gain;	/**< Base Gain of Shading control. */
-	int32_t local_y_gain;		/**< Gain of local luminance control. */
-	int32_t local_y_base_gain;	/**< Base gain of local luminance control. */
-	int32_t rad_x_origin;		/**< Initial x coord. for radius computation. */
-	int32_t rad_y_origin;		/**< Initial y coord. for radius computation. */
+	int32_t thres_flat_table[64];	/** Final optimized strength table of NR for flat region. */
+	int32_t thres_detail_table[64];	/** Final optimized strength table of NR for detail region. */
+	int32_t epsilon_0;		/** Coefficient to control variance for dark area (for flat region). */
+	int32_t epsilon_1;		/** Coefficient to control variance for bright area (for flat region). */
+	int32_t eps_scale_text;		/** Epsilon scaling coefficient for texture region. */
+	int32_t eps_scale_edge;		/** Epsilon scaling coefficient for edge region. */
+	int32_t sepa_flat;		/** Threshold to judge flat (edge < m_Flat_thre). */
+	int32_t sepa_edge;		/** Threshold to judge edge (edge > m_Edge_thre). */
+	int32_t blend_flat;		/** Blending ratio at flat region. */
+	int32_t blend_text;		/** Blending ratio at texture region. */
+	int32_t blend_edge;		/** Blending ratio at edge region. */
+	int32_t shading_gain;		/** Gain of Shading control. */
+	int32_t shading_base_gain;	/** Base Gain of Shading control. */
+	int32_t local_y_gain;		/** Gain of local luminance control. */
+	int32_t local_y_base_gain;	/** Base gain of local luminance control. */
+	int32_t rad_x_origin;		/** Initial x coord. for radius computation. */
+	int32_t rad_y_origin;		/** Initial y coord. for radius computation. */
 };
 
 #endif /* __IA_CSS_TDF_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr3/ia_css_tnr3_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr3/ia_css_tnr3_types.h
index 135563f52174512e0aed99050731aff54496edd5..223423f8c40b2dd247401b3c129519449c614d84 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr3/ia_css_tnr3_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr3/ia_css_tnr3_types.h
@@ -16,7 +16,7 @@ more details.
 #ifndef _IA_CSS_TNR3_TYPES_H
 #define _IA_CSS_TNR3_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Temporal Noise Reduction v3 (TNR3) kernel
 */
 
@@ -27,7 +27,7 @@ more details.
  */
 #define TNR3_NUM_SEGMENTS    3
 
-/** Temporal Noise Reduction v3 (TNR3) configuration.
+/* Temporal Noise Reduction v3 (TNR3) configuration.
  * The parameter to this kernel is fourfold
  * 1. Three piecewise linear graphs (one for each plane) with three segments
  * each. Each line graph has Luma values on the x axis and sigma values for
@@ -44,17 +44,17 @@ more details.
  * 4. Selection of the reference frame buffer to be used for noise reduction.
  */
 struct ia_css_tnr3_kernel_config {
-	unsigned int maxfb_y;                        /**< Maximum Feedback Gain for Y */
-	unsigned int maxfb_u;                        /**< Maximum Feedback Gain for U */
-	unsigned int maxfb_v;                        /**< Maximum Feedback Gain for V */
-	unsigned int round_adj_y;                    /**< Rounding Adjust for Y */
-	unsigned int round_adj_u;                    /**< Rounding Adjust for U */
-	unsigned int round_adj_v;                    /**< Rounding Adjust for V */
-	unsigned int knee_y[TNR3_NUM_SEGMENTS - 1];  /**< Knee points */
-	unsigned int sigma_y[TNR3_NUM_SEGMENTS + 1]; /**< Standard deviation for Y at points Y0, Y1, Y2, Y3 */
-	unsigned int sigma_u[TNR3_NUM_SEGMENTS + 1]; /**< Standard deviation for U at points U0, U1, U2, U3 */
-	unsigned int sigma_v[TNR3_NUM_SEGMENTS + 1]; /**< Standard deviation for V at points V0, V1, V2, V3 */
-	unsigned int ref_buf_select;                 /**< Selection of the reference buffer */
+	unsigned int maxfb_y;                        /** Maximum Feedback Gain for Y */
+	unsigned int maxfb_u;                        /** Maximum Feedback Gain for U */
+	unsigned int maxfb_v;                        /** Maximum Feedback Gain for V */
+	unsigned int round_adj_y;                    /** Rounding Adjust for Y */
+	unsigned int round_adj_u;                    /** Rounding Adjust for U */
+	unsigned int round_adj_v;                    /** Rounding Adjust for V */
+	unsigned int knee_y[TNR3_NUM_SEGMENTS - 1];  /** Knee points */
+	unsigned int sigma_y[TNR3_NUM_SEGMENTS + 1]; /** Standard deviation for Y at points Y0, Y1, Y2, Y3 */
+	unsigned int sigma_u[TNR3_NUM_SEGMENTS + 1]; /** Standard deviation for U at points U0, U1, U2, U3 */
+	unsigned int sigma_v[TNR3_NUM_SEGMENTS + 1]; /** Standard deviation for V at points V0, V1, V2, V3 */
+	unsigned int ref_buf_select;                 /** Selection of the reference buffer */
 };
 
 #endif
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr_1.0/ia_css_tnr_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr_1.0/ia_css_tnr_types.h
index 4fd35e6ccd704ceb4a6aac9467d30b83cd70c9b2..9bbc9ab2e6c0d1a5ff5b176d07c5a6945bb580df 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr_1.0/ia_css_tnr_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tnr/tnr_1.0/ia_css_tnr_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_TNR_TYPES_H
 #define __IA_CSS_TNR_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Temporal Noise Reduction (TNR) parameters.
 */
 
-/** Temporal Noise Reduction (TNR) configuration.
+/* Temporal Noise Reduction (TNR) configuration.
  *
  *  When difference between current frame and previous frame is less than or
  *  equal to threshold, TNR works and current frame is mixed
@@ -36,18 +36,18 @@
 
 
 struct ia_css_tnr_config {
-	ia_css_u0_16 gain; /**< Interpolation ratio of current frame
+	ia_css_u0_16 gain; /** Interpolation ratio of current frame
 			        and previous frame.
 				gain=0.0 -> previous frame is outputted.
 				gain=1.0 -> current frame is outputted.
 				u0.16, [0,65535],
 			default 32768(0.5), ineffective 65535(almost 1.0) */
-	ia_css_u0_16 threshold_y; /**< Threshold to enable interpolation of Y.
+	ia_css_u0_16 threshold_y; /** Threshold to enable interpolation of Y.
 				If difference between current frame and
 				previous frame is greater than threshold_y,
 				TNR for Y is disabled.
 				u0.16, [0,65535], default/ineffective 0 */
-	ia_css_u0_16 threshold_uv; /**< Threshold to enable interpolation of
+	ia_css_u0_16 threshold_uv; /** Threshold to enable interpolation of
 				U/V.
 				If difference between current frame and
 				previous frame is greater than threshold_uv,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_param.h
index df5d37c8c946401c01347711add4ef153034d41b..9df4e12f6c2c7f1379caed574e4a55d7becb1164 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_param.h
@@ -23,9 +23,9 @@
 
 #define VFDEC_BITS_PER_PIXEL	GAMMA_OUTPUT_BITS
 
-/** Viewfinder decimation */
+/* Viewfinder decimation */
 struct sh_css_isp_vf_isp_config {
-	uint32_t vf_downscale_bits; /**< Log VF downscale value */
+	uint32_t vf_downscale_bits; /** Log VF downscale value */
 	uint32_t enable;
 	struct ia_css_frame_sp_info info;
 	struct {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_types.h
index d8cfdfbc8c0bd1af7f938f88985516d8367ab29a..e3efafa279ffc3c5a62bf9054a909061e2d6935a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_VF_TYPES_H
 #define __IA_CSS_VF_TYPES_H
 
-/** Viewfinder decimation
+/* Viewfinder decimation
  *
  *  ISP block: vfeven_horizontal_downscale
  */
@@ -24,7 +24,7 @@
 #include <type_support.h>
 
 struct ia_css_vf_configuration {
-	uint32_t vf_downscale_bits; /**< Log VF downscale value */
+	uint32_t vf_downscale_bits; /** Log VF downscale value */
 	const struct ia_css_frame_info *info;
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/wb/wb_1.0/ia_css_wb_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/wb/wb_1.0/ia_css_wb_types.h
index 6bcfa274be884c3aa95cf4efb6e59a18580ff12d..bf98734d057e5e0e7280ad8882647e78a4a054a9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/wb/wb_1.0/ia_css_wb_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/wb/wb_1.0/ia_css_wb_types.h
@@ -15,31 +15,31 @@
 #ifndef __IA_CSS_WB_TYPES_H
 #define __IA_CSS_WB_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for White Balance parameters.
 */
 
 
-/** White Balance configuration (Gain Adjust).
+/* White Balance configuration (Gain Adjust).
  *
  *  ISP block: WB1
  *  ISP1: WB1 is used.
  *  ISP2: WB1 is used.
  */
 struct ia_css_wb_config {
-	uint32_t integer_bits; /**< Common exponent of gains.
+	uint32_t integer_bits; /** Common exponent of gains.
 				u8.0, [0,3],
 				default 1, ineffective 1 */
-	uint32_t gr;	/**< Significand of Gr gain.
+	uint32_t gr;	/** Significand of Gr gain.
 				u[integer_bits].[16-integer_bits], [0,65535],
 				default/ineffective 32768(u1.15, 1.0) */
-	uint32_t r;	/**< Significand of R gain.
+	uint32_t r;	/** Significand of R gain.
 				u[integer_bits].[16-integer_bits], [0,65535],
 				default/ineffective 32768(u1.15, 1.0) */
-	uint32_t b;	/**< Significand of B gain.
+	uint32_t b;	/** Significand of B gain.
 				u[integer_bits].[16-integer_bits], [0,65535],
 				default/ineffective 32768(u1.15, 1.0) */
-	uint32_t gb;	/**< Significand of Gb gain.
+	uint32_t gb;	/** Significand of Gb gain.
 				u[integer_bits].[16-integer_bits], [0,65535],
 				default/ineffective 32768(u1.15, 1.0) */
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr.host.c
index 3018100f6f767b15d14a1f539b4be65ae6a1fded..abcb531f51ccdf6c1073cf09862e90685bd6640b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr.host.c
@@ -21,7 +21,7 @@
 #include "ia_css_xnr.host.h"
 
 const struct ia_css_xnr_config default_xnr_config = {
-	/** default threshold 6400 translates to 25 on ISP. */
+	/* default threshold 6400 translates to 25 on ISP. */
 	6400
 };
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_param.h
index 806c9f8f0e2eb68fd638a7bbb842f63ec0d5fd0a..a5caebbe2f8481825f6aed6e743df9ac9af0002f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_param.h
@@ -41,7 +41,7 @@ struct sh_css_isp_xnr_vamem_params {
 };
 
 struct sh_css_isp_xnr_params {
-	/** XNR threshold.
+	/* XNR threshold.
 	 * type:u0.16 but actual valid range is:[0,255]
 	 * valid range is dependent on SH_CSS_ISP_YUV_BITS (currently 8bits)
 	 * default: 25 */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_types.h
index 89e8b0f17e8cf90184c458ac0b08625ff767fd9e..d2b634211a3fadcefb1fd7f4758a6dec51208479 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_1.0/ia_css_xnr_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_XNR_TYPES_H
 #define __IA_CSS_XNR_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Extra Noise Reduction (XNR) parameters.
 */
 
-/** XNR table.
+/* XNR table.
  *
  *  NOTE: The driver does not need to set this table,
  *        because the default values are set inside the css.
@@ -36,23 +36,23 @@
  *
  */
 
-/** Number of elements in the xnr table. */
+/* Number of elements in the xnr table. */
 #define IA_CSS_VAMEM_1_XNR_TABLE_SIZE_LOG2      6
-/** Number of elements in the xnr table. */
+/* Number of elements in the xnr table. */
 #define IA_CSS_VAMEM_1_XNR_TABLE_SIZE           (1U<<IA_CSS_VAMEM_1_XNR_TABLE_SIZE_LOG2)
 
-/** Number of elements in the xnr table. */
+/* Number of elements in the xnr table. */
 #define IA_CSS_VAMEM_2_XNR_TABLE_SIZE_LOG2      6
-/** Number of elements in the xnr table. */
+/* Number of elements in the xnr table. */
 #define IA_CSS_VAMEM_2_XNR_TABLE_SIZE	        (1U<<IA_CSS_VAMEM_2_XNR_TABLE_SIZE_LOG2)
 
-/**< IA_CSS_VAMEM_TYPE_1(ISP2300) or
+/** IA_CSS_VAMEM_TYPE_1(ISP2300) or
      IA_CSS_VAMEM_TYPE_2(ISP2400) */
 union ia_css_xnr_data {
 	uint16_t vamem_1[IA_CSS_VAMEM_1_XNR_TABLE_SIZE];
-	/**< Coefficients table on vamem type1. u0.12, [0,4095] */
+	/** Coefficients table on vamem type1. u0.12, [0,4095] */
 	uint16_t vamem_2[IA_CSS_VAMEM_2_XNR_TABLE_SIZE];
-	/**< Coefficients table on vamem type2. u0.12, [0,4095] */
+	/** Coefficients table on vamem type2. u0.12, [0,4095] */
 };
 
 struct ia_css_xnr_table {
@@ -61,7 +61,7 @@ struct ia_css_xnr_table {
 };
 
 struct ia_css_xnr_config {
-	/** XNR threshold.
+	/* XNR threshold.
 	 * type:u0.16 valid range:[0,65535]
 	 * default: 6400 */
 	uint16_t threshold;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_3.0/ia_css_xnr3_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_3.0/ia_css_xnr3_types.h
index 8f14d10806515c9bec9f10cb9b12da61e25740f4..669200caf72e6b765819086e68f818b5ea4ee1ca 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_3.0/ia_css_xnr3_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/xnr/xnr_3.0/ia_css_xnr3_types.h
@@ -15,7 +15,7 @@
 #ifndef __IA_CSS_XNR3_TYPES_H
 #define __IA_CSS_XNR3_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Extra Noise Reduction (XNR) parameters.
 */
 
@@ -47,12 +47,12 @@
  * IA_CSS_XNR3_SIGMA_SCALE.
  */
 struct ia_css_xnr3_sigma_params {
-	int y0;     /**< Sigma for Y range similarity in dark area */
-	int y1;     /**< Sigma for Y range similarity in bright area */
-	int u0;     /**< Sigma for U range similarity in dark area */
-	int u1;     /**< Sigma for U range similarity in bright area */
-	int v0;     /**< Sigma for V range similarity in dark area */
-	int v1;     /**< Sigma for V range similarity in bright area */
+	int y0;     /** Sigma for Y range similarity in dark area */
+	int y1;     /** Sigma for Y range similarity in bright area */
+	int u0;     /** Sigma for U range similarity in dark area */
+	int u1;     /** Sigma for U range similarity in bright area */
+	int v0;     /** Sigma for V range similarity in dark area */
+	int v1;     /** Sigma for V range similarity in bright area */
 };
 
 /**
@@ -64,10 +64,10 @@ struct ia_css_xnr3_sigma_params {
  * with IA_CSS_XNR3_CORING_SCALE. The ineffective value is 0.
  */
 struct ia_css_xnr3_coring_params {
-	int u0;     /**< Coring threshold of U channel in dark area */
-	int u1;     /**< Coring threshold of U channel in bright area */
-	int v0;     /**< Coring threshold of V channel in dark area */
-	int v1;     /**< Coring threshold of V channel in bright area */
+	int u0;     /** Coring threshold of U channel in dark area */
+	int u1;     /** Coring threshold of U channel in bright area */
+	int v0;     /** Coring threshold of V channel in dark area */
+	int v1;     /** Coring threshold of V channel in bright area */
 };
 
 /**
@@ -81,7 +81,7 @@ struct ia_css_xnr3_coring_params {
  * value of 0.0 bypasses the entire xnr3 filter.
  */
 struct ia_css_xnr3_blending_params {
-	int strength;   /**< Blending strength */
+	int strength;   /** Blending strength */
 };
 
 /**
@@ -90,9 +90,9 @@ struct ia_css_xnr3_blending_params {
  * from the CSS API.
  */
 struct ia_css_xnr3_config {
-	struct ia_css_xnr3_sigma_params    sigma;    /**< XNR3 sigma parameters */
-	struct ia_css_xnr3_coring_params   coring;   /**< XNR3 coring parameters */
-	struct ia_css_xnr3_blending_params blending; /**< XNR3 blending parameters */
+	struct ia_css_xnr3_sigma_params    sigma;    /** XNR3 sigma parameters */
+	struct ia_css_xnr3_coring_params   coring;   /** XNR3 coring parameters */
+	struct ia_css_xnr3_blending_params blending; /** XNR3 blending parameters */
 };
 
 #endif /* __IA_CSS_XNR3_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_1.0/ia_css_ynr_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_1.0/ia_css_ynr_types.h
index 3f46655bee57fd9af3d52f3a8f6209d04a349bac..3f8589a5a43a8b3cc6add4b9a1b53de7f703d0ed 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_1.0/ia_css_ynr_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_1.0/ia_css_ynr_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_YNR_TYPES_H
 #define __IA_CSS_YNR_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Noise Reduction (BNR) and YCC Noise Reduction (YNR,CNR).
 */
 
-/** Configuration used by Bayer Noise Reduction (BNR) and
+/* Configuration used by Bayer Noise Reduction (BNR) and
  *  YCC Noise Reduction (YNR,CNR).
  *
  *  ISP block: BNR1, YNR1, CNR1
@@ -28,28 +28,28 @@
  *        BNR1,YNR2,CNR2 are used for Still.
  */
 struct ia_css_nr_config {
-	ia_css_u0_16 bnr_gain;	   /**< Strength of noise reduction (BNR).
+	ia_css_u0_16 bnr_gain;	   /** Strength of noise reduction (BNR).
 				u0.16, [0,65535],
 				default 14336(0.21875), ineffective 0 */
-	ia_css_u0_16 ynr_gain;	   /**< Strength of noise reduction (YNR).
+	ia_css_u0_16 ynr_gain;	   /** Strength of noise reduction (YNR).
 				u0.16, [0,65535],
 				default 14336(0.21875), ineffective 0 */
-	ia_css_u0_16 direction;    /**< Sensitivity of edge (BNR).
+	ia_css_u0_16 direction;    /** Sensitivity of edge (BNR).
 				u0.16, [0,65535],
 				default 512(0.0078125), ineffective 0 */
-	ia_css_u0_16 threshold_cb; /**< Coring threshold for Cb (CNR).
+	ia_css_u0_16 threshold_cb; /** Coring threshold for Cb (CNR).
 				This is the same as
 				de_config.c1_coring_threshold.
 				u0.16, [0,65535],
 				default 0(0), ineffective 0 */
-	ia_css_u0_16 threshold_cr; /**< Coring threshold for Cr (CNR).
+	ia_css_u0_16 threshold_cr; /** Coring threshold for Cr (CNR).
 				This is the same as
 				de_config.c2_coring_threshold.
 				u0.16, [0,65535],
 				default 0(0), ineffective 0 */
 };
 
-/** Edge Enhancement (sharpen) configuration.
+/* Edge Enhancement (sharpen) configuration.
  *
  *  ISP block: YEE1
  *  ISP1: YEE1 is used.
@@ -57,24 +57,24 @@ struct ia_css_nr_config {
  *       (YEE2 is used for Still.)
  */
 struct ia_css_ee_config {
-	ia_css_u5_11 gain;	  /**< The strength of sharpness.
+	ia_css_u5_11 gain;	  /** The strength of sharpness.
 					u5.11, [0,65535],
 					default 8192(4.0), ineffective 0 */
-	ia_css_u8_8 threshold;    /**< The threshold that divides noises from
+	ia_css_u8_8 threshold;    /** The threshold that divides noises from
 					edge.
 					u8.8, [0,65535],
 					default 256(1.0), ineffective 65535 */
-	ia_css_u5_11 detail_gain; /**< The strength of sharpness in pell-mell
+	ia_css_u5_11 detail_gain; /** The strength of sharpness in pell-mell
 					area.
 					u5.11, [0,65535],
 					default 2048(1.0), ineffective 0 */
 };
 
-/** YNR and YEE (sharpen) configuration.
+/* YNR and YEE (sharpen) configuration.
  */
 struct ia_css_yee_config {
-	struct ia_css_nr_config nr; /**< The NR configuration. */
-	struct ia_css_ee_config ee; /**< The EE configuration. */
+	struct ia_css_nr_config nr; /** The NR configuration. */
+	struct ia_css_ee_config ee; /** The EE configuration. */
 };
 
 #endif /* __IA_CSS_YNR_TYPES_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_2/ia_css_ynr2_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_2/ia_css_ynr2_types.h
index e0a0b10ac5fa540c0e569e3f4b57f3ca2047ec54..83161a24207d9b50e69df725781efd6d5b62fe34 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_2/ia_css_ynr2_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/ynr/ynr_2/ia_css_ynr2_types.h
@@ -15,11 +15,11 @@
 #ifndef __IA_CSS_YNR2_TYPES_H
 #define __IA_CSS_YNR2_TYPES_H
 
-/** @file
+/* @file
 * CSS-API header file for Y(Luma) Noise Reduction.
 */
 
-/** Y(Luma) Noise Reduction configuration.
+/* Y(Luma) Noise Reduction configuration.
  *
  *  ISP block: YNR2 & YEE2
  * (ISP1: YNR1 and YEE1 are used.)
@@ -27,21 +27,21 @@
  *  ISP2: YNR2 and YEE2 are used for Still.
  */
 struct ia_css_ynr_config {
-	uint16_t edge_sense_gain_0;   /**< Sensitivity of edge in dark area.
+	uint16_t edge_sense_gain_0;   /** Sensitivity of edge in dark area.
 					u13.0, [0,8191],
 					default 1000, ineffective 0 */
-	uint16_t edge_sense_gain_1;   /**< Sensitivity of edge in bright area.
+	uint16_t edge_sense_gain_1;   /** Sensitivity of edge in bright area.
 					u13.0, [0,8191],
 					default 1000, ineffective 0 */
-	uint16_t corner_sense_gain_0; /**< Sensitivity of corner in dark area.
+	uint16_t corner_sense_gain_0; /** Sensitivity of corner in dark area.
 					u13.0, [0,8191],
 					default 1000, ineffective 0 */
-	uint16_t corner_sense_gain_1; /**< Sensitivity of corner in bright area.
+	uint16_t corner_sense_gain_1; /** Sensitivity of corner in bright area.
 					u13.0, [0,8191],
 					default 1000, ineffective 0 */
 };
 
-/** Fringe Control configuration.
+/* Fringe Control configuration.
  *
  *  ISP block: FC2 (FC2 is used with YNR2/YEE2.)
  * (ISP1: FC2 is not used.)
@@ -49,43 +49,43 @@ struct ia_css_ynr_config {
  *  ISP2: FC2 is used for Still.
  */
 struct ia_css_fc_config {
-	uint8_t  gain_exp;   /**< Common exponent of gains.
+	uint8_t  gain_exp;   /** Common exponent of gains.
 				u8.0, [0,13],
 				default 1, ineffective 0 */
-	uint16_t coring_pos_0; /**< Coring threshold for positive edge in dark area.
+	uint16_t coring_pos_0; /** Coring threshold for positive edge in dark area.
 				u0.13, [0,8191],
 				default 0(0), ineffective 0 */
-	uint16_t coring_pos_1; /**< Coring threshold for positive edge in bright area.
+	uint16_t coring_pos_1; /** Coring threshold for positive edge in bright area.
 				u0.13, [0,8191],
 				default 0(0), ineffective 0 */
-	uint16_t coring_neg_0; /**< Coring threshold for negative edge in dark area.
+	uint16_t coring_neg_0; /** Coring threshold for negative edge in dark area.
 				u0.13, [0,8191],
 				default 0(0), ineffective 0 */
-	uint16_t coring_neg_1; /**< Coring threshold for negative edge in bright area.
+	uint16_t coring_neg_1; /** Coring threshold for negative edge in bright area.
 				u0.13, [0,8191],
 				default 0(0), ineffective 0 */
-	uint16_t gain_pos_0; /**< Gain for positive edge in dark area.
+	uint16_t gain_pos_0; /** Gain for positive edge in dark area.
 				u0.13, [0,8191],
 				default 4096(0.5), ineffective 0 */
-	uint16_t gain_pos_1; /**< Gain for positive edge in bright area.
+	uint16_t gain_pos_1; /** Gain for positive edge in bright area.
 				u0.13, [0,8191],
 				default 4096(0.5), ineffective 0 */
-	uint16_t gain_neg_0; /**< Gain for negative edge in dark area.
+	uint16_t gain_neg_0; /** Gain for negative edge in dark area.
 				u0.13, [0,8191],
 				default 4096(0.5), ineffective 0 */
-	uint16_t gain_neg_1; /**< Gain for negative edge in bright area.
+	uint16_t gain_neg_1; /** Gain for negative edge in bright area.
 				u0.13, [0,8191],
 				default 4096(0.5), ineffective 0 */
-	uint16_t crop_pos_0; /**< Limit for positive edge in dark area.
+	uint16_t crop_pos_0; /** Limit for positive edge in dark area.
 				u0.13, [0,8191],
 				default/ineffective 8191(almost 1.0) */
-	uint16_t crop_pos_1; /**< Limit for positive edge in bright area.
+	uint16_t crop_pos_1; /** Limit for positive edge in bright area.
 				u0.13, [0,8191],
 				default/ineffective 8191(almost 1.0) */
-	int16_t  crop_neg_0; /**< Limit for negative edge in dark area.
+	int16_t  crop_neg_0; /** Limit for negative edge in dark area.
 				s0.13, [-8192,0],
 				default/ineffective -8192(-1.0) */
-	int16_t  crop_neg_1; /**< Limit for negative edge in bright area.
+	int16_t  crop_neg_1; /** Limit for negative edge in bright area.
 				s0.13, [-8192,0],
 				default/ineffective -8192(-1.0) */
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/yuv_ls/yuv_ls_1.0/ia_css_yuv_ls_param.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/yuv_ls/yuv_ls_1.0/ia_css_yuv_ls_param.h
index 63a8703c9c44bc0a4e1f8b8527bd0ac7bc8734e6..c9ff0cb2493aa3fe13faa148565082c5891b8b78 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/yuv_ls/yuv_ls_1.0/ia_css_yuv_ls_param.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/yuv_ls/yuv_ls_1.0/ia_css_yuv_ls_param.h
@@ -24,7 +24,7 @@
  */
 #define NUM_YUV_LS 2
 
-/** YUV load/store */
+/* YUV load/store */
 struct sh_css_isp_yuv_ls_isp_config {
 	unsigned base_address[NUM_YUV_LS];
 	unsigned width[NUM_YUV_LS];
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/memory_realloc.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/memory_realloc.c
index e814f1bf19f792d3c1d848f59145d15964a4453d..6512a1ceb9d34d1cc3f5f6ddcee6ef093f9b1436 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/memory_realloc.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/memory_realloc.c
@@ -1,4 +1,4 @@
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
index c65194619a34a832febcbb395a82b82c11980bf2..5a58abe2b2333fbed5a0573a34d396e38e41f3e1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
@@ -269,7 +269,7 @@ enum ia_css_err
 ia_css_binary_find(struct ia_css_binary_descr *descr,
 		   struct ia_css_binary *binary);
 
-/** @brief Get the shading information of the specified shading correction type.
+/* @brief Get the shading information of the specified shading correction type.
  *
  * @param[in] binary: The isp binary which has the shading correction.
  * @param[in] type: The shading correction type.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
index e028e460ae4c3c4d901967a4f2da7dd9d54b7676..295e07049393225696836e36b5af270d5596e2fb 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
@@ -972,7 +972,7 @@ ia_css_binary_uninit(void)
 	return IA_CSS_SUCCESS;
 }
 
-/** @brief Compute decimation factor for 3A statistics and shading correction.
+/* @brief Compute decimation factor for 3A statistics and shading correction.
  *
  * @param[in]	width	Frame width in pixels.
  * @param[in]	height	Frame height in pixels.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
index 42d9a85088585092ab64b0b09aed0340f05acc7c..e50d9f2e2609e408dca2c90519c6806243c6aa8f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
@@ -152,7 +152,7 @@ void ia_css_queue_map(
 		unmap_buffer_type_to_queue_id(thread_id, buf_type);
 }
 
-/**
+/*
  * @brief Query the internal queue ID.
  */
 bool ia_css_query_internal_queue_id(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/interface/ia_css_debug.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/interface/ia_css_debug.h
index 3c8dcfd4bbc6bc70e300e491c15b8bb86cd7c095..4b28b2a0863a70528c321a1b11874e203d84786c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/interface/ia_css_debug.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/interface/ia_css_debug.h
@@ -54,21 +54,21 @@ extern unsigned int ia_css_debug_trace_level;
  *  Values can be combined to dump a combination of sets.
  */
 enum ia_css_debug_enable_param_dump {
-	IA_CSS_DEBUG_DUMP_FPN = 1 << 0, /**< FPN table */
-	IA_CSS_DEBUG_DUMP_OB = 1 << 1,  /**< OB table */
-	IA_CSS_DEBUG_DUMP_SC = 1 << 2,  /**< Shading table */
-	IA_CSS_DEBUG_DUMP_WB = 1 << 3,  /**< White balance */
-	IA_CSS_DEBUG_DUMP_DP = 1 << 4,  /**< Defect Pixel */
-	IA_CSS_DEBUG_DUMP_BNR = 1 << 5,  /**< Bayer Noise Reductions */
-	IA_CSS_DEBUG_DUMP_S3A = 1 << 6,  /**< 3A Statistics */
-	IA_CSS_DEBUG_DUMP_DE = 1 << 7,  /**< De Mosaicing */
-	IA_CSS_DEBUG_DUMP_YNR = 1 << 8,  /**< Luma Noise Reduction */
-	IA_CSS_DEBUG_DUMP_CSC = 1 << 9,  /**< Color Space Conversion */
-	IA_CSS_DEBUG_DUMP_GC = 1 << 10,  /**< Gamma Correction */
-	IA_CSS_DEBUG_DUMP_TNR = 1 << 11,  /**< Temporal Noise Reduction */
-	IA_CSS_DEBUG_DUMP_ANR = 1 << 12,  /**< Advanced Noise Reduction */
-	IA_CSS_DEBUG_DUMP_CE = 1 << 13,  /**< Chroma Enhancement */
-	IA_CSS_DEBUG_DUMP_ALL = 1 << 14  /**< Dump all device parameters */
+	IA_CSS_DEBUG_DUMP_FPN = 1 << 0, /** FPN table */
+	IA_CSS_DEBUG_DUMP_OB = 1 << 1,  /** OB table */
+	IA_CSS_DEBUG_DUMP_SC = 1 << 2,  /** Shading table */
+	IA_CSS_DEBUG_DUMP_WB = 1 << 3,  /** White balance */
+	IA_CSS_DEBUG_DUMP_DP = 1 << 4,  /** Defect Pixel */
+	IA_CSS_DEBUG_DUMP_BNR = 1 << 5,  /** Bayer Noise Reductions */
+	IA_CSS_DEBUG_DUMP_S3A = 1 << 6,  /** 3A Statistics */
+	IA_CSS_DEBUG_DUMP_DE = 1 << 7,  /** De Mosaicing */
+	IA_CSS_DEBUG_DUMP_YNR = 1 << 8,  /** Luma Noise Reduction */
+	IA_CSS_DEBUG_DUMP_CSC = 1 << 9,  /** Color Space Conversion */
+	IA_CSS_DEBUG_DUMP_GC = 1 << 10,  /** Gamma Correction */
+	IA_CSS_DEBUG_DUMP_TNR = 1 << 11,  /** Temporal Noise Reduction */
+	IA_CSS_DEBUG_DUMP_ANR = 1 << 12,  /** Advanced Noise Reduction */
+	IA_CSS_DEBUG_DUMP_CE = 1 << 13,  /** Chroma Enhancement */
+	IA_CSS_DEBUG_DUMP_ALL = 1 << 14  /** Dump all device parameters */
 };
 
 #define IA_CSS_ERROR(fmt, ...) \
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
index 0fa7cb2423d863de2bcee3d86f7c3baecdfc84c3..dd1127a21494d866f1311d9c96c55a39fbcbdfa0 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
@@ -1617,7 +1617,7 @@ void ia_css_debug_print_sp_debug_state(const struct sh_css_sp_debug_state
 
 #elif SP_DEBUG == SP_DEBUG_TRACE
 
-/**
+/*
  * This is just an example how TRACE_FILE_ID (see ia_css_debug.sp.h) will
  * me mapped on the file name string.
  *
@@ -2267,7 +2267,7 @@ void ia_css_debug_dump_debug_info(const char *context)
 	return;
 }
 
-/** this function is for debug use, it can make SP go to sleep
+/* this function is for debug use, it can make SP go to sleep
   state after each frame, then user can dump the stable SP dmem.
   this function can be called after ia_css_start_sp()
   and before sh_css_init_buffer_queues()
@@ -2526,7 +2526,7 @@ void ia_css_debug_dump_ddr_debug_queue(void)
 }
 */
 
-/**
+/*
  * @brief Initialize the debug mode.
  * Refer to "ia_css_debug.h" for more details.
  */
@@ -2537,7 +2537,7 @@ bool ia_css_debug_mode_init(void)
 	return rc;
 }
 
-/**
+/*
  * @brief Disable the DMA channel.
  * Refer to "ia_css_debug.h" for more details.
  */
@@ -2552,7 +2552,7 @@ ia_css_debug_mode_disable_dma_channel(int dma_id,
 	return rc;
 }
 
-/**
+/*
  * @brief Enable the DMA channel.
  * Refer to "ia_css_debug.h" for more details.
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/event/src/event.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/event/src/event.c
index 2698c3e1adb0787fad774fbfa544bfce208c6811..239c06730bf4f27a2145ca99c9ebe59745835f82 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/event/src/event.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/event/src/event.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
@@ -52,7 +52,7 @@ more details.
 
 #include "ia_css_queue.h"	/* host_sp_enqueue_XXX */
 #include "ia_css_event.h"	/* ia_css_event_encode */
-/**
+/*
  * @brief Encode the information into the software-event.
  * Refer to "sw_event_public.h" for details.
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/eventq/src/eventq.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/eventq/src/eventq.c
index 56d6858890ec1dc2d0289947a651c517b39002fa..913a4bf7a34fc24522f762b2e52530b569e3a917 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/eventq/src/eventq.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/eventq/src/eventq.c
@@ -37,7 +37,7 @@ int ia_css_eventq_recv(
 	return error;
 }
 
-/**
+/*
  * @brief The Host sends the event to the SP.
  * Refer to "sh_css_sp.h" for details.
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/interface/ia_css_frame.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/interface/ia_css_frame.h
index c7e07b79f4e55be5d91ac0f974ae61685fa9d872..89ad8080ceb126888dbebe2bff3477de7cf91f73 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/interface/ia_css_frame.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/interface/ia_css_frame.h
@@ -41,7 +41,7 @@ more details.
 /*********************************************************************
 ****	Frame INFO APIs
 **********************************************************************/
-/** @brief Sets the given width and alignment to the frame info
+/* @brief Sets the given width and alignment to the frame info
  *
  * @param
  * @param[in]	info        The info to which parameters would set
@@ -53,7 +53,7 @@ void ia_css_frame_info_set_width(struct ia_css_frame_info *info,
 	unsigned int width,
 	unsigned int min_padded_width);
 
-/** @brief Sets the given format to the frame info
+/* @brief Sets the given format to the frame info
  *
  * @param
  * @param[in]	info        The info to which parameters would set
@@ -63,7 +63,7 @@ void ia_css_frame_info_set_width(struct ia_css_frame_info *info,
 void ia_css_frame_info_set_format(struct ia_css_frame_info *info,
 	enum ia_css_frame_format format);
 
-/** @brief Sets the frame info with the given parameters
+/* @brief Sets the frame info with the given parameters
  *
  * @param
  * @param[in]	info        The info to which parameters would set
@@ -79,7 +79,7 @@ void ia_css_frame_info_init(struct ia_css_frame_info *info,
 	enum ia_css_frame_format format,
 	unsigned int aligned);
 
-/** @brief Checks whether 2 frame infos has the same resolution
+/* @brief Checks whether 2 frame infos has the same resolution
  *
  * @param
  * @param[in]	frame_a         The first frame to be compared
@@ -90,7 +90,7 @@ bool ia_css_frame_info_is_same_resolution(
 	const struct ia_css_frame_info *info_a,
 	const struct ia_css_frame_info *info_b);
 
-/** @brief Check the frame info is valid
+/* @brief Check the frame info is valid
  *
  * @param
  * @param[in]	info       The frame attributes to be initialized
@@ -102,7 +102,7 @@ enum ia_css_err ia_css_frame_check_info(const struct ia_css_frame_info *info);
 ****	Frame APIs
 **********************************************************************/
 
-/** @brief Initialize the plane depending on the frame type
+/* @brief Initialize the plane depending on the frame type
  *
  * @param
  * @param[in]	frame           The frame attributes to be initialized
@@ -110,7 +110,7 @@ enum ia_css_err ia_css_frame_check_info(const struct ia_css_frame_info *info);
  */
 enum ia_css_err ia_css_frame_init_planes(struct ia_css_frame *frame);
 
-/** @brief Free an array of frames
+/* @brief Free an array of frames
  *
  * @param
  * @param[in]	num_frames      The number of frames to be freed in the array
@@ -120,7 +120,7 @@ enum ia_css_err ia_css_frame_init_planes(struct ia_css_frame *frame);
 void ia_css_frame_free_multiple(unsigned int num_frames,
 	struct ia_css_frame **frames_array);
 
-/** @brief Allocate a CSS frame structure of given size in bytes..
+/* @brief Allocate a CSS frame structure of given size in bytes..
  *
  * @param	frame	The allocated frame.
  * @param[in]	size_bytes	The frame size in bytes.
@@ -135,7 +135,7 @@ enum ia_css_err ia_css_frame_allocate_with_buffer_size(
 	const unsigned int size_bytes,
 	const bool contiguous);
 
-/** @brief Check whether 2 frames are same type
+/* @brief Check whether 2 frames are same type
  *
  * @param
  * @param[in]	frame_a         The first frame to be compared
@@ -146,7 +146,7 @@ bool ia_css_frame_is_same_type(
 	const struct ia_css_frame *frame_a,
 	const struct ia_css_frame *frame_b);
 
-/** @brief Configure a dma port from frame info
+/* @brief Configure a dma port from frame info
  *
  * @param
  * @param[in]	config         The DAM port configuration
@@ -158,7 +158,7 @@ void ia_css_dma_configure_from_info(
 	const struct ia_css_frame_info *info);
 
 #ifdef ISP2401
-/** @brief Finds the cropping resolution
+/* @brief Finds the cropping resolution
  * This function finds the maximum cropping resolution in an input image keeping
  * the aspect ratio for the given output resolution.Calculates the coordinates
  * for cropping from the center and returns the starting pixel location of the
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/src/frame.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/src/frame.c
index f1a943cf04c01a17dd29cbda7158d0622ce00373..5faa89ad8a230e31c2ca8853cc945dd83cc1ebf5 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/src/frame.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/frame/src/frame.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
index 11d3995ba0dba8e8090f172bbc28e79413ff8779..adefa57820a4ac46a59467ed6e2c06792161af98 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
index d9a5f3e9283acbe29f58416b2f9b39b99fe39d00..8dc74927e9a2fb997e031189610f7088e94cf7bd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/interface/ia_css_isp_param_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/interface/ia_css_isp_param_types.h
index 8e651b80345a0ea47862c78fdfa3ef1359d75bf3..2283dd1c1c9bdc6eadaf13bbfdad5721e5cae275 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/interface/ia_css_isp_param_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/interface/ia_css_isp_param_types.h
@@ -53,7 +53,7 @@ enum ia_css_param_class {
 };
 #define IA_CSS_NUM_PARAM_CLASSES (IA_CSS_PARAM_CLASS_STATE + 1)
 
-/** ISP parameter descriptor */
+/* ISP parameter descriptor */
 struct ia_css_isp_parameter {
 	uint32_t offset; /* Offset in isp_<mem>)parameters, etc. */
 	uint32_t size;   /* Disabled if 0 */
@@ -77,10 +77,10 @@ struct ia_css_isp_param_isp_segments {
 
 /* Memory offsets in binary info */
 struct ia_css_isp_param_memory_offsets {
-	uint32_t offsets[IA_CSS_NUM_PARAM_CLASSES];  /**< offset wrt hdr in bytes */
+	uint32_t offsets[IA_CSS_NUM_PARAM_CLASSES];  /** offset wrt hdr in bytes */
 };
 
-/** Offsets for ISP kernel parameters per isp memory.
+/* Offsets for ISP kernel parameters per isp memory.
  * Only relevant for standard ISP binaries, not ACC or SP.
  */
 union ia_css_all_memory_offsets {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/src/isp_param.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/src/isp_param.c
index 832d9e16edebf45813dd224f00cf7092ea0d882b..f793ce125f02a85c494d71ef3c597adcefc01d2b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/src/isp_param.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isp_param/src/isp_param.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
index 02bf908d94e6c0a25f458ee909ab4fda454f6e71..4cf2defe9ef032589a144a68240224f2caca6f21 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
@@ -44,7 +44,7 @@ more details.
  * Virtual Input System. (Input System 2401)
  */
 typedef input_system_cfg_t	ia_css_isys_descr_t;
-/** end of Virtual Input System */
+/* end of Virtual Input System */
 #endif
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2) || defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -112,7 +112,7 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
 
 #endif /* #if !defined(USE_INPUT_SYSTEM_VERSION_2401) */
 
-/** @brief Translate format and compression to format type.
+/* @brief Translate format and compression to format type.
  *
  * @param[in]	input_format	The input format.
  * @param[in]	compression	The compression scheme.
@@ -195,7 +195,7 @@ extern void ia_css_isys_stream2mmio_sid_rmgr_release(
 	stream2mmio_ID_t	stream2mmio,
 	stream2mmio_sid_ID_t	*sid);
 
-/** end of Virtual Input System */
+/* end of Virtual Input System */
 #endif
 
 #endif				/* __IA_CSS_ISYS_H__ */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
index d1d4f79c00f19c704dd795999c5ef6acde8d9125..3b04dc51335a50a859805bc5f0999cb0ac999eba 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/ibuf_ctrl_rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/ibuf_ctrl_rmgr.c
index faef97672eac285b7e69f3b0373126eba3d7295e..d8c3b75d7faca3b8502cdd2f2123d3e39ab7bd4c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/ibuf_ctrl_rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/ibuf_ctrl_rmgr.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
  * Support for Intel Camera Imaging ISP subsystem.
  * Copyright (c) 2010 - 2015, Intel Corporation.
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_dma_rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_dma_rmgr.c
index 5032627342d95993297b0f85a97ad64dc059b68d..4def4a542b7db3b524d7c473bb23e51669d4e77c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_dma_rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_dma_rmgr.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
index 239ef310bdeb6f653eb345ed06513eaae0e88e05..4122084fd237443fe4fe39672c55366af9553fc9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_stream2mmio_rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_stream2mmio_rmgr.c
index a93c7f44ff1283e18ba1c26821a9cbdbfb0dace5..222b294c0ab000152f5148ea49a9f5324db2475d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_stream2mmio_rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_stream2mmio_rmgr.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
index 46a157f64343c21e176efc976fcea11140b0efbc..70f6cb5e5918d185fb9f23b1746e39c808c3bd39 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
index 0f1e8a2f6b104a45e682e5126161376b213b1471..90922a7acefdcaca2f6aba26fed2c60c5adc6cb2 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
@@ -166,7 +166,7 @@ static int32_t calculate_stride(
 	bool	raw_packed,
 	int32_t	align_in_bytes);
 
-/** end of Forwarded Declaration */
+/* end of Forwarded Declaration */
 
 /**************************************************
  *
@@ -292,7 +292,7 @@ ia_css_isys_error_t ia_css_isys_stream_calculate_cfg(
 	return rc;
 }
 
-/** end of Public Methods */
+/* end of Public Methods */
 
 /**************************************************
  *
@@ -894,5 +894,5 @@ static csi_mipi_packet_type_t get_csi_mipi_packet_type(
 
 	return packet_type;
 }
-/** end of Private Methods */
+/* end of Private Methods */
 #endif
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/interface/ia_css_pipeline.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/interface/ia_css_pipeline.h
index 90646f5f888568299a6c55b5218828fc4bd08ded..e64936e2d46e5b7fe9d9d6ed83a1dbb6e9027464 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/interface/ia_css_pipeline.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/interface/ia_css_pipeline.h
@@ -103,7 +103,7 @@ struct ia_css_pipeline_stage_desc {
 	struct ia_css_frame *vf_frame;
 };
 
-/** @brief initialize the pipeline module
+/* @brief initialize the pipeline module
  *
  * @return    None
  *
@@ -112,7 +112,7 @@ struct ia_css_pipeline_stage_desc {
  */
 void ia_css_pipeline_init(void);
 
-/** @brief initialize the pipeline structure with default values
+/* @brief initialize the pipeline structure with default values
  *
  * @param[out] pipeline  structure to be initialized with defaults
  * @param[in] pipe_id
@@ -129,7 +129,7 @@ enum ia_css_err ia_css_pipeline_create(
 	unsigned int pipe_num,
 	unsigned int dvs_frame_delay);
 
-/** @brief destroy a pipeline
+/* @brief destroy a pipeline
  *
  * @param[in] pipeline
  * @return    None
@@ -138,7 +138,7 @@ enum ia_css_err ia_css_pipeline_create(
 void ia_css_pipeline_destroy(struct ia_css_pipeline *pipeline);
 
 
-/** @brief Starts a pipeline
+/* @brief Starts a pipeline
  *
  * @param[in] pipe_id
  * @param[in] pipeline
@@ -148,7 +148,7 @@ void ia_css_pipeline_destroy(struct ia_css_pipeline *pipeline);
 void ia_css_pipeline_start(enum ia_css_pipe_id pipe_id,
 			   struct ia_css_pipeline *pipeline);
 
-/** @brief Request to stop a pipeline
+/* @brief Request to stop a pipeline
  *
  * @param[in] pipeline
  * @return                     IA_CSS_SUCCESS or error code upon error.
@@ -156,7 +156,7 @@ void ia_css_pipeline_start(enum ia_css_pipe_id pipe_id,
  */
 enum ia_css_err ia_css_pipeline_request_stop(struct ia_css_pipeline *pipeline);
 
-/** @brief Check whether pipeline has stopped
+/* @brief Check whether pipeline has stopped
  *
  * @param[in] pipeline
  * @return    true if the pipeline has stopped
@@ -164,7 +164,7 @@ enum ia_css_err ia_css_pipeline_request_stop(struct ia_css_pipeline *pipeline);
  */
 bool ia_css_pipeline_has_stopped(struct ia_css_pipeline *pipe);
 
-/** @brief clean all the stages pipeline and make it as new
+/* @brief clean all the stages pipeline and make it as new
  *
  * @param[in] pipeline
  * @return    None
@@ -172,7 +172,7 @@ bool ia_css_pipeline_has_stopped(struct ia_css_pipeline *pipe);
  */
 void ia_css_pipeline_clean(struct ia_css_pipeline *pipeline);
 
-/** @brief Add a stage to pipeline.
+/* @brief Add a stage to pipeline.
  *
  * @param     pipeline               Pointer to the pipeline to be added to.
  * @param[in] stage_desc       The description of the stage
@@ -188,7 +188,7 @@ enum ia_css_err ia_css_pipeline_create_and_add_stage(
 			struct ia_css_pipeline_stage_desc *stage_desc,
 			struct ia_css_pipeline_stage **stage);
 
-/** @brief Finalize the stages in a pipeline
+/* @brief Finalize the stages in a pipeline
  *
  * @param     pipeline               Pointer to the pipeline to be added to.
  * @return                     None
@@ -198,7 +198,7 @@ enum ia_css_err ia_css_pipeline_create_and_add_stage(
 void ia_css_pipeline_finalize_stages(struct ia_css_pipeline *pipeline,
 			bool continuous);
 
-/** @brief gets a stage from the pipeline
+/* @brief gets a stage from the pipeline
  *
  * @param[in] pipeline
  * @return                     IA_CSS_SUCCESS or error code upon error.
@@ -208,7 +208,7 @@ enum ia_css_err ia_css_pipeline_get_stage(struct ia_css_pipeline *pipeline,
 			  int mode,
 			  struct ia_css_pipeline_stage **stage);
 
-/** @brief Gets a pipeline stage corresponding Firmware handle from the pipeline
+/* @brief Gets a pipeline stage corresponding Firmware handle from the pipeline
  *
  * @param[in] pipeline
  * @param[in] fw_handle
@@ -221,7 +221,7 @@ enum ia_css_err ia_css_pipeline_get_stage_from_fw(struct ia_css_pipeline *pipeli
 			  uint32_t fw_handle,
 			  struct ia_css_pipeline_stage **stage);
 
-/** @brief Gets the Firmware handle correponding the stage num from the pipeline
+/* @brief Gets the Firmware handle correponding the stage num from the pipeline
  *
  * @param[in] pipeline
  * @param[in] stage_num
@@ -234,7 +234,7 @@ enum ia_css_err ia_css_pipeline_get_fw_from_stage(struct ia_css_pipeline *pipeli
 			  uint32_t stage_num,
 			  uint32_t *fw_handle);
 
-/** @brief gets the output stage from the pipeline
+/* @brief gets the output stage from the pipeline
  *
  * @param[in] pipeline
  * @return                     IA_CSS_SUCCESS or error code upon error.
@@ -245,7 +245,7 @@ enum ia_css_err ia_css_pipeline_get_output_stage(
 			int mode,
 			struct ia_css_pipeline_stage **stage);
 
-/** @brief Checks whether the pipeline uses params
+/* @brief Checks whether the pipeline uses params
  *
  * @param[in] pipeline
  * @return    true if the pipeline uses params
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
index 62d13978475d87b1d482df5df8f79e1c09aea4fa..8f93d29d1c5173c36c1f9581ec84b2a3a045a609 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
@@ -114,7 +114,7 @@ void ia_css_pipeline_map(unsigned int pipe_num, bool map)
 	IA_CSS_LEAVE_PRIVATE("void");
 }
 
-/** @brief destroy a pipeline
+/* @brief destroy a pipeline
  *
  * @param[in] pipeline
  * @return    None
@@ -187,7 +187,7 @@ void ia_css_pipeline_start(enum ia_css_pipe_id pipe_id,
 	      "ia_css_pipeline_start() leave: return_void\n");
 }
 
-/**
+/*
  * @brief Query the SP thread ID.
  * Refer to "sh_css_internal.h" for details.
  */
@@ -285,7 +285,7 @@ void ia_css_pipeline_clean(struct ia_css_pipeline *pipeline)
 	IA_CSS_LEAVE_PRIVATE("void");
 }
 
-/** @brief Add a stage to pipeline.
+/* @brief Add a stage to pipeline.
  *
  * @param       pipeline      Pointer to the pipeline to be added to.
  * @param[in]   stage_desc    The description of the stage
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/interface/ia_css_queue.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/interface/ia_css_queue.h
index e50a0f8137530bbbd60bb230c4e6a2b771cbe77c..aaf2e247cafbe0d2ed2eb6fbc02219e3b49257fb 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/interface/ia_css_queue.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/interface/ia_css_queue.h
@@ -51,7 +51,7 @@ typedef struct ia_css_queue ia_css_queue_t;
 /*****************************************************************************
  * Queue Public APIs
  *****************************************************************************/
-/** @brief Initialize a local queue instance.
+/* @brief Initialize a local queue instance.
  *
  * @param[out] qhandle. Handle to queue instance for use with API
  * @param[in]  desc.   Descriptor with queue properties filled-in
@@ -63,7 +63,7 @@ extern int ia_css_queue_local_init(
 			ia_css_queue_t *qhandle,
 			ia_css_queue_local_t *desc);
 
-/** @brief Initialize a remote queue instance
+/* @brief Initialize a remote queue instance
  *
  * @param[out] qhandle. Handle to queue instance for use with API
  * @param[in]  desc.   Descriptor with queue properties filled-in
@@ -74,7 +74,7 @@ extern int ia_css_queue_remote_init(
 			ia_css_queue_t *qhandle,
 			ia_css_queue_remote_t *desc);
 
-/** @brief Uninitialize a queue instance
+/* @brief Uninitialize a queue instance
  *
  * @param[in]  qhandle. Handle to queue instance
  * @return     0 - Successful uninit.
@@ -83,7 +83,7 @@ extern int ia_css_queue_remote_init(
 extern int ia_css_queue_uninit(
 			ia_css_queue_t *qhandle);
 
-/** @brief Enqueue an item in the queue instance
+/* @brief Enqueue an item in the queue instance
  *
  * @param[in]  qhandle. Handle to queue instance
  * @param[in]  item.    Object to be enqueued.
@@ -96,7 +96,7 @@ extern int ia_css_queue_enqueue(
 			ia_css_queue_t *qhandle,
 			uint32_t item);
 
-/** @brief Dequeue an item from the queue instance
+/* @brief Dequeue an item from the queue instance
  *
  * @param[in]  qhandle. Handle to queue instance
  * @param[out] item.    Object to be dequeued into this item.
@@ -110,7 +110,7 @@ extern int ia_css_queue_dequeue(
 			ia_css_queue_t *qhandle,
 			uint32_t *item);
 
-/** @brief Check if the queue is empty
+/* @brief Check if the queue is empty
  *
  * @param[in]  qhandle.  Handle to queue instance
  * @param[in]  is_empty  True if empty, False if not.
@@ -123,7 +123,7 @@ extern int ia_css_queue_is_empty(
 			ia_css_queue_t *qhandle,
 			bool *is_empty);
 
-/** @brief Check if the queue is full
+/* @brief Check if the queue is full
  *
  * @param[in]  qhandle.  Handle to queue instance
  * @param[in]  is_full   True if Full, False if not.
@@ -136,7 +136,7 @@ extern int ia_css_queue_is_full(
 			ia_css_queue_t *qhandle,
 			bool *is_full);
 
-/** @brief Get used space in the queue
+/* @brief Get used space in the queue
  *
  * @param[in]  qhandle.  Handle to queue instance
  * @param[in]  size      Number of available elements in the queue
@@ -148,7 +148,7 @@ extern int ia_css_queue_get_used_space(
 			ia_css_queue_t *qhandle,
 			uint32_t *size);
 
-/** @brief Get free space in the queue
+/* @brief Get free space in the queue
  *
  * @param[in]  qhandle.  Handle to queue instance
  * @param[in]  size      Number of free elements in the queue
@@ -160,7 +160,7 @@ extern int ia_css_queue_get_free_space(
 			ia_css_queue_t *qhandle,
 			uint32_t *size);
 
-/** @brief Peek at an element in the queue
+/* @brief Peek at an element in the queue
  *
  * @param[in]  qhandle.  Handle to queue instance
  * @param[in]  offset   Offset of element to peek,
@@ -175,7 +175,7 @@ extern int ia_css_queue_peek(
 		uint32_t offset,
 		uint32_t *element);
 
-/** @brief Get the usable size for the queue
+/* @brief Get the usable size for the queue
  *
  * @param[in]  qhandle. Handle to queue instance
  * @param[out] size     Size value to be returned here.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/src/queue_access.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/src/queue_access.c
index 946d4f2d21080099daa2aa76589cda6a8ba0cd37..7bb2b494836e8d16eaafe99415966b09f6436339 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/src/queue_access.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/queue/src/queue_access.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr.c
index efa9c140484f6a2dc9a4f9b9fc1f4a811cfcbf33..370ff3816dbe0b4bcf2958cd18ff2673b08a39e3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
@@ -44,7 +44,7 @@ enum ia_css_err ia_css_rmgr_init(void)
 	return err;
 }
 
-/**
+/*
  * @brief Uninitialize resource pool (host)
  */
 void ia_css_rmgr_uninit(void)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
index e56006c07ee863016f464b0bbd7d628a1d9da8ce..54239ac9d7c90a22c73e5781815fa1ae87bf8308 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
@@ -20,13 +20,13 @@
 #include <memory_access.h>    /* mmmgr_malloc, mhmm_free */
 #include <ia_css_debug.h>
 
-/**
+/*
  * @brief VBUF resource handles
  */
 #define NUM_HANDLES 1000
 struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
 
-/**
+/*
  * @brief VBUF resource pool - refpool
  */
 struct ia_css_rmgr_vbuf_pool refpool = {
@@ -37,7 +37,7 @@ struct ia_css_rmgr_vbuf_pool refpool = {
 	NULL,			/* handles */
 };
 
-/**
+/*
  * @brief VBUF resource pool - writepool
  */
 struct ia_css_rmgr_vbuf_pool writepool = {
@@ -48,7 +48,7 @@ struct ia_css_rmgr_vbuf_pool writepool = {
 	NULL,			/* handles */
 };
 
-/**
+/*
  * @brief VBUF resource pool - hmmbufferpool
  */
 struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
@@ -63,7 +63,7 @@ struct ia_css_rmgr_vbuf_pool *vbuf_ref = &refpool;
 struct ia_css_rmgr_vbuf_pool *vbuf_write = &writepool;
 struct ia_css_rmgr_vbuf_pool *hmm_buffer_pool = &hmmbufferpool;
 
-/**
+/*
  * @brief Initialize the reference count (host, vbuf)
  */
 static void rmgr_refcount_init_vbuf(void)
@@ -72,7 +72,7 @@ static void rmgr_refcount_init_vbuf(void)
 	memset(&handle_table, 0, sizeof(handle_table));
 }
 
-/**
+/*
  * @brief Retain the reference count for a handle (host, vbuf)
  *
  * @param handle	The pointer to the handle
@@ -109,7 +109,7 @@ void ia_css_rmgr_refcount_retain_vbuf(struct ia_css_rmgr_vbuf_handle **handle)
 	(*handle)->count++;
 }
 
-/**
+/*
  * @brief Release the reference count for a handle (host, vbuf)
  *
  * @param handle	The pointer to the handle
@@ -131,7 +131,7 @@ void ia_css_rmgr_refcount_release_vbuf(struct ia_css_rmgr_vbuf_handle **handle)
 	}
 }
 
-/**
+/*
  * @brief Initialize the resource pool (host, vbuf)
  *
  * @param pool	The pointer to the pool
@@ -163,7 +163,7 @@ enum ia_css_err ia_css_rmgr_init_vbuf(struct ia_css_rmgr_vbuf_pool *pool)
 	return err;
 }
 
-/**
+/*
  * @brief Uninitialize the resource pool (host, vbuf)
  *
  * @param pool	The pointer to the pool
@@ -197,7 +197,7 @@ void ia_css_rmgr_uninit_vbuf(struct ia_css_rmgr_vbuf_pool *pool)
 	}
 }
 
-/**
+/*
  * @brief Push a handle to the pool
  *
  * @param pool		The pointer to the pool
@@ -224,7 +224,7 @@ void rmgr_push_handle(struct ia_css_rmgr_vbuf_pool *pool,
 	assert(succes);
 }
 
-/**
+/*
  * @brief Pop a handle from the pool
  *
  * @param pool		The pointer to the pool
@@ -254,7 +254,7 @@ void rmgr_pop_handle(struct ia_css_rmgr_vbuf_pool *pool,
 	}
 }
 
-/**
+/*
  * @brief Acquire a handle from the pool (host, vbuf)
  *
  * @param pool		The pointer to the pool
@@ -302,7 +302,7 @@ void ia_css_rmgr_acq_vbuf(struct ia_css_rmgr_vbuf_pool *pool,
 	ia_css_rmgr_refcount_retain_vbuf(handle);
 }
 
-/**
+/*
  * @brief Release a handle to the pool (host, vbuf)
  *
  * @param pool		The pointer to the pool
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl.h
index 27e9eb1e2102b3f29bd4b8403587c5c9f10cf9d2..bc4b1723369ec71664ea0821a2b5605428e2c44b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl.h
@@ -37,17 +37,17 @@ more details.
 
 
 typedef struct {
-	uint32_t        ddr_data_offset;       /**<  posistion of data in DDR */
-	uint32_t        dmem_data_addr;        /**< data segment address in dmem */
-	uint32_t        dmem_bss_addr;         /**< bss segment address in dmem  */
-	uint32_t        data_size;             /**< data segment size            */
-	uint32_t        bss_size;              /**< bss segment size             */
-	uint32_t        spctrl_config_dmem_addr; /** <location of dmem_cfg  in SP dmem */
-	uint32_t        spctrl_state_dmem_addr;  /** < location of state  in SP dmem */
-	unsigned int    sp_entry;                /** < entry function ptr on SP */
-	const void      *code;                   /**< location of firmware */
+	uint32_t        ddr_data_offset;       /**  posistion of data in DDR */
+	uint32_t        dmem_data_addr;        /** data segment address in dmem */
+	uint32_t        dmem_bss_addr;         /** bss segment address in dmem  */
+	uint32_t        data_size;             /** data segment size            */
+	uint32_t        bss_size;              /** bss segment size             */
+	uint32_t        spctrl_config_dmem_addr; /* <location of dmem_cfg  in SP dmem */
+	uint32_t        spctrl_state_dmem_addr;  /* < location of state  in SP dmem */
+	unsigned int    sp_entry;                /* < entry function ptr on SP */
+	const void      *code;                   /** location of firmware */
 	uint32_t         code_size;
-	char      *program_name;    /**< not used on hardware, only for simulation */
+	char      *program_name;    /** not used on hardware, only for simulation */
 } ia_css_spctrl_cfg;
 
 /* Get the code addr in DDR of SP */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl_comm.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl_comm.h
index 3af2891efca743bae47dbe55e208d33a90b3b377..2620d7514f79c63d55dfe1126e9a4687cba327dd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl_comm.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/interface/ia_css_spctrl_comm.h
@@ -41,16 +41,16 @@ typedef enum {
 	IA_CSS_SP_SW_RUNNING
 } ia_css_spctrl_sp_sw_state;
 
-/** Structure to encapsulate required arguments for
+/* Structure to encapsulate required arguments for
  * initialization of SP DMEM using the SP itself
  */
 struct ia_css_sp_init_dmem_cfg {
-	ia_css_ptr      ddr_data_addr;  /**< data segment address in ddr  */
-	uint32_t        dmem_data_addr; /**< data segment address in dmem */
-	uint32_t        dmem_bss_addr;  /**< bss segment address in dmem  */
-	uint32_t        data_size;      /**< data segment size            */
-	uint32_t        bss_size;       /**< bss segment size             */
-	sp_ID_t         sp_id;          /** <sp Id */
+	ia_css_ptr      ddr_data_addr;  /** data segment address in ddr  */
+	uint32_t        dmem_data_addr; /** data segment address in dmem */
+	uint32_t        dmem_bss_addr;  /** bss segment address in dmem  */
+	uint32_t        data_size;      /** data segment size            */
+	uint32_t        bss_size;       /** bss segment size             */
+	sp_ID_t         sp_id;          /* <sp Id */
 };
 
 #define SIZE_OF_IA_CSS_SP_INIT_DMEM_CFG_STRUCT	\
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/src/spctrl.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/src/spctrl.c
index 6d9bceb60196dcb6fb613bf3cec3624e96265632..844e4d536cecc612c308bd756fd94ae44706a788 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/src/spctrl.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/spctrl/src/spctrl.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
@@ -39,7 +39,7 @@ more details.
 
 struct spctrl_context_info {
 	struct ia_css_sp_init_dmem_cfg dmem_config;
-	uint32_t        spctrl_config_dmem_addr; /** location of dmem_cfg  in SP dmem */
+	uint32_t        spctrl_config_dmem_addr; /* location of dmem_cfg  in SP dmem */
 	uint32_t        spctrl_state_dmem_addr;
 	unsigned int    sp_entry;           /* entry function ptr on SP */
 	hrt_vaddress    code_addr;          /* sp firmware location in host mem-DDR*/
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/timer/src/timer.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/timer/src/timer.c
index 49c69e60ca5c5ed748bbab5c4bfad41a70c47d1b..b7dd18492a91b72f5d8bca9b28c8c3ae629c41ff 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/timer/src/timer.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/timer/src/timer.c
@@ -13,7 +13,7 @@
  * more details.
  */
 #else
-/**
+/*
 Support for Intel Camera Imaging ISP subsystem.
 Copyright (c) 2010 - 2015, Intel Corporation.
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
index f92b6a9f77eb1faa5058d4326e8aa992193dac8c..322bb3de6098fd3e46342a4f0c50289ffe4d77f4 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
@@ -176,7 +176,7 @@ static struct sh_css_hmm_buffer_record hmm_buffer_record[MAX_HMM_BUFFER_NUM];
 
 static bool fw_explicitly_loaded = false;
 
-/**
+/*
  * Local prototypes
  */
 
@@ -187,7 +187,7 @@ static enum ia_css_err
 sh_css_pipe_start(struct ia_css_stream *stream);
 
 #ifdef ISP2401
-/**
+/*
  * @brief Stop all "ia_css_pipe" instances in the target
  * "ia_css_stream" instance.
  *
@@ -207,7 +207,7 @@ sh_css_pipe_start(struct ia_css_stream *stream);
 static enum ia_css_err
 sh_css_pipes_stop(struct ia_css_stream *stream);
 
-/**
+/*
  * @brief Check if all "ia_css_pipe" instances in the target
  * "ia_css_stream" instance have stopped.
  *
@@ -1649,7 +1649,7 @@ ia_css_init(const struct ia_css_env *env,
 	void (*flush_func)(struct ia_css_acc_fw *fw);
 	hrt_data select, enable;
 
-	/**
+	/*
 	 * The C99 standard does not specify the exact object representation of structs;
 	 * the representation is compiler dependent.
 	 *
@@ -4617,23 +4617,23 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
  * 4) "enum ia_css_event_type convert_event_sp_to_host_domain"	(sh_css.c)
  */
 static enum ia_css_event_type convert_event_sp_to_host_domain[] = {
-	IA_CSS_EVENT_TYPE_OUTPUT_FRAME_DONE,	/**< Output frame ready. */
-	IA_CSS_EVENT_TYPE_SECOND_OUTPUT_FRAME_DONE,	/**< Second output frame ready. */
-	IA_CSS_EVENT_TYPE_VF_OUTPUT_FRAME_DONE,	/**< Viewfinder Output frame ready. */
-	IA_CSS_EVENT_TYPE_SECOND_VF_OUTPUT_FRAME_DONE,	/**< Second viewfinder Output frame ready. */
-	IA_CSS_EVENT_TYPE_3A_STATISTICS_DONE,	/**< Indication that 3A statistics are available. */
-	IA_CSS_EVENT_TYPE_DIS_STATISTICS_DONE,	/**< Indication that DIS statistics are available. */
-	IA_CSS_EVENT_TYPE_PIPELINE_DONE,	/**< Pipeline Done event, sent after last pipeline stage. */
-	IA_CSS_EVENT_TYPE_FRAME_TAGGED,		/**< Frame tagged. */
-	IA_CSS_EVENT_TYPE_INPUT_FRAME_DONE,	/**< Input frame ready. */
-	IA_CSS_EVENT_TYPE_METADATA_DONE,	/**< Metadata ready. */
-	IA_CSS_EVENT_TYPE_LACE_STATISTICS_DONE,	/**< Indication that LACE statistics are available. */
-	IA_CSS_EVENT_TYPE_ACC_STAGE_COMPLETE,	/**< Extension stage executed. */
-	IA_CSS_EVENT_TYPE_TIMER,		/**< Timing measurement data. */
-	IA_CSS_EVENT_TYPE_PORT_EOF,		/**< End Of Frame event, sent when in buffered sensor mode. */
-	IA_CSS_EVENT_TYPE_FW_WARNING,		/**< Performance warning encountered by FW */
-	IA_CSS_EVENT_TYPE_FW_ASSERT,		/**< Assertion hit by FW */
-	0,					/** error if sp passes  SH_CSS_SP_EVENT_NR_OF_TYPES as a valid event. */
+	IA_CSS_EVENT_TYPE_OUTPUT_FRAME_DONE,	/** Output frame ready. */
+	IA_CSS_EVENT_TYPE_SECOND_OUTPUT_FRAME_DONE,	/** Second output frame ready. */
+	IA_CSS_EVENT_TYPE_VF_OUTPUT_FRAME_DONE,	/** Viewfinder Output frame ready. */
+	IA_CSS_EVENT_TYPE_SECOND_VF_OUTPUT_FRAME_DONE,	/** Second viewfinder Output frame ready. */
+	IA_CSS_EVENT_TYPE_3A_STATISTICS_DONE,	/** Indication that 3A statistics are available. */
+	IA_CSS_EVENT_TYPE_DIS_STATISTICS_DONE,	/** Indication that DIS statistics are available. */
+	IA_CSS_EVENT_TYPE_PIPELINE_DONE,	/** Pipeline Done event, sent after last pipeline stage. */
+	IA_CSS_EVENT_TYPE_FRAME_TAGGED,		/** Frame tagged. */
+	IA_CSS_EVENT_TYPE_INPUT_FRAME_DONE,	/** Input frame ready. */
+	IA_CSS_EVENT_TYPE_METADATA_DONE,	/** Metadata ready. */
+	IA_CSS_EVENT_TYPE_LACE_STATISTICS_DONE,	/** Indication that LACE statistics are available. */
+	IA_CSS_EVENT_TYPE_ACC_STAGE_COMPLETE,	/** Extension stage executed. */
+	IA_CSS_EVENT_TYPE_TIMER,		/** Timing measurement data. */
+	IA_CSS_EVENT_TYPE_PORT_EOF,		/** End Of Frame event, sent when in buffered sensor mode. */
+	IA_CSS_EVENT_TYPE_FW_WARNING,		/** Performance warning encountered by FW */
+	IA_CSS_EVENT_TYPE_FW_ASSERT,		/** Assertion hit by FW */
+	0,					/* error if sp passes  SH_CSS_SP_EVENT_NR_OF_TYPES as a valid event. */
 };
 
 enum ia_css_err
@@ -5028,7 +5028,7 @@ sh_css_enable_cont_capt(bool enable, bool stop_copy_preview)
 bool
 sh_css_continuous_is_enabled(uint8_t pipe_num)
 #else
-/**
+/*
  * @brief Stop all "ia_css_pipe" instances in the target
  * "ia_css_stream" instance.
  *
@@ -5107,7 +5107,7 @@ ia_css_stream_set_buffer_depth(struct ia_css_stream *stream, int buffer_depth)
 	return IA_CSS_SUCCESS;
 }
 #else
-	/**
+	/*
 	 * Stop all "ia_css_pipe" instances in this target
 	 * "ia_css_stream" instance.
 	 */
@@ -5146,7 +5146,7 @@ ia_css_stream_get_buffer_depth(struct ia_css_stream *stream, int *buffer_depth)
 		}
 	}
 
-	/**
+	/*
 	 * In the CSS firmware use scenario "Continuous Preview"
 	 * as well as "Continuous Video", the "ia_css_pipe" instance
 	 * "Copy Pipe" is activated. This "Copy Pipe" is private to
@@ -5183,7 +5183,7 @@ ia_css_stream_get_buffer_depth(struct ia_css_stream *stream, int *buffer_depth)
 	return err;
 }
 
-/**
+/*
  * @brief Check if all "ia_css_pipe" instances in the target
  * "ia_css_stream" instance have stopped.
  *
@@ -5218,7 +5218,7 @@ sh_css_pipes_have_stopped(struct ia_css_stream *stream)
 	main_pipe_id = main_pipe->mode;
 	IA_CSS_ENTER_PRIVATE("main_pipe_id=%d", main_pipe_id);
 
-	/**
+	/*
 	 * Check if every "ia_css_pipe" instance in this target
 	 * "ia_css_stream" instance has stopped.
 	 */
@@ -5229,7 +5229,7 @@ sh_css_pipes_have_stopped(struct ia_css_stream *stream)
 				rval);
 	}
 
-	/**
+	/*
 	 * In the CSS firmware use scenario "Continuous Preview"
 	 * as well as "Continuous Video", the "ia_css_pipe" instance
 	 * "Copy Pipe" is activated. This "Copy Pipe" is private to
@@ -5474,7 +5474,7 @@ sh_css_pipe_get_grid_info(struct ia_css_pipe *pipe,
 }
 
 #ifdef ISP2401
-/**
+/*
  * @brief Check if a format is supported by the pipe.
  *
  */
@@ -8626,7 +8626,7 @@ sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
 	return err;
 }
 
-/**
+/*
  * @brief Tag a specific frame in continuous capture.
  * Refer to "sh_css_internal.h" for details.
  */
@@ -8666,7 +8666,7 @@ enum ia_css_err ia_css_stream_capture_frame(struct ia_css_stream *stream,
 	return err;
 }
 
-/**
+/*
  * @brief Configure the continuous capture.
  * Refer to "sh_css_internal.h" for details.
  */
@@ -8822,7 +8822,7 @@ sh_css_init_host_sp_control_vars(void)
 		"sh_css_init_host_sp_control_vars() leave: return_void\n");
 }
 
-/**
+/*
  * create the internal structures and fill in the configuration data
  */
 void ia_css_pipe_config_defaults(struct ia_css_pipe_config *pipe_config)
@@ -10435,7 +10435,7 @@ ia_css_start_sp(void)
 	return err;
 }
 
-/**
+/*
  *	Time to wait SP for termincate. Only condition when this can happen
  *	is a fatal hw failure, but we must be able to detect this and emit
  *	a proper error trace.
@@ -10713,7 +10713,7 @@ ia_css_unlock_raw_frame(struct ia_css_stream *stream, uint32_t exp_id)
 	return ret;
 }
 
-/** @brief	Set the state (Enable or Disable) of the Extension stage in the
+/* @brief	Set the state (Enable or Disable) of the Extension stage in the
  * 		given pipe.
  */
 enum ia_css_err
@@ -10758,7 +10758,7 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle, bool
 	return err;
 }
 
-/**	@brief	Get the state (Enable or Disable) of the Extension stage in the
+/*	@brief	Get the state (Enable or Disable) of the Extension stage in the
  *	given pipe.
  */
 enum ia_css_err
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_internal.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_internal.h
index 0910021286a4113f6b6b47eb7ee86133f3e01618..161122e1bcbc6e216b1cf0981bf9104d8097ba2f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_internal.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_internal.h
@@ -188,7 +188,7 @@ enum host2sp_commands {
 	N_host2sp_cmd
 };
 
-/** Enumeration used to indicate the events that are produced by
+/* Enumeration used to indicate the events that are produced by
  *  the SP and consumed by the Host.
  *
  * !!!IMPORTANT!!! KEEP THE FOLLOWING IN SYNC:
@@ -274,10 +274,10 @@ struct sh_css_ddr_address_map_compound {
 };
 
 struct ia_css_isp_parameter_set_info {
-	struct sh_css_ddr_address_map  mem_map;/**< pointers to Parameters in ISP format IMPT:
+	struct sh_css_ddr_address_map  mem_map;/** pointers to Parameters in ISP format IMPT:
 						    This should be first member of this struct */
-	uint32_t                       isp_parameters_id;/**< Unique ID to track which config was actually applied to a particular frame */
-	ia_css_ptr                     output_frame_ptr;/**< Output frame to which this config has to be applied (optional) */
+	uint32_t                       isp_parameters_id;/** Unique ID to track which config was actually applied to a particular frame */
+	ia_css_ptr                     output_frame_ptr;/** Output frame to which this config has to be applied (optional) */
 };
 
 /* this struct contains all arguments that can be passed to
@@ -398,9 +398,9 @@ struct sh_css_sp_input_formatter_set {
 /* SP configuration information */
 struct sh_css_sp_config {
 	uint8_t			no_isp_sync; /* Signal host immediately after start */
-	uint8_t			enable_raw_pool_locking; /**< Enable Raw Buffer Locking for HALv3 Support */
+	uint8_t			enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */
 	uint8_t			lock_all;
-	/**< If raw buffer locking is enabled, this flag indicates whether raw
+	/** If raw buffer locking is enabled, this flag indicates whether raw
 	     frames are locked when their EOF event is successfully sent to the
 	     host (true) or when they are passed to the preview/video pipe
 	     (false). */
@@ -458,13 +458,13 @@ struct sh_css_sp_pipeline_io {
 	/*struct sh_css_sp_pipeline_terminal	output;*/
 };
 
-/** This struct tracks how many streams are registered per CSI port.
+/* This struct tracks how many streams are registered per CSI port.
  * This is used to track which streams have already been configured.
  * Only when all streams are configured, the CSI RX is started for that port.
  */
 struct sh_css_sp_pipeline_io_status {
-	uint32_t	active[N_INPUT_SYSTEM_CSI_PORT];	/**< registered streams */
-	uint32_t	running[N_INPUT_SYSTEM_CSI_PORT];	/**< configured streams */
+	uint32_t	active[N_INPUT_SYSTEM_CSI_PORT];	/** registered streams */
+	uint32_t	running[N_INPUT_SYSTEM_CSI_PORT];	/** configured streams */
 };
 
 #endif
@@ -500,7 +500,7 @@ enum sh_css_port_type {
 #define SH_CSS_METADATA_OFFLINE_MODE   0x04
 #define SH_CSS_METADATA_WAIT_INPUT     0x08
 
-/** @brief Free an array of metadata buffers.
+/* @brief Free an array of metadata buffers.
  *
  * @param[in]	num_bufs	Number of metadata buffers to be freed.
  * @param[in]	bufs		Pointer of array of metadata buffers.
@@ -764,7 +764,7 @@ struct sh_css_hmm_buffer {
 			hrt_vaddress	frame_data;
 			uint32_t	flashed;
 			uint32_t	exp_id;
-			uint32_t	isp_parameters_id; /**< Unique ID to track which config was
+			uint32_t	isp_parameters_id; /** Unique ID to track which config was
 								actually applied to a particular frame */
 #if CONFIG_ON_FRAME_ENQUEUE()
 			struct sh_css_config_on_frame_enqueue config_on_frame_enqueue;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_legacy.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_legacy.h
index e12789236bb9abadae85a6e27fbbb8089858255d..4bcc35d219f83fa84ced52007616ab26e063b139 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_legacy.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_legacy.h
@@ -22,7 +22,7 @@
 #include <ia_css_pipe_public.h>
 #include <ia_css_stream_public.h>
 
-/** The pipe id type, distinguishes the kind of pipes that
+/* The pipe id type, distinguishes the kind of pipes that
  *  can be run in parallel.
  */
 enum ia_css_pipe_id {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
index 36aaa3019a15ec5a82d0d64ec1803a1d79b8cbcb..883474e90c8173912b9800a79f34a9d257ae50a5 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
@@ -321,7 +321,7 @@ calculate_mipi_buff_size(
 	height = stream_cfg->input_config.input_res.height;
 	format = stream_cfg->input_config.format;
 	pack_raw_pixels = stream_cfg->pack_raw_pixels;
-	/** end of NOTE */
+	/* end of NOTE */
 
 	/**
 #ifndef ISP2401
@@ -341,7 +341,7 @@ calculate_mipi_buff_size(
 	 * in the non-continuous use scenario.
 	 */
 	width_padded = width + (2 * ISP_VEC_NELEMS);
-	/** end of NOTE */
+	/* end of NOTE */
 
 	IA_CSS_ENTER("padded_width=%d, height=%d, format=%d\n",
 		     width_padded, height, format);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.h
index a7ffe6d8331bd68a7d5e3d53f87f978bf73c99b0..270ec2b60a3ed365d5d8042adf53f59570e0e12b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.h
@@ -144,8 +144,8 @@ struct ia_css_isp_parameters {
 	struct sh_css_ddr_address_map_size pipe_ddr_ptrs_size[IA_CSS_PIPE_ID_NUM];
 	struct sh_css_ddr_address_map ddr_ptrs;
 	struct sh_css_ddr_address_map_size ddr_ptrs_size;
-	struct ia_css_frame *output_frame; /**< Output frame the config is to be applied to (optional) */
-	uint32_t isp_parameters_id; /**< Unique ID to track which config was actually applied to a particular frame */
+	struct ia_css_frame *output_frame; /** Output frame the config is to be applied to (optional) */
+	uint32_t isp_parameters_id; /** Unique ID to track which config was actually applied to a particular frame */
 };
 
 void
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
index e6a345979ff14298c4a3ee385ab6f941e6907c3b..6fc00fc402b14991e0b3b267abc9320ca7dbd513 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
@@ -261,7 +261,7 @@ sh_css_sp_start_raw_copy(struct ia_css_frame *out_frame,
 	assert(out_frame != NULL);
 
 	{
-		/**
+		/*
 		 * Clear sh_css_sp_stage for easy debugging.
 		 * program_input_circuit must be saved as it is set outside
 		 * this function.
@@ -335,7 +335,7 @@ sh_css_sp_start_isys_copy(struct ia_css_frame *out_frame,
 	assert(out_frame != NULL);
 
 	{
-		/**
+		/*
 		 * Clear sh_css_sp_stage for easy debugging.
 		 * program_input_circuit must be saved as it is set outside
 		 * this function.
@@ -909,7 +909,7 @@ sh_css_sp_init_stage(struct ia_css_binary *binary,
 	xinfo = binary->info;
 	info  = &xinfo->sp;
 	{
-		/**
+		/*
 		 * Clear sh_css_sp_stage for easy debugging.
 		 * program_input_circuit must be saved as it is set outside
 		 * this function.
@@ -980,7 +980,7 @@ sh_css_sp_init_stage(struct ia_css_binary *binary,
 	sh_css_isp_stage.binary_name[SH_CSS_MAX_BINARY_NAME - 1] = 0;
 	sh_css_isp_stage.mem_initializers = *isp_mem_if;
 
-	/**
+	/*
 	 * Even when a stage does not need uds and does not params,
 	 * ia_css_uds_sp_scale_params() seems to be called (needs
 	 * further investigation). This function can not deal with
@@ -1429,7 +1429,7 @@ sh_css_init_host2sp_frame_data(void)
 }
 
 
-/**
+/*
  * @brief Update the offline frame information in host_sp_communication.
  * Refer to "sh_css_sp.h" for more details.
  */
@@ -1461,7 +1461,7 @@ sh_css_update_host2sp_offline_frame(
 }
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2) || defined(USE_INPUT_SYSTEM_VERSION_2401)
-/**
+/*
  * @brief Update the mipi frame information in host_sp_communication.
  * Refer to "sh_css_sp.h" for more details.
  */
@@ -1488,7 +1488,7 @@ sh_css_update_host2sp_mipi_frame(
 				frame ? frame->data : 0);
 }
 
-/**
+/*
  * @brief Update the mipi metadata information in host_sp_communication.
  * Refer to "sh_css_sp.h" for more details.
  */
@@ -1735,7 +1735,7 @@ ia_css_isp_has_started(void)
 }
 
 
-/**
+/*
  * @brief Initialize the DMA software-mask in the debug mode.
  * Refer to "sh_css_sp.h" for more details.
  */
@@ -1761,7 +1761,7 @@ sh_css_sp_init_dma_sw_reg(int dma_id)
 	return true;
 }
 
-/**
+/*
  * @brief Set the DMA software-mask in the debug mode.
  * Refer to "sh_css_sp.h" for more details.
  */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_struct.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_struct.h
index e49e478ab3547e9b565315374a3954a4da2c60f3..0b8e3d8720691b5db6a0365cff74a254250aedb9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_struct.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_struct.h
@@ -61,7 +61,7 @@ struct sh_css {
 #endif
 	hrt_vaddress                   sp_bin_addr;
 	hrt_data                       page_table_base_index;
-	unsigned int                   size_mem_words; /** \deprecated{Use ia_css_mipi_buffer_config instead.}*/
+	unsigned int                   size_mem_words; /* \deprecated{Use ia_css_mipi_buffer_config instead.}*/
 	enum ia_css_irq_type           irq_type;
 	unsigned int                   pipe_counter;
 	
diff --git a/drivers/staging/octeon-usb/octeon-hcd.c b/drivers/staging/octeon-usb/octeon-hcd.c
index 068aece25d37f17414a3767b79fc8510cdf6bb3e..cded30f145aa2423b13cafdf2e149c607cb355d2 100644
--- a/drivers/staging/octeon-usb/octeon-hcd.c
+++ b/drivers/staging/octeon-usb/octeon-hcd.c
@@ -394,7 +394,7 @@ struct octeon_hcd {
 				result = -1;				    \
 				break;					    \
 			} else						    \
-				cvmx_wait(100);				    \
+				__delay(100);				    \
 		}							    \
 	} while (0);							    \
 	result; })
@@ -774,7 +774,7 @@ static int cvmx_usb_initialize(struct device *dev,
 	usbn_clk_ctl.s.hclk_rst = 1;
 	cvmx_write64_uint64(CVMX_USBNX_CLK_CTL(usb->index), usbn_clk_ctl.u64);
 	/* 2e.  Wait 64 core-clock cycles for HCLK to stabilize */
-	cvmx_wait(64);
+	__delay(64);
 	/*
 	 * 3. Program the power-on reset field in the USBN clock-control
 	 *    register:
@@ -795,7 +795,7 @@ static int cvmx_usb_initialize(struct device *dev,
 	cvmx_write64_uint64(CVMX_USBNX_USBP_CTL_STATUS(usb->index),
 			    usbn_usbp_ctl_status.u64);
 	/* 6. Wait 10 cycles */
-	cvmx_wait(10);
+	__delay(10);
 	/*
 	 * 7. Clear ATE_RESET field in the USBN clock-control register:
 	 *    USBN_USBP_CTL_STATUS[ATE_RESET] = 0
diff --git a/drivers/staging/pi433/rf69.c b/drivers/staging/pi433/rf69.c
index e69a2153c999c796602f2884137faff8bc1ef8ab..12c9df9cddde22bea46650ededda1ed4d3e057ae 100644
--- a/drivers/staging/pi433/rf69.c
+++ b/drivers/staging/pi433/rf69.c
@@ -102,7 +102,7 @@ enum modulation rf69_get_modulation(struct spi_device *spi)
 
 	currentValue = READ_REG(REG_DATAMODUL);
 
-	switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE >> 3) { // TODO improvement: change 3 to define
+	switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE) {
 	case DATAMODUL_MODULATION_TYPE_OOK: return OOK;
 	case DATAMODUL_MODULATION_TYPE_FSK: return FSK;
 	default:			    return undefined;
diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
index c0664dc80bf24684184c83bad118d311d756b75c..446310775e9021bcc7e9f72cb94545e0b9c1b012 100644
--- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
@@ -1395,19 +1395,13 @@ static int rtw_wx_get_essid(struct net_device *dev,
 	if ((check_fwstate(pmlmepriv, _FW_LINKED)) ||
 	    (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) {
 		len = pcur_bss->Ssid.SsidLength;
-
-		wrqu->essid.length = len;
-
 		memcpy(extra, pcur_bss->Ssid.Ssid, len);
-
-		wrqu->essid.flags = 1;
 	} else {
-		ret = -1;
-		goto exit;
+		len = 0;
+		*extra = 0;
 	}
-
-exit:
-
+	wrqu->essid.length = len;
+	wrqu->essid.flags = 1;
 
 	return ret;
 }
diff --git a/drivers/staging/vboxvideo/vbox_ttm.c b/drivers/staging/vboxvideo/vbox_ttm.c
index 231c89e0699cc9e6741c3ff57526b7637004f209..2ea31589d77377fe37edd2ba674541154c6e555f 100644
--- a/drivers/staging/vboxvideo/vbox_ttm.c
+++ b/drivers/staging/vboxvideo/vbox_ttm.c
@@ -213,9 +213,10 @@ static struct ttm_tt *vbox_ttm_tt_create(struct ttm_bo_device *bdev,
 	return tt;
 }
 
-static int vbox_ttm_tt_populate(struct ttm_tt *ttm)
+static int vbox_ttm_tt_populate(struct ttm_tt *ttm,
+				struct ttm_operation_ctx *ctx)
 {
-	return ttm_pool_populate(ttm);
+	return ttm_pool_populate(ttm, ctx);
 }
 
 static void vbox_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -233,7 +234,6 @@ static struct ttm_bo_driver vbox_bo_driver = {
 	.verify_access = vbox_bo_verify_access,
 	.io_mem_reserve = &vbox_ttm_io_mem_reserve,
 	.io_mem_free = &vbox_ttm_io_mem_free,
-	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int vbox_mm_init(struct vbox_private *vbox)
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c69b4a9694d2016a8aac3b63a4b7d4399146688..0d99b242e82e3f84da25a47564f96db60be4b5f5 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 					" %d i: %d bio: %p, allocating another"
 					" bio\n", bio->bi_vcnt, i, bio);
 
-				rc = blk_rq_append_bio(req, bio);
+				rc = blk_rq_append_bio(req, &bio);
 				if (rc) {
 					pr_err("pSCSI: failed to append bio\n");
 					goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	}
 
 	if (bio) {
-		rc = blk_rq_append_bio(req, bio);
+		rc = blk_rq_append_bio(req, &bio);
 		if (rc) {
 			pr_err("pSCSI: failed to append bio\n");
 			goto fail;
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index 7952357df9c862c9d9558522c3664d3d5059a572..edb6e4e9ef3acb8d18fcf9de4280e10fcbda5f14 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -590,7 +590,6 @@ static int __init optee_driver_init(void)
 		return -ENODEV;
 
 	np = of_find_matching_node(fw_np, optee_match);
-	of_node_put(fw_np);
 	if (!np)
 		return -ENODEV;
 
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 419a7a90bce0e21c40afc165506420aa9146fb72..f45bcbc63738ffb3598e958e1af529c443b98e1d 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring)
 		return;
 
 	if (ring->start_poll) {
-		__ring_interrupt_mask(ring, false);
+		__ring_interrupt_mask(ring, true);
 		ring->start_poll(ring->poll_data);
 	} else {
 		schedule_work(&ring->work);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 427e0d5d8f135e56249c120fdfb50aca1f65021d..539b49adb6afd41190eee97f6eaf1950c8a1ac3f 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 {
 	struct n_tty_data *ldata = tty->disc_data;
 
-	if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
+	if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
 		bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
 		ldata->line_start = ldata->read_tail;
 		if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
 	case TIOCINQ:
 		down_write(&tty->termios_rwsem);
-		if (L_ICANON(tty))
+		if (L_ICANON(tty) && !L_EXTPROC(tty))
 			retval = inq_canon(ldata);
 		else
 			retval = read_cnt(ldata);
diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index ce7ad0acee7aa784772c77339130a3d6289641cd..247788a16f0b62f74f86d0537d51ce0358bdb61d 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -27,23 +27,41 @@ static int ttyport_receive_buf(struct tty_port *port, const unsigned char *cp,
 {
 	struct serdev_controller *ctrl = port->client_data;
 	struct serport *serport = serdev_controller_get_drvdata(ctrl);
+	int ret;
 
 	if (!test_bit(SERPORT_ACTIVE, &serport->flags))
 		return 0;
 
-	return serdev_controller_receive_buf(ctrl, cp, count);
+	ret = serdev_controller_receive_buf(ctrl, cp, count);
+
+	dev_WARN_ONCE(&ctrl->dev, ret < 0 || ret > count,
+				"receive_buf returns %d (count = %zu)\n",
+				ret, count);
+	if (ret < 0)
+		return 0;
+	else if (ret > count)
+		return count;
+
+	return ret;
 }
 
 static void ttyport_write_wakeup(struct tty_port *port)
 {
 	struct serdev_controller *ctrl = port->client_data;
 	struct serport *serport = serdev_controller_get_drvdata(ctrl);
+	struct tty_struct *tty;
+
+	tty = tty_port_tty_get(port);
+	if (!tty)
+		return;
 
-	if (test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &port->tty->flags) &&
+	if (test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) &&
 	    test_bit(SERPORT_ACTIVE, &serport->flags))
 		serdev_controller_write_wakeup(ctrl);
 
-	wake_up_interruptible_poll(&port->tty->write_wait, POLLOUT);
+	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+
+	tty_kref_put(tty);
 }
 
 static const struct tty_port_client_operations client_ops = {
@@ -136,8 +154,10 @@ static void ttyport_close(struct serdev_controller *ctrl)
 
 	clear_bit(SERPORT_ACTIVE, &serport->flags);
 
+	tty_lock(tty);
 	if (tty->ops->close)
 		tty->ops->close(tty, NULL);
+	tty_unlock(tty);
 
 	tty_release_struct(tty, serport->tty_idx);
 }
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 362c25ff188a549f5d2e111a8c4ef99248a5b920..ae6a256524d8b618d4116fde8a66518e4e93cce4 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -122,12 +122,14 @@ static void __init init_port(struct earlycon_device *device)
 	serial8250_early_out(port, UART_FCR, 0);	/* no fifo */
 	serial8250_early_out(port, UART_MCR, 0x3);	/* DTR + RTS */
 
-	divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * device->baud);
-	c = serial8250_early_in(port, UART_LCR);
-	serial8250_early_out(port, UART_LCR, c | UART_LCR_DLAB);
-	serial8250_early_out(port, UART_DLL, divisor & 0xff);
-	serial8250_early_out(port, UART_DLM, (divisor >> 8) & 0xff);
-	serial8250_early_out(port, UART_LCR, c & ~UART_LCR_DLAB);
+	if (port->uartclk && device->baud) {
+		divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * device->baud);
+		c = serial8250_early_in(port, UART_LCR);
+		serial8250_early_out(port, UART_LCR, c | UART_LCR_DLAB);
+		serial8250_early_out(port, UART_DLL, divisor & 0xff);
+		serial8250_early_out(port, UART_DLM, (divisor >> 8) & 0xff);
+		serial8250_early_out(port, UART_LCR, c & ~UART_LCR_DLAB);
+	}
 }
 
 int __init early_serial8250_setup(struct earlycon_device *device,
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index b7e0e34166414fafa3c6e705cce6bd6e952f1c31..54adf8d563501ab844cea41edf0427fdb83a1a8e 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5135,6 +5135,9 @@ static const struct pci_device_id serial_pci_tbl[] = {
 	{ PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 },
 	{ PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 },
 
+	/* Amazon PCI serial device */
+	{ PCI_DEVICE(0x1d0f, 0x8250), .driver_data = pbn_b0_1_115200 },
+
 	/*
 	 * These entries match devices with class COMMUNICATION_SERIAL,
 	 * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c
index 3593ce0ec641d848f1a15438ffb76295558ab9a6..880009987460affefa3aac2bf61aee6174195f81 100644
--- a/drivers/usb/chipidea/ci_hdrc_msm.c
+++ b/drivers/usb/chipidea/ci_hdrc_msm.c
@@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_mux;
 
-	ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi");
+	ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi");
 	if (ulpi_node) {
 		phy_node = of_get_next_available_child(ulpi_node, NULL);
 		ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy");
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 8b351444cc40d015fefde2f40143a452232d32c9..9a2ab6751a23c504177fbac78cc7e95cdb21a1ae 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -180,9 +180,9 @@ static int ulpi_of_register(struct ulpi *ulpi)
 	/* Find a ulpi bus underneath the parent or the grandparent */
 	parent = ulpi->dev.parent;
 	if (parent->of_node)
-		np = of_find_node_by_name(parent->of_node, "ulpi");
+		np = of_get_child_by_name(parent->of_node, "ulpi");
 	else if (parent->parent && parent->parent->of_node)
-		np = of_find_node_by_name(parent->parent->of_node, "ulpi");
+		np = of_get_child_by_name(parent->parent->of_node, "ulpi");
 	if (!np)
 		return 0;
 
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index da8acd980fc68e1c67b302ab45a62580d99fb0eb..c821b4b9647e357468335fca83b3aa980e600315 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -555,6 +555,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	unsigned iad_num = 0;
 
 	memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
+	nintf = nintf_orig = config->desc.bNumInterfaces;
+	config->desc.bNumInterfaces = 0;	// Adjusted later
+
 	if (config->desc.bDescriptorType != USB_DT_CONFIG ||
 	    config->desc.bLength < USB_DT_CONFIG_SIZE ||
 	    config->desc.bLength > size) {
@@ -568,7 +571,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	buffer += config->desc.bLength;
 	size -= config->desc.bLength;
 
-	nintf = nintf_orig = config->desc.bNumInterfaces;
 	if (nintf > USB_MAXINTERFACES) {
 		dev_warn(ddev, "config %d has too many interfaces: %d, "
 		    "using maximum allowed: %d\n",
@@ -905,14 +907,25 @@ void usb_release_bos_descriptor(struct usb_device *dev)
 	}
 }
 
+static const __u8 bos_desc_len[256] = {
+	[USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE,
+	[USB_CAP_TYPE_EXT]          = USB_DT_USB_EXT_CAP_SIZE,
+	[USB_SS_CAP_TYPE]           = USB_DT_USB_SS_CAP_SIZE,
+	[USB_SSP_CAP_TYPE]          = USB_DT_USB_SSP_CAP_SIZE(1),
+	[CONTAINER_ID_TYPE]         = USB_DT_USB_SS_CONTN_ID_SIZE,
+	[USB_PTM_CAP_TYPE]          = USB_DT_USB_PTM_ID_SIZE,
+};
+
 /* Get BOS descriptor set */
 int usb_get_bos_descriptor(struct usb_device *dev)
 {
 	struct device *ddev = &dev->dev;
 	struct usb_bos_descriptor *bos;
 	struct usb_dev_cap_header *cap;
+	struct usb_ssp_cap_descriptor *ssp_cap;
 	unsigned char *buffer;
-	int length, total_len, num, i;
+	int length, total_len, num, i, ssac;
+	__u8 cap_type;
 	int ret;
 
 	bos = kzalloc(sizeof(struct usb_bos_descriptor), GFP_KERNEL);
@@ -965,7 +978,13 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 			dev->bos->desc->bNumDeviceCaps = i;
 			break;
 		}
+		cap_type = cap->bDevCapabilityType;
 		length = cap->bLength;
+		if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) {
+			dev->bos->desc->bNumDeviceCaps = i;
+			break;
+		}
+
 		total_len -= length;
 
 		if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
@@ -973,7 +992,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 			continue;
 		}
 
-		switch (cap->bDevCapabilityType) {
+		switch (cap_type) {
 		case USB_CAP_TYPE_WIRELESS_USB:
 			/* Wireless USB cap descriptor is handled by wusb */
 			break;
@@ -986,8 +1005,11 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 				(struct usb_ss_cap_descriptor *)buffer;
 			break;
 		case USB_SSP_CAP_TYPE:
-			dev->bos->ssp_cap =
-				(struct usb_ssp_cap_descriptor *)buffer;
+			ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
+			ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
+				USB_SSP_SUBLINK_SPEED_ATTRIBS);
+			if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
+				dev->bos->ssp_cap = ssp_cap;
 			break;
 		case CONTAINER_ID_TYPE:
 			dev->bos->ss_id =
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 705c573d0257e28b5142070caaca17e06e5a25fd..a3fad4ec9870d21e602fa80d8cf564fff5dd6d62 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1442,14 +1442,18 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	int number_of_packets = 0;
 	unsigned int stream_id = 0;
 	void *buf;
-
-	if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
-				USBDEVFS_URB_SHORT_NOT_OK |
+	unsigned long mask =	USBDEVFS_URB_SHORT_NOT_OK |
 				USBDEVFS_URB_BULK_CONTINUATION |
 				USBDEVFS_URB_NO_FSBR |
 				USBDEVFS_URB_ZERO_PACKET |
-				USBDEVFS_URB_NO_INTERRUPT))
-		return -EINVAL;
+				USBDEVFS_URB_NO_INTERRUPT;
+	/* USBDEVFS_URB_ISO_ASAP is a special case */
+	if (uurb->type == USBDEVFS_URB_TYPE_ISO)
+		mask |= USBDEVFS_URB_ISO_ASAP;
+
+	if (uurb->flags & ~mask)
+			return -EINVAL;
+
 	if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX)
 		return -EINVAL;
 	if (uurb->buffer_length > 0 && !uurb->buffer)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 7ccdd3d4db84c9d4675084d4a575f458fd1e1d44..cf7bbcb9a63cc9acaa4dfe225bba01167ced2051 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4948,6 +4948,15 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 		usb_put_dev(udev);
 		if ((status == -ENOTCONN) || (status == -ENOTSUPP))
 			break;
+
+		/* When halfway through our retry count, power-cycle the port */
+		if (i == (SET_CONFIG_TRIES / 2) - 1) {
+			dev_info(&port_dev->dev, "attempt power cycle\n");
+			usb_hub_set_port_power(hdev, hub, port1, false);
+			msleep(2 * hub_power_on_good_delay(hub));
+			usb_hub_set_port_power(hdev, hub, port1, true);
+			msleep(hub_power_on_good_delay(hub));
+		}
 	}
 	if (hub->hdev->parent ||
 			!hcd->driver->port_handed_over ||
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index f1dbab6f798fdc75dbde8039b8d0c5339871fcde..4024926c1d68c93e97e40f822c4a690a4c113987 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Microsoft LifeCam-VX700 v2.0 */
 	{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
 
-	/* Logitech HD Pro Webcams C920, C920-C and C930e */
+	/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
 	{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
 	{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
 	{ USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
+	{ USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
 
 	/* Logitech ConferenceCam CC3000e */
 	{ USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -146,6 +147,12 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* appletouch */
 	{ USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
+	{ USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
+
+	/* ELSA MicroLink 56K */
+	{ USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
 	{ USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
 
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index f66c94130cac0b49a02d7ca081568bd88367589c..31749c79045f3a95ed27b6b338f9b94baf7b8d03 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -537,6 +537,7 @@ struct dwc2_core_params {
  *                       2 - Internal DMA
  * @power_optimized     Are power optimizations enabled?
  * @num_dev_ep          Number of device endpoints available
+ * @num_dev_in_eps      Number of device IN endpoints available
  * @num_dev_perio_in_ep Number of device periodic IN endpoints
  *                      available
  * @dev_token_q_depth   Device Mode IN Token Sequence Learning Queue
@@ -565,6 +566,7 @@ struct dwc2_core_params {
  *                       2 - 8 or 16 bits
  * @snpsid:             Value from SNPSID register
  * @dev_ep_dirs:        Direction of device endpoints (GHWCFG1)
+ * @g_tx_fifo_size[]	Power-on values of TxFIFO sizes
  */
 struct dwc2_hw_params {
 	unsigned op_mode:3;
@@ -586,12 +588,14 @@ struct dwc2_hw_params {
 	unsigned fs_phy_type:2;
 	unsigned i2c_enable:1;
 	unsigned num_dev_ep:4;
+	unsigned num_dev_in_eps : 4;
 	unsigned num_dev_perio_in_ep:4;
 	unsigned total_fifo_size:16;
 	unsigned power_optimized:1;
 	unsigned utmi_phy_data_width:2;
 	u32 snpsid;
 	u32 dev_ep_dirs;
+	u32 g_tx_fifo_size[MAX_EPS_CHANNELS];
 };
 
 /* Size of control and EP0 buffers */
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 88529d0925039f53112458e59c208c057a32a608..e4c3ce0de5de11ba5532fb34c8b8d72e1fd98511 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -195,55 +195,18 @@ int dwc2_hsotg_tx_fifo_count(struct dwc2_hsotg *hsotg)
 {
 	if (hsotg->hw_params.en_multiple_tx_fifo)
 		/* In dedicated FIFO mode we need count of IN EPs */
-		return (dwc2_readl(hsotg->regs + GHWCFG4)  &
-			GHWCFG4_NUM_IN_EPS_MASK) >> GHWCFG4_NUM_IN_EPS_SHIFT;
+		return hsotg->hw_params.num_dev_in_eps;
 	else
 		/* In shared FIFO mode we need count of Periodic IN EPs */
 		return hsotg->hw_params.num_dev_perio_in_ep;
 }
 
-/**
- * dwc2_hsotg_ep_info_size - return Endpoint Info Control block size in DWORDs
- */
-static int dwc2_hsotg_ep_info_size(struct dwc2_hsotg *hsotg)
-{
-	int val = 0;
-	int i;
-	u32 ep_dirs;
-
-	/*
-	 * Don't need additional space for ep info control registers in
-	 * slave mode.
-	 */
-	if (!using_dma(hsotg)) {
-		dev_dbg(hsotg->dev, "Buffer DMA ep info size 0\n");
-		return 0;
-	}
-
-	/*
-	 * Buffer DMA mode - 1 location per endpoit
-	 * Descriptor DMA mode - 4 locations per endpoint
-	 */
-	ep_dirs = hsotg->hw_params.dev_ep_dirs;
-
-	for (i = 0; i <= hsotg->hw_params.num_dev_ep; i++) {
-		val += ep_dirs & 3 ? 1 : 2;
-		ep_dirs >>= 2;
-	}
-
-	if (using_desc_dma(hsotg))
-		val = val * 4;
-
-	return val;
-}
-
 /**
  * dwc2_hsotg_tx_fifo_total_depth - return total FIFO depth available for
  * device mode TX FIFOs
  */
 int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
 {
-	int ep_info_size;
 	int addr;
 	int tx_addr_max;
 	u32 np_tx_fifo_size;
@@ -252,8 +215,7 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
 				hsotg->params.g_np_tx_fifo_size);
 
 	/* Get Endpoint Info Control block size in DWORDs. */
-	ep_info_size = dwc2_hsotg_ep_info_size(hsotg);
-	tx_addr_max = hsotg->hw_params.total_fifo_size - ep_info_size;
+	tx_addr_max = hsotg->hw_params.total_fifo_size;
 
 	addr = hsotg->params.g_rx_fifo_size + np_tx_fifo_size;
 	if (tx_addr_max <= addr)
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index ef73af6e03a98828b7fadb46bd32ee291fd6302f..03fd20f0b49613aaffba14aa117e48a7781391a5 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -484,8 +484,7 @@ static void dwc2_check_param_tx_fifo_sizes(struct dwc2_hsotg *hsotg)
 	}
 
 	for (fifo = 1; fifo <= fifo_count; fifo++) {
-		dptxfszn = (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) &
-			FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT;
+		dptxfszn = hsotg->hw_params.g_tx_fifo_size[fifo];
 
 		if (hsotg->params.g_tx_fifo_size[fifo] < min ||
 		    hsotg->params.g_tx_fifo_size[fifo] >  dptxfszn) {
@@ -609,6 +608,7 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
 	struct dwc2_hw_params *hw = &hsotg->hw_params;
 	bool forced;
 	u32 gnptxfsiz;
+	int fifo, fifo_count;
 
 	if (hsotg->dr_mode == USB_DR_MODE_HOST)
 		return;
@@ -617,6 +617,14 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
 
 	gnptxfsiz = dwc2_readl(hsotg->regs + GNPTXFSIZ);
 
+	fifo_count = dwc2_hsotg_tx_fifo_count(hsotg);
+
+	for (fifo = 1; fifo <= fifo_count; fifo++) {
+		hw->g_tx_fifo_size[fifo] =
+			(dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) &
+			 FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT;
+	}
+
 	if (forced)
 		dwc2_clear_force_mode(hsotg);
 
@@ -661,14 +669,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 	hwcfg4 = dwc2_readl(hsotg->regs + GHWCFG4);
 	grxfsiz = dwc2_readl(hsotg->regs + GRXFSIZ);
 
-	/*
-	 * Host specific hardware parameters. Reading these parameters
-	 * requires the controller to be in host mode. The mode will
-	 * be forced, if necessary, to read these values.
-	 */
-	dwc2_get_host_hwparams(hsotg);
-	dwc2_get_dev_hwparams(hsotg);
-
 	/* hwcfg1 */
 	hw->dev_ep_dirs = hwcfg1;
 
@@ -711,6 +711,8 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 	hw->en_multiple_tx_fifo = !!(hwcfg4 & GHWCFG4_DED_FIFO_EN);
 	hw->num_dev_perio_in_ep = (hwcfg4 & GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK) >>
 				  GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT;
+	hw->num_dev_in_eps = (hwcfg4 & GHWCFG4_NUM_IN_EPS_MASK) >>
+			     GHWCFG4_NUM_IN_EPS_SHIFT;
 	hw->dma_desc_enable = !!(hwcfg4 & GHWCFG4_DESC_DMA);
 	hw->power_optimized = !!(hwcfg4 & GHWCFG4_POWER_OPTIMIZ);
 	hw->utmi_phy_data_width = (hwcfg4 & GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK) >>
@@ -719,6 +721,13 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 	/* fifo sizes */
 	hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >>
 				GRXFSIZ_DEPTH_SHIFT;
+	/*
+	 * Host specific hardware parameters. Reading these parameters
+	 * requires the controller to be in host mode. The mode will
+	 * be forced, if necessary, to read these values.
+	 */
+	dwc2_get_host_hwparams(hsotg);
+	dwc2_get_dev_hwparams(hsotg);
 
 	return 0;
 }
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index c4a4d7bd27660225442e732e6be994c333b9b91b..7ae0eefc7cc7daf0382c7aeaa56c14659f025ffb 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -51,8 +51,10 @@ static int dwc3_of_simple_clk_init(struct dwc3_of_simple *simple, int count)
 
 		clk = of_clk_get(np, i);
 		if (IS_ERR(clk)) {
-			while (--i >= 0)
+			while (--i >= 0) {
+				clk_disable_unprepare(simple->clks[i]);
 				clk_put(simple->clks[i]);
+			}
 			return PTR_ERR(clk);
 		}
 
@@ -203,6 +205,7 @@ static struct platform_driver dwc3_of_simple_driver = {
 	.driver		= {
 		.name	= "dwc3-of-simple",
 		.of_match_table = of_dwc3_simple_match,
+		.pm	= &dwc3_of_simple_dev_pm_ops,
 	},
 };
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 981fd986cf824804b752e64018289c79f191d646..639dd1b163a0e19e502ff2274c73101e3efaa777 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -259,7 +259,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
 {
 	const struct usb_endpoint_descriptor *desc = dep->endpoint.desc;
 	struct dwc3		*dwc = dep->dwc;
-	u32			timeout = 500;
+	u32			timeout = 1000;
 	u32			reg;
 
 	int			cmd_status = 0;
@@ -912,7 +912,7 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
 			 */
 			if (speed == USB_SPEED_HIGH) {
 				struct usb_ep *ep = &dep->endpoint;
-				unsigned int mult = ep->mult - 1;
+				unsigned int mult = 2;
 				unsigned int maxp = usb_endpoint_maxp(ep->desc);
 
 				if (length <= (2 * maxp))
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index eec14e6ed20be0a43f414233fccd38efe51f7f10..77c7ecca816aa026677ec869e087f0dc747f55a7 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -146,7 +146,6 @@ int config_ep_by_speed(struct usb_gadget *g,
 			struct usb_function *f,
 			struct usb_ep *_ep)
 {
-	struct usb_composite_dev	*cdev = get_gadget_data(g);
 	struct usb_endpoint_descriptor *chosen_desc = NULL;
 	struct usb_descriptor_header **speed_desc = NULL;
 
@@ -226,8 +225,12 @@ int config_ep_by_speed(struct usb_gadget *g,
 			_ep->maxburst = comp_desc->bMaxBurst + 1;
 			break;
 		default:
-			if (comp_desc->bMaxBurst != 0)
+			if (comp_desc->bMaxBurst != 0) {
+				struct usb_composite_dev *cdev;
+
+				cdev = get_gadget_data(g);
 				ERROR(cdev, "ep0 bMaxBurst must be 0\n");
+			}
 			_ep->maxburst = 1;
 			break;
 		}
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 97ea059a7aa471192710be177276c8b969619e2b..b6cf5ab5a0a135bb36e09de7b96e13645caa1abe 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1012,7 +1012,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 		else
 			ret = ep->status;
 		goto error_mutex;
-	} else if (!(req = usb_ep_alloc_request(ep->ep, GFP_KERNEL))) {
+	} else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) {
 		ret = -ENOMEM;
 	} else {
 		req->buf      = data;
@@ -2282,9 +2282,18 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 		int i;
 
 		if (len < sizeof(*d) ||
-		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-		    !d->Reserved1)
+		    d->bFirstInterfaceNumber >= ffs->interfaces_count)
 			return -EINVAL;
+		if (d->Reserved1 != 1) {
+			/*
+			 * According to the spec, Reserved1 must be set to 1
+			 * but older kernels incorrectly rejected non-zero
+			 * values.  We fix it here to avoid returning EINVAL
+			 * in response to values we used to accept.
+			 */
+			pr_debug("usb_ext_compat_desc::Reserved1 forced to 1\n");
+			d->Reserved1 = 1;
+		}
 		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
 			if (d->Reserved2[i])
 				return -EINVAL;
diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig
index a12fb459dbd9f6b8fccb06ce4d5fd18084f0b38b..784bf86dad4fcb0c8f9d10f2bd8652ee6a8d38d6 100644
--- a/drivers/usb/gadget/legacy/Kconfig
+++ b/drivers/usb/gadget/legacy/Kconfig
@@ -479,7 +479,7 @@ endif
 # or video class gadget drivers), or specific hardware, here.
 config USB_G_WEBCAM
 	tristate "USB Webcam Gadget"
-	depends on VIDEO_DEV
+	depends on VIDEO_V4L2
 	select USB_LIBCOMPOSITE
 	select VIDEOBUF2_VMALLOC
 	select USB_F_UVC
diff --git a/drivers/usb/gadget/udc/bdc/bdc_core.c b/drivers/usb/gadget/udc/bdc/bdc_core.c
index d39f070acbd705573b49a1b8df1b1f0077470940..01b44e15962378d02e260ee37872ded457bc763d 100644
--- a/drivers/usb/gadget/udc/bdc/bdc_core.c
+++ b/drivers/usb/gadget/udc/bdc/bdc_core.c
@@ -642,7 +642,6 @@ static const struct of_device_id bdc_of_match[] = {
 static struct platform_driver bdc_driver = {
 	.driver		= {
 		.name	= BRCM_BDC_NAME,
-		.owner	= THIS_MODULE,
 		.pm = &bdc_pm_ops,
 		.of_match_table	= bdc_of_match,
 	},
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 61422d624ad090f1dff586d59a9a31e2ce2ea0fb..1b3efb14aec7878e616a3a1a7588046e2641aec7 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1069,8 +1069,12 @@ static inline void usb_gadget_udc_stop(struct usb_udc *udc)
 static inline void usb_gadget_udc_set_speed(struct usb_udc *udc,
 					    enum usb_device_speed speed)
 {
-	if (udc->gadget->ops->udc_set_speed)
-		udc->gadget->ops->udc_set_speed(udc->gadget, speed);
+	if (udc->gadget->ops->udc_set_speed) {
+		enum usb_device_speed s;
+
+		s = min(speed, udc->gadget->max_speed);
+		udc->gadget->ops->udc_set_speed(udc->gadget, s);
+	}
 }
 
 /**
@@ -1143,11 +1147,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
 	udc = kzalloc(sizeof(*udc), GFP_KERNEL);
 	if (!udc)
-		goto err1;
-
-	ret = device_add(&gadget->dev);
-	if (ret)
-		goto err2;
+		goto err_put_gadget;
 
 	device_initialize(&udc->dev);
 	udc->dev.release = usb_udc_release;
@@ -1156,7 +1156,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 	udc->dev.parent = parent;
 	ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
 	if (ret)
-		goto err3;
+		goto err_put_udc;
+
+	ret = device_add(&gadget->dev);
+	if (ret)
+		goto err_put_udc;
 
 	udc->gadget = gadget;
 	gadget->udc = udc;
@@ -1166,7 +1170,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
 	ret = device_add(&udc->dev);
 	if (ret)
-		goto err4;
+		goto err_unlist_udc;
 
 	usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
 	udc->vbus = true;
@@ -1174,27 +1178,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 	/* pick up one of pending gadget drivers */
 	ret = check_pending_gadget_drivers(udc);
 	if (ret)
-		goto err5;
+		goto err_del_udc;
 
 	mutex_unlock(&udc_lock);
 
 	return 0;
 
-err5:
+ err_del_udc:
 	device_del(&udc->dev);
 
-err4:
+ err_unlist_udc:
 	list_del(&udc->list);
 	mutex_unlock(&udc_lock);
 
-err3:
-	put_device(&udc->dev);
 	device_del(&gadget->dev);
 
-err2:
-	kfree(udc);
+ err_put_udc:
+	put_device(&udc->dev);
 
-err1:
+ err_put_gadget:
 	put_device(&gadget->dev);
 	return ret;
 }
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index bc37f40baacf2b54fc2528c59f1b6d80ebae3a5b..6e87af2483679aac59f23e942dde9367a30d35e4 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -252,7 +252,7 @@
 #define USB3_EP0_SS_MAX_PACKET_SIZE	512
 #define USB3_EP0_HSFS_MAX_PACKET_SIZE	64
 #define USB3_EP0_BUF_SIZE		8
-#define USB3_MAX_NUM_PIPES		30
+#define USB3_MAX_NUM_PIPES		6	/* This includes PIPE 0 */
 #define USB3_WAIT_US			3
 #define USB3_DMA_NUM_SETTING_AREA	4
 /*
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 19f00424f53ed3b6246788dbec370e198a7b86a9..3ed75aaa09d9d37b786c7865af231274fa8d4542 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -827,7 +827,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf)
 			default:		/* unknown */
 				break;
 			}
-			temp = (cap >> 8) & 0xff;
+			offset = (cap >> 8) & 0xff;
 		}
 	}
 #endif
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 4f7895dbcf880ef2a1e3b863b7002825e3cb752c..e26e685d8a578fddffaa7ff220a0c4442ba4f5ce 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id,
 static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
 {
 	dma_addr_t		dma;
-	struct xhci_ring	*ring = s->private;
+	struct xhci_ring	*ring = *(struct xhci_ring **)s->private;
 
 	dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue);
 	seq_printf(s, "%pad\n", &dma);
@@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
 static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
 {
 	dma_addr_t		dma;
-	struct xhci_ring	*ring = s->private;
+	struct xhci_ring	*ring = *(struct xhci_ring **)s->private;
 
 	dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
 	seq_printf(s, "%pad\n", &dma);
@@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
 
 static int xhci_ring_cycle_show(struct seq_file *s, void *unused)
 {
-	struct xhci_ring	*ring = s->private;
+	struct xhci_ring	*ring = *(struct xhci_ring **)s->private;
 
 	seq_printf(s, "%d\n", ring->cycle_state);
 
@@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci,
 }
 
 static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci,
-						   struct xhci_ring *ring,
+						   struct xhci_ring **ring,
 						   const char *name,
 						   struct dentry *parent)
 {
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
 
 	snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
 	epriv->root = xhci_debugfs_create_ring_dir(xhci,
-						   dev->eps[ep_index].new_ring,
+						   &dev->eps[ep_index].new_ring,
 						   epriv->name,
 						   spriv->root);
 	spriv->eps[ep_index] = epriv;
@@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id)
 	priv->dev = dev;
 	dev->debugfs_private = priv;
 
-	xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring,
+	xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring,
 				     "ep00", priv->root);
 
 	xhci_debugfs_create_context_files(xhci, priv->root, slot_id);
@@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci)
 				   ARRAY_SIZE(xhci_extcap_dbc),
 				   "reg-ext-dbc");
 
-	xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring,
+	xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring,
 				     "command-ring",
 				     xhci->debugfs_root);
 
-	xhci_debugfs_create_ring_dir(xhci, xhci->event_ring,
+	xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring,
 				     "event-ring",
 				     xhci->debugfs_root);
 
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index e1fba4688509df32d0aee83be2f53abbf65f37b5..3a29b32a3bd06c43376bb493ae6f08a48e6f3174 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -934,6 +934,12 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id)
 	if (!vdev)
 		return;
 
+	if (vdev->real_port == 0 ||
+			vdev->real_port > HCS_MAX_PORTS(xhci->hcs_params1)) {
+		xhci_dbg(xhci, "Bad vdev->real_port.\n");
+		goto out;
+	}
+
 	tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts);
 	list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) {
 		/* is this a hub device that added a tt_info to the tts list */
@@ -947,6 +953,7 @@ void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id)
 			}
 		}
 	}
+out:
 	/* we are now at a leaf device */
 	xhci_debugfs_remove_slot(xhci, slot_id);
 	xhci_free_virt_device(xhci, slot_id);
@@ -964,10 +971,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 		return 0;
 	}
 
-	xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags);
-	if (!xhci->devs[slot_id])
+	dev = kzalloc(sizeof(*dev), flags);
+	if (!dev)
 		return 0;
-	dev = xhci->devs[slot_id];
 
 	/* Allocate the (output) device context that will be used in the HC. */
 	dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags);
@@ -1008,9 +1014,17 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 
 	trace_xhci_alloc_virt_device(dev);
 
+	xhci->devs[slot_id] = dev;
+
 	return 1;
 fail:
-	xhci_free_virt_device(xhci, slot_id);
+
+	if (dev->in_ctx)
+		xhci_free_container_ctx(xhci, dev->in_ctx);
+	if (dev->out_ctx)
+		xhci_free_container_ctx(xhci, dev->out_ctx);
+	kfree(dev);
+
 	return 0;
 }
 
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7ef1274ef7f7f245a03e539982da07f6242343b1..1aad89b8aba0b5b5b4610aa161bc7379ba45fbb7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -177,6 +177,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 		xhci->quirks |= XHCI_BROKEN_STREAMS;
 	}
+	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+			pdev->device == 0x0014)
+		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
 			pdev->device == 0x0015)
 		xhci->quirks |= XHCI_RESET_ON_RESUME;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index c239c688076cf924060eccdb9698b2a0a86ab75d..c5cbc685c6915ce9e5b6f884ec5cdd7ea0306dc4 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2477,12 +2477,16 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		 */
 		if (list_empty(&ep_ring->td_list)) {
 			/*
-			 * A stopped endpoint may generate an extra completion
-			 * event if the device was suspended.  Don't print
-			 * warnings.
+			 * Don't print wanings if it's due to a stopped endpoint
+			 * generating an extra completion event if the device
+			 * was suspended. Or, a event for the last TRB of a
+			 * short TD we already got a short event for.
+			 * The short TD is already removed from the TD list.
 			 */
+
 			if (!(trb_comp_code == COMP_STOPPED ||
-				trb_comp_code == COMP_STOPPED_LENGTH_INVALID)) {
+			      trb_comp_code == COMP_STOPPED_LENGTH_INVALID ||
+			      ep_ring->last_td_was_short)) {
 				xhci_warn(xhci, "WARN Event TRB for slot %d ep %d with no TDs queued?\n",
 						TRB_TO_SLOT_ID(le32_to_cpu(event->flags)),
 						ep_index);
@@ -3108,7 +3112,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
 {
 	u32 maxp, total_packet_count;
 
-	/* MTK xHCI is mostly 0.97 but contains some features from 1.0 */
+	/* MTK xHCI 0.96 contains some features from 1.0 */
 	if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST))
 		return ((td_total_len - transferred) >> 10);
 
@@ -3117,8 +3121,8 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
 	    trb_buff_len == td_total_len)
 		return 0;
 
-	/* for MTK xHCI, TD size doesn't include this TRB */
-	if (xhci->quirks & XHCI_MTK_HOST)
+	/* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */
+	if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100))
 		trb_buff_len = 0;
 
 	maxp = usb_endpoint_maxp(&urb->ep->desc);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2424d3020ca364b22792376e36c21462af3b2f62..da6dbe3ebd8be9c8f137c6f500663be857184080 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
 	struct xhci_slot_ctx *slot_ctx;
 	int i, ret;
 
-	xhci_debugfs_remove_slot(xhci, udev->slot_id);
-
 #ifndef CONFIG_USB_DEFAULT_PERSIST
 	/*
 	 * We called pm_runtime_get_noresume when the device was attached.
@@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
 	}
 
 	ret = xhci_disable_slot(xhci, udev->slot_id);
-	if (ret)
+	if (ret) {
+		xhci_debugfs_remove_slot(xhci, udev->slot_id);
 		xhci_free_virt_device(xhci, udev->slot_id);
+	}
 }
 
 int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index 465dbf68b4633d438e858260d5db12d6bc0ce44e..f723f7b8c9ac4cdb65e5beb9a401ac3ce64f99e2 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -279,6 +279,8 @@ static int usb3503_probe(struct usb3503 *hub)
 	if (gpio_is_valid(hub->gpio_reset)) {
 		err = devm_gpio_request_one(dev, hub->gpio_reset,
 				GPIOF_OUT_INIT_LOW, "usb3503 reset");
+		/* Datasheet defines a hardware reset to be at least 100us */
+		usleep_range(100, 10000);
 		if (err) {
 			dev_err(dev,
 				"unable to request GPIO %d as reset pin (%d)\n",
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index f6ae753ab99b0998fcc99463fd50971f658dfaf0..f932f40302df942b744353f0db738edba88c553f 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		break;
 
 	case MON_IOCQ_RING_SIZE:
+		mutex_lock(&rp->fetch_lock);
 		ret = rp->b_size;
+		mutex_unlock(&rp->fetch_lock);
 		break;
 
 	case MON_IOCT_RING_SIZE:
@@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf)
 	unsigned long offset, chunk_idx;
 	struct page *pageptr;
 
+	mutex_lock(&rp->fetch_lock);
 	offset = vmf->pgoff << PAGE_SHIFT;
-	if (offset >= rp->b_size)
+	if (offset >= rp->b_size) {
+		mutex_unlock(&rp->fetch_lock);
 		return VM_FAULT_SIGBUS;
+	}
 	chunk_idx = offset / CHUNK_SIZE;
 	pageptr = rp->b_vec[chunk_idx].pg;
 	get_page(pageptr);
+	mutex_unlock(&rp->fetch_lock);
 	vmf->page = pageptr;
 	return 0;
 }
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 0397606a211b2a62ab58067334784753a4e10668..6c036de63272b432b8b88255a2fd97db587187e6 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -284,7 +284,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&musb->dev_timer);
-		} else {
+		} else if (!(musb->int_usb & MUSB_INTR_BABBLE)) {
+			/*
+			 * When babble condition happens, drvvbus interrupt
+			 * is also generated. Ignore this drvvbus interrupt
+			 * and let babble interrupt handler recovers the
+			 * controller; otherwise, the host-mode flag is lost
+			 * due to the MUSB_DEV_MODE() call below and babble
+			 * recovery logic will not be called.
+			 */
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
 			otg->default_a = 0;
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 7c6273bf5bebcfff7b573c23659ef2da219bd29d..06d502b3e91344c809523f67cf75cd364dc2cbed 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
 	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
 	{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+	{ USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
 	{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
 	{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -174,6 +175,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
 	{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+	{ USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
 	{ USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
 	{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
 	{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 1aba9105b369678759a204780bb7b3d882b7d879..fc68952c994a5557bb8ca3e2de94f9d2c1b15791 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = {
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
 	{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
+	{ USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
 	{ }					/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 4faa09fe308ca0570cc9dcda6900ef74efa3e1d0..8b4ecd2bd297b84d5a86c9b02cd5c38d3a6e86a8 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -914,6 +914,12 @@
 #define ICPDAS_I7561U_PID		0x0104
 #define ICPDAS_I7563U_PID		0x0105
 
+/*
+ * Airbus Defence and Space
+ */
+#define AIRBUS_DS_VID			0x1e8e  /* Vendor ID */
+#define AIRBUS_DS_P8GR			0x6001  /* Tetra P8GR */
+
 /*
  * RT Systems programming cables for various ham radios
  */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index aaa7d901a06de68a7902bbba02707ec29d6bcbd5..b6320e3be42970c984cb86505251cde089437102 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,11 +233,14 @@ static void option_instat_callback(struct urb *urb);
 /* These Quectel products use Qualcomm's vendor ID */
 #define QUECTEL_PRODUCT_UC20			0x9003
 #define QUECTEL_PRODUCT_UC15			0x9090
+/* These Yuga products use Qualcomm's vendor ID */
+#define YUGA_PRODUCT_CLM920_NC5			0x9625
 
 #define QUECTEL_VENDOR_ID			0x2c7c
 /* These Quectel products use Quectel's vendor ID */
 #define QUECTEL_PRODUCT_EC21			0x0121
 #define QUECTEL_PRODUCT_EC25			0x0125
+#define QUECTEL_PRODUCT_BG96			0x0296
 
 #define CMOTECH_VENDOR_ID			0x16d8
 #define CMOTECH_PRODUCT_6001			0x6001
@@ -279,6 +282,7 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE922_USBCFG3		0x1043
 #define TELIT_PRODUCT_LE922_USBCFG5		0x1045
 #define TELIT_PRODUCT_ME910			0x1100
+#define TELIT_PRODUCT_ME910_DUAL_MODEM		0x1101
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4		0x1206
@@ -644,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = {
 	.reserved = BIT(1) | BIT(3),
 };
 
+static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(3),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
 	.sendsetup = BIT(0),
 	.reserved = BIT(1) | BIT(2),
@@ -673,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
 	.reserved = BIT(4) | BIT(5),
 };
 
+static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
+	.reserved = BIT(1) | BIT(4),
+};
+
 static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1177,11 +1190,16 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	/* Yuga products use Qualcomm vendor ID */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
+	  .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
 	/* Quectel products using Quectel vendor ID */
 	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1241,6 +1259,8 @@ static const struct usb_device_id option_ids[] = {
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
 		.driver_info = (kernel_ulong_t)&telit_me910_blacklist },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
+		.driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
 		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index e3892541a489940f58bfa8f534ff0e765fc527f4..613f91add03da189c0fd5334cbccbbf720060079 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
 	{DEVICE_SWI(0x1199, 0x9079)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x1199, 0x907a)},	/* Sierra Wireless EM74xx QDL */
 	{DEVICE_SWI(0x1199, 0x907b)},	/* Sierra Wireless EM74xx */
+	{DEVICE_SWI(0x1199, 0x9090)},	/* Sierra Wireless EM7565 QDL */
+	{DEVICE_SWI(0x1199, 0x9091)},	/* Sierra Wireless EM7565 */
 	{DEVICE_SWI(0x413c, 0x81a2)},	/* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a3)},	/* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a4)},	/* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 			break;
 		case 2:
 			dev_dbg(dev, "NMEA GPS interface found\n");
+			sendsetup = true;
 			break;
 		case 3:
 			dev_dbg(dev, "Modem port found\n");
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index ab5a2ac4993ab234f55ecdb0c4669277e6700f2a..aaf4813e4971eeb98ccae06aa37b7047404baac2 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -31,12 +31,14 @@ static const struct usb_device_id id_table[] = {
 };
 
 static const struct usb_device_id dbc_id_table[] = {
+	{ USB_DEVICE(0x1d6b, 0x0010) },
 	{ USB_DEVICE(0x1d6b, 0x0011) },
 	{ },
 };
 
 static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(0x0525, 0x127a) },
+	{ USB_DEVICE(0x1d6b, 0x0010) },
 	{ USB_DEVICE(0x1d6b, 0x0011) },
 	{ },
 };
diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h
index 1fcd758a961f5e192082efef9fb69c8efe3bd962..3734a25e09e539f05f16e9f16b5dfb371d0e3799 100644
--- a/drivers/usb/storage/uas-detect.h
+++ b/drivers/usb/storage/uas-detect.h
@@ -112,6 +112,10 @@ static int uas_use_uas_driver(struct usb_interface *intf,
 		}
 	}
 
+	/* All Seagate disk enclosures have broken ATA pass-through support */
+	if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2)
+		flags |= US_FL_NO_ATA_1X;
+
 	usb_stor_adjust_quirks(udev, &flags);
 
 	if (flags & US_FL_IGNORE_UAS) {
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 2968046e7c059229cd8cec3611e150ccc3e70f23..f72d045ee9ef11940c056921d2760c649bc34d17 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2100,6 +2100,13 @@ UNUSUAL_DEV(  0x152d, 0x0567, 0x0114, 0x0116,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BROKEN_FUA ),
 
+/* Reported by David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+		"JMicron",
+		"JMS567",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_BROKEN_FUA),
+
 /*
  * Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
  * JMicron responds to USN and several other SCSI ioctls with a
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index d520374a824e19796862280dcc17832a0d66321c..a7d08ae0adaddc257c6399f0f8c5897fc16e10ff 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -129,6 +129,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
 
+/* Reported-by: David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+		"JMicron",
+		"JMS567",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_BROKEN_FUA),
+
 /* Reported-by: Hans de Goede <hdegoede@redhat.com> */
 UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
 		"VIA",
@@ -136,6 +143,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_ATA_1X),
 
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+		"Norelsys",
+		"NS1068X",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_IGNORE_UAS),
+
 /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
 UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
 		"JMicron",
diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index 465d7da849c3415dc0d7fa25068db3ca65e37b9b..bcb2744c59772ce6453bbf280afae83b948163d9 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -1,13 +1,53 @@
 
-menu "USB Power Delivery and Type-C drivers"
+menuconfig TYPEC
+	tristate "USB Type-C Support"
+	help
+	  USB Type-C Specification defines a cable and connector for USB where
+	  only one type of plug is supported on both ends, i.e. there will not
+	  be Type-A plug on one end of the cable and Type-B plug on the other.
+	  Determination of the host-to-device relationship happens through a
+	  specific Configuration Channel (CC) which goes through the USB Type-C
+	  cable. The Configuration Channel may also be used to detect optional
+	  Accessory Modes - Analog Audio and Debug - and if USB Power Delivery
+	  is supported, the Alternate Modes, where the connector is used for
+	  something else then USB communication.
+
+	  USB Power Delivery Specification defines a protocol that can be used
+	  to negotiate the voltage and current levels with the connected
+	  partners. USB Power Delivery allows higher voltages then the normal
+	  5V, up to 20V, and current up to 5A over the cable. The USB Power
+	  Delivery protocol is also used to negotiate the optional Alternate
+	  Modes when they are supported. USB Power Delivery does not depend on
+	  USB Type-C connector, however it is mostly used together with USB
+	  Type-C connectors.
+
+	  USB Type-C and USB Power Delivery Specifications define a set of state
+	  machines that need to be implemented in either software or firmware.
+	  Simple USB Type-C PHYs, for example USB Type-C Port Controller
+	  Interface Specification compliant "Port Controllers" need the state
+	  machines to be handled in the OS, but stand-alone USB Type-C and Power
+	  Delivery controllers handle the state machines inside their firmware.
+	  The USB Type-C and Power Delivery controllers usually function
+	  autonomously, and do not necessarily require drivers.
+
+	  Enable this configurations option if you have USB Type-C connectors on
+	  your system and 1) you know your USB Type-C hardware requires OS
+	  control (a driver) to function, or 2) if you need to be able to read
+	  the status of the USB Type-C ports in your system, or 3) if you need
+	  to be able to swap the power role (decide are you supplying or
+	  consuming power over the cable) or data role (host or device) when
+	  both roles are supported.
+
+	  For more information, see the kernel documentation for USB Type-C
+	  Connector Class API (Documentation/driver-api/usb/typec.rst)
+	  <https://www.kernel.org/doc/html/latest/driver-api/usb/typec.html>
+	  and ABI (Documentation/ABI/testing/sysfs-class-typec).
 
-config TYPEC
-	tristate
+if TYPEC
 
 config TYPEC_TCPM
 	tristate "USB Type-C Port Controller Manager"
 	depends on USB
-	select TYPEC
 	help
 	  The Type-C Port Controller Manager provides a USB PD and USB Type-C
 	  state machine for use with Type-C Port Controllers.
@@ -22,7 +62,6 @@ config TYPEC_WCOVE
 	depends on INTEL_SOC_PMIC
 	depends on INTEL_PMC_IPC
 	depends on BXT_WC_PMIC_OPREGION
-	select TYPEC
 	help
 	  This driver adds support for USB Type-C detection on Intel Broxton
 	  platforms that have Intel Whiskey Cove PMIC. The driver can detect the
@@ -31,14 +70,13 @@ config TYPEC_WCOVE
 	  To compile this driver as module, choose M here: the module will be
 	  called typec_wcove
 
-endif
+endif # TYPEC_TCPM
 
 source "drivers/usb/typec/ucsi/Kconfig"
 
 config TYPEC_TPS6598X
 	tristate "TI TPS6598x USB Power Delivery controller driver"
 	depends on I2C
-	select TYPEC
 	help
 	  Say Y or M here if your system has TI TPS65982 or TPS65983 USB Power
 	  Delivery controller.
@@ -46,4 +84,4 @@ config TYPEC_TPS6598X
 	  If you choose to build this driver as a dynamically linked module, the
 	  module will be called tps6598x.ko.
 
-endmenu
+endif # TYPEC
diff --git a/drivers/usb/typec/ucsi/Kconfig b/drivers/usb/typec/ucsi/Kconfig
index d0c31cee472099598eb1f727cb4f9164dade2a84..e36d6c73c4a4184c6246b14ab27b8e0ecb393be8 100644
--- a/drivers/usb/typec/ucsi/Kconfig
+++ b/drivers/usb/typec/ucsi/Kconfig
@@ -1,7 +1,6 @@
 config TYPEC_UCSI
 	tristate "USB Type-C Connector System Software Interface driver"
 	depends on !CPU_BIG_ENDIAN
-	select TYPEC
 	help
 	  USB Type-C Connector System Software Interface (UCSI) is a
 	  specification for an interface that allows the operating system to
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index a3df8ee82faff77d9c114ed605412ccfb0704ce6..e31a6f204397dba47b3356ec3bbfe420539dc05b 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
 	 * step 1?
 	 */
 	if (ud->tcp_socket) {
-		dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
-			ud->tcp_socket);
+		dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd);
 		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
 	}
 
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index 4f48b306713f1a62942905122c9164beb134ce5f..c31c8402a0c55ddd2f4b463ebabd28be6438f490 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
 	struct stub_priv *priv;
 	struct urb *urb;
 
-	dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
+	dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
 
 	while ((priv = stub_priv_pop(sdev))) {
 		urb = priv->urb;
-		dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
+		dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
+			priv->seqnum);
 		usb_kill_urb(urb);
 
 		kmem_cache_free(stub_priv_cache, priv);
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 536e037f541faa01f16188e45e1afe130abc57ea..6c5a593139996fe11be9c2bf86eb8cc19a390ef1 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
 		if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
 			continue;
 
-		dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
-			 priv->urb);
-
 		/*
 		 * This matched urb is not completed yet (i.e., be in
 		 * flight in usb hcd hardware/driver). Now we are
@@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
 		ret = usb_unlink_urb(priv->urb);
 		if (ret != -EINPROGRESS)
 			dev_err(&priv->urb->dev->dev,
-				"failed to unlink a urb %p, ret %d\n",
-				priv->urb, ret);
+				"failed to unlink a urb # %lu, ret %d\n",
+				priv->seqnum, ret);
 
 		return 0;
 	}
@@ -322,23 +319,26 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev,
 	return priv;
 }
 
-static int get_pipe(struct stub_device *sdev, int epnum, int dir)
+static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
 {
 	struct usb_device *udev = sdev->udev;
 	struct usb_host_endpoint *ep;
 	struct usb_endpoint_descriptor *epd = NULL;
+	int epnum = pdu->base.ep;
+	int dir = pdu->base.direction;
+
+	if (epnum < 0 || epnum > 15)
+		goto err_ret;
 
 	if (dir == USBIP_DIR_IN)
 		ep = udev->ep_in[epnum & 0x7f];
 	else
 		ep = udev->ep_out[epnum & 0x7f];
-	if (!ep) {
-		dev_err(&sdev->udev->dev, "no such endpoint?, %d\n",
-			epnum);
-		BUG();
-	}
+	if (!ep)
+		goto err_ret;
 
 	epd = &ep->desc;
+
 	if (usb_endpoint_xfer_control(epd)) {
 		if (dir == USBIP_DIR_OUT)
 			return usb_sndctrlpipe(udev, epnum);
@@ -361,15 +361,31 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir)
 	}
 
 	if (usb_endpoint_xfer_isoc(epd)) {
+		/* validate packet size and number of packets */
+		unsigned int maxp, packets, bytes;
+
+		maxp = usb_endpoint_maxp(epd);
+		maxp *= usb_endpoint_maxp_mult(epd);
+		bytes = pdu->u.cmd_submit.transfer_buffer_length;
+		packets = DIV_ROUND_UP(bytes, maxp);
+
+		if (pdu->u.cmd_submit.number_of_packets < 0 ||
+		    pdu->u.cmd_submit.number_of_packets > packets) {
+			dev_err(&sdev->udev->dev,
+				"CMD_SUBMIT: isoc invalid num packets %d\n",
+				pdu->u.cmd_submit.number_of_packets);
+			return -1;
+		}
 		if (dir == USBIP_DIR_OUT)
 			return usb_sndisocpipe(udev, epnum);
 		else
 			return usb_rcvisocpipe(udev, epnum);
 	}
 
+err_ret:
 	/* NOT REACHED */
-	dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum);
-	return 0;
+	dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum);
+	return -1;
 }
 
 static void masking_bogus_flags(struct urb *urb)
@@ -433,7 +449,10 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
 	struct stub_priv *priv;
 	struct usbip_device *ud = &sdev->ud;
 	struct usb_device *udev = sdev->udev;
-	int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction);
+	int pipe = get_pipe(sdev, pdu);
+
+	if (pipe == -1)
+		return;
 
 	priv = stub_priv_alloc(sdev, pdu);
 	if (!priv)
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
index b18bce96c212b35384d18ba108cbe98f27803cab..f0ec41a50cbc16f9814ca8b2a7590dbe3c465fab 100644
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -88,7 +88,7 @@ void stub_complete(struct urb *urb)
 	/* link a urb to the queue of tx. */
 	spin_lock_irqsave(&sdev->priv_lock, flags);
 	if (sdev->ud.tcp_socket == NULL) {
-		usbip_dbg_stub_tx("ignore urb for closed connection %p", urb);
+		usbip_dbg_stub_tx("ignore urb for closed connection\n");
 		/* It will be freed in stub_device_cleanup_urbs(). */
 	} else if (priv->unlinking) {
 		stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
@@ -167,6 +167,13 @@ static int stub_send_ret_submit(struct stub_device *sdev)
 		memset(&pdu_header, 0, sizeof(pdu_header));
 		memset(&msg, 0, sizeof(msg));
 
+		if (urb->actual_length > 0 && !urb->transfer_buffer) {
+			dev_err(&sdev->udev->dev,
+				"urb: actual_length %d transfer_buffer null\n",
+				urb->actual_length);
+			return -1;
+		}
+
 		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
 			iovnum = 2 + urb->number_of_packets;
 		else
@@ -183,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev)
 
 		/* 1. setup usbip_header */
 		setup_ret_submit_pdu(&pdu_header, urb);
-		usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-				  pdu_header.base.seqnum, urb);
+		usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+				  pdu_header.base.seqnum);
 		usbip_header_correct_endian(&pdu_header, 1);
 
 		iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index f7978933b40290787ded82fd7b244783610d206e..ee2bbce24584c1b28e9ea81fff476328635d7707 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -91,7 +91,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
 	dev_dbg(dev, "       devnum(%d) devpath(%s) usb speed(%s)",
 		udev->devnum, udev->devpath, usb_speed_string(udev->speed));
 
-	pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+	pr_debug("tt hub ttport %d\n", udev->ttport);
 
 	dev_dbg(dev, "                    ");
 	for (i = 0; i < 16; i++)
@@ -124,12 +124,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
 	}
 	pr_debug("\n");
 
-	dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
-	dev_dbg(dev,
-		"descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
-		&udev->descriptor, udev->config,
-		udev->actconfig, udev->rawdescriptors);
+	dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+		udev->bus->bus_name);
 
 	dev_dbg(dev, "have_langid %d, string_langid %d\n",
 		udev->have_langid, udev->string_langid);
@@ -237,9 +233,6 @@ void usbip_dump_urb(struct urb *urb)
 
 	dev = &urb->dev->dev;
 
-	dev_dbg(dev, "   urb                   :%p\n", urb);
-	dev_dbg(dev, "   dev                   :%p\n", urb->dev);
-
 	usbip_dump_usb_device(urb->dev);
 
 	dev_dbg(dev, "   pipe                  :%08x ", urb->pipe);
@@ -248,11 +241,9 @@ void usbip_dump_urb(struct urb *urb)
 
 	dev_dbg(dev, "   status                :%d\n", urb->status);
 	dev_dbg(dev, "   transfer_flags        :%08X\n", urb->transfer_flags);
-	dev_dbg(dev, "   transfer_buffer       :%p\n", urb->transfer_buffer);
 	dev_dbg(dev, "   transfer_buffer_length:%d\n",
 						urb->transfer_buffer_length);
 	dev_dbg(dev, "   actual_length         :%d\n", urb->actual_length);
-	dev_dbg(dev, "   setup_packet          :%p\n", urb->setup_packet);
 
 	if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
 		usbip_dump_usb_ctrlrequest(
@@ -262,8 +253,6 @@ void usbip_dump_urb(struct urb *urb)
 	dev_dbg(dev, "   number_of_packets     :%d\n", urb->number_of_packets);
 	dev_dbg(dev, "   interval              :%d\n", urb->interval);
 	dev_dbg(dev, "   error_count           :%d\n", urb->error_count);
-	dev_dbg(dev, "   context               :%p\n", urb->context);
-	dev_dbg(dev, "   complete              :%p\n", urb->complete);
 }
 EXPORT_SYMBOL_GPL(usbip_dump_urb);
 
@@ -317,26 +306,20 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 	struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
 	int total = 0;
 
+	if (!sock || !buf || !size)
+		return -EINVAL;
+
 	iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
 
 	usbip_dbg_xmit("enter\n");
 
-	if (!sock || !buf || !size) {
-		pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
-		       size);
-		return -EINVAL;
-	}
-
 	do {
-		int sz = msg_data_left(&msg);
+		msg_data_left(&msg);
 		sock->sk->sk_allocation = GFP_NOIO;
 
 		result = sock_recvmsg(sock, &msg, MSG_WAITALL);
-		if (result <= 0) {
-			pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
-				 sock, buf + total, sz, result, total);
+		if (result <= 0)
 			goto err;
-		}
 
 		total += result;
 	} while (msg_data_left(&msg));
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index e5de35c8c5056b7ab9c38c715cbf57b67a263a92..473fb8a872893caa3494fe9a4a85b60600bdfd28 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -256,6 +256,7 @@ struct usbip_device {
 	/* lock for status */
 	spinlock_t lock;
 
+	int sockfd;
 	struct socket *tcp_socket;
 
 	struct task_struct *tcp_rx;
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 713e941709632f61664d5e97ed081163883835ac..c3e1008aa491ee45071adab6215f7440aa6dfa13 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 	struct vhci_device *vdev;
 	unsigned long flags;
 
-	usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
-			  hcd, urb, mem_flags);
-
 	if (portnum > VHCI_HC_PORTS) {
 		pr_err("invalid port number %d\n", portnum);
 		return -ENODEV;
@@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	struct vhci_device *vdev;
 	unsigned long flags;
 
-	pr_info("dequeue a urb %p\n", urb);
-
 	spin_lock_irqsave(&vhci->lock, flags);
 
 	priv = urb->hcpriv;
@@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 		/* tcp connection is closed */
 		spin_lock(&vdev->priv_lock);
 
-		pr_info("device %p seems to be disconnected\n", vdev);
 		list_del(&priv->list);
 		kfree(priv);
 		urb->hcpriv = NULL;
@@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 		 * vhci_rx will receive RET_UNLINK and give back the URB.
 		 * Otherwise, we give back it here.
 		 */
-		pr_info("gives back urb %p\n", urb);
-
 		usb_hcd_unlink_urb_from_ep(hcd, urb);
 
 		spin_unlock_irqrestore(&vhci->lock, flags);
@@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 
 		unlink->unlink_seqnum = priv->seqnum;
 
-		pr_info("device %p seems to be still connected\n", vdev);
-
 		/* send cmd_unlink and try to cancel the pending URB in the
 		 * peer */
 		list_add_tail(&unlink->list, &vdev->unlink_tx);
@@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
 
 	/* need this? see stub_dev.c */
 	if (ud->tcp_socket) {
-		pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
+		pr_debug("shutdown tcp_socket %d\n", ud->sockfd);
 		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
 	}
 
@@ -1098,7 +1088,6 @@ static int hcd_name_to_id(const char *name)
 static int vhci_setup(struct usb_hcd *hcd)
 {
 	struct vhci *vhci = *((void **)dev_get_platdata(hcd->self.controller));
-	hcd->self.sg_tablesize = ~0;
 	if (usb_hcd_is_primary_hcd(hcd)) {
 		vhci->vhci_hcd_hs = hcd_to_vhci_hcd(hcd);
 		vhci->vhci_hcd_hs->vhci = vhci;
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index 90577e8b2282393014558da959125345b1f7008c..112ebb90d8c95e0eb8ba3aa800d3b66941cb7e6c 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
 		urb = priv->urb;
 		status = urb->status;
 
-		usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
-				urb, priv, seqnum);
+		usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
 
 		switch (status) {
 		case -ENOENT:
 			/* fall through */
 		case -ECONNRESET:
-			dev_info(&urb->dev->dev,
-				 "urb %p was unlinked %ssynchronuously.\n", urb,
-				 status == -ENOENT ? "" : "a");
+			dev_dbg(&urb->dev->dev,
+				 "urb seq# %u was unlinked %ssynchronuously\n",
+				 seqnum, status == -ENOENT ? "" : "a");
 			break;
 		case -EINPROGRESS:
 			/* no info output */
 			break;
 		default:
-			dev_info(&urb->dev->dev,
-				 "urb %p may be in a error, status %d\n", urb,
-				 status);
+			dev_dbg(&urb->dev->dev,
+				 "urb seq# %u may be in a error, status %d\n",
+				 seqnum, status);
 		}
 
 		list_del(&priv->list);
@@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
 	spin_unlock_irqrestore(&vdev->priv_lock, flags);
 
 	if (!urb) {
-		pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
-		pr_info("max seqnum %d\n",
+		pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
+			pdu->base.seqnum,
 			atomic_read(&vhci_hcd->seqnum));
 		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
 		return;
@@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
 	if (usbip_dbg_flag_vhci_rx)
 		usbip_dump_urb(urb);
 
-	usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+	usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
 
 	spin_lock_irqsave(&vhci->lock, flags);
 	usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb);
@@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
 		pr_info("the urb (seqnum %d) was already given back\n",
 			pdu->base.seqnum);
 	} else {
-		usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+		usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
 
 		/* If unlink is successful, status is -ECONNRESET */
 		urb->status = pdu->u.ret_unlink.status;
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index e78f7472cac496d4cfa2a5fcf0e4111568c242ab..091f76b7196d21da57d4adf1cca91fae29377fdd 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -17,15 +17,20 @@
 
 /*
  * output example:
- * hub port sta spd dev      socket           local_busid
- * hs  0000 004 000 00000000         c5a7bb80 1-2.3
+ * hub port sta spd dev       sockfd    local_busid
+ * hs  0000 004 000 00000000  3         1-2.3
  * ................................................
- * ss  0008 004 000 00000000         d8cee980 2-3.4
+ * ss  0008 004 000 00000000  4         2-3.4
  * ................................................
  *
- * IP address can be retrieved from a socket pointer address by looking
- * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
- * port number and its peer IP address.
+ * Output includes socket fd instead of socket pointer address to avoid
+ * leaking kernel memory address in:
+ *	/sys/devices/platform/vhci_hcd.0/status and in debug output.
+ * The socket pointer address is not used at the moment and it was made
+ * visible as a convenient way to find IP address from socket pointer
+ * address by looking up /proc/net/{tcp,tcp6}. As this opens a security
+ * hole, the change is made to use sockfd instead.
+ *
  */
 static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vdev)
 {
@@ -39,8 +44,8 @@ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vd
 	if (vdev->ud.status == VDEV_ST_USED) {
 		*out += sprintf(*out, "%03u %08x ",
 				      vdev->speed, vdev->devid);
-		*out += sprintf(*out, "%16p %s",
-				      vdev->ud.tcp_socket,
+		*out += sprintf(*out, "%u %s",
+				      vdev->ud.sockfd,
 				      dev_name(&vdev->udev->dev));
 
 	} else {
@@ -160,7 +165,8 @@ static ssize_t nports_show(struct device *dev, struct device_attribute *attr,
 	char *s = out;
 
 	/*
-	 * Half the ports are for SPEED_HIGH and half for SPEED_SUPER, thus the * 2.
+	 * Half the ports are for SPEED_HIGH and half for SPEED_SUPER,
+	 * thus the * 2.
 	 */
 	out += sprintf(out, "%d\n", VHCI_PORTS * vhci_num_controllers);
 	return out - s;
@@ -366,6 +372,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
 
 	vdev->devid         = devid;
 	vdev->speed         = speed;
+	vdev->ud.sockfd     = sockfd;
 	vdev->ud.tcp_socket = socket;
 	vdev->ud.status     = VDEV_ST_NOTASSIGNED;
 
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index d625a2ff4b712f4112bd48b8115ea0104e005d15..9aed15a358b7b98b0fab9e6b02b6820cff9ff808 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
 		memset(&msg, 0, sizeof(msg));
 		memset(&iov, 0, sizeof(iov));
 
-		usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
+		usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
+				  priv->seqnum);
 
 		/* 1. setup usbip_header */
 		setup_cmd_submit_pdu(&pdu_header, urb);
diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
index df1e309891488eebd0e5439738140c22654cc711..1e8a23d92cb4b86bd5ae7173b6b35ac8e37392f9 100644
--- a/drivers/usb/usbip/vudc_rx.c
+++ b/drivers/usb/usbip/vudc_rx.c
@@ -120,6 +120,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
 	urb_p->new = 1;
 	urb_p->seqnum = pdu->base.seqnum;
 
+	if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
+		/* validate packet size and number of packets */
+		unsigned int maxp, packets, bytes;
+
+		maxp = usb_endpoint_maxp(urb_p->ep->desc);
+		maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
+		bytes = pdu->u.cmd_submit.transfer_buffer_length;
+		packets = DIV_ROUND_UP(bytes, maxp);
+
+		if (pdu->u.cmd_submit.number_of_packets < 0 ||
+		    pdu->u.cmd_submit.number_of_packets > packets) {
+			dev_err(&udc->gadget.dev,
+				"CMD_SUBMIT: isoc invalid num packets %d\n",
+				pdu->u.cmd_submit.number_of_packets);
+			ret = -EMSGSIZE;
+			goto free_urbp;
+		}
+	}
+
 	ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
 	if (ret) {
 		usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
index 1440ae0919ec7c5a5f2fc62903faa43c2aeee5ed..3ccb17c3e84060a907afb8c2914e3f69039f753d 100644
--- a/drivers/usb/usbip/vudc_tx.c
+++ b/drivers/usb/usbip/vudc_tx.c
@@ -85,6 +85,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
 	memset(&pdu_header, 0, sizeof(pdu_header));
 	memset(&msg, 0, sizeof(msg));
 
+	if (urb->actual_length > 0 && !urb->transfer_buffer) {
+		dev_err(&udc->gadget.dev,
+			"urb: actual_length %d transfer_buffer null\n",
+			urb->actual_length);
+		return -1;
+	}
+
 	if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
 		iovnum = 2 + urb->number_of_packets;
 	else
@@ -100,8 +107,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
 
 	/* 1. setup usbip_header */
 	setup_ret_submit_pdu(&pdu_header, urb_p);
-	usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-			  pdu_header.base.seqnum, urb);
+	usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+			  pdu_header.base.seqnum);
 	usbip_header_correct_endian(&pdu_header, 1);
 
 	iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8d626d7c2e7e79db8d243278e805c96ad563bb3d..c7bdeb6556469efb93e2a6a7e742da3a37ad7e69 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -778,16 +778,6 @@ static void handle_rx(struct vhost_net *net)
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			goto out;
-		if (nvq->rx_array)
-			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
-		/* On overrun, truncate and discard */
-		if (unlikely(headcount > UIO_MAXIOV)) {
-			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
-			err = sock->ops->recvmsg(sock, &msg,
-						 1, MSG_DONTWAIT | MSG_TRUNC);
-			pr_debug("Discarded rx packet: len %zd\n", sock_len);
-			continue;
-		}
 		/* OK, now we need to know about added descriptors. */
 		if (!headcount) {
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
@@ -800,6 +790,16 @@ static void handle_rx(struct vhost_net *net)
 			 * they refilled. */
 			goto out;
 		}
+		if (nvq->rx_array)
+			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
+		/* On overrun, truncate and discard */
+		if (unlikely(headcount > UIO_MAXIOV)) {
+			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+			err = sock->ops->recvmsg(sock, &msg,
+						 1, MSG_DONTWAIT | MSG_TRUNC);
+			pr_debug("Discarded rx packet: len %zd\n", sock_len);
+			continue;
+		}
 		/* We don't need to be notified again. */
 		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
 		fixup = msg.msg_iter;
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
index 57e9e146ff74c0f30bc15491d7abf3c179de2150..f6da8755b8592ee5dfe3ae95fbeb4c7ce7201292 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
@@ -455,15 +455,26 @@ static int td028ttec1_panel_remove(struct spi_device *spi)
 }
 
 static const struct of_device_id td028ttec1_of_match[] = {
+	{ .compatible = "omapdss,tpo,td028ttec1", },
+	/* keep to not break older DTB */
 	{ .compatible = "omapdss,toppoly,td028ttec1", },
 	{},
 };
 
 MODULE_DEVICE_TABLE(of, td028ttec1_of_match);
 
+static const struct spi_device_id td028ttec1_ids[] = {
+	{ "toppoly,td028ttec1", 0 },
+	{ "tpo,td028ttec1", 0},
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, td028ttec1_ids);
+
 static struct spi_driver td028ttec1_spi_driver = {
 	.probe		= td028ttec1_panel_probe,
 	.remove		= td028ttec1_panel_remove,
+	.id_table	= td028ttec1_ids,
 
 	.driver         = {
 		.name   = "panel-tpo-td028ttec1",
@@ -474,7 +485,6 @@ static struct spi_driver td028ttec1_spi_driver = {
 
 module_spi_driver(td028ttec1_spi_driver);
 
-MODULE_ALIAS("spi:toppoly,td028ttec1");
 MODULE_AUTHOR("H. Nikolaus Schaller <hns@goldelico.com>");
 MODULE_DESCRIPTION("Toppoly TD028TTEC1 panel driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 48230a5e12f262b67d28d87adc713f462e8ec5fc..bf7ff3934d7fff5169e5252cd8fc0a29ea25a133 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -333,6 +333,8 @@ int register_virtio_device(struct virtio_device *dev)
 	/* device_register() causes the bus infrastructure to look for a
 	 * matching driver. */
 	err = device_register(&dev->dev);
+	if (err)
+		ida_simple_remove(&virtio_index_ida, dev->index);
 out:
 	if (err)
 		virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7960746f759788d545b9e85e384a56cbf99a7606..a1fb52cb3f0ab5c0f066d3d773a82c73665f54da 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -174,13 +174,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
 	while ((page = balloon_page_pop(&pages))) {
 		balloon_page_enqueue(&vb->vb_dev_info, page);
 
-		vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
-
 		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
 		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
 		if (!virtio_has_feature(vb->vdev,
 					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
 			adjust_managed_page_count(page, -1);
+		vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
 	}
 
 	num_allocated_pages = vb->num_pfns;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 74dc7170fd351e02d1732357b0705f66c507f7ce..c92131edfabaad76355036f2afe13adbadf0b63d 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -493,7 +493,16 @@ static const struct virtio_config_ops virtio_mmio_config_ops = {
 };
 
 
-static void virtio_mmio_release_dev_empty(struct device *_d) {}
+static void virtio_mmio_release_dev(struct device *_d)
+{
+	struct virtio_device *vdev =
+			container_of(_d, struct virtio_device, dev);
+	struct virtio_mmio_device *vm_dev =
+			container_of(vdev, struct virtio_mmio_device, vdev);
+	struct platform_device *pdev = vm_dev->pdev;
+
+	devm_kfree(&pdev->dev, vm_dev);
+}
 
 /* Platform device */
 
@@ -514,10 +523,10 @@ static int virtio_mmio_probe(struct platform_device *pdev)
 
 	vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
 	if (!vm_dev)
-		return  -ENOMEM;
+		return -ENOMEM;
 
 	vm_dev->vdev.dev.parent = &pdev->dev;
-	vm_dev->vdev.dev.release = virtio_mmio_release_dev_empty;
+	vm_dev->vdev.dev.release = virtio_mmio_release_dev;
 	vm_dev->vdev.config = &virtio_mmio_config_ops;
 	vm_dev->pdev = pdev;
 	INIT_LIST_HEAD(&vm_dev->virtqueues);
@@ -573,13 +582,16 @@ static int virtio_mmio_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, vm_dev);
 
-	return register_virtio_device(&vm_dev->vdev);
+	rc = register_virtio_device(&vm_dev->vdev);
+	if (rc)
+		put_device(&vm_dev->vdev.dev);
+
+	return rc;
 }
 
 static int virtio_mmio_remove(struct platform_device *pdev)
 {
 	struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
-
 	unregister_virtio_device(&vm_dev->vdev);
 
 	return 0;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d8dd54678ab7100d32435fd68368531d98317375..e5d0c28372ea178a3177114694c7327f34089d24 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -269,7 +269,7 @@ config XEN_ACPI_HOTPLUG_CPU
 
 config XEN_ACPI_PROCESSOR
 	tristate "Xen ACPI processor"
-	depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
+	depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
 	default m
 	help
           This ACPI processor uploads Power Management information to the Xen
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f77e499afdddb02c2d7595459a70b15f3a8c56d5..065f0b607373402a0d4dd7520b9820503390fb0a 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
 	kfree(resource);
 }
 
+/*
+ * Host memory not allocated to dom0. We can use this range for hotplug-based
+ * ballooning.
+ *
+ * It's a type-less resource. Setting IORESOURCE_MEM will make resource
+ * management algorithms (arch_remove_reservations()) look into guest e820,
+ * which we don't want.
+ */
+static struct resource hostmem_resource = {
+	.name   = "Host RAM",
+};
+
+void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
+{}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
-	struct resource *res;
-	int ret;
+	struct resource *res, *res_hostmem;
+	int ret = -ENOMEM;
 
 	res = kzalloc(sizeof(*res), GFP_KERNEL);
 	if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	res->name = "System RAM";
 	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-	ret = allocate_resource(&iomem_resource, res,
-				size, 0, -1,
-				PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-	if (ret < 0) {
-		pr_err("Cannot allocate new System RAM resource\n");
-		kfree(res);
-		return NULL;
+	res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
+	if (res_hostmem) {
+		/* Try to grab a range from hostmem */
+		res_hostmem->name = "Host memory";
+		ret = allocate_resource(&hostmem_resource, res_hostmem,
+					size, 0, -1,
+					PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+	}
+
+	if (!ret) {
+		/*
+		 * Insert this resource into iomem. Because hostmem_resource
+		 * tracks portion of guest e820 marked as UNUSABLE noone else
+		 * should try to use it.
+		 */
+		res->start = res_hostmem->start;
+		res->end = res_hostmem->end;
+		ret = insert_resource(&iomem_resource, res);
+		if (ret < 0) {
+			pr_err("Can't insert iomem_resource [%llx - %llx]\n",
+				res->start, res->end);
+			release_memory_resource(res_hostmem);
+			res_hostmem = NULL;
+			res->start = res->end = 0;
+		}
+	}
+
+	if (ret) {
+		ret = allocate_resource(&iomem_resource, res,
+					size, 0, -1,
+					PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+		if (ret < 0) {
+			pr_err("Cannot allocate new System RAM resource\n");
+			kfree(res);
+			return NULL;
+		}
 	}
 
 #ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 			pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
 			       pfn, limit);
 			release_memory_resource(res);
+			release_memory_resource(res_hostmem);
 			return NULL;
 		}
 	}
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
 	set_online_page_callback(&xen_online_page);
 	register_memory_notifier(&xen_memory_nb);
 	register_sysctl_table(xen_root);
+
+	arch_xen_balloon_init(&hostmem_resource);
 #endif
 
 #ifdef CONFIG_XEN_PV
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 57efbd3b053b37ca43816483dd3364c3c48a761e..bd56653b9bbc2bed8e27c4909b2e8c025c4f1627 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -380,10 +380,8 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 		range = 0;
 		while (range < pages) {
-			if (map->unmap_ops[offset+range].handle == -1) {
-				range--;
+			if (map->unmap_ops[offset+range].handle == -1)
 				break;
-			}
 			range++;
 		}
 		err = __unmap_grant_pages(map, offset, range);
@@ -1073,8 +1071,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 out_unlock_put:
 	mutex_unlock(&priv->lock);
 out_put_map:
-	if (use_ptemod)
+	if (use_ptemod) {
 		map->vma = NULL;
+		unmap_grant_pages(map, 0, map->count);
+	}
 	gntdev_put_map(priv, map);
 	return err;
 }
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 40caa92bff33deaebf1bf01aac44571453660c0f..4c789e61554b2d3a9d9c01c4384a5d77a2b978d4 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -805,7 +805,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 		pvcalls_exit();
 		return ret;
 	}
-	map2 = kzalloc(sizeof(*map2), GFP_KERNEL);
+	map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
 	if (map2 == NULL) {
 		clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
 			  (void *)&map->passive.flags);
@@ -1103,7 +1103,7 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
 			kfree(map);
 		}
 	}
-	if (bedata->ref >= 0)
+	if (bedata->ref != -1)
 		gnttab_end_foreign_access(bedata->ref, 0, 0);
 	kfree(bedata->ring.sring);
 	kfree(bedata);
@@ -1128,6 +1128,8 @@ static int pvcalls_front_probe(struct xenbus_device *dev,
 	}
 
 	versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
+	if (IS_ERR(versions))
+		return PTR_ERR(versions);
 	if (!len)
 		return -EINVAL;
 	if (strcmp(versions, "1")) {
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ff8d5bf4354f306227a297ca542078580ca57e34..23c7f395d7182705d945082576ec59a15af3226f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -895,20 +895,38 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
  * However, if we didn't have a callback promise outstanding, or it was
  * outstanding on a different server, then it won't break it either...
  */
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
+static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+			       unsigned long d_version_before,
+			       unsigned long d_version_after)
 {
+	bool dir_valid;
 	int ret = 0;
 
+	/* There were no intervening changes on the server if the version
+	 * number we got back was incremented by exactly 1.
+	 */
+	dir_valid = (d_version_after == d_version_before + 1);
+
 	if (d_really_is_positive(dentry)) {
 		struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 
-		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-			kdebug("AFS_VNODE_DELETED");
-		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
-
-		ret = afs_validate(vnode, key);
-		if (ret == -ESTALE)
+		if (dir_valid) {
+			drop_nlink(&vnode->vfs_inode);
+			if (vnode->vfs_inode.i_nlink == 0) {
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
+				clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+			}
 			ret = 0;
+		} else {
+			clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+			if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+				kdebug("AFS_VNODE_DELETED");
+
+			ret = afs_validate(vnode, key);
+			if (ret == -ESTALE)
+				ret = 0;
+		}
 		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
 	}
 
@@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	struct afs_fs_cursor fc;
 	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
 	struct key *key;
+	unsigned long d_version = (unsigned long)dentry->d_fsdata;
 	int ret;
 
 	_enter("{%x:%u},{%pd}",
@@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret == 0)
-			ret = afs_dir_remove_link(dentry, key);
+			ret = afs_dir_remove_link(
+				dentry, key, d_version,
+				(unsigned long)dvnode->status.data_version);
 	}
 
 error_key:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 3415eb7484f6ba5653e3f08c8e3035ab29b2eaa3..1e81864ef0b29bffcc10944c30bc57796815025d 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	}
 
 	read_sequnlock_excl(&vnode->cb_lock);
+
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+		clear_nlink(&vnode->vfs_inode);
+
 	if (valid)
 		goto valid;
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ea1460b9b71aab5900b342e28641924a0f7b9f04..e1126659f043f0c445cc19fa9524cc5d96afd36a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 {
 	struct afs_net *net = call->net;
 	enum afs_call_state state;
-	u32 remote_abort;
+	u32 remote_abort = 0;
 	int ret;
 
 	_enter("{%s,%zu},,%zu,%d",
diff --git a/fs/afs/write.c b/fs/afs/write.c
index cb5f8a3df5773cba37c292e65ac6e985b0af11bc..9370e2feb999303098d36aab4234d9f2fef7f24a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 			ret = afs_fill_page(vnode, key, pos + copied,
 					    len - copied, page);
 			if (ret < 0)
-				return ret;
+				goto out;
 		}
 		SetPageUptodate(page);
 	}
@@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 	set_page_dirty(page);
 	if (PageDirty(page))
 		_debug("dirtied");
+	ret = copied;
+
+out:
 	unlock_page(page);
 	put_page(page);
-
-	return copied;
+	return ret;
 }
 
 /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 8fc41705c7cd50af4c53f71851d8a4136673411b..961a12dc6dc81f369a71c36e62fbd604886ebf2c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 	mutex_unlock(&sbi->wq_mutex);
 
-	if (autofs4_write(sbi, pipe, &pkt, pktsz))
 	switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
 	case 0:
 		break;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 531e0a8645b08a1554b495c0852cc7e53e528770..1e74cf82653271e7d5c7b6cbf930aea09751bc49 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1032,14 +1032,17 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
 		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
 			ret = btrfs_inc_ref(trans, root, buf, 1);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				return ret;
 
 			if (root->root_key.objectid ==
 			    BTRFS_TREE_RELOC_OBJECTID) {
 				ret = btrfs_dec_ref(trans, root, buf, 0);
-				BUG_ON(ret); /* -ENOMEM */
+				if (ret)
+					return ret;
 				ret = btrfs_inc_ref(trans, root, cow, 1);
-				BUG_ON(ret); /* -ENOMEM */
+				if (ret)
+					return ret;
 			}
 			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
 		} else {
@@ -1049,7 +1052,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 				ret = btrfs_inc_ref(trans, root, cow, 1);
 			else
 				ret = btrfs_inc_ref(trans, root, cow, 0);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				return ret;
 		}
 		if (new_flags != 0) {
 			int level = btrfs_header_level(buf);
@@ -1068,9 +1072,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 				ret = btrfs_inc_ref(trans, root, cow, 1);
 			else
 				ret = btrfs_inc_ref(trans, root, cow, 0);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				return ret;
 			ret = btrfs_dec_ref(trans, root, buf, 1);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				return ret;
 		}
 		clean_tree_block(fs_info, buf);
 		*last_ref = 1;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5d73f79ded8bcbd967636b652d98433da64cceb8..056276101c63a7060b09517353ba24cd00cde9a5 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 
 	spin_lock(&root->inode_lock);
 	node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+
 	if (node) {
 		if (btrfs_inode->delayed_node) {
 			refcount_inc(&node->refs);	/* can be accessed */
@@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 			spin_unlock(&root->inode_lock);
 			return node;
 		}
-		btrfs_inode->delayed_node = node;
-		/* can be accessed and cached in the inode */
-		refcount_add(2, &node->refs);
+
+		/*
+		 * It's possible that we're racing into the middle of removing
+		 * this node from the radix tree.  In this case, the refcount
+		 * was zero and it should never go back to one.  Just return
+		 * NULL like it was never in the radix at all; our release
+		 * function is in the process of removing it.
+		 *
+		 * Some implementations of refcount_inc refuse to bump the
+		 * refcount once it has hit zero.  If we don't do this dance
+		 * here, refcount_inc() may decide to just WARN_ONCE() instead
+		 * of actually bumping the refcount.
+		 *
+		 * If this node is properly in the radix, we want to bump the
+		 * refcount twice, once for the inode and once for this get
+		 * operation.
+		 */
+		if (refcount_inc_not_zero(&node->refs)) {
+			refcount_inc(&node->refs);
+			btrfs_inode->delayed_node = node;
+		} else {
+			node = NULL;
+		}
+
 		spin_unlock(&root->inode_lock);
 		return node;
 	}
@@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(
 	mutex_unlock(&delayed_node->mutex);
 
 	if (refcount_dec_and_test(&delayed_node->refs)) {
-		bool free = false;
 		struct btrfs_root *root = delayed_node->root;
+
 		spin_lock(&root->inode_lock);
-		if (refcount_read(&delayed_node->refs) == 0) {
-			radix_tree_delete(&root->delayed_nodes_tree,
-					  delayed_node->inode_id);
-			free = true;
-		}
+		/*
+		 * Once our refcount goes to zero, nobody is allowed to bump it
+		 * back up.  We can delete it now.
+		 */
+		ASSERT(refcount_read(&delayed_node->refs) == 0);
+		radix_tree_delete(&root->delayed_nodes_tree,
+				  delayed_node->inode_id);
 		spin_unlock(&root->inode_lock);
-		if (free)
-			kmem_cache_free(delayed_node_cache, delayed_node);
+		kmem_cache_free(delayed_node_cache, delayed_node);
 	}
 }
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 10a2a579cc7f6a3569212444fb37dd5ddcf0c0c7..a8ecccfc36ded1d9470deb0d78f77bcdacb6d0cf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3231,6 +3231,7 @@ static int write_dev_supers(struct btrfs_device *device,
 	int errors = 0;
 	u32 crc;
 	u64 bytenr;
+	int op_flags;
 
 	if (max_mirrors == 0)
 		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@@ -3273,13 +3274,10 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * we fua the first super.  The others we allow
 		 * to go down lazy.
 		 */
-		if (i == 0) {
-			ret = btrfsic_submit_bh(REQ_OP_WRITE,
-				REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh);
-		} else {
-			ret = btrfsic_submit_bh(REQ_OP_WRITE,
-				REQ_SYNC | REQ_META | REQ_PRIO, bh);
-		}
+		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
+		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
+			op_flags |= REQ_FUA;
+		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
 		if (ret)
 			errors++;
 	}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4497f937e8fb7ce608f8ef7cc8db1c7a51bcd2a3..2f4328511ac84e11b8148952b09b25f3b4bb4e9d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9206,6 +9206,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	ret = btrfs_del_root(trans, fs_info, &root->root_key);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
+		err = ret;
 		goto out_end_trans;
 	}
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 993061f83067a9a65c5a58908908f5948423f11a..e1a7f3cb5be940561af3ec855b6657c3fce7c850 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3005,6 +3005,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		compress_type = ordered_extent->compress_type;
 	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
 		BUG_ON(compress_type);
+		btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
+				       ordered_extent->len);
 		ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
 						ordered_extent->file_offset,
 						ordered_extent->file_offset +
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d748ad1c3620a7e99ff9706902846362a165a05a..2ef8acaac68846ea1d29452a0b0f1d95d85cc6e5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2206,7 +2206,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
 	if (!path)
 		return -ENOMEM;
 
-	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
+	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
 
 	key.objectid = tree_id;
 	key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49810b70afd3941721246497d94c754ec2120619..a256842875017b386a2a349f734f5feff7391484 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void)
 		kfree(dev);
 		return ERR_PTR(-ENOMEM);
 	}
-	bio_get(dev->flush_bio);
 
 	INIT_LIST_HEAD(&dev->dev_list);
 	INIT_LIST_HEAD(&dev->dev_alloc_list);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ab69dcb70e8ae342733f589338c02dc226f95356..1b468250e94752e6eedf63283cba95a236fb0380 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc,
 	return request_close_session(mdsc, session);
 }
 
+static bool drop_negative_children(struct dentry *dentry)
+{
+	struct dentry *child;
+	bool all_negative = true;
+
+	if (!d_is_dir(dentry))
+		goto out;
+
+	spin_lock(&dentry->d_lock);
+	list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+		if (d_really_is_positive(child)) {
+			all_negative = false;
+			break;
+		}
+	}
+	spin_unlock(&dentry->d_lock);
+
+	if (all_negative)
+		shrink_dcache_parent(dentry);
+out:
+	return all_negative;
+}
+
 /*
  * Trim old(er) caps.
  *
@@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 	if ((used | wanted) & ~oissued & mine)
 		goto out;   /* we need these caps */
 
-	session->s_trim_caps--;
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
 		__ceph_remove_cap(cap, true);
+		session->s_trim_caps--;
 	} else {
+		struct dentry *dentry;
 		/* try dropping referring dentries */
 		spin_unlock(&ci->i_ceph_lock);
-		d_prune_aliases(inode);
-		dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
-		     inode, cap, atomic_read(&inode->i_count));
+		dentry = d_find_any_alias(inode);
+		if (dentry && drop_negative_children(dentry)) {
+			int count;
+			dput(dentry);
+			d_prune_aliases(inode);
+			count = atomic_read(&inode->i_count);
+			if (count == 1)
+				session->s_trim_caps--;
+			dout("trim_caps_cb %p cap %p pruned, count now %d\n",
+			     inode, cap, count);
+		} else {
+			dput(dentry);
+		}
 		return 0;
 	}
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e06740436b92080b915bd6c414fec38b0e868cd8..ed88ab8a477434b78aa94ace68c92794a6c4e082 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
 	} while (rc == -EAGAIN);
 
 	if (rc) {
-		cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
+		if (rc != -ENOENT)
+			cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
 		goto out;
 	}
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5331631386a23bd4a7458ecb5fb96efe1773cf71..01346b8b6edb38498c1b48c37e1c9210f4d5fe09 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
 	cifs_small_buf_release(req);
 
 	rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
-	shdr = get_sync_hdr(rsp);
 
-	if (shdr->Status == STATUS_END_OF_FILE) {
+	if (rc) {
+		if (rc != -ENODATA) {
+			cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
+			cifs_dbg(VFS, "Send error in read = %d\n", rc);
+		}
 		free_rsp_buf(resp_buftype, rsp_iov.iov_base);
-		return 0;
+		return rc == -ENODATA ? 0 : rc;
 	}
 
-	if (rc) {
-		cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
-		cifs_dbg(VFS, "Send error in read = %d\n", rc);
-	} else {
-		*nbytes = le32_to_cpu(rsp->DataLength);
-		if ((*nbytes > CIFS_MAX_MSGSIZE) ||
-		    (*nbytes > io_parms->length)) {
-			cifs_dbg(FYI, "bad length %d for count %d\n",
-				 *nbytes, io_parms->length);
-			rc = -EIO;
-			*nbytes = 0;
-		}
+	*nbytes = le32_to_cpu(rsp->DataLength);
+	if ((*nbytes > CIFS_MAX_MSGSIZE) ||
+	    (*nbytes > io_parms->length)) {
+		cifs_dbg(FYI, "bad length %d for count %d\n",
+			 *nbytes, io_parms->length);
+		rc = -EIO;
+		*nbytes = 0;
 	}
 
+	shdr = get_sync_hdr(rsp);
+
 	if (*buf) {
 		memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
 		free_rsp_buf(resp_buftype, rsp_iov.iov_base);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index f937082f32449a9f3316e581675c47e51f7f6d30..58e2fe40b2a04423de26729613bae16c233c8920 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV
 config CRAMFS_MTD
 	bool "Support CramFs image directly mapped in physical memory"
 	depends on CRAMFS && MTD
+	depends on CRAMFS=m || MTD=y
 	default y if !CRAMFS_BLOCKDEV
 	help
 	  This option allows the CramFs driver to load data directly from
diff --git a/fs/dax.c b/fs/dax.c
index 78b72c48374e5eed09587292f3b7eee62059e18b..95981591977a04d08f300c0795fcd96a4211adc1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
 
 			if (pfn != pmd_pfn(*pmdp))
 				goto unlock_pmd;
-			if (!pmd_dirty(*pmdp)
-					&& !pmd_access_permitted(*pmdp, WRITE))
+			if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
 				goto unlock_pmd;
 
 			flush_cache_page(vma, address, pfn);
diff --git a/fs/exec.c b/fs/exec.c
index 6be2aa0ab26fe26cb37032b99bba656f8d7c6b51..7eb8d21bcab94b51905071d12a5205e7ac2ef194 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,15 +1216,14 @@ static int de_thread(struct task_struct *tsk)
 	return -EAGAIN;
 }
 
-char *get_task_comm(char *buf, struct task_struct *tsk)
+char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
 {
-	/* buf must be at least sizeof(tsk->comm) in size */
 	task_lock(tsk);
-	strncpy(buf, tsk->comm, sizeof(tsk->comm));
+	strncpy(buf, tsk->comm, buf_size);
 	task_unlock(tsk);
 	return buf;
 }
-EXPORT_SYMBOL_GPL(get_task_comm);
+EXPORT_SYMBOL_GPL(__get_task_comm);
 
 /*
  * These functions flushes out all traces of the currently running executable
@@ -1340,24 +1339,24 @@ void setup_new_exec(struct linux_binprm * bprm)
 		 * avoid bad behavior from the prior rlimits. This has to
 		 * happen before arch_pick_mmap_layout(), which examines
 		 * RLIMIT_STACK, but after the point of no return to avoid
-		 * races from other threads changing the limits. This also
-		 * must be protected from races with prlimit() calls.
+		 * needing to clean up the change on failure.
 		 */
-		task_lock(current->group_leader);
 		if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
 			current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
-		if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
-			current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
-		task_unlock(current->group_leader);
 	}
 
 	arch_pick_mmap_layout(current->mm);
 
 	current->sas_ss_sp = current->sas_ss_size = 0;
 
-	/* Figure out dumpability. */
+	/*
+	 * Figure out dumpability. Note that this checking only of current
+	 * is wrong, but userspace depends on it. This should be testing
+	 * bprm->secureexec instead.
+	 */
 	if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
-	    bprm->secureexec)
+	    !(uid_eq(current_euid(), current_uid()) &&
+	      gid_eq(current_egid(), current_gid())))
 		set_dumpable(current->mm, suid_dumpable);
 	else
 		set_dumpable(current->mm, SUID_DUMP_USER);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 07bca11749d406fd4e3130e31026f1db9ac0d879..c941251ac0c008587b1aaee10fb52e27a50e684a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4722,6 +4722,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 						    EXT4_INODE_EOFBLOCKS);
 		}
 		ext4_mark_inode_dirty(handle, inode);
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 		ret2 = ext4_journal_stop(handle);
 		if (ret2)
 			break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b4267d72f24955c314d78f350c4247e6c7373cc2..b32cf263750d1d3b2024847e78bf1c6181e8a44f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 		struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
 
+		if (IS_ERR(p))
+			return ERR_CAST(p);
 		if (p) {
 			int acl_size = p->a_count * sizeof(ext4_acl_entry);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7df2c5644e59c9678e9379985338f972a69d42d1..534a9130f62578931a24477f317c17b42c71ffc3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
  */
 int ext4_inode_is_fast_symlink(struct inode *inode)
 {
+	if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+		int ea_blocks = EXT4_I(inode)->i_file_acl ?
+				EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
+
+		if (ext4_has_inline_data(inode))
+			return 0;
+
+		return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+	}
 	return S_ISLNK(inode->i_mode) && inode->i_size &&
 	       (inode->i_size < EXT4_N_BLOCKS * 4);
 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 798b3ac680db1b4f8c4510a0bd66d0510d11216a..e750d68fbcb50c0447e13556905da8401f5f6b03 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 			       "falling back\n"));
 	}
 	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+	if (!nblocks) {
+		ret = NULL;
+		goto cleanup_and_exit;
+	}
 	start = EXT4_I(dir)->i_dir_start_lookup;
 	if (start >= nblocks)
 		start = 0;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 8d6b7e35faf9a76d91ce9835598c13495adb2032..c83ece7facc5e466eb4b17fac368c0f63cf1048a 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -150,7 +150,6 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx)
 			if (unlikely(ret < 0))
 				goto out;
 			ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1;
-			file->f_version = inode->i_version;
 		}
 		next_pos = ctx->pos;
 		if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) {
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 3b834563b1f161352806249f0ef62b3e552ecee6..a4ad18afbdec7ffba2eb5cd46c59879433ba6da9 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -419,7 +419,6 @@ int hpfs_add_dirent(struct inode *i,
 		c = 1;
 		goto ret;
 	}	
-	i->i_version++;
 	c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
 	ret:
 	return c;
@@ -726,7 +725,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
 			return 2;
 		}
 	}
-	i->i_version++;
 	for_all_poss(i, hpfs_pos_del, (t = get_pos(dnode, de)) + 1, 1);
 	hpfs_delete_de(i->i_sb, dnode, de);
 	hpfs_mark_4buffers_dirty(qbh);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c45a3b9b9ac7e22861b5dcd7816a884a15bb62bb..f2c3ebcd309c326d6ac08bd168f13b8b460379ec 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -235,7 +235,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
-	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
 }
 
diff --git a/fs/namespace.c b/fs/namespace.c
index e158ec6b527b2d72341e096f76e628b5e61ea4cf..9d1374ab6e06f2cd7b57aedf196c53a745a1a683 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 			    SB_DIRSYNC |
 			    SB_SILENT |
 			    SB_POSIXACL |
+			    SB_LAZYTIME |
 			    SB_I_VERSION);
 
 	if (flags & MS_REMOUNT)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1c6b634626cf1f98f4423841ad103d08c2..b9129e2befeaa4186138bbd84e78d7ca2d128370 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	const struct sockaddr *sap = data->addr;
 	struct nfs_net *nn = net_generic(data->net, nfs_net_id);
 
+again:
 	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
 	        const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 		/* Don't match clients that failed to initialise properly */
 		if (clp->cl_cons_state < 0)
 			continue;
 
+		/* If a client is still initializing then we need to wait */
+		if (clp->cl_cons_state > NFS_CS_READY) {
+			refcount_inc(&clp->cl_count);
+			spin_unlock(&nn->nfs_client_lock);
+			nfs_wait_client_init_complete(clp);
+			nfs_put_client(clp);
+			spin_lock(&nn->nfs_client_lock);
+			goto again;
+		}
+
 		/* Different NFS versions cannot share the same nfs_client */
 		if (clp->rpc_ops != data->nfs_mod->rpc_ops)
 			continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 12bbab0becb420463bb37d4eefe0c2a9ec56796d..65a7e5da508c3e3019dab617b3ba92521b96cd0b 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (error < 0)
 		goto error;
 
-	if (!nfs4_has_session(clp))
-		nfs_mark_client_ready(clp, NFS_CS_READY);
-
 	error = nfs4_discover_server_trunking(clp, &old);
 	if (error < 0)
 		goto error;
 
-	if (clp != old)
+	if (clp != old) {
 		clp->cl_preserve_clid = true;
+		/*
+		 * Mark the client as having failed initialization so other
+		 * processes walking the nfs_client_list in nfs_match_client()
+		 * won't try to use it.
+		 */
+		nfs_mark_client_ready(clp, -EPERM);
+	}
 	nfs_put_client(clp);
 	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	return old;
@@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
 
+		if (pos == new)
+			goto found;
+
 		status = nfs4_match_client(pos, new, &prev, nn);
 		if (status < 0)
 			goto out_unlock;
@@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		 * way that a SETCLIENTID_CONFIRM to pos can succeed is
 		 * if new and pos point to the same server:
 		 */
+found:
 		refcount_inc(&pos->cl_count);
 		spin_unlock(&nn->nfs_client_lock);
 
@@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		case 0:
 			nfs4_swap_callback_idents(pos, new);
 			pos->cl_confirm = new->cl_confirm;
+			nfs_mark_client_ready(pos, NFS_CS_READY);
 
 			prev = NULL;
 			*result = pos;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5b5f464f6f2ada7cdfd2ea80d95d0c048ccee3f6..4a379d7918f23e1130468c2f58bfea3623035116 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how)
 	if (res)
 		error = nfs_generic_commit_list(inode, &head, how, &cinfo);
 	nfs_commit_end(cinfo.mds);
+	if (res == 0)
+		return res;
 	if (error < 0)
 		goto out_error;
 	if (!may_wait)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 697f8ae7792d1304e3cec035fd56185eb2ee02df..f650e475d8f0d84af1bb3013b6b35ef77421cde2 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 				gi->gid[i] = exp->ex_anon_gid;
 			else
 				gi->gid[i] = rqgi->gid[i];
+
+			/* Each thread allocates its own gi, no race */
+			groups_sort(gi);
 		}
 	} else {
 		gi = get_group_info(rqgi);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index cbfc196e5dc53b2cb2376524c5955af20bd07ea6..5ac4154668613d158d63301272039ee5f5b814e4 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR
 	  an overlay which has redirects on a kernel that doesn't support this
 	  feature will have unexpected results.
 
+config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
+	bool "Overlayfs: follow redirects even if redirects are turned off"
+	default y
+	depends on OVERLAY_FS
+	help
+	  Disable this to get a possibly more secure configuration, but that
+	  might not be backward compatible with previous kernels.
+
+	  For more information, see Documentation/filesystems/overlayfs.txt
+
 config OVERLAY_FS_INDEX
 	bool "Overlayfs: turn on inodes index feature by default"
 	depends on OVERLAY_FS
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index e13921824c70ce8061e545f21e408eda478d7644..f9788bc116a8d1b5137f805d9955995e68b31884 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
 		spin_unlock(&dentry->d_lock);
 	} else {
 		kfree(redirect);
-		pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+		pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+				    err);
 		/* Fall back to userspace copy-up */
 		err = -EXDEV;
 	}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 625ed8066570607b6140a0c22c39f152135a5c81..beb945e1963c0aac86fbce312ca9814821a1c33d 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
 
 	/* Check if index is orphan and don't warn before cleaning it */
 	if (d_inode(index)->i_nlink == 1 &&
-	    ovl_get_nlink(index, origin.dentry, 0) == 0)
+	    ovl_get_nlink(origin.dentry, index, 0) == 0)
 		err = -ENOENT;
 
 	dput(origin.dentry);
@@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		if (d.stop)
 			break;
 
+		/*
+		 * Following redirects can have security consequences: it's like
+		 * a symlink into the lower layer without the permission checks.
+		 * This is only a problem if the upper layer is untrusted (e.g
+		 * comes from an USB drive).  This can allow a non-readable file
+		 * or directory to become readable.
+		 *
+		 * Only following redirects when redirects are enabled disables
+		 * this attack vector when not necessary.
+		 */
+		err = -EPERM;
+		if (d.redirect && !ofs->config.redirect_follow) {
+			pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
+			goto out_put;
+		}
+
 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
 			poe = roe;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 13eab09a6b6f33c04c90ad822aa8a0dbe12a4927..b489099ccd493a54e231c9ee7e3ebe18809398b5 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
 {
 	struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
-	int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+	int err = PTR_ERR_OR_ZERO(ret);
 
 	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
 	return ret;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 752bab645879e5fce43e86d45e835d94d3c44b22..9d0bc03bf6e4563ef280fe5e8f695cc9f1e8abd5 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,8 @@ struct ovl_config {
 	char *workdir;
 	bool default_permissions;
 	bool redirect_dir;
+	bool redirect_follow;
+	const char *redirect_mode;
 	bool index;
 };
 
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 0daa4354fec4ae967da4ae43e81f7f833cd1095c..8c98578d27a1496922d1b862fa90af61da3dd290 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -499,7 +499,7 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
 	return err;
 
 fail:
-	pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n",
+	pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
 			    p->name, err);
 	goto out;
 }
@@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
 			return PTR_ERR(rdt.cache);
 	}
 
-	return iterate_dir(od->realfile, &rdt.ctx);
+	err = iterate_dir(od->realfile, &rdt.ctx);
+	ctx->pos = rdt.ctx.pos;
+
+	return err;
 }
 
 
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 288d20f9a55a220d3782f4eaf10e62c491c3fc5c..76440feb79f64ee0fce36713ed4fe4264ff70f11 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(ovl_redirect_dir_def,
 		 "Default to on or off for the redirect_dir feature");
 
+static bool ovl_redirect_always_follow =
+	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+		   bool, 0644);
+MODULE_PARM_DESC(ovl_redirect_always_follow,
+		 "Follow redirects even if redirect_dir feature is turned off");
+
 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
 module_param_named(index, ovl_index_def, bool, 0644);
 MODULE_PARM_DESC(ovl_index_def,
@@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	kfree(ofs->config.lowerdir);
 	kfree(ofs->config.upperdir);
 	kfree(ofs->config.workdir);
+	kfree(ofs->config.redirect_mode);
 	if (ofs->creator_cred)
 		put_cred(ofs->creator_cred);
 	kfree(ofs);
@@ -244,6 +252,7 @@ static void ovl_put_super(struct super_block *sb)
 	ovl_free_fs(ofs);
 }
 
+/* Sync real dirty inodes in upper filesystem (if it exists) */
 static int ovl_sync_fs(struct super_block *sb, int wait)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
@@ -252,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
 
 	if (!ofs->upper_mnt)
 		return 0;
-	upper_sb = ofs->upper_mnt->mnt_sb;
-	if (!upper_sb->s_op->sync_fs)
+
+	/*
+	 * If this is a sync(2) call or an emergency sync, all the super blocks
+	 * will be iterated, including upper_sb, so no need to do anything.
+	 *
+	 * If this is a syncfs(2) call, then we do need to call
+	 * sync_filesystem() on upper_sb, but enough if we do it when being
+	 * called with wait == 1.
+	 */
+	if (!wait)
 		return 0;
 
-	/* real inodes have already been synced by sync_filesystem(ovl_sb) */
+	upper_sb = ofs->upper_mnt->mnt_sb;
+
 	down_read(&upper_sb->s_umount);
-	ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+	ret = sync_filesystem(upper_sb);
 	up_read(&upper_sb->s_umount);
+
 	return ret;
 }
 
@@ -295,6 +314,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs)
 	return (!ofs->upper_mnt || !ofs->workdir);
 }
 
+static const char *ovl_redirect_mode_def(void)
+{
+	return ovl_redirect_dir_def ? "on" : "off";
+}
+
 /**
  * ovl_show_options
  *
@@ -313,12 +337,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	}
 	if (ofs->config.default_permissions)
 		seq_puts(m, ",default_permissions");
-	if (ofs->config.redirect_dir != ovl_redirect_dir_def)
-		seq_printf(m, ",redirect_dir=%s",
-			   ofs->config.redirect_dir ? "on" : "off");
+	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
+		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 	if (ofs->config.index != ovl_index_def)
-		seq_printf(m, ",index=%s",
-			   ofs->config.index ? "on" : "off");
+		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
 	return 0;
 }
 
@@ -348,8 +370,7 @@ enum {
 	OPT_UPPERDIR,
 	OPT_WORKDIR,
 	OPT_DEFAULT_PERMISSIONS,
-	OPT_REDIRECT_DIR_ON,
-	OPT_REDIRECT_DIR_OFF,
+	OPT_REDIRECT_DIR,
 	OPT_INDEX_ON,
 	OPT_INDEX_OFF,
 	OPT_ERR,
@@ -360,8 +381,7 @@ static const match_table_t ovl_tokens = {
 	{OPT_UPPERDIR,			"upperdir=%s"},
 	{OPT_WORKDIR,			"workdir=%s"},
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
-	{OPT_REDIRECT_DIR_ON,		"redirect_dir=on"},
-	{OPT_REDIRECT_DIR_OFF,		"redirect_dir=off"},
+	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
 	{OPT_INDEX_ON,			"index=on"},
 	{OPT_INDEX_OFF,			"index=off"},
 	{OPT_ERR,			NULL}
@@ -390,10 +410,37 @@ static char *ovl_next_opt(char **s)
 	return sbegin;
 }
 
+static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
+{
+	if (strcmp(mode, "on") == 0) {
+		config->redirect_dir = true;
+		/*
+		 * Does not make sense to have redirect creation without
+		 * redirect following.
+		 */
+		config->redirect_follow = true;
+	} else if (strcmp(mode, "follow") == 0) {
+		config->redirect_follow = true;
+	} else if (strcmp(mode, "off") == 0) {
+		if (ovl_redirect_always_follow)
+			config->redirect_follow = true;
+	} else if (strcmp(mode, "nofollow") != 0) {
+		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
+		       mode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ovl_parse_opt(char *opt, struct ovl_config *config)
 {
 	char *p;
 
+	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
+	if (!config->redirect_mode)
+		return -ENOMEM;
+
 	while ((p = ovl_next_opt(&opt)) != NULL) {
 		int token;
 		substring_t args[MAX_OPT_ARGS];
@@ -428,12 +475,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->default_permissions = true;
 			break;
 
-		case OPT_REDIRECT_DIR_ON:
-			config->redirect_dir = true;
-			break;
-
-		case OPT_REDIRECT_DIR_OFF:
-			config->redirect_dir = false;
+		case OPT_REDIRECT_DIR:
+			kfree(config->redirect_mode);
+			config->redirect_mode = match_strdup(&args[0]);
+			if (!config->redirect_mode)
+				return -ENOMEM;
 			break;
 
 		case OPT_INDEX_ON:
@@ -458,7 +504,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 		config->workdir = NULL;
 	}
 
-	return 0;
+	return ovl_parse_redirect_mode(config, config->redirect_mode);
 }
 
 #define OVL_WORKDIR_NAME "work"
@@ -1160,7 +1206,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!cred)
 		goto out_err;
 
-	ofs->config.redirect_dir = ovl_redirect_dir_def;
 	ofs->config.index = ovl_index_def;
 	err = ovl_parse_opt((char *) data, &ofs->config);
 	if (err)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 28fa85276eec6679cd1ad7a0b408975cbea9204f..60316b52d6591459d4c25bc0d434c4bfea3d2fef 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2268,7 +2268,7 @@ static int show_timer(struct seq_file *m, void *v)
 	notify = timer->it_sigev_notify;
 
 	seq_printf(m, "ID: %d\n", timer->it_id);
-	seq_printf(m, "signal: %d/%p\n",
+	seq_printf(m, "signal: %d/%px\n",
 		   timer->sigq->info.si_signo,
 		   timer->sigq->info.si_value.sival_ptr);
 	seq_printf(m, "notify: %s/%s.%d\n",
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6fee3172e0e07e9d358587eea34bc4..06bd25d90ba591f45c6f772e218ffce91c915dbc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 
 	INIT_LIST_HEAD(&s->s_mounts);
 	s->s_user_ns = get_user_ns(user_ns);
+	init_rwsem(&s->s_umount);
+	lockdep_set_class(&s->s_umount, &type->s_umount_key);
+	/*
+	 * sget() can have s_umount recursion.
+	 *
+	 * When it cannot find a suitable sb, it allocates a new
+	 * one (this one), and tries again to find a suitable old
+	 * one.
+	 *
+	 * In case that succeeds, it will acquire the s_umount
+	 * lock of the old one. Since these are clearly distrinct
+	 * locks, and this object isn't exposed yet, there's no
+	 * risk of deadlocks.
+	 *
+	 * Annotate this by putting this lock in a different
+	 * subclass.
+	 */
+	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
 
 	if (security_sb_alloc(s))
 		goto fail;
@@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 		goto fail;
 	if (list_lru_init_memcg(&s->s_inode_lru))
 		goto fail;
-
-	init_rwsem(&s->s_umount);
-	lockdep_set_class(&s->s_umount, &type->s_umount_key);
-	/*
-	 * sget() can have s_umount recursion.
-	 *
-	 * When it cannot find a suitable sb, it allocates a new
-	 * one (this one), and tries again to find a suitable old
-	 * one.
-	 *
-	 * In case that succeeds, it will acquire the s_umount
-	 * lock of the old one. Since these are clearly distrinct
-	 * locks, and this object isn't exposed yet, there's no
-	 * risk of deadlocks.
-	 *
-	 * Annotate this by putting this lock in a different
-	 * subclass.
-	 */
-	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
 	s->s_count = 1;
 	atomic_set(&s->s_active, 1);
 	mutex_init(&s->s_vfs_rename_mutex);
@@ -518,7 +517,11 @@ struct super_block *sget_userns(struct file_system_type *type,
 	hlist_add_head(&s->s_instances, &type->fs_supers);
 	spin_unlock(&sb_lock);
 	get_filesystem(type);
-	register_shrinker(&s->s_shrink);
+	err = register_shrinker(&s->s_shrink);
+	if (err) {
+		deactivate_locked_super(s);
+		s = ERR_PTR(err);
+	}
 	return s;
 }
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ac9a4e65ca497ad3b673dc50893d13b44150c70f..41a75f9f23fdbfc2d30ae80b979a331484f0dcf9 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -570,11 +570,14 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 					      struct userfaultfd_wait_queue *ewq)
 {
+	struct userfaultfd_ctx *release_new_ctx;
+
 	if (WARN_ON_ONCE(current->flags & PF_EXITING))
 		goto out;
 
 	ewq->ctx = ctx;
 	init_waitqueue_entry(&ewq->wq, current);
+	release_new_ctx = NULL;
 
 	spin_lock(&ctx->event_wqh.lock);
 	/*
@@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 				new = (struct userfaultfd_ctx *)
 					(unsigned long)
 					ewq->msg.arg.reserved.reserved1;
-
-				userfaultfd_ctx_put(new);
+				release_new_ctx = new;
 			}
 			break;
 		}
@@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	__set_current_state(TASK_RUNNING);
 	spin_unlock(&ctx->event_wqh.lock);
 
+	if (release_new_ctx) {
+		struct vm_area_struct *vma;
+		struct mm_struct *mm = release_new_ctx->mm;
+
+		/* the various vma->vm_userfaultfd_ctx still points to it */
+		down_write(&mm->mmap_sem);
+		for (vma = mm->mmap; vma; vma = vma->vm_next)
+			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+		up_write(&mm->mmap_sem);
+
+		userfaultfd_ctx_put(release_new_ctx);
+	}
+
 	/*
 	 * ctx may go away after this if the userfault pseudo fd is
 	 * already released.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a9173cba18811abc71e39ca4631a7449..83ed7715f856d2025509c308583436e63d0043a5 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
 	ASSERT(args->agbno % args->alignment == 0);
 
 	/* if not file data, insert new block into the reverse map btree */
-	if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+	if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
 		error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 				       args->agbno, args->len, &args->oinfo);
 		if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
 	bno_cur = cnt_cur = NULL;
 	mp = tp->t_mountp;
 
-	if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+	if (!xfs_rmap_should_skip_owner_update(oinfo)) {
 		error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
 		if (error)
 			goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671debe20a45e3cb0577360552e36d523..a76914db72ef11094cd8d74e5bd6cd6add2a3fde 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
 	int			flags)
 {
 	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_buf		*leaf_bp = NULL;
 	struct xfs_da_args	args;
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans_res	tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
 		 * GROT: another possible req'mt for a double-split btree op.
 		 */
 		xfs_defer_init(args.dfops, args.firstblock);
-		error = xfs_attr_shortform_to_leaf(&args);
+		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
 		if (error)
 			goto out_defer_cancel;
+		/*
+		 * Prevent the leaf buffer from being unlocked so that a
+		 * concurrent AIL push cannot grab the half-baked leaf
+		 * buffer and run into problems with the write verifier.
+		 */
+		xfs_trans_bhold(args.trans, leaf_bp);
+		xfs_defer_bjoin(args.dfops, leaf_bp);
 		xfs_defer_ijoin(args.dfops, dp);
 		error = xfs_defer_finish(&args.trans, args.dfops);
 		if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
 
 		/*
 		 * Commit the leaf transformation.  We'll need another (linked)
-		 * transaction to add the new attribute to the leaf.
+		 * transaction to add the new attribute to the leaf, which
+		 * means that we have to hold & join the leaf buffer here too.
 		 */
-
 		error = xfs_trans_roll_inode(&args.trans, dp);
 		if (error)
 			goto out;
-
+		xfs_trans_bjoin(args.trans, leaf_bp);
+		leaf_bp = NULL;
 	}
 
 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
 
 out_defer_cancel:
 	xfs_defer_cancel(&dfops);
-	args.trans = NULL;
 out:
+	if (leaf_bp)
+		xfs_trans_brelse(args.trans, leaf_bp);
 	if (args.trans)
 		xfs_trans_cancel(args.trans);
 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac45c4e5ec4afd319891b6e9f716c5..601eaa36f1ada22e2213f9178bcb5cdb5868034d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 }
 
 /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
  */
 int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+	struct xfs_da_args	*args,
+	struct xfs_buf		**leaf_bp)
 {
 	xfs_inode_t *dp;
 	xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
 	}
 	error = 0;
-
+	*leaf_bp = bp;
 out:
 	kmem_free(tmpbuffer);
 	return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b044b166d6d178fca3178feff2b644..894124efb421e0d0674b0f39bf63dabfe7916937 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void	xfs_attr_shortform_create(struct xfs_da_args *args);
 void	xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int	xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+			struct xfs_buf **leaf_bp);
 int	xfs_attr_shortform_remove(struct xfs_da_args *args);
 int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int	xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1210f684d3c28f9af8d8403c1f0222ef06dc380b..1bddbba6b80c960bdcc10c9a30210c119e1b2f77 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
 	 * blowing out the transaction with a mix of EFIs and reflink
 	 * adjustments.
 	 */
-	if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
 		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
 	else
 		max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6aeb3e00e306a06d71a1b478382f3ad..087fea02c3892c34e1e63df2839bd8d804ba1f3f 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
 	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
 		xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
 
+	/* Hold the (previously bjoin'd) buffer locked across the roll. */
+	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+		xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
 	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
 
 	/* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
 	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
 		xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
 
+	/* Rejoin the buffers and dirty them so the log moves forward. */
+	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+		xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+		xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+	}
+
 	return error;
 }
 
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
 		}
 	}
 
+	ASSERT(0);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op.  Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+	struct xfs_defer_ops		*dop,
+	struct xfs_buf			*bp)
+{
+	int				i;
+
+	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+		if (dop->dop_bufs[i] == bp)
+			return 0;
+		else if (dop->dop_bufs[i] == NULL) {
+			dop->dop_bufs[i] = bp;
+			return 0;
+		}
+	}
+
+	ASSERT(0);
 	return -EFSCORRUPTED;
 }
 
@@ -493,9 +528,7 @@ xfs_defer_init(
 	struct xfs_defer_ops		*dop,
 	xfs_fsblock_t			*fbp)
 {
-	dop->dop_committed = false;
-	dop->dop_low = false;
-	memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+	memset(dop, 0, sizeof(struct xfs_defer_ops));
 	*fbp = NULLFSBLOCK;
 	INIT_LIST_HEAD(&dop->dop_intake);
 	INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd4ae434d8104991d0327f2d2b9fc7..045beacdd37d81c9e01e0ab46ab2bbcbbb581fbb 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
 };
 
 #define XFS_DEFER_OPS_NR_INODES	2	/* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
 
 struct xfs_defer_ops {
 	bool			dop_committed;	/* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
 	struct list_head	dop_intake;	/* unlogged pending work */
 	struct list_head	dop_pending;	/* logged pending work */
 
-	/* relog these inodes with each roll */
+	/* relog these with each roll */
 	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
+	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];
 };
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index de3f04a986565d7265fc694b80962000de8d7dc8..3b57ef0f2f76c758e6a9c8b89b7a0c470cdd09a9 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
 xfs_ialloc_ag_select(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_ino_t	parent,		/* parent directory inode number */
-	umode_t		mode,		/* bits set to indicate file type */
-	int		okalloc)	/* ok to allocate more space */
+	umode_t		mode)		/* bits set to indicate file type */
 {
 	xfs_agnumber_t	agcount;	/* number of ag's in the filesystem */
 	xfs_agnumber_t	agno;		/* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
 			return agno;
 		}
 
-		if (!okalloc)
-			goto nextag;
-
 		if (!pag->pagf_init) {
 			error = xfs_alloc_pagf_init(mp, tp, agno, flags);
 			if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
 	struct xfs_trans	*tp,
 	xfs_ino_t		parent,
 	umode_t			mode,
-	int			okalloc,
 	struct xfs_buf		**IO_agbp,
 	xfs_ino_t		*inop)
 {
@@ -1692,6 +1687,7 @@ xfs_dialloc(
 	int			noroom = 0;
 	xfs_agnumber_t		start_agno;
 	struct xfs_perag	*pag;
+	int			okalloc = 1;
 
 	if (*IO_agbp) {
 		/*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
 	 * We do not have an agbp, so select an initial allocation
 	 * group for inode allocation.
 	 */
-	start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+	start_agno = xfs_ialloc_ag_select(tp, parent, mode);
 	if (start_agno == NULLAGNUMBER) {
 		*inop = NULLFSINO;
 		return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index d2bdcd5e7312e499deb91b29bc62cb91cc35e881..66a8de0b1caaad8d1ba9d5ed94fccc813780dca3 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -81,7 +81,6 @@ xfs_dialloc(
 	struct xfs_trans *tp,		/* transaction pointer */
 	xfs_ino_t	parent,		/* parent inode (directory) */
 	umode_t		mode,		/* mode bits for new inode */
-	int		okalloc,	/* ok to allocate more space */
 	struct xfs_buf	**agbp,		/* buf for a.g. inode header */
 	xfs_ino_t	*inop);		/* inode number allocated */
 
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d9377293fa842c48f2b9b637f83c62c2..b0f31791c7e6137c0b7e46c35000e3c76e145ba1 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
 	struct xfs_iext_leaf	*new = NULL;
 	int			nr_entries, i;
 
-	trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
-
 	if (ifp->if_height == 0)
 		xfs_iext_alloc_root(ifp, cur);
 	else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
 	xfs_iext_set(cur_rec(cur), irec);
 	ifp->if_bytes += sizeof(struct xfs_iext_rec);
 
+	trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
+
 	if (new)
 		xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 }
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142157863740d3b8cd437b37e49e05d..c40d26763075307b064d49bd3cb48dfce8dd5b67 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
 	xfs_extlen_t		aglen,
 	struct xfs_defer_ops	*dfops)
 {
-	int			error;
-
 	trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
 			agbno, aglen);
 
 	/* Add refcount btree reservation */
-	error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+	return xfs_refcount_adjust_cow(rcur, agbno, aglen,
 			XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
-	if (error)
-		return error;
-
-	/* Add rmap entry */
-	if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-		error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
-				rcur->bc_private.a.agno,
-				agbno, aglen, XFS_RMAP_OWN_COW);
-		if (error)
-			return error;
-	}
-
-	return error;
 }
 
 /*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
 	xfs_extlen_t		aglen,
 	struct xfs_defer_ops	*dfops)
 {
-	int			error;
-
 	trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
 			agbno, aglen);
 
 	/* Remove refcount btree reservation */
-	error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+	return xfs_refcount_adjust_cow(rcur, agbno, aglen,
 			XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
-	if (error)
-		return error;
-
-	/* Remove rmap entry */
-	if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-		error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
-				rcur->bc_private.a.agno,
-				agbno, aglen, XFS_RMAP_OWN_COW);
-		if (error)
-			return error;
-	}
-
-	return error;
 }
 
 /* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
 	xfs_fsblock_t			fsb,
 	xfs_extlen_t			len)
 {
+	int				error;
+
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return 0;
 
-	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+	error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
 			fsb, len);
+	if (error)
+		return error;
+
+	/* Add rmap entry */
+	return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+			XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 }
 
 /* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
 	xfs_fsblock_t			fsb,
 	xfs_extlen_t			len)
 {
+	int				error;
+
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return 0;
 
+	/* Remove rmap entry */
+	error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+			XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+	if (error)
+		return error;
+
 	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
 			fsb, len);
 }
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3bdccf4e08b25fd70b2c59220859a1..50db920ceeebbf077c2b3b13690066173ba7cf4e 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range(
 	return error;
 }
 
+/*
+ * Perform all the relevant owner checks for a removal op.  If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+	struct xfs_mount	*mp,
+	uint64_t		ltoff,
+	struct xfs_rmap_irec	*rec,
+	xfs_fsblock_t		bno,
+	xfs_filblks_t		len,
+	uint64_t		owner,
+	uint64_t		offset,
+	unsigned int		flags)
+{
+	int			error = 0;
+
+	if (owner == XFS_RMAP_OWN_UNKNOWN)
+		return 0;
+
+	/* Make sure the unwritten flag matches. */
+	XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+			(rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+
+	/* Make sure the owner matches what we expect to find in the tree. */
+	XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+
+	/* Check the offset, if necessary. */
+	if (XFS_RMAP_NON_INODE_OWNER(owner))
+		goto out;
+
+	if (flags & XFS_RMAP_BMBT_BLOCK) {
+		XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+				out);
+	} else {
+		XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+		XFS_WANT_CORRUPTED_GOTO(mp,
+				ltoff + rec->rm_blockcount >= offset + len,
+				out);
+	}
+
+out:
+	return error;
+}
+
 /*
  * Find the extent in the rmap btree and remove it.
  *
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
 		goto out_done;
 	}
 
-	/* Make sure the unwritten flag matches. */
-	XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
-			(ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+	/*
+	 * If we're doing an unknown-owner removal for EFI recovery, we expect
+	 * to find the full range in the rmapbt or nothing at all.  If we
+	 * don't find any rmaps overlapping either end of the range, we're
+	 * done.  Hopefully this means that the EFI creator already queued
+	 * (and finished) a RUI to remove the rmap.
+	 */
+	if (owner == XFS_RMAP_OWN_UNKNOWN &&
+	    ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+		struct xfs_rmap_irec    rtrec;
+
+		error = xfs_btree_increment(cur, 0, &i);
+		if (error)
+			goto out_error;
+		if (i == 0)
+			goto out_done;
+		error = xfs_rmap_get_rec(cur, &rtrec, &i);
+		if (error)
+			goto out_error;
+		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+		if (rtrec.rm_startblock >= bno + len)
+			goto out_done;
+	}
 
 	/* Make sure the extent we found covers the entire freeing range. */
 	XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
-		ltrec.rm_startblock + ltrec.rm_blockcount >=
-		bno + len, out_error);
+			ltrec.rm_startblock + ltrec.rm_blockcount >=
+			bno + len, out_error);
 
-	/* Make sure the owner matches what we expect to find in the tree. */
-	XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
-				    XFS_RMAP_NON_INODE_OWNER(owner), out_error);
-
-	/* Check the offset, if necessary. */
-	if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
-		if (flags & XFS_RMAP_BMBT_BLOCK) {
-			XFS_WANT_CORRUPTED_GOTO(mp,
-					ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
-					out_error);
-		} else {
-			XFS_WANT_CORRUPTED_GOTO(mp,
-					ltrec.rm_offset <= offset, out_error);
-			XFS_WANT_CORRUPTED_GOTO(mp,
-					ltoff + ltrec.rm_blockcount >= offset + len,
-					out_error);
-		}
-	}
+	/* Check owner information. */
+	error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+			offset, flags);
+	if (error)
+		goto out_error;
 
 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
 		/* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
 		flags |= XFS_RMAP_UNWRITTEN;
 	trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
 			unwritten, oinfo);
+	ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
 
 	/*
 	 * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080e5832046a96d62789eb2e46ed03f..0fcd5b1ba7295379081e0c61d230324447a8ae56 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
 xfs_rmap_skip_owner_update(
 	struct xfs_owner_info	*oi)
 {
-	oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+	xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+
+static inline bool
+xfs_rmap_should_skip_owner_update(
+	struct xfs_owner_info	*oi)
+{
+	return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+
+static inline void
+xfs_rmap_any_owner_update(
+	struct xfs_owner_info	*oi)
+{
+	xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
 }
 
 /* Reverse mapping functions. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9c42c4efd01ec57bb1945ba671780cc2687e49c3..ab3aef2ae8233350f42647b77c0dff833521fe2f 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -46,7 +46,6 @@
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
-#include "scrub/scrub.h"
 #include "scrub/btree.h"
 
 /*
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 472080e757887957b5cd7baf9f12a10d6b27ae71..86daed0e3a458dd16ab76251b2772400be1e9409 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -26,7 +26,6 @@
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_da_format.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_trans.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 21e2d70884e18edc2c765584f201a8b04604837c..4fc526a27a94fe4594508b707f0a9ca976c1dd98 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -399,7 +399,7 @@ xfs_map_blocks(
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 
-	if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
+	if (offset > mp->m_super->s_maxbytes - count)
 		count = mp->m_super->s_maxbytes - offset;
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
 	lockmode = xfs_ilock_data_map_shared(ip);
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
-	if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
+	if (offset > mp->m_super->s_maxbytes - size)
 		size = mp->m_super->s_maxbytes - offset;
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c54512102577dbe768e5f137298ba1ee79c7..64da90655e957c3fd01331720aa32093909ddad6 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
 		return error;
 	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
-	xfs_rmap_skip_owner_update(&oinfo);
+	xfs_rmap_any_owner_update(&oinfo);
 	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
 		extp = &efip->efi_format.efi_extents[i];
 		error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbba4abf9b609040802d24e9cf2f732..60a2e128cb6a59aa7181faa5c519d511a308bc5c 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
 		 * this doesn't actually exist in the rmap btree.
 		 */
 		xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+		error = xfs_rmap_free(tp, bp, agno,
+				be32_to_cpu(agf->agf_length) - new,
+				new, &oinfo);
+		if (error)
+			goto error0;
 		error = xfs_free_extent(tp,
 				XFS_AGB_TO_FSB(mp, agno,
 					be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1eefabc84ee762a9427ec784889814..3861d61fb265f66a9d39723286d5adc9772cc15e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
  * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
  * (We'll just piggyback on the post-EOF prealloc space workqueue.)
  */
-STATIC void
+void
 xfs_queue_cowblocks(
 	struct xfs_mount *mp)
 {
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
 	return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
 }
 
+static inline unsigned long
+xfs_iflag_for_tag(
+	int		tag)
+{
+	switch (tag) {
+	case XFS_ICI_EOFBLOCKS_TAG:
+		return XFS_IEOFBLOCKS;
+	case XFS_ICI_COWBLOCKS_TAG:
+		return XFS_ICOWBLOCKS;
+	default:
+		ASSERT(0);
+		return 0;
+	}
+}
+
 static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
 	xfs_inode_t	*ip,
 	void		(*execute)(struct xfs_mount *mp),
 	void		(*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
 	 * Don't bother locking the AG and looking up in the radix trees
 	 * if we already know that we have the tag set.
 	 */
-	if (ip->i_flags & XFS_IEOFBLOCKS)
+	if (ip->i_flags & xfs_iflag_for_tag(tag))
 		return;
 	spin_lock(&ip->i_flags_lock);
-	ip->i_flags |= XFS_IEOFBLOCKS;
+	ip->i_flags |= xfs_iflag_for_tag(tag);
 	spin_unlock(&ip->i_flags_lock);
 
 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
 	xfs_inode_t	*ip)
 {
 	trace_xfs_inode_set_eofblocks_tag(ip);
-	return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+	return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
 			trace_xfs_perag_set_eofblocks,
 			XFS_ICI_EOFBLOCKS_TAG);
 }
 
 static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
 	xfs_inode_t	*ip,
 	void		(*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
 				    int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
 	struct xfs_perag *pag;
 
 	spin_lock(&ip->i_flags_lock);
-	ip->i_flags &= ~XFS_IEOFBLOCKS;
+	ip->i_flags &= ~xfs_iflag_for_tag(tag);
 	spin_unlock(&ip->i_flags_lock);
 
 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
 	xfs_inode_t	*ip)
 {
 	trace_xfs_inode_clear_eofblocks_tag(ip);
-	return __xfs_inode_clear_eofblocks_tag(ip,
+	return __xfs_inode_clear_blocks_tag(ip,
 			trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
 }
 
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
 	xfs_inode_t	*ip)
 {
 	trace_xfs_inode_set_cowblocks_tag(ip);
-	return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+	return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
 			trace_xfs_perag_set_cowblocks,
 			XFS_ICI_COWBLOCKS_TAG);
 }
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
 	xfs_inode_t	*ip)
 {
 	trace_xfs_inode_clear_cowblocks_tag(ip);
-	return __xfs_inode_clear_eofblocks_tag(ip,
+	return __xfs_inode_clear_blocks_tag(ip,
 			trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e54984ad84ca0741f801f536a99195055..d4a77588eca15b90639debc1ca6c5c62a3680de8 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
 void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8012741266488ab4e0724b68aadb2742c1d29c2c..6f95bdb408ced01b9471b931714d279003a22d92 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -749,7 +749,6 @@ xfs_ialloc(
 	xfs_nlink_t	nlink,
 	dev_t		rdev,
 	prid_t		prid,
-	int		okalloc,
 	xfs_buf_t	**ialloc_context,
 	xfs_inode_t	**ipp)
 {
@@ -765,7 +764,7 @@ xfs_ialloc(
 	 * Call the space management code to pick
 	 * the on-disk inode to be allocated.
 	 */
-	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
 			    ialloc_context, &ino);
 	if (error)
 		return error;
@@ -957,7 +956,6 @@ xfs_dir_ialloc(
 	xfs_nlink_t	nlink,
 	dev_t		rdev,
 	prid_t		prid,		/* project id */
-	int		okalloc,	/* ok to allocate new space */
 	xfs_inode_t	**ipp,		/* pointer to inode; it will be
 					   locked. */
 	int		*committed)
@@ -988,8 +986,8 @@ xfs_dir_ialloc(
 	 * transaction commit so that no other process can steal
 	 * the inode(s) that we've just allocated.
 	 */
-	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
-			  &ialloc_context, &ip);
+	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
+			&ip);
 
 	/*
 	 * Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
 		 * this call should always succeed.
 		 */
 		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-				  okalloc, &ialloc_context, &ip);
+				  &ialloc_context, &ip);
 
 		/*
 		 * If we get an error at this point, return to the caller
@@ -1182,11 +1180,6 @@ xfs_create(
 		xfs_flush_inodes(mp);
 		error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
 	}
-	if (error == -ENOSPC) {
-		/* No space at all so try a "no-allocation" reservation */
-		resblks = 0;
-		error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-	}
 	if (error)
 		goto out_release_inode;
 
@@ -1203,19 +1196,13 @@ xfs_create(
 	if (error)
 		goto out_trans_cancel;
 
-	if (!resblks) {
-		error = xfs_dir_canenter(tp, dp, name);
-		if (error)
-			goto out_trans_cancel;
-	}
-
 	/*
 	 * A newly created regular or special file just has one directory
 	 * entry pointing to them, but a directory also the "." entry
 	 * pointing to itself.
 	 */
-	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
-			       prid, resblks > 0, &ip, NULL);
+	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
+			NULL);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
 	tres = &M_RES(mp)->tr_create_tmpfile;
 
 	error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
-	if (error == -ENOSPC) {
-		/* No space at all so try a "no-allocation" reservation */
-		resblks = 0;
-		error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-	}
 	if (error)
 		goto out_release_inode;
 
@@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
 	if (error)
 		goto out_trans_cancel;
 
-	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
-				prid, resblks > 0, &ip, NULL);
+	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1506,6 +1487,24 @@ xfs_link(
 	return error;
 }
 
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+	struct xfs_inode	*ip)
+{
+	struct xfs_ifork	*dfork;
+	struct xfs_ifork	*cfork;
+
+	if (!xfs_is_reflink_inode(ip))
+		return;
+	dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+	if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+	if (cfork->if_bytes == 0)
+		xfs_inode_clear_cowblocks_tag(ip);
+}
+
 /*
  * Free up the underlying blocks past new_size.  The new size must be smaller
  * than the current size.  This routine can be used both for the attribute and
@@ -1602,15 +1601,7 @@ xfs_itruncate_extents(
 	if (error)
 		goto out;
 
-	/*
-	 * Clear the reflink flag if there are no data fork blocks and
-	 * there are no extents staged in the cow fork.
-	 */
-	if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-		if (ip->i_d.di_nblocks == 0)
-			ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
-		xfs_inode_clear_cowblocks_tag(ip);
-	}
+	xfs_itruncate_clear_reflink_flags(ip);
 
 	/*
 	 * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cc13c37637217e74e4c9425b34a710a5fd55e090..d383e392ec9ddcca6f552dc8c4cfe5329373d6c9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
  * log recovery to replay a bmap operation on the inode.
  */
 #define XFS_IRECOVERY		(1 << 11)
+#define XFS_ICOWBLOCKS		(1 << 12)/* has the cowblocks tag set */
 
 /*
  * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -428,7 +429,7 @@ xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
 int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
-			       xfs_nlink_t, dev_t, prid_t, int,
+			       xfs_nlink_t, dev_t, prid_t,
 			       struct xfs_inode **, int *);
 
 /* from xfs_file.c */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 33eb4fb2e3fd87b0848ebe2599b0a71b05f0d082..66e1edbfb2b2bcd7d278226a33f0531bb0043b97 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(
 	}
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
-	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+	if (offset > mp->m_super->s_maxbytes - length)
 		length = mp->m_super->s_maxbytes - offset;
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
@@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(
 
 	ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
-			       &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+			       &nimaps, XFS_BMAPI_ATTRFORK);
 out_unlock:
 	xfs_iunlock(ip, lockmode);
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 010a13a201aad78382ae69fdcc8e3b8d5c451c1d..b897b11afb2c658bebba0416739fddcc0fec5aa4 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,7 +48,7 @@
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 
-
+STATIC void	xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
 STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
 /*
  * We use the batch lookup interface to iterate over the dquots as it
@@ -695,9 +695,17 @@ xfs_qm_init_quotainfo(
 	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
 	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
 	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
-	register_shrinker(&qinf->qi_shrinker);
+
+	error = register_shrinker(&qinf->qi_shrinker);
+	if (error)
+		goto out_free_inos;
+
 	return 0;
 
+out_free_inos:
+	mutex_destroy(&qinf->qi_quotaofflock);
+	mutex_destroy(&qinf->qi_tree_lock);
+	xfs_qm_destroy_quotainos(qinf);
 out_free_lru:
 	list_lru_destroy(&qinf->qi_lru);
 out_free_qinf:
@@ -706,7 +714,6 @@ xfs_qm_init_quotainfo(
 	return error;
 }
 
-
 /*
  * Gets called when unmounting a filesystem or when all quotas get
  * turned off.
@@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo(
 
 	unregister_shrinker(&qi->qi_shrinker);
 	list_lru_destroy(&qi->qi_lru);
-
-	if (qi->qi_uquotaip) {
-		IRELE(qi->qi_uquotaip);
-		qi->qi_uquotaip = NULL; /* paranoia */
-	}
-	if (qi->qi_gquotaip) {
-		IRELE(qi->qi_gquotaip);
-		qi->qi_gquotaip = NULL;
-	}
-	if (qi->qi_pquotaip) {
-		IRELE(qi->qi_pquotaip);
-		qi->qi_pquotaip = NULL;
-	}
+	xfs_qm_destroy_quotainos(qi);
+	mutex_destroy(&qi->qi_tree_lock);
 	mutex_destroy(&qi->qi_quotaofflock);
 	kmem_free(qi);
 	mp->m_quotainfo = NULL;
@@ -793,8 +789,8 @@ xfs_qm_qino_alloc(
 		return error;
 
 	if (need_alloc) {
-		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
-								&committed);
+		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
+				&committed);
 		if (error) {
 			xfs_trans_cancel(tp);
 			return error;
@@ -1599,6 +1595,24 @@ xfs_qm_init_quotainos(
 	return error;
 }
 
+STATIC void
+xfs_qm_destroy_quotainos(
+	xfs_quotainfo_t	*qi)
+{
+	if (qi->qi_uquotaip) {
+		IRELE(qi->qi_uquotaip);
+		qi->qi_uquotaip = NULL; /* paranoia */
+	}
+	if (qi->qi_gquotaip) {
+		IRELE(qi->qi_gquotaip);
+		qi->qi_gquotaip = NULL;
+	}
+	if (qi->qi_pquotaip) {
+		IRELE(qi->qi_pquotaip);
+		qi->qi_pquotaip = NULL;
+	}
+}
+
 STATIC void
 xfs_qm_dqfree_one(
 	struct xfs_dquot	*dqp)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cc041a29eb70bbb7524e036c0e24843099480617..47aea2e82c268f4bbf9c25c1c1c6f3821c11caa3 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
 #include "xfs_alloc.h"
 #include "xfs_quota_defs.h"
 #include "xfs_quota.h"
-#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
 #include "xfs_iomap.h"
 #include "xfs_rmap_btree.h"
@@ -456,6 +454,8 @@ xfs_reflink_allocate_cow(
 	if (error)
 		goto out_bmap_cancel;
 
+	xfs_inode_set_cowblocks_tag(ip);
+
 	/* Finish up. */
 	error = xfs_defer_finish(&tp, &dfops);
 	if (error)
@@ -492,8 +492,9 @@ xfs_reflink_find_cow_mapping(
 	struct xfs_iext_cursor		icur;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-	ASSERT(xfs_is_reflink_inode(ip));
 
+	if (!xfs_is_reflink_inode(ip))
+		return false;
 	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
 	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
 		return false;
@@ -612,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
 
 			/* Remove the mapping from the CoW fork. */
 			xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+		} else {
+			/* Didn't do anything, push cursor back. */
+			xfs_iext_prev(ifp, &icur);
 		}
 next_extent:
 		if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -727,7 +731,7 @@ xfs_reflink_end_cow(
 			(unsigned int)(end_fsb - offset_fsb),
 			XFS_DATA_FORK);
 	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-			resblks, 0, 0, &tp);
+			resblks, 0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		goto out;
 
@@ -1293,6 +1297,17 @@ xfs_reflink_remap_range(
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
+	/*
+	 * Clear out post-eof preallocations because we don't have page cache
+	 * backing the delayed allocations and they'll never get freed on
+	 * their own.
+	 */
+	if (xfs_can_free_eofblocks(dest, true)) {
+		ret = xfs_free_eofblocks(dest);
+		if (ret)
+			goto out_unlock;
+	}
+
 	/* Set flags and remap blocks. */
 	ret = xfs_reflink_set_inode_flag(src, dest);
 	if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5122d3021117f00e20d6dd1e195c28666cc71076..1dacccc367f81725a678ea3a6ed50528a731920d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 			return error;
 		}
+		xfs_queue_cowblocks(mp);
 
 		/* Create the per-AG metadata reservation pool .*/
 		error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
 
 	/* rw -> ro */
 	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+		/* Get rid of any leftover CoW reservations... */
+		cancel_delayed_work_sync(&mp->m_cowblocks_work);
+		error = xfs_icache_free_cowblocks(mp, NULL);
+		if (error) {
+			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+			return error;
+		}
+
 		/* Free the per-AG metadata reservation pool. */
 		error = xfs_fs_unreserve_ag_blocks(mp);
 		if (error) {
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 68d3ca2c4968054646345dab2bd3a76f97490fb8..2e9e793a8f9dfa18e87078bce5133860d4de6d4d 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -232,11 +232,6 @@ xfs_symlink(
 	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
-	if (error == -ENOSPC && fs_blocks == 0) {
-		resblks = 0;
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
-				&tp);
-	}
 	if (error)
 		goto out_release_inode;
 
@@ -259,14 +254,6 @@ xfs_symlink(
 	if (error)
 		goto out_trans_cancel;
 
-	/*
-	 * Check for ability to enter directory entry, if no space reserved.
-	 */
-	if (!resblks) {
-		error = xfs_dir_canenter(tp, dp, link_name);
-		if (error)
-			goto out_trans_cancel;
-	}
 	/*
 	 * Initialize the bmap freelist prior to calling either
 	 * bmapi or the directory create code.
@@ -277,7 +264,7 @@ xfs_symlink(
 	 * Allocate an inode for the symlink.
 	 */
 	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-			       prid, resblks > 0, &ip, NULL);
+			       prid, &ip, NULL);
 	if (error)
 		goto out_trans_cancel;
 
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 5d95fe34829438a0a2486bc50278d84b0d0714bf..35f3546b6af5237a78a9fe1bd3aa07cbb8503349 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -24,7 +24,6 @@
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_da_format.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_da_btree.h"
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index ea189d88a3cc761e0239043785e00d9c6d8402f5..8ac4e68a12f08e4e00c1fcf850f0f8c97188d390 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -7,9 +7,10 @@
 #ifndef _ASM_GENERIC_MM_HOOKS_H
 #define _ASM_GENERIC_MM_HOOKS_H
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-				 struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+				struct mm_struct *mm)
 {
+	return 0;
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b234d54f2cb6e4c23a21db2af3b225264eccae2a..868e68561f913ecaec80ddf05f02816767ea5a17 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 struct file;
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 			unsigned long size, pgprot_t *vma_prot);
+
+#ifndef CONFIG_X86_ESPFIX64
+static inline void init_espfix_bsp(void) { }
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 38d9c5861ed8c110d5dbac1b6a807252b520f23e..f38227a78eae9f6b2d99ca9d1ca7dbb6ef720ce1 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -18,6 +18,7 @@
 #include <linux/if_alg.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
+#include <linux/atomic.h>
 #include <net/sock.h>
 
 #include <crypto/aead.h>
@@ -150,7 +151,7 @@ struct af_alg_ctx {
 	struct crypto_wait wait;
 
 	size_t used;
-	size_t rcvused;
+	atomic_t rcvused;
 
 	bool more;
 	bool merge;
@@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
 	struct af_alg_ctx *ctx = ask->private;
 
 	return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->rcvused, 0);
+		     atomic_read(&ctx->rcvused), 0);
 }
 
 /**
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index f0b44c16e88f241721a4296019475abae6b7a3b0..c2bae8da642cbaef97f3de444a446a27df15dc18 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -82,6 +82,14 @@ int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst);
 void ahash_free_instance(struct crypto_instance *inst);
 
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+		    unsigned int keylen);
+
+static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+	return alg->setkey != shash_no_setkey;
+}
+
 int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
 			    struct hash_alg_common *alg,
 			    struct crypto_instance *inst);
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index cceafa01f9073293cf4e813247b54bbe094728e3..b67404fc4b34bab495086b4b1e969f53a0921b39 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
 
 struct mcryptd_cpu_queue {
 	struct crypto_queue queue;
+	spinlock_t q_lock;
 	struct work_struct work;
 };
 
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 5afd6e364fb6774b846134e05a1e33b010672cc1..1c27526c499eed6f50b46771aaea27282567deb9 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -189,12 +189,40 @@ struct drm_private_state_funcs {
 				     struct drm_private_state *state);
 };
 
+/**
+ * struct drm_private_obj - base struct for driver private atomic object
+ *
+ * A driver private object is initialized by calling
+ * drm_atomic_private_obj_init() and cleaned up by calling
+ * drm_atomic_private_obj_fini().
+ *
+ * Currently only tracks the state update functions and the opaque driver
+ * private state itself, but in the future might also track which
+ * &drm_modeset_lock is required to duplicate and update this object's state.
+ */
 struct drm_private_obj {
+	/**
+	 * @state: Current atomic state for this driver private object.
+	 */
 	struct drm_private_state *state;
 
+	/**
+	 * @funcs:
+	 *
+	 * Functions to manipulate the state of this driver private object, see
+	 * &drm_private_state_funcs.
+	 */
 	const struct drm_private_state_funcs *funcs;
 };
 
+/**
+ * struct drm_private_state - base struct for driver private object state
+ * @state: backpointer to global drm_atomic_state
+ *
+ * Currently only contains a backpointer to the overall atomic update, but in
+ * the future also might hold synchronization information similar to e.g.
+ * &drm_crtc.commit.
+ */
 struct drm_private_state {
 	struct drm_atomic_state *state;
 };
@@ -218,6 +246,10 @@ struct __drm_private_objs_state {
  * @num_private_objs: size of the @private_objs array
  * @private_objs: pointer to array of private object pointers
  * @acquire_ctx: acquire context for this atomic modeset state update
+ *
+ * States are added to an atomic update by calling drm_atomic_get_crtc_state(),
+ * drm_atomic_get_plane_state(), drm_atomic_get_connector_state(), or for
+ * private state structures, drm_atomic_get_private_obj_state().
  */
 struct drm_atomic_state {
 	struct kref ref;
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index f39ff52feb3be77809e83fdbf0c735cf728e58d1..ed38df4ac204af5ecadea023bfa983c4b3a11a4c 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -24,6 +24,7 @@
 #define __DRM_CONNECTOR_H__
 
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/ctype.h>
 #include <linux/hdmi.h>
 #include <drm/drm_mode_object.h>
@@ -964,6 +965,15 @@ struct drm_connector {
 	uint8_t num_h_tile, num_v_tile;
 	uint8_t tile_h_loc, tile_v_loc;
 	uint16_t tile_h_size, tile_v_size;
+
+	/**
+	 * @free_node:
+	 *
+	 * List used only by &drm_connector_iter to be able to clean up a
+	 * connector from any context, in conjunction with
+	 * &drm_mode_config.connector_free_work.
+	 */
+	struct llist_node free_node;
 };
 
 #define obj_to_connector(x) container_of(x, struct drm_connector, base)
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index b25d12ef120a10d9cada8072a019d38d5aa17075..8d89a9c3748dad00d1716cc0b4d6f87362dd398f 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -465,6 +465,8 @@ struct edid *drm_get_edid(struct drm_connector *connector,
 struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
 				     struct i2c_adapter *adapter);
 struct edid *drm_edid_duplicate(const struct edid *edid);
+void drm_reset_display_info(struct drm_connector *connector);
+u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid);
 int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
 
 u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
diff --git a/include/drm/drm_fb_cma_helper.h b/include/drm/drm_fb_cma_helper.h
index 65def43eb231dc0ac9a3abf41c2ca8c5bbcbff51..d532f88a8d5579f57db39905372b908da69b020f 100644
--- a/include/drm/drm_fb_cma_helper.h
+++ b/include/drm/drm_fb_cma_helper.h
@@ -16,6 +16,13 @@ struct drm_mode_fb_cmd2;
 struct drm_plane;
 struct drm_plane_state;
 
+int drm_fb_cma_fbdev_init_with_funcs(struct drm_device *dev,
+	unsigned int preferred_bpp, unsigned int max_conn_count,
+	const struct drm_framebuffer_funcs *funcs);
+int drm_fb_cma_fbdev_init(struct drm_device *dev, unsigned int preferred_bpp,
+			  unsigned int max_conn_count);
+void drm_fb_cma_fbdev_fini(struct drm_device *dev);
+
 struct drm_fbdev_cma *drm_fbdev_cma_init_with_funcs(struct drm_device *dev,
 	unsigned int preferred_bpp, unsigned int max_conn_count,
 	const struct drm_framebuffer_funcs *funcs);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 1494b12a984aa09f2cb16d0da15d791d1ebe381e..b069433e7fc12f77fc8696e9f8bf2f0442d5d146 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -33,6 +33,7 @@
 struct drm_fb_helper;
 
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <linux/kgdb.h>
 
 enum mode_set_atomic {
@@ -275,6 +276,7 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper);
 
 void drm_fb_helper_deferred_io(struct fb_info *info,
 			       struct list_head *pagelist);
+int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper);
 
 ssize_t drm_fb_helper_sys_read(struct fb_info *info, char __user *buf,
 			       size_t count, loff_t *ppos);
@@ -319,6 +321,13 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_
 int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 				       struct drm_connector *connector);
 
+int drm_fb_helper_fbdev_setup(struct drm_device *dev,
+			      struct drm_fb_helper *fb_helper,
+			      const struct drm_fb_helper_funcs *funcs,
+			      unsigned int preferred_bpp,
+			      unsigned int max_conn_count);
+void drm_fb_helper_fbdev_teardown(struct drm_device *dev);
+
 void drm_fb_helper_lastclose(struct drm_device *dev);
 void drm_fb_helper_output_poll_changed(struct drm_device *dev);
 #else
@@ -332,11 +341,17 @@ static inline int drm_fb_helper_init(struct drm_device *dev,
 		       struct drm_fb_helper *helper,
 		       int max_conn)
 {
+	/* So drivers can use it to free the struct */
+	helper->dev = dev;
+	dev->fb_helper = helper;
+
 	return 0;
 }
 
 static inline void drm_fb_helper_fini(struct drm_fb_helper *helper)
 {
+	if (helper && helper->dev)
+		helper->dev->fb_helper = NULL;
 }
 
 static inline int drm_fb_helper_blank(int blank, struct fb_info *info)
@@ -409,6 +424,11 @@ static inline void drm_fb_helper_deferred_io(struct fb_info *info,
 {
 }
 
+static inline int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
+{
+	return -ENODEV;
+}
+
 static inline ssize_t drm_fb_helper_sys_read(struct fb_info *info,
 					     char __user *buf, size_t count,
 					     loff_t *ppos)
@@ -518,6 +538,24 @@ drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 	return 0;
 }
 
+static inline int
+drm_fb_helper_fbdev_setup(struct drm_device *dev,
+			  struct drm_fb_helper *fb_helper,
+			  const struct drm_fb_helper_funcs *funcs,
+			  unsigned int preferred_bpp,
+			  unsigned int max_conn_count)
+{
+	/* So drivers can use it to free the struct */
+	dev->fb_helper = fb_helper;
+
+	return 0;
+}
+
+static inline void drm_fb_helper_fbdev_teardown(struct drm_device *dev)
+{
+	dev->fb_helper = NULL;
+}
+
 static inline void drm_fb_helper_lastclose(struct drm_device *dev)
 {
 }
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index dccb897951bada497603e41f6bb21ec02687a72e..c50502c656e5751374ed4e088540285fb1ee95f4 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -121,6 +121,12 @@ struct drm_framebuffer {
 	 * @base: base modeset object structure, contains the reference count.
 	 */
 	struct drm_mode_object base;
+
+	/**
+	 * @comm: Name of the process allocating the fb, used for fb dumping.
+	 */
+	char comm[TASK_COMM_LEN];
+
 	/**
 	 * @format: framebuffer format information
 	 */
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index a0afeb591dcb1cb25e7f7005dd903ce229792cab..2cb6f02df64ab562eed1d0b515e6da905bda0212 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -27,6 +27,7 @@
 #include <linux/types.h>
 #include <linux/idr.h>
 #include <linux/workqueue.h>
+#include <linux/llist.h>
 
 #include <drm/drm_modeset_lock.h>
 
@@ -268,6 +269,9 @@ struct drm_mode_config_funcs {
 	 * state easily. If this hook is implemented, drivers must also
 	 * implement @atomic_state_clear and @atomic_state_free.
 	 *
+	 * Subclassing of &drm_atomic_state is deprecated in favour of using
+	 * &drm_private_state and &drm_private_obj.
+	 *
 	 * RETURNS:
 	 *
 	 * A new &drm_atomic_state on success or NULL on failure.
@@ -289,6 +293,9 @@ struct drm_mode_config_funcs {
 	 *
 	 * Drivers that implement this must call drm_atomic_state_default_clear()
 	 * to clear common state.
+	 *
+	 * Subclassing of &drm_atomic_state is deprecated in favour of using
+	 * &drm_private_state and &drm_private_obj.
 	 */
 	void (*atomic_state_clear)(struct drm_atomic_state *state);
 
@@ -301,6 +308,9 @@ struct drm_mode_config_funcs {
 	 *
 	 * Drivers that implement this must call
 	 * drm_atomic_state_default_release() to release common resources.
+	 *
+	 * Subclassing of &drm_atomic_state is deprecated in favour of using
+	 * &drm_private_state and &drm_private_obj.
 	 */
 	void (*atomic_state_free)(struct drm_atomic_state *state);
 };
@@ -393,7 +403,7 @@ struct drm_mode_config {
 
 	/**
 	 * @connector_list_lock: Protects @num_connector and
-	 * @connector_list.
+	 * @connector_list and @connector_free_list.
 	 */
 	spinlock_t connector_list_lock;
 	/**
@@ -413,6 +423,21 @@ struct drm_mode_config {
 	 * &struct drm_connector_list_iter to walk this list.
 	 */
 	struct list_head connector_list;
+	/**
+	 * @connector_free_list:
+	 *
+	 * List of connector objects linked with &drm_connector.free_head.
+	 * Protected by @connector_list_lock. Used by
+	 * drm_for_each_connector_iter() and
+	 * &struct drm_connector_list_iter to savely free connectors using
+	 * @connector_free_work.
+	 */
+	struct llist_head connector_free_list;
+	/**
+	 * @connector_free_work: Work to clean up @connector_free_list.
+	 */
+	struct work_struct connector_free_work;
+
 	/**
 	 * @num_encoder:
 	 *
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 5f9932e2246e19629024b979852bc40a3eb63c06..2a4a42e59a478284742d5d3669adb4ba7589abf8 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -80,13 +80,13 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf);
 __printf(2, 3)
 void drm_printf(struct drm_printer *p, const char *f, ...);
 
+__printf(2, 0)
 /**
  * drm_vprintf - print to a &drm_printer stream
  * @p: the &drm_printer
  * @fmt: format string
  * @va: the va_list
  */
-__printf(2, 0)
 static inline void
 drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va)
 {
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 9e8ba90c6784a23a2ac4c52523290b0b289bc82c..3980602472c0f86cc1792a82503f583aa710b630 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -33,36 +33,31 @@ struct drm_syncobj_cb;
 /**
  * struct drm_syncobj - sync object.
  *
- * This structure defines a generic sync object which wraps a dma fence.
+ * This structure defines a generic sync object which wraps a &dma_fence.
  */
 struct drm_syncobj {
 	/**
-	 * @refcount:
-	 *
-	 * Reference count of this object.
+	 * @refcount: Reference count of this object.
 	 */
 	struct kref refcount;
 	/**
 	 * @fence:
 	 * NULL or a pointer to the fence bound to this object.
 	 *
-	 * This field should not be used directly.  Use drm_syncobj_fence_get
-	 * and drm_syncobj_replace_fence instead.
+	 * This field should not be used directly. Use drm_syncobj_fence_get()
+	 * and drm_syncobj_replace_fence() instead.
 	 */
 	struct dma_fence __rcu *fence;
 	/**
-	 * @cb_list:
-	 * List of callbacks to call when the fence gets replaced
+	 * @cb_list: List of callbacks to call when the &fence gets replaced.
 	 */
 	struct list_head cb_list;
 	/**
-	 * @lock:
-	 * locks cb_list and write-locks fence.
+	 * @lock: Protects &cb_list and write-locks &fence.
 	 */
 	spinlock_t lock;
 	/**
-	 * @file:
-	 * a file backing for this syncobj.
+	 * @file: A file backing for this syncobj.
 	 */
 	struct file *file;
 };
@@ -73,7 +68,7 @@ typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
 /**
  * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
  * @node: used by drm_syncob_add_callback to append this struct to
- *	  syncobj::cb_list
+ *	  &drm_syncobj.cb_list
  * @func: drm_syncobj_func_t to call
  *
  * This struct will be initialized by drm_syncobj_add_callback, additional
@@ -92,7 +87,7 @@ void drm_syncobj_free(struct kref *kref);
  * drm_syncobj_get - acquire a syncobj reference
  * @obj: sync object
  *
- * This acquires additional reference to @obj. It is illegal to call this
+ * This acquires an additional reference to @obj. It is illegal to call this
  * without already holding a reference. No locks required.
  */
 static inline void
@@ -111,6 +106,17 @@ drm_syncobj_put(struct drm_syncobj *obj)
 	kref_put(&obj->refcount, drm_syncobj_free);
 }
 
+/**
+ * drm_syncobj_fence_get - get a reference to a fence in a sync object
+ * @syncobj: sync object.
+ *
+ * This acquires additional reference to &drm_syncobj.fence contained in @obj,
+ * if not NULL. It is illegal to call this without already holding a reference.
+ * No locks required.
+ *
+ * Returns:
+ * Either the fence of @obj or NULL if there's none.
+ */
 static inline struct dma_fence *
 drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 {
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
new file mode 100644
index 0000000000000000000000000000000000000000..dfd54fb94e10780a81e5b04e3b7bc759f302c199
--- /dev/null
+++ b/include/drm/gpu_scheduler.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _DRM_GPU_SCHEDULER_H_
+#define _DRM_GPU_SCHEDULER_H_
+
+#include <drm/spsc_queue.h>
+#include <linux/dma-fence.h>
+
+struct drm_gpu_scheduler;
+struct drm_sched_rq;
+
+enum drm_sched_priority {
+	DRM_SCHED_PRIORITY_MIN,
+	DRM_SCHED_PRIORITY_LOW = DRM_SCHED_PRIORITY_MIN,
+	DRM_SCHED_PRIORITY_NORMAL,
+	DRM_SCHED_PRIORITY_HIGH_SW,
+	DRM_SCHED_PRIORITY_HIGH_HW,
+	DRM_SCHED_PRIORITY_KERNEL,
+	DRM_SCHED_PRIORITY_MAX,
+	DRM_SCHED_PRIORITY_INVALID = -1,
+	DRM_SCHED_PRIORITY_UNSET = -2
+};
+
+/**
+ * A scheduler entity is a wrapper around a job queue or a group
+ * of other entities. Entities take turns emitting jobs from their
+ * job queues to corresponding hardware ring based on scheduling
+ * policy.
+*/
+struct drm_sched_entity {
+	struct list_head		list;
+	struct drm_sched_rq		*rq;
+	spinlock_t			rq_lock;
+	struct drm_gpu_scheduler	*sched;
+
+	spinlock_t			queue_lock;
+	struct spsc_queue		job_queue;
+
+	atomic_t			fence_seq;
+	uint64_t			fence_context;
+
+	struct dma_fence		*dependency;
+	struct dma_fence_cb		cb;
+	atomic_t			*guilty; /* points to ctx's guilty */
+};
+
+/**
+ * Run queue is a set of entities scheduling command submissions for
+ * one specific ring. It implements the scheduling policy that selects
+ * the next entity to emit commands from.
+*/
+struct drm_sched_rq {
+	spinlock_t			lock;
+	struct list_head		entities;
+	struct drm_sched_entity		*current_entity;
+};
+
+struct drm_sched_fence {
+	struct dma_fence		scheduled;
+	struct dma_fence		finished;
+	struct dma_fence_cb		cb;
+	struct dma_fence		*parent;
+	struct drm_gpu_scheduler	*sched;
+	spinlock_t			lock;
+	void				*owner;
+};
+
+struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
+
+struct drm_sched_job {
+	struct spsc_node		queue_node;
+	struct drm_gpu_scheduler	*sched;
+	struct drm_sched_fence		*s_fence;
+	struct dma_fence_cb		finish_cb;
+	struct work_struct		finish_work;
+	struct list_head		node;
+	struct delayed_work		work_tdr;
+	uint64_t			id;
+	atomic_t			karma;
+	enum drm_sched_priority		s_priority;
+};
+
+static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
+					    int threshold)
+{
+	return (s_job && atomic_inc_return(&s_job->karma) > threshold);
+}
+
+/**
+ * Define the backend operations called by the scheduler,
+ * these functions should be implemented in driver side
+*/
+struct drm_sched_backend_ops {
+	struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
+					struct drm_sched_entity *s_entity);
+	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
+	void (*timedout_job)(struct drm_sched_job *sched_job);
+	void (*free_job)(struct drm_sched_job *sched_job);
+};
+
+/**
+ * One scheduler is implemented for each hardware ring
+*/
+struct drm_gpu_scheduler {
+	const struct drm_sched_backend_ops	*ops;
+	uint32_t			hw_submission_limit;
+	long				timeout;
+	const char			*name;
+	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_MAX];
+	wait_queue_head_t		wake_up_worker;
+	wait_queue_head_t		job_scheduled;
+	atomic_t			hw_rq_count;
+	atomic64_t			job_id_count;
+	struct task_struct		*thread;
+	struct list_head		ring_mirror_list;
+	spinlock_t			job_list_lock;
+	int				hang_limit;
+};
+
+int drm_sched_init(struct drm_gpu_scheduler *sched,
+		   const struct drm_sched_backend_ops *ops,
+		   uint32_t hw_submission, unsigned hang_limit, long timeout,
+		   const char *name);
+void drm_sched_fini(struct drm_gpu_scheduler *sched);
+
+int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
+			  struct drm_sched_entity *entity,
+			  struct drm_sched_rq *rq,
+			  uint32_t jobs, atomic_t *guilty);
+void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity);
+void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
+			       struct drm_sched_entity *entity);
+void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
+			     struct drm_sched_rq *rq);
+
+struct drm_sched_fence *drm_sched_fence_create(
+	struct drm_sched_entity *s_entity, void *owner);
+void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
+void drm_sched_fence_finished(struct drm_sched_fence *fence);
+int drm_sched_job_init(struct drm_sched_job *job,
+		       struct drm_gpu_scheduler *sched,
+		       struct drm_sched_entity *entity,
+		       void *owner);
+void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
+			    struct drm_sched_job *job);
+void drm_sched_job_recovery(struct drm_gpu_scheduler *sched);
+bool drm_sched_dependency_optimized(struct dma_fence* fence,
+				    struct drm_sched_entity *entity);
+void drm_sched_job_kickout(struct drm_sched_job *s_job);
+
+#endif
diff --git a/include/drm/gpu_scheduler_trace.h b/include/drm/gpu_scheduler_trace.h
new file mode 100644
index 0000000000000000000000000000000000000000..0789e8d0a0e1e9db57ab493107e73ad8101d52cc
--- /dev/null
+++ b/include/drm/gpu_scheduler_trace.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _GPU_SCHED_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu_scheduler
+#define TRACE_INCLUDE_FILE gpu_scheduler_trace
+
+TRACE_EVENT(drm_sched_job,
+	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
+	    TP_ARGS(sched_job, entity),
+	    TP_STRUCT__entry(
+			     __field(struct drm_sched_entity *, entity)
+			     __field(struct dma_fence *, fence)
+			     __field(const char *, name)
+			     __field(uint64_t, id)
+			     __field(u32, job_count)
+			     __field(int, hw_job_count)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->entity = entity;
+			   __entry->id = sched_job->id;
+			   __entry->fence = &sched_job->s_fence->finished;
+			   __entry->name = sched_job->sched->name;
+			   __entry->job_count = spsc_queue_count(&entity->job_queue);
+			   __entry->hw_job_count = atomic_read(
+				   &sched_job->sched->hw_rq_count);
+			   ),
+	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
+		      __entry->entity, __entry->id,
+		      __entry->fence, __entry->name,
+		      __entry->job_count, __entry->hw_job_count)
+);
+
+TRACE_EVENT(drm_sched_process_job,
+	    TP_PROTO(struct drm_sched_fence *fence),
+	    TP_ARGS(fence),
+	    TP_STRUCT__entry(
+		    __field(struct dma_fence *, fence)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->fence = &fence->finished;
+		    ),
+	    TP_printk("fence=%p signaled", __entry->fence)
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/scheduler/spsc_queue.h b/include/drm/spsc_queue.h
similarity index 95%
rename from drivers/gpu/drm/amd/scheduler/spsc_queue.h
rename to include/drm/spsc_queue.h
index 5902f35ce759da7eb70b371f2b8d122bb46d201e..125f096c88cb96a6da1567d25a238a9cf1640f41 100644
--- a/drivers/gpu/drm/amd/scheduler/spsc_queue.h
+++ b/include/drm/spsc_queue.h
@@ -21,10 +21,11 @@
  *
  */
 
-#ifndef AMD_SCHEDULER_SPSC_QUEUE_H_
-#define AMD_SCHEDULER_SPSC_QUEUE_H_
+#ifndef DRM_SCHEDULER_SPSC_QUEUE_H_
+#define DRM_SCHEDULER_SPSC_QUEUE_H_
 
 #include <linux/atomic.h>
+#include <linux/preempt.h>
 
 /** SPSC lockless queue */
 
@@ -118,4 +119,4 @@ static inline struct spsc_node *spsc_queue_pop(struct spsc_queue *queue)
 
 
 
-#endif /* AMD_SCHEDULER_SPSC_QUEUE_H_ */
+#endif /* DRM_SCHEDULER_SPSC_QUEUE_H_ */
diff --git a/include/drm/tinydrm/tinydrm.h b/include/drm/tinydrm/tinydrm.h
index 03cd9d72308c1423844cad2a4c92e21c5547ef7d..07a9a11fe19dcb66e863ea8155b51589d7c8ed07 100644
--- a/include/drm/tinydrm/tinydrm.h
+++ b/include/drm/tinydrm/tinydrm.h
@@ -19,14 +19,12 @@
  * @drm: DRM device
  * @pipe: Display pipe structure
  * @dirty_lock: Serializes framebuffer flushing
- * @fbdev_cma: CMA fbdev structure
  * @fb_funcs: Framebuffer functions used when creating framebuffers
  */
 struct tinydrm_device {
 	struct drm_device *drm;
 	struct drm_simple_display_pipe pipe;
 	struct mutex dirty_lock;
-	struct drm_fbdev_cma *fbdev_cma;
 	const struct drm_framebuffer_funcs *fb_funcs;
 };
 
@@ -80,7 +78,6 @@ pipe_to_tinydrm(struct drm_simple_display_pipe *pipe)
 	.type = DRM_MODE_TYPE_DRIVER, \
 	.clock = 1 /* pass validation */
 
-void tinydrm_lastclose(struct drm_device *drm);
 void tinydrm_gem_cma_free_object(struct drm_gem_object *gem_obj);
 struct drm_gem_object *
 tinydrm_gem_cma_prime_import_sg_table(struct drm_device *drm,
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 368eb02b54a9ee1526270da28ae30d4c9607d884..2cd025c2abe727930ce39bcc2f461bd35aeba3e7 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -263,6 +263,8 @@ struct ttm_bo_kmap_obj {
  *
  * @interruptible: Sleep interruptible if sleeping.
  * @no_wait_gpu: Return immediately if the GPU is busy.
+ * @allow_reserved_eviction: Allow eviction of reserved BOs.
+ * @resv: Reservation object to allow reserved evictions with.
  *
  * Context for TTM operations like changing buffer placement or general memory
  * allocation.
@@ -270,6 +272,8 @@ struct ttm_bo_kmap_obj {
 struct ttm_operation_ctx {
 	bool interruptible;
 	bool no_wait_gpu;
+	bool allow_reserved_eviction;
+	struct reservation_object *resv;
 	uint64_t bytes_moved;
 };
 
@@ -700,17 +704,6 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map);
  */
 int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo);
 
-/**
- * ttm_bo_default_iomem_pfn - get a pfn for a page offset
- *
- * @bo: the BO we need to look up the pfn for
- * @page_offset: offset inside the BO to look up.
- *
- * Calculate the PFN for iomem based mappings during page fault
- */
-unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
-				        unsigned long page_offset);
-
 /**
  * ttm_bo_mmap - mmap out of the ttm device address space.
  *
@@ -748,6 +741,8 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
 		  const char __user *wbuf, char __user *rbuf,
 		  size_t count, loff_t *f_pos, bool write);
 
+int ttm_bo_swapout(struct ttm_bo_global *glob,
+			struct ttm_operation_ctx *ctx);
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
 int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
 #endif
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6996d884c50875aeb851c214528d4b252c45e6c5..94064b126e8e2df874bc31807e13881514c3bb07 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -352,7 +352,8 @@ struct ttm_bo_driver {
 	 * Returns:
 	 * -ENOMEM: Out of memory.
 	 */
-	int (*ttm_tt_populate)(struct ttm_tt *ttm);
+	int (*ttm_tt_populate)(struct ttm_tt *ttm,
+			struct ttm_operation_ctx *ctx);
 
 	/**
 	 * ttm_tt_unpopulate
@@ -522,7 +523,6 @@ struct ttm_bo_global {
 	struct kobject kobj;
 	struct ttm_mem_global *mem_glob;
 	struct page *dummy_read_page;
-	struct ttm_mem_shrink shrink;
 	struct mutex device_list_mutex;
 	spinlock_t lru_lock;
 
@@ -650,7 +650,8 @@ void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma);
  *
  * Bind the pages of @ttm to an aperture location identified by @bo_mem
  */
-int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
+int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem,
+		struct ttm_operation_ctx *ctx);
 
 /**
  * ttm_ttm_destroy:
@@ -976,7 +977,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev,
  */
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-		    bool interruptible, bool no_wait_gpu,
+		    struct ttm_operation_ctx *ctx,
 		    struct ttm_mem_reg *new_mem);
 
 /**
@@ -998,7 +999,7 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
  */
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool interruptible, bool no_wait_gpu,
+		       struct ttm_operation_ctx *ctx,
 		       struct ttm_mem_reg *new_mem);
 
 /**
@@ -1078,7 +1079,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_bo_device *bdev,
 				 struct agp_bridge_data *bridge,
 				 unsigned long size, uint32_t page_flags,
 				 struct page *dummy_read_page);
-int ttm_agp_tt_populate(struct ttm_tt *ttm);
+int ttm_agp_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx);
 void ttm_agp_tt_unpopulate(struct ttm_tt *ttm);
 #endif
 
diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
index 2c1e3598effe35d15cc139dd1a68f0b87f5a14fd..8936285b6543a1e6371f87db2f961bfc4881c15a 100644
--- a/include/drm/ttm/ttm_memory.h
+++ b/include/drm/ttm/ttm_memory.h
@@ -35,20 +35,7 @@
 #include <linux/errno.h>
 #include <linux/kobject.h>
 #include <linux/mm.h>
-
-/**
- * struct ttm_mem_shrink - callback to shrink TTM memory usage.
- *
- * @do_shrink: The callback function.
- *
- * Arguments to the do_shrink functions are intended to be passed using
- * inheritance. That is, the argument class derives from struct ttm_mem_shrink,
- * and can be accessed using container_of().
- */
-
-struct ttm_mem_shrink {
-	int (*do_shrink) (struct ttm_mem_shrink *);
-};
+#include "ttm_bo_api.h"
 
 /**
  * struct ttm_mem_global - Global memory accounting structure.
@@ -76,7 +63,7 @@ struct ttm_mem_shrink {
 struct ttm_mem_zone;
 struct ttm_mem_global {
 	struct kobject kobj;
-	struct ttm_mem_shrink *shrink;
+	struct ttm_bo_global *bo_glob;
 	struct workqueue_struct *swap_queue;
 	struct work_struct work;
 	spinlock_t lock;
@@ -90,67 +77,15 @@ struct ttm_mem_global {
 #endif
 };
 
-/**
- * ttm_mem_init_shrink - initialize a struct ttm_mem_shrink object
- *
- * @shrink: The object to initialize.
- * @func: The callback function.
- */
-
-static inline void ttm_mem_init_shrink(struct ttm_mem_shrink *shrink,
-				       int (*func) (struct ttm_mem_shrink *))
-{
-	shrink->do_shrink = func;
-}
-
-/**
- * ttm_mem_register_shrink - register a struct ttm_mem_shrink object.
- *
- * @glob: The struct ttm_mem_global object to register with.
- * @shrink: An initialized struct ttm_mem_shrink object to register.
- *
- * Returns:
- * -EBUSY: There's already a callback registered. (May change).
- */
-
-static inline int ttm_mem_register_shrink(struct ttm_mem_global *glob,
-					  struct ttm_mem_shrink *shrink)
-{
-	spin_lock(&glob->lock);
-	if (glob->shrink != NULL) {
-		spin_unlock(&glob->lock);
-		return -EBUSY;
-	}
-	glob->shrink = shrink;
-	spin_unlock(&glob->lock);
-	return 0;
-}
-
-/**
- * ttm_mem_unregister_shrink - unregister a struct ttm_mem_shrink object.
- *
- * @glob: The struct ttm_mem_global object to unregister from.
- * @shrink: A previously registert struct ttm_mem_shrink object.
- *
- */
-
-static inline void ttm_mem_unregister_shrink(struct ttm_mem_global *glob,
-					     struct ttm_mem_shrink *shrink)
-{
-	spin_lock(&glob->lock);
-	BUG_ON(glob->shrink != shrink);
-	glob->shrink = NULL;
-	spin_unlock(&glob->lock);
-}
-
 extern int ttm_mem_global_init(struct ttm_mem_global *glob);
 extern void ttm_mem_global_release(struct ttm_mem_global *glob);
 extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
-				bool no_wait, bool interruptible);
+				struct ttm_operation_ctx *ctx);
 extern void ttm_mem_global_free(struct ttm_mem_global *glob,
 				uint64_t amount);
 extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
-				     struct page *page, uint64_t size);
+				     struct page *page, uint64_t size,
+				     struct ttm_operation_ctx *ctx);
 extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
 				     struct page *page, uint64_t size);
 extern size_t ttm_round_pot(size_t size);
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 593811362a9172fd49fa14f62567af2ee636acc6..4d9b019d253cdc2646dc056f224b8293a525a727 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -47,7 +47,7 @@ void ttm_page_alloc_fini(void);
  *
  * Add backing pages to all of @ttm
  */
-int ttm_pool_populate(struct ttm_tt *ttm);
+int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx);
 
 /**
  * ttm_pool_unpopulate:
@@ -61,7 +61,8 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm);
 /**
  * Populates and DMA maps pages to fullfil a ttm_dma_populate() request
  */
-int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt);
+int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt,
+				struct ttm_operation_ctx *ctx);
 
 /**
  * Unpopulates and DMA unmaps pages as part of a
@@ -89,7 +90,8 @@ void ttm_dma_page_alloc_fini(void);
  */
 int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data);
 
-int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev);
+int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
+			struct ttm_operation_ctx *ctx);
 void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev);
 
 #else
@@ -106,7 +108,8 @@ static inline int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data)
 	return 0;
 }
 static inline int ttm_dma_populate(struct ttm_dma_tt *ttm_dma,
-				   struct device *dev)
+				struct device *dev,
+				struct ttm_operation_ctx *ctx)
 {
 	return -ENOMEM;
 }
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 01ee473517e25b7a9cda5531c5f62411da6d17d4..9da6ce22803f03fc318a7fdd33af380eae67d4e6 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
 	bool			enabled;
 };
 
-int kvm_timer_hyp_init(void);
+int kvm_timer_hyp_init(bool);
 int kvm_timer_enable(struct kvm_vcpu *vcpu);
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -93,7 +93,4 @@ void kvm_timer_init_vhe(void);
 #define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.vtimer)
 #define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.ptimer)
 
-void enable_el1_phys_timer_access(void);
-void disable_el1_phys_timer_access(void);
-
 #endif
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 82f0c8fd7be8fd20951af806319f64f615f4ffc6..23d29b39f71e83e8a6a25540adc2e3f28702aec7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #define bio_set_dev(bio, bdev) 			\
 do {						\
+	if ((bio)->bi_disk != (bdev)->bd_disk)	\
+		bio_clear_flag(bio, BIO_THROTTLED);\
 	(bio)->bi_disk = (bdev)->bd_disk;	\
 	(bio)->bi_partno = (bdev)->bd_partno;	\
 } while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1e628e032dad75bf1837a25e45b55a7f54ca2df..9e7d8bd776d227d2ba92b137af7230300f5b1d4a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct gendisk		*bi_disk;
-	u8			bi_partno;
-	blk_status_t		bi_status;
 	unsigned int		bi_opf;		/* bottom bits req flags,
 						 * top bits REQ_OP. Use
 						 * accessors.
@@ -59,8 +57,8 @@ struct bio {
 	unsigned short		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
 	unsigned short		bi_write_hint;
-
-	struct bvec_iter	bi_iter;
+	blk_status_t		bi_status;
+	u8			bi_partno;
 
 	/* Number of segments in this BIO after
 	 * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
 	unsigned int		bi_seg_front_size;
 	unsigned int		bi_seg_back_size;
 
-	atomic_t		__bi_remaining;
+	struct bvec_iter	bi_iter;
 
+	atomic_t		__bi_remaining;
 	bio_end_io_t		*bi_end_io;
 
 	void			*bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9ac65998ec9cf82f65743bb5c5abb9..0ce8a372d5069a7aca7810429a968d20e923d3d1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
 struct request {
 	struct list_head queuelist;
 	union {
-		call_single_data_t csd;
+		struct __call_single_data csd;
 		u64 fifo_time;
 	};
 
@@ -241,14 +241,24 @@ struct request {
 	struct request *next_rq;
 };
 
+static inline bool blk_op_is_scsi(unsigned int op)
+{
+	return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_op_is_private(unsigned int op)
+{
+	return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
+}
+
 static inline bool blk_rq_is_scsi(struct request *rq)
 {
-	return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+	return blk_op_is_scsi(req_op(rq));
 }
 
 static inline bool blk_rq_is_private(struct request *rq)
 {
-	return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+	return blk_op_is_private(req_op(rq));
 }
 
 static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
 	return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
 }
 
+static inline bool bio_is_passthrough(struct bio *bio)
+{
+	unsigned op = bio_op(bio);
+
+	return blk_op_is_scsi(op) || blk_op_is_private(op);
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
-extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
+extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e4255a21082325f0888ffcd464615add708b8..0b25cf87b6d608b6a1646e89e1408270f148a783 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -43,7 +43,14 @@ struct bpf_map_ops {
 };
 
 struct bpf_map {
-	atomic_t refcnt;
+	/* 1st cacheline with read-mostly members of which some
+	 * are also accessed in fast-path (e.g. ops, max_entries).
+	 */
+	const struct bpf_map_ops *ops ____cacheline_aligned;
+	struct bpf_map *inner_map_meta;
+#ifdef CONFIG_SECURITY
+	void *security;
+#endif
 	enum bpf_map_type map_type;
 	u32 key_size;
 	u32 value_size;
@@ -52,15 +59,17 @@ struct bpf_map {
 	u32 pages;
 	u32 id;
 	int numa_node;
-	struct user_struct *user;
-	const struct bpf_map_ops *ops;
-	struct work_struct work;
+	bool unpriv_array;
+	/* 7 bytes hole */
+
+	/* 2nd cacheline with misc members to avoid false sharing
+	 * particularly with refcounting.
+	 */
+	struct user_struct *user ____cacheline_aligned;
+	atomic_t refcnt;
 	atomic_t usercnt;
-	struct bpf_map *inner_map_meta;
+	struct work_struct work;
 	char name[BPF_OBJ_NAME_LEN];
-#ifdef CONFIG_SECURITY
-	void *security;
-#endif
 };
 
 /* function argument constraints */
@@ -221,6 +230,7 @@ struct bpf_prog_aux {
 struct bpf_array {
 	struct bpf_map map;
 	u32 elem_size;
+	u32 index_mask;
 	/* 'ownership' of prog_array is claimed by the first program that
 	 * is going to use this map or by the first program which FD is stored
 	 * in the map to make sure that all callers and callees have the same
@@ -419,6 +429,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 		attr->numa_node : NUMA_NO_NODE;
 }
 
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -506,6 +518,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
 {
 	return 0;
 }
+
+static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
+				enum bpf_prog_type type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -514,6 +532,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 	return bpf_prog_get_type_dev(ufd, type, false);
 }
 
+bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
+
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
 
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c561b986bab0ebf886000ea34e377ea789138a3b..1632bb13ad8aed8cfeba2ccc69cfa458d02540bb 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -15,11 +15,11 @@
  * In practice this is far bigger than any realistic pointer offset; this limit
  * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
  */
-#define BPF_MAX_VAR_OFF	(1ULL << 31)
+#define BPF_MAX_VAR_OFF	(1 << 29)
 /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
  * that converting umax_value to int cannot overflow.
  */
-#define BPF_MAX_VAR_SIZ	INT_MAX
+#define BPF_MAX_VAR_SIZ	(1 << 29)
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 188ed9f65517453d5bb97f2466167c069a7c0ae2..52e611ab9a6cf6fde23dae53784f01bfb06ce448 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -220,21 +220,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 /*
  * Prevent the compiler from merging or refetching reads or writes. The
  * compiler is also forbidden from reordering successive instances of
- * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
- * compiler is aware of some particular ordering.  One way to make the
- * compiler aware of ordering is to put the two invocations of READ_ONCE,
- * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
  *
- * In contrast to ACCESS_ONCE these two macros will also work on aggregate
- * data types like structs or unions. If the size of the accessed data
- * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
- * least two memcpy()s: one for the __builtin_memcpy() and then one for
- * the macro doing the copy of variable - '__u' allocated on the stack.
+ * These two macros will also work on aggregate data types like structs or
+ * unions. If the size of the accessed data type exceeds the word size of
+ * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
+ * fall back to memcpy(). There's at least two memcpy()s: one for the
+ * __builtin_memcpy() and then one for the macro doing the copy of variable
+ * - '__u' allocated on the stack.
  *
  * Their two major use cases are: (1) Mediating communication between
  * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
  * mutilate accesses that either do not require ordering or that interact
  * with an explicit memory barrier or atomic instruction that provides the
  * required ordering.
@@ -327,29 +327,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	compiletime_assert(__native_word(t),				\
 		"Need native word sized stores/loads for atomicity.")
 
-/*
- * Prevent the compiler from merging or refetching accesses.  The compiler
- * is also forbidden from reordering successive instances of ACCESS_ONCE(),
- * but only when the compiler is aware of some particular ordering.  One way
- * to make the compiler aware of ordering is to put the two invocations of
- * ACCESS_ONCE() in different C statements.
- *
- * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE
- * on a union member will work as long as the size of the member matches the
- * size of the union and the size is smaller than word size.
- *
- * The major use cases of ACCESS_ONCE used to be (1) Mediating communication
- * between process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
- * mutilate accesses that either do not require ordering or that interact
- * with an explicit memory barrier or atomic instruction that provides the
- * required ordering.
- *
- * If possible use READ_ONCE()/WRITE_ONCE() instead.
- */
-#define __ACCESS_ONCE(x) ({ \
-	 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
-	(volatile typeof(x) *)&(x); })
-#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
-
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 0662a417febe34fb9e638857f13a6d52fcaf0d49..519e94915d1850628ba3a79f7341c29ce3a68f40 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -10,9 +10,6 @@
  */
 
 #include <linux/wait.h>
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-#include <linux/lockdep.h>
-#endif
 
 /*
  * struct completion - structure used to maintain state for a "completion"
@@ -29,58 +26,15 @@
 struct completion {
 	unsigned int done;
 	wait_queue_head_t wait;
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-	struct lockdep_map_cross map;
-#endif
 };
 
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-static inline void complete_acquire(struct completion *x)
-{
-	lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
-}
-
-static inline void complete_release(struct completion *x)
-{
-	lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
-}
-
-static inline void complete_release_commit(struct completion *x)
-{
-	lock_commit_crosslock((struct lockdep_map *)&x->map);
-}
-
-#define init_completion_map(x, m)					\
-do {									\
-	lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map,	\
-			(m)->name, (m)->key, 0);				\
-	__init_completion(x);						\
-} while (0)
-
-#define init_completion(x)						\
-do {									\
-	static struct lock_class_key __key;				\
-	lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map,	\
-			"(completion)" #x,				\
-			&__key, 0);					\
-	__init_completion(x);						\
-} while (0)
-#else
 #define init_completion_map(x, m) __init_completion(x)
 #define init_completion(x) __init_completion(x)
 static inline void complete_acquire(struct completion *x) {}
 static inline void complete_release(struct completion *x) {}
-static inline void complete_release_commit(struct completion *x) {}
-#endif
 
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-#define COMPLETION_INITIALIZER(work) \
-	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
-	STATIC_CROSS_LOCKDEP_MAP_INIT("(completion)" #work, &(work)) }
-#else
 #define COMPLETION_INITIALIZER(work) \
 	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
-#endif
 
 #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
 	(*({ init_completion_map(&(work), &(map)); &(work); }))
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a04ef7c15c6a9dd504a288e23d7da3d6322459e2..7b01bc11c6929b7aaf4906a1587ee1b4e19b2e70 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
 
+extern ssize_t cpu_show_meltdown(struct device *dev,
+				 struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+				   struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+				   struct device_attribute *attr, char *buf);
+
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
 				 const struct attribute_group **groups,
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 201ab72679863163c40f664cacca77b09dfaaef5..1a32e558eb1175e812234dfaa18d5e4665b46f30 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -86,7 +86,7 @@ enum cpuhp_state {
 	CPUHP_MM_ZSWP_POOL_PREPARE,
 	CPUHP_KVM_PPC_BOOK3S_PREPARE,
 	CPUHP_ZCOMP_PREPARE,
-	CPUHP_TIMERS_DEAD,
+	CPUHP_TIMERS_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
 	CPUHP_BP_PREPARE_DYN,
 	CPUHP_BP_PREPARE_DYN_END		= CPUHP_BP_PREPARE_DYN + 20,
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 06097ef304491dc8f9743154c10f92d20165cb47..b511f6d24b42b0c4a2796bc33f494d86b7c7854b 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
 	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
 #define VMCOREINFO_SIZE(name) \
 	vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
 			      (unsigned long)sizeof(name))
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 099058e1178b4d8529438450e28ad03b06497d32..631286535d0f126a13a366b924cae62f58d114e1 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -83,6 +83,7 @@ extern int set_current_groups(struct group_info *);
 extern void set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
 extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
 
 /*
  * The security context of a task
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index f36ecc2a57128cdf5df582b0b7b00d125ffd7ca7..3b0ba54cc4d5b0ea9bc7a11d48b476b48e9e9e22 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -216,6 +216,8 @@ static inline void debugfs_remove(struct dentry *dentry)
 static inline void debugfs_remove_recursive(struct dentry *dentry)
 { }
 
+const struct file_operations *debugfs_real_fops(const struct file *filp);
+
 static inline int debugfs_file_get(struct dentry *dentry)
 {
 	return 0;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index e8f8e8fb244d649830dfc499163a1e8a7d0e476f..81ed9b2d84dcc78e1b2213e9a22efd0f4f384330 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -704,7 +704,6 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
 	return ret;
 }
 
-#ifdef CONFIG_HAS_DMA
 static inline int dma_get_cache_alignment(void)
 {
 #ifdef ARCH_DMA_MINALIGN
@@ -712,7 +711,6 @@ static inline int dma_get_cache_alignment(void)
 #endif
 	return 1;
 }
-#endif
 
 /* flags for the coherent memory api */
 #define DMA_MEMORY_EXCLUSIVE		0x01
diff --git a/include/linux/efi.h b/include/linux/efi.h
index d813f7b04da7a1dbe35fc7dac5bb2842287c8021..29fdf8029cf6fea785631ce65c99eb16c80bf926 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -140,11 +140,13 @@ struct efi_boot_memmap {
 
 struct capsule_info {
 	efi_capsule_header_t	header;
+	efi_capsule_header_t	*capsule;
 	int			reset_type;
 	long			index;
 	size_t			count;
 	size_t			total_size;
-	phys_addr_t		*pages;
+	struct page		**pages;
+	phys_addr_t		*phys;
 	size_t			page_bytes_remain;
 };
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index f4ff47d4a893a7bd0ef7f4baeef6c29dbee2a9f2..fe0c349684fa83428a31814d38836a6a2a7000da 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
 {
 	if (fscache_cookie_valid(cookie) && PageFsCache(page))
 		return __fscache_maybe_release_page(cookie, page, gfp);
-	return false;
+	return true;
 }
 
 /**
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 55e672592fa93cd94aef5dfab761b688df0a7235..7258cd676df42c3ea77700d1a6ed15e7957e4f37 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -66,9 +66,10 @@ struct gpio_irq_chip {
 	/**
 	 * @lock_key:
 	 *
-	 * Per GPIO IRQ chip lockdep class.
+	 * Per GPIO IRQ chip lockdep classes.
 	 */
 	struct lock_class_key *lock_key;
+	struct lock_class_key *request_key;
 
 	/**
 	 * @parent_handler:
@@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
-				      struct lock_class_key *lock_key);
+				      struct lock_class_key *lock_key,
+				      struct lock_class_key *request_key);
 
 /**
  * gpiochip_add_data() - register a gpio_chip
@@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
  */
 #ifdef CONFIG_LOCKDEP
 #define gpiochip_add_data(chip, data) ({		\
-		static struct lock_class_key key;	\
-		gpiochip_add_data_with_key(chip, data, &key);	\
+		static struct lock_class_key lock_key;	\
+		static struct lock_class_key request_key;	  \
+		gpiochip_add_data_with_key(chip, data, &lock_key, \
+					   &request_key);	  \
 	})
 #else
-#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL)
+#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
 #endif
 
 static inline int gpiochip_add(struct gpio_chip *chip)
@@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
 			     irq_flow_handler_t handler,
 			     unsigned int type,
 			     bool threaded,
-			     struct lock_class_key *lock_key);
+			     struct lock_class_key *lock_key,
+			     struct lock_class_key *request_key);
 
 #ifdef CONFIG_LOCKDEP
 
@@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 				       irq_flow_handler_t handler,
 				       unsigned int type)
 {
-	static struct lock_class_key key;
+	static struct lock_class_key lock_key;
+	static struct lock_class_key request_key;
 
 	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-					handler, type, false, &key);
+					handler, type, false,
+					&lock_key, &request_key);
 }
 
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 			  unsigned int type)
 {
 
-	static struct lock_class_key key;
+	static struct lock_class_key lock_key;
+	static struct lock_class_key request_key;
 
 	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-					handler, type, true, &key);
+					handler, type, true,
+					&lock_key, &request_key);
 }
 #else
 static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
@@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 				       unsigned int type)
 {
 	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-					handler, type, false, NULL);
+					handler, type, false, NULL, NULL);
 }
 
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 			  unsigned int type)
 {
 	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-					handler, type, true, NULL);
+					handler, type, true, NULL, NULL);
 }
 #endif /* CONFIG_LOCKDEP */
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f3e97c5f94c96bbaf4ef338f9bad6145f552b1be..6c9336626592b0a07e4216ea97a72a050e7146d4 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -708,6 +708,7 @@ struct vmbus_channel {
 	u8 monitor_bit;
 
 	bool rescind; /* got rescind msg */
+	struct completion rescind_event;
 
 	u32 ringbuffer_gpadlhandle;
 
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 7c3a365f7e127ac29cfedc6b5debe81eb87abe74..fa14f834e4ede3a81e7e6182c889fec338b2495f 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
 #include <linux/radix-tree.h>
 #include <linux/gfp.h>
 #include <linux/percpu.h>
+#include <linux/bug.h>
 
 struct idr {
 	struct radix_tree_root	idr_rt;
diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h
index 34d59bfdce2d2b12b0f6d7b61c601bfc0e2cb518..464458d20b16501ef45633c09cb2b7705e9f3f84 100644
--- a/include/linux/iio/timer/stm32-lptim-trigger.h
+++ b/include/linux/iio/timer/stm32-lptim-trigger.h
@@ -16,11 +16,14 @@
 #define LPTIM2_OUT	"lptim2_out"
 #define LPTIM3_OUT	"lptim3_out"
 
-#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
+#if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
 bool is_stm32_lptim_trigger(struct iio_trigger *trig);
 #else
 static inline bool is_stm32_lptim_trigger(struct iio_trigger *trig)
 {
+#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER)
+	pr_warn_once("stm32 lptim_trigger not linked in\n");
+#endif
 	return false;
 }
 #endif
diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h
new file mode 100644
index 0000000000000000000000000000000000000000..2710d72de3c926c2a4e1be36a921aa23ac107c31
--- /dev/null
+++ b/include/linux/intel-pti.h
@@ -0,0 +1,43 @@
+/*
+ *  Copyright (C) Intel 2011
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ *
+ * This header file will allow other parts of the OS to use the
+ * interface to write out it's contents for debugging a mobile system.
+ */
+
+#ifndef LINUX_INTEL_PTI_H_
+#define LINUX_INTEL_PTI_H_
+
+/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
+#define PTI_LASTDWORD_DTS	0x30
+
+/* basic structure used as a write address to the PTI HW */
+struct pti_masterchannel {
+	u8 master;
+	u8 channel;
+};
+
+/* the following functions are defined in misc/pti.c */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
+struct pti_masterchannel *pti_request_masterchannel(u8 type,
+						    const char *thread_name);
+void pti_release_masterchannel(struct pti_masterchannel *mc);
+
+#endif /* LINUX_INTEL_PTI_H_ */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c6290ca87290996e636f3eda14eb03d26316..8415bf1a9776245b810c8f92fa16c98eb038a45f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
 						 * 100: prefer care-of address
 						 */
 				dontfrag:1,
-				autoflowlabel:1;
+				autoflowlabel:1,
+				autoflowlabel_set:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index e140f69163b693b386bdc709719b4efc3d8a30b0..a0231e96a578348c21596a3c9d2d0cbe4a9a1249 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -212,6 +212,7 @@ struct irq_data {
  *				  mask. Applies only to affinity managed irqs.
  * IRQD_SINGLE_TARGET		- IRQ allows only a single affinity target
  * IRQD_DEFAULT_TRIGGER_SET	- Expected trigger already been set
+ * IRQD_CAN_RESERVE		- Can use reservation mode
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -233,6 +234,7 @@ enum {
 	IRQD_MANAGED_SHUTDOWN		= (1 << 23),
 	IRQD_SINGLE_TARGET		= (1 << 24),
 	IRQD_DEFAULT_TRIGGER_SET	= (1 << 25),
+	IRQD_CAN_RESERVE		= (1 << 26),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
 }
 
+static inline void irqd_set_can_reserve(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_CAN_RESERVE;
+}
+
+static inline void irqd_clr_can_reserve(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
+}
+
+static inline bool irqd_can_reserve(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_CAN_RESERVE;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dd418955962bc23a6e30af9c2edd3fed57118f69..25b33b66453773cb01509725fa68664c555ffd3f 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -230,7 +230,7 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
 	data->chip = chip;
 }
 
-static inline int irq_balancing_disabled(unsigned int irq)
+static inline bool irq_balancing_disabled(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -238,7 +238,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
 	return desc->status_use_accessors & IRQ_NO_BALANCING_MASK;
 }
 
-static inline int irq_is_percpu(unsigned int irq)
+static inline bool irq_is_percpu(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -246,7 +246,7 @@ static inline int irq_is_percpu(unsigned int irq)
 	return desc->status_use_accessors & IRQ_PER_CPU;
 }
 
-static inline int irq_is_percpu_devid(unsigned int irq)
+static inline bool irq_is_percpu_devid(unsigned int irq)
 {
 	struct irq_desc *desc;
 
@@ -255,12 +255,15 @@ static inline int irq_is_percpu_devid(unsigned int irq)
 }
 
 static inline void
-irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
+irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
+		      struct lock_class_key *request_class)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (desc)
-		lockdep_set_class(&desc->lock, class);
+	if (desc) {
+		lockdep_set_class(&desc->lock, lock_class);
+		lockdep_set_class(&desc->request_mutex, request_class);
+	}
 }
 
 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a34355d195463f93d3235283978633bf60f0b59d..48c7e86bb5566798aba1b1b132ab242b75b05ea9 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -113,7 +113,7 @@ struct irq_domain_ops {
 		     unsigned int nr_irqs, void *arg);
 	void (*free)(struct irq_domain *d, unsigned int virq,
 		     unsigned int nr_irqs);
-	int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early);
+	int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
 	void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
 	int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
 			 unsigned long *out_hwirq, unsigned int *out_type);
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 46cb57d5eb1361e521f36208f0fef6b31fd071ea..1b3996ff3f16d10158056e45425b81b5e7c5cbd8 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -27,22 +27,18 @@
 # define trace_hardirq_enter()			\
 do {						\
 	current->hardirq_context++;		\
-	crossrelease_hist_start(XHLOCK_HARD);	\
 } while (0)
 # define trace_hardirq_exit()			\
 do {						\
 	current->hardirq_context--;		\
-	crossrelease_hist_end(XHLOCK_HARD);	\
 } while (0)
 # define lockdep_softirq_enter()		\
 do {						\
 	current->softirq_context++;		\
-	crossrelease_hist_start(XHLOCK_SOFT);	\
 } while (0)
 # define lockdep_softirq_exit()			\
 do {						\
 	current->softirq_context--;		\
-	crossrelease_hist_end(XHLOCK_SOFT);	\
 } while (0)
 # define INIT_TRACE_IRQFLAGS	.softirqs_enabled = 1,
 #else
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/include/linux/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 893d6d606cd0a9023e2ab8ef27c523ba8a959a06..6bdd4b9f661154c386754654db238d13e0a13a31 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -232,7 +232,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	int guest_fpu_loaded, guest_xcr0_loaded;
+	int guest_xcr0_loaded;
 	struct swait_queue_head wq;
 	struct pid __rcu *pid;
 	int sigset_active;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a842551fe0449a38df5510a39d3fdc4760caa0ee..3251d9c0d3137ee594a8727f4f817bdb2e2d2350 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -158,12 +158,6 @@ struct lockdep_map {
 	int				cpu;
 	unsigned long			ip;
 #endif
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-	/*
-	 * Whether it's a crosslock.
-	 */
-	int				cross;
-#endif
 };
 
 static inline void lockdep_copy_map(struct lockdep_map *to,
@@ -267,95 +261,8 @@ struct held_lock {
 	unsigned int hardirqs_off:1;
 	unsigned int references:12;					/* 32 bits */
 	unsigned int pin_count;
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-	/*
-	 * Generation id.
-	 *
-	 * A value of cross_gen_id will be stored when holding this,
-	 * which is globally increased whenever each crosslock is held.
-	 */
-	unsigned int gen_id;
-#endif
-};
-
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-#define MAX_XHLOCK_TRACE_ENTRIES 5
-
-/*
- * This is for keeping locks waiting for commit so that true dependencies
- * can be added at commit step.
- */
-struct hist_lock {
-	/*
-	 * Id for each entry in the ring buffer. This is used to
-	 * decide whether the ring buffer was overwritten or not.
-	 *
-	 * For example,
-	 *
-	 *           |<----------- hist_lock ring buffer size ------->|
-	 *           pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii
-	 * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii.......................
-	 *
-	 *           where 'p' represents an acquisition in process
-	 *           context, 'i' represents an acquisition in irq
-	 *           context.
-	 *
-	 * In this example, the ring buffer was overwritten by
-	 * acquisitions in irq context, that should be detected on
-	 * rollback or commit.
-	 */
-	unsigned int hist_id;
-
-	/*
-	 * Seperate stack_trace data. This will be used at commit step.
-	 */
-	struct stack_trace	trace;
-	unsigned long		trace_entries[MAX_XHLOCK_TRACE_ENTRIES];
-
-	/*
-	 * Seperate hlock instance. This will be used at commit step.
-	 *
-	 * TODO: Use a smaller data structure containing only necessary
-	 * data. However, we should make lockdep code able to handle the
-	 * smaller one first.
-	 */
-	struct held_lock	hlock;
 };
 
-/*
- * To initialize a lock as crosslock, lockdep_init_map_crosslock() should
- * be called instead of lockdep_init_map().
- */
-struct cross_lock {
-	/*
-	 * When more than one acquisition of crosslocks are overlapped,
-	 * we have to perform commit for them based on cross_gen_id of
-	 * the first acquisition, which allows us to add more true
-	 * dependencies.
-	 *
-	 * Moreover, when no acquisition of a crosslock is in progress,
-	 * we should not perform commit because the lock might not exist
-	 * any more, which might cause incorrect memory access. So we
-	 * have to track the number of acquisitions of a crosslock.
-	 */
-	int nr_acquire;
-
-	/*
-	 * Seperate hlock instance. This will be used at commit step.
-	 *
-	 * TODO: Use a smaller data structure containing only necessary
-	 * data. However, we should make lockdep code able to handle the
-	 * smaller one first.
-	 */
-	struct held_lock	hlock;
-};
-
-struct lockdep_map_cross {
-	struct lockdep_map map;
-	struct cross_lock xlock;
-};
-#endif
-
 /*
  * Initialization, self-test and debugging-output methods:
  */
@@ -560,37 +467,6 @@ enum xhlock_context_t {
 	XHLOCK_CTX_NR,
 };
 
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-extern void lockdep_init_map_crosslock(struct lockdep_map *lock,
-				       const char *name,
-				       struct lock_class_key *key,
-				       int subclass);
-extern void lock_commit_crosslock(struct lockdep_map *lock);
-
-/*
- * What we essencially have to initialize is 'nr_acquire'. Other members
- * will be initialized in add_xlock().
- */
-#define STATIC_CROSS_LOCK_INIT() \
-	{ .nr_acquire = 0,}
-
-#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \
-	{ .map.name = (_name), .map.key = (void *)(_key), \
-	  .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), }
-
-/*
- * To initialize a lockdep_map statically use this macro.
- * Note that _name must not be NULL.
- */
-#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
-	{ .name = (_name), .key = (void *)(_key), .cross = 0, }
-
-extern void crossrelease_hist_start(enum xhlock_context_t c);
-extern void crossrelease_hist_end(enum xhlock_context_t c);
-extern void lockdep_invariant_state(bool force);
-extern void lockdep_init_task(struct task_struct *task);
-extern void lockdep_free_task(struct task_struct *task);
-#else /* !CROSSRELEASE */
 #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
 /*
  * To initialize a lockdep_map statically use this macro.
@@ -599,12 +475,9 @@ extern void lockdep_free_task(struct task_struct *task);
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
 	{ .name = (_name), .key = (void *)(_key), }
 
-static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
-static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
 static inline void lockdep_invariant_state(bool force) {}
 static inline void lockdep_init_task(struct task_struct *task) {}
 static inline void lockdep_free_task(struct task_struct *task) {}
-#endif /* CROSSRELEASE */
 
 #ifdef CONFIG_LOCK_STAT
 
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a2a1318a3d0c8be0a1fb3d1a08fcf671ff9d8bee..c3d3f04d8cc689eddf217c0626e71d8c16530db5 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -915,10 +915,10 @@ enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
 #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN	BIT(6)
 
 enum dev_aspm_mode {
-	DEV_ASPM_DISABLE = 0,
 	DEV_ASPM_DYNAMIC,
 	DEV_ASPM_BACKDOOR,
 	DEV_ASPM_STATIC,
+	DEV_ASPM_DISABLE,
 };
 
 /*
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a886b51511abbf146c4c76309aa313e4bbf77dda..1f509d072026d3d62a9e3701ed72bd5ab2d5b5c4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
 };
 
 struct mlx5_irq_info {
+	cpumask_var_t mask;
 	char name[MLX5_MAX_IRQ_NAME];
 };
 
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       enum mlx5_eq_type type);
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
-int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 		    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1164,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+				 u64 *values,
+				 int num_counters,
+				 size_t *offsets);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 38a7577a9ce71fbcf63c21e2911364795842daa8..d44ec5f41d4a04c72b25b4db1d6fb0217f8f1fa1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -147,7 +147,7 @@ enum {
 	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
 	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
 	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
-	MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+	MLX5_CMD_OP_SET_PP_RATE_LIMIT             = 0x780,
 	MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
 	MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
 	MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
 	u8         vxlan_udp_port[0x10];
 };
 
-struct mlx5_ifc_set_rate_limit_out_bits {
+struct mlx5_ifc_set_pp_rate_limit_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_set_rate_limit_in_bits {
+struct mlx5_ifc_set_pp_rate_limit_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
 	u8         reserved_at_60[0x20];
 
 	u8         rate_limit[0x20];
+
+	u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 01c91d874a57f5edc7f4698b57918402a211d707..5bad038ac012e6e3047fd6a63a625c6814966d51 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -66,6 +66,15 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
 	return tsk->signal->oom_mm;
 }
 
+/*
+ * Use this helper if tsk->mm != mm and the victim mm needs a special
+ * handling. This is guaranteed to stay true after once set.
+ */
+static inline bool mm_is_oom_victim(struct mm_struct *mm)
+{
+	return test_bit(MMF_OOM_VICTIM, &mm->flags);
+}
+
 /*
  * Checks whether a page fault on the given mm is still reliable.
  * This is no longer true if the oom reaper started to reap the
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0403894147a3ca970ff7248549919693b53d9e8e..c170c9250c8b706e62e00f4b6a26149735bacba5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1674,6 +1674,9 @@ static inline struct pci_dev *pci_get_slot(struct pci_bus *bus,
 static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 						unsigned int devfn)
 { return NULL; }
+static inline struct pci_dev *pci_get_domain_bus_and_slot(int domain,
+					unsigned int bus, unsigned int devfn)
+{ return NULL; }
 
 static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2c9c87d8a0c18e5f5c1cf2a8e148504e4f3ad3a9..7546822a1d74f198d5c521bc2864c369e38d70e4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -15,6 +15,7 @@
 #define _LINUX_PERF_EVENT_H
 
 #include <uapi/linux/perf_event.h>
+#include <uapi/linux/bpf_perf_event.h>
 
 /*
  * Kernel-internal data types and definitions:
@@ -787,7 +788,7 @@ struct perf_output_handle {
 };
 
 struct bpf_perf_event_data_kern {
-	struct pt_regs *regs;
+	bpf_user_pt_regs_t *regs;
 	struct perf_sample_data *data;
 	struct perf_event *event;
 };
@@ -1177,6 +1178,9 @@ extern void perf_bp_event(struct perf_event *event, void *data);
 		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
 # define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
+#ifndef perf_arch_bpf_user_pt_regs
+# define perf_arch_bpf_user_pt_regs(regs) regs
+#endif
 
 static inline bool has_branch_stack(struct perf_event *event)
 {
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 65d39115f06d8780399e469ed44fc1c90f074e69..492ed473ba7e440ad422c3a5c16833ef47e66374 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -765,6 +765,7 @@ extern int pm_generic_poweroff_late(struct device *dev);
 extern int pm_generic_poweroff(struct device *dev);
 extern void pm_generic_complete(struct device *dev);
 
+extern void dev_pm_skip_next_resume_phases(struct device *dev);
 extern bool dev_pm_smart_suspend_and_suspended(struct device *dev);
 
 #else /* !CONFIG_PM_SLEEP */
diff --git a/include/linux/pti.h b/include/linux/pti.h
index b3ea01a3197efa5c30768fe00ee0f24aad6941aa..0174883a935a25ddef6b33ad4765c65f0f3b25c4 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -1,43 +1,11 @@
-/*
- *  Copyright (C) Intel 2011
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The PTI (Parallel Trace Interface) driver directs trace data routed from
- * various parts in the system out through the Intel Penwell PTI port and
- * out of the mobile device for analysis with a debugging tool
- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
- * compact JTAG, standard.
- *
- * This header file will allow other parts of the OS to use the
- * interface to write out it's contents for debugging a mobile system.
- */
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _INCLUDE_PTI_H
+#define _INCLUDE_PTI_H
 
-#ifndef PTI_H_
-#define PTI_H_
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#include <asm/pti.h>
+#else
+static inline void pti_init(void) { }
+#endif
 
-/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
-#define PTI_LASTDWORD_DTS	0x30
-
-/* basic structure used as a write address to the PTI HW */
-struct pti_masterchannel {
-	u8 master;
-	u8 channel;
-};
-
-/* the following functions are defined in misc/pti.c */
-void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
-struct pti_masterchannel *pti_request_masterchannel(u8 type,
-						    const char *thread_name);
-void pti_release_masterchannel(struct pti_masterchannel *mc);
-
-#endif /*PTI_H_*/
+#endif
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 37b4bb2545b32dc82633df6b76e629b9999f900b..6866df4f31b59da506d56b3db29501a56d813635 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -101,12 +101,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r)
 
 /* Note: callers invoking this in a loop must use a compiler barrier,
  * for example cpu_relax(). Callers must hold producer_lock.
+ * Callers are responsible for making sure pointer that is being queued
+ * points to a valid data.
  */
 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
 {
 	if (unlikely(!r->size) || r->queue[r->producer])
 		return -ENOSPC;
 
+	/* Make sure the pointer we are storing points to a valid data. */
+	/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+	smp_wmb();
+
 	r->queue[r->producer++] = ptr;
 	if (unlikely(r->producer >= r->size))
 		r->producer = 0;
@@ -275,6 +281,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 	if (ptr)
 		__ptr_ring_discard_one(r);
 
+	/* Make sure anyone accessing data through the pointer is up to date. */
+	/* Pairs with smp_wmb in __ptr_ring_produce. */
+	smp_read_barrier_depends();
 	return ptr;
 }
 
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index d574361943ea832532cf93177e97a8c62d6c3430..fcbeed4053efbba81993fea4151a8a6a6cd6f5d1 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -99,6 +99,8 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 			    struct rb_root *root);
 extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
 				struct rb_root *root);
+extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
+				   struct rb_root_cached *root);
 
 static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
 				struct rb_node **rb_link)
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index a328e8181e49f3a0947dd713daeef35b9d7c831f..e4b257ff881bfe439a945d7487f5700f17a26740 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
 		first->pprev = &n->next;
 }
 
-/**
- * hlist_nulls_add_tail_rcu
- * @n: the element to add to the hash list.
- * @h: the list to add to.
- *
- * Description:
- * Adds the specified element to the end of the specified hlist_nulls,
- * while permitting racing traversals.  NOTE: tail insertion requires
- * list traversal.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
- * or hlist_nulls_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs.  Regardless of the type of CPU, the
- * list-traversal primitive must be guarded by rcu_read_lock().
- */
-static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
-					struct hlist_nulls_head *h)
-{
-	struct hlist_nulls_node *i, *last = NULL;
-
-	for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
-	     i = hlist_nulls_next_rcu(i))
-		last = i;
-
-	if (last) {
-		n->next = last->next;
-		n->pprev = &last->next;
-		rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
-	} else {
-		hlist_nulls_add_head_rcu(n, h);
-	}
-}
-
 /**
  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
  * @tpos:	the type * to use as a loop cursor.
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index cc0072e93e360722f40a19928c001a1feab272bb..857a72ceb794252eb8cb22c55ef85d17453c34a0 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -10,9 +10,6 @@
  */
 typedef struct {
 	arch_rwlock_t raw_lock;
-#ifdef CONFIG_GENERIC_LOCKBREAK
-	unsigned int break_lock;
-#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned int magic, owner_cpu;
 	void *owner;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 21991d668d35231e625b95d99e2bd6826ba6a47e..d2588263a9893caa04d8854607207c41080927cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -849,17 +849,6 @@ struct task_struct {
 	struct held_lock		held_locks[MAX_LOCK_DEPTH];
 #endif
 
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-#define MAX_XHLOCKS_NR 64UL
-	struct hist_lock *xhlocks; /* Crossrelease history locks */
-	unsigned int xhlock_idx;
-	/* For restoring at history boundaries */
-	unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
-	unsigned int hist_id;
-	/* For overwrite check at each context exit */
-	unsigned int hist_id_save[XHLOCK_CTX_NR];
-#endif
-
 #ifdef CONFIG_UBSAN
 	unsigned int			in_ubsan;
 #endif
@@ -1503,7 +1492,11 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from)
 	__set_task_comm(tsk, from, false);
 }
 
-extern char *get_task_comm(char *to, struct task_struct *tsk);
+extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
+#define get_task_comm(buf, tsk) ({			\
+	BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);	\
+	__get_task_comm(buf, sizeof(buf), tsk);		\
+})
 
 #ifdef CONFIG_SMP
 void scheduler_ipi(void);
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 9c8847395b5e15347e150d9b99337a70217b97db..ec912d01126f4b01f5ac61ebae7f73f8e0527d23 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -70,6 +70,7 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
 #define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
 #define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
+#define MMF_OOM_VICTIM		25	/* mm is the oom victim */
 #define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index e69402d4a8aecbdc6fe4a40e8ad7e01957badbd6..d609e6dc5bad00bb54f9258c077dc3220cbcb938 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -184,7 +184,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl,
 	struct serdev_device *serdev = ctrl->serdev;
 
 	if (!serdev || !serdev->ops->receive_buf)
-		return -EINVAL;
+		return 0;
 
 	return serdev->ops->receive_buf(serdev, data, count);
 }
diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
index ff3642d267f7f3311a063e2b1e2b2ebd946686e8..94081e9a5010608d81f4a30badef921b69ee5ecc 100644
--- a/include/linux/sh_eth.h
+++ b/include/linux/sh_eth.h
@@ -17,7 +17,6 @@ struct sh_eth_plat_data {
 	unsigned char mac_addr[ETH_ALEN];
 	unsigned no_ether_link:1;
 	unsigned ether_link_active_low:1;
-	unsigned needs_init:1;
 };
 
 #endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bc486ef23f20f91ce3ed183e935399d1e4c55e18..a38c80e9f91efee011f22bc4e6755f3e4d85ff69 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1406,8 +1406,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb)
 }
 
 /*
- * If users == 1, we are the only owner and are can avoid redundant
- * atomic change.
+ * If users == 1, we are the only owner and can avoid redundant atomic changes.
  */
 
 /**
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7b2170bfd6e7dae432478fffdbc70e1408740394..bc6bb325d1bf7c03db223c568b891e5d33dc93ca 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  *	for that name.  This appears in the sysfs "modalias" attribute
  *	for driver coldplugging, and in uevents used for hotplugging
  * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
- *	when not using a GPIO line)
+ *	not using a GPIO line)
  *
  * @statistics: statistics for the spi_device
  *
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index a39186194cd6782ac0b6705f02af6179a86f7813..3bf273538840103c7b6c634b148b8c74ce754843 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -107,16 +107,11 @@ do {								\
 
 #define raw_spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
 
-#ifdef CONFIG_GENERIC_LOCKBREAK
-#define raw_spin_is_contended(lock) ((lock)->break_lock)
-#else
-
 #ifdef arch_spin_is_contended
 #define raw_spin_is_contended(lock)	arch_spin_is_contended(&(lock)->raw_lock)
 #else
 #define raw_spin_is_contended(lock)	(((void)(lock), 0))
 #endif /*arch_spin_is_contended*/
-#endif
 
 /*
  * This barrier must provide two things:
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 73548eb13a5ddc82ea16c6a292a8d704471eb7ac..24b4e6f2c1a22fe2a6717f4d6076ea87d595994b 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -19,9 +19,6 @@
 
 typedef struct raw_spinlock {
 	arch_spinlock_t raw_lock;
-#ifdef CONFIG_GENERIC_LOCKBREAK
-	unsigned int break_lock;
-#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned int magic, owner_cpu;
 	void *owner;
diff --git a/include/linux/string.h b/include/linux/string.h
index 410ecf17de3ce591017e36a95f28de18ae54dcc1..cfd83eb2f926c74622f46ed931bb1c58277df49f 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -259,7 +259,10 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
 {
 	__kernel_size_t ret;
 	size_t p_size = __builtin_object_size(p, 0);
-	if (p_size == (size_t)-1)
+
+	/* Work around gcc excess stack consumption issue */
+	if (p_size == (size_t)-1 ||
+	    (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
 		return __builtin_strlen(p);
 	ret = strnlen(p, p_size);
 	if (p_size <= ret)
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e32dfe098e822f559810d785a685e05054974cc0..40839c02d28c04389218a72012f784efc8ca5ff8 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -117,6 +117,12 @@ struct attribute_group {
 	.show	= _name##_show,						\
 }
 
+#define __ATTR_RO_MODE(_name, _mode) {					\
+	.attr	= { .name = __stringify(_name),				\
+		    .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },		\
+	.show	= _name##_show,						\
+}
+
 #define __ATTR_WO(_name) {						\
 	.attr	= { .name = __stringify(_name), .mode = S_IWUSR },	\
 	.store	= _name##_store,					\
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index df5d97a85e1a6674e76dcc51ef54a250cbbdb539..ca4a6361389b8a3b268ca5b0f4778662a1f7d315 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -224,7 +224,8 @@ struct tcp_sock {
 		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
-		unused:3;
+		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
+		unused:2;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		unused1	    : 1,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index f442d1a42025925eb4446c6678e3a059a8cfbb39..7cc35921218ecb745634e29d0558749981b4089d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern unsigned long tick_nohz_get_idle_calls(void);
+extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 04af640ea95bd011cdec0101798b0aa7810233cb..2448f9cc48a3120222d29f8110069c626b428543 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j);
 unsigned long round_jiffies_up_relative(unsigned long j);
 
 #ifdef CONFIG_HOTPLUG_CPU
+int timers_prepare_cpu(unsigned int cpu);
 int timers_dead_cpu(unsigned int cpu);
 #else
-#define timers_dead_cpu NULL
+#define timers_prepare_cpu	NULL
+#define timers_dead_cpu		NULL
 #endif
 
 #endif
diff --git a/include/linux/trace.h b/include/linux/trace.h
index d24991c1fef33343d7b7bc65d6b9ec67d8d9f575..b95ffb2188abaaac85dc992c6545d33b946495d1 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -18,7 +18,7 @@
  */
 struct trace_export {
 	struct trace_export __rcu	*next;
-	void (*write)(const void *, unsigned int);
+	void (*write)(struct trace_export *, const void *, unsigned int);
 };
 
 int register_ftrace_export(struct trace_export *export);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index a69877734c4eb033b7a7a7f2989e1da241793782..e2ec3582e54937d3818afed3e253440fc23541a0 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -82,6 +82,7 @@ struct usbnet {
 #		define EVENT_RX_KILL	10
 #		define EVENT_LINK_CHANGE	11
 #		define EVENT_SET_RX_MODE	12
+#		define EVENT_NO_IP_ALIGN	13
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b8118a7fadbc74a9aa879dc934de0442bb3a013..cb4d92b79cd932eda4e178861d8345683b329bdb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
  *	auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
- * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
  * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
  *	firmware.
  * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
diff --git a/include/net/gue.h b/include/net/gue.h
index 2fdb29ca74c2ab26068c57e0134e58b35019ac0e..fdad41469b6521bed2d666cf1cf7925579ec8111 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -44,10 +44,10 @@ struct guehdr {
 #else
 #error  "Please fix <asm/byteorder.h>"
 #endif
-			__u8    proto_ctype;
-			__u16   flags;
+			__u8	proto_ctype;
+			__be16	flags;
 		};
-		__u32 word;
+		__be32	word;
 	};
 };
 
@@ -84,11 +84,10 @@ static inline size_t guehdr_priv_flags_len(__be32 flags)
  * if there is an unknown standard or private flags, or the options length for
  * the flags exceeds the options length specific in hlen of the GUE header.
  */
-static inline int validate_gue_flags(struct guehdr *guehdr,
-				     size_t optlen)
+static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen)
 {
+	__be16 flags = guehdr->flags;
 	size_t len;
-	__be32 flags = guehdr->flags;
 
 	if (flags & ~GUE_FLAGS_ALL)
 		return 1;
@@ -101,12 +100,13 @@ static inline int validate_gue_flags(struct guehdr *guehdr,
 		/* Private flags are last four bytes accounted in
 		 * guehdr_flags_len
 		 */
-		flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV);
+		__be32 pflags = *(__be32 *)((void *)&guehdr[1] +
+					    len - GUE_LEN_PRIV);
 
-		if (flags & ~GUE_PFLAGS_ALL)
+		if (pflags & ~GUE_PFLAGS_ALL)
 			return 1;
 
-		len += guehdr_priv_flags_len(flags);
+		len += guehdr_priv_flags_len(pflags);
 		if (len > optlen)
 			return 1;
 	}
diff --git a/include/net/ip.h b/include/net/ip.h
index 9896f46cbbf11235395d75a5ec18a14736ee099d..af8addbaa3c188a896b74ff9646b6fdd692d1c8e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -34,6 +34,7 @@
 #include <net/flow_dissector.h>
 
 #define IPV4_MAX_PMTU		65535U		/* RFC 2675, Section 5.1 */
+#define IPV4_MIN_MTU		68			/* RFC 791 */
 
 struct sock;
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0105445cab83d32008b3526794c077f4bfbd9816..8e08b6da72f325bd4a623191e886fb1b746644d7 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
 };
 
 enum tc_clsbpf_command {
-	TC_CLSBPF_ADD,
-	TC_CLSBPF_REPLACE,
-	TC_CLSBPF_DESTROY,
+	TC_CLSBPF_OFFLOAD,
 	TC_CLSBPF_STATS,
 };
 
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
 	enum tc_clsbpf_command command;
 	struct tcf_exts *exts;
 	struct bpf_prog *prog;
+	struct bpf_prog *oldprog;
 	const char *name;
 	bool exts_integrated;
 	u32 gen_flags;
diff --git a/include/net/red.h b/include/net/red.h
index 9a9347710701458a74953ef0407714865a13298a..9665582c4687e41bf5dd081894c0be89a40b89b6 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -168,6 +168,17 @@ static inline void red_set_vars(struct red_vars *v)
 	v->qcount	= -1;
 }
 
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
+{
+	if (fls(qth_min) + Wlog > 32)
+		return false;
+	if (fls(qth_max) + Wlog > 32)
+		return false;
+	if (qth_max < qth_min)
+		return false;
+	return true;
+}
+
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
 				 u8 Scell_log, u8 *stab, u32 max_P)
@@ -179,7 +190,7 @@ static inline void red_set_parms(struct red_parms *p,
 	p->qth_max	= qth_max << Wlog;
 	p->Wlog		= Wlog;
 	p->Plog		= Plog;
-	if (delta < 0)
+	if (delta <= 0)
 		delta = 1;
 	p->qth_delta	= delta;
 	if (!max_P) {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 65d0d25f2648f645bad707c348cbb6454ef75cd5..83a3e47d5845b99fa61799a15b29e7247d478c72 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,6 +71,7 @@ struct Qdisc {
 				      * qdisc_tree_decrease_qlen() should stop.
 				      */
 #define TCQ_F_INVISIBLE		0x80 /* invisible by default in dump */
+#define TCQ_F_OFFLOADED		0x200 /* qdisc is offloaded to HW */
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 16f949eef52fdfd7c90fa15b44093334d1355aaf..9a5ccf03a59b1e0a4820e2a978c66f1df8a9a82f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -503,7 +503,8 @@ struct sctp_datamsg {
 	/* Did the messenge fail to send? */
 	int send_error;
 	u8 send_failed:1,
-	   can_delay;	    /* should this message be Nagle delayed */
+	   can_delay:1,	/* should this message be Nagle delayed */
+	   abandoned:1;	/* should this message be abandoned */
 };
 
 struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
@@ -965,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
 void sctp_transport_reset(struct sctp_transport *t);
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
diff --git a/include/net/sock.h b/include/net/sock.h
index 79e1a2c7912c03d8281d449609d57cc909138a3b..7a7b14e9628a1174e18dda30cd0b5bfd5b32d30d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -685,11 +685,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
 
 static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
 {
-	if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
-	    sk->sk_family == AF_INET6)
-		hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
-	else
-		hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
 }
 
 static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
@@ -1518,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
 	return sk->sk_lock.owned;
 }
 
+static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
+{
+	return sk->sk_lock.owned;
+}
+
 /* no reclassification while locks are held */
 static inline bool sock_allow_reclassification(const struct sock *csk)
 {
diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 524cee4f4c817b3583385e9ae9f991503ab934fd..01dbfea3267277ad5efacef4b59b4a1c615de66b 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -14,7 +14,6 @@ struct tcf_sample {
 	struct psample_group __rcu *psample_group;
 	u32 psample_group_num;
 	struct list_head tcfm_list;
-	struct rcu_head rcu;
 };
 #define to_sample(a) ((struct tcf_sample *)a)
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4e09398009c10a72478b43d3cffc24ba01612b91..6da880d2f022c0cfd787a62af2bb7d222348af32 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -844,12 +844,11 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb)
 }
 #endif
 
-/* TCP_SKB_CB reference means this can not be used from early demux */
 static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
 	if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
-	    skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+	    skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
 		return true;
 #endif
 	return false;
@@ -1056,7 +1055,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
 void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 			    struct rate_sample *rs);
 void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
-		  struct rate_sample *rs);
+		  bool is_sack_reneg, struct rate_sample *rs);
 void tcp_rate_check_app_limited(struct sock *sk);
 
 /* These functions determine how the current flow behaves in respect of SACK
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 13223396dc64abdbcd504b2439214efeb5c6def8..f96391e84a8a3161df98a964692e7ce445050944 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -146,7 +146,7 @@ struct vxlanhdr_gpe {
 		np_applied:1,
 		instance_applied:1,
 		version:2,
-reserved_flags2:2;
+		reserved_flags2:2;
 #elif defined(__BIG_ENDIAN_BITFIELD)
 	u8	reserved_flags2:2,
 		version:2,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc28a98ce97ca7d53d67809a0fc718f0b9d622f8..ae35991b5877029b217a2b49fba7f012f193148c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x);
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb,
+		     int (*finish)(struct net *, struct sock *,
+				   struct sk_buff *));
 int xfrm_output_resume(struct sk_buff *skb, int err);
 int xfrm_output(struct sock *sk, struct sk_buff *skb);
 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 0f9cbf96c093d86ae926ae5380f82eb454fecd02..6df6fe0c21980b4eb686bc20ff10f0c2bf28280b 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -159,11 +159,11 @@ struct expander_device {
 
 struct sata_device {
 	unsigned int class;
-	struct smp_resp        rps_resp; /* report_phy_sata_resp */
 	u8     port_no;        /* port number, if this is a PM (Port) */
 
 	struct ata_port *ap;
 	struct ata_host ata_host;
+	struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */
 	u8     fis[ATA_RESP_FIS_SIZE];
 };
 
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index 758607226bfdd5269d33557b650a31628a59d22f..2cd449328aee37e55de94633d803d73fe3057f9e 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
 
 	TP_STRUCT__entry(
 		__string(        name,           core->name                )
-		__string(        pname,          parent->name              )
+		__string(        pname, parent ? parent->name : "none"     )
 	),
 
 	TP_fast_assign(
 		__assign_str(name, core->name);
-		__assign_str(pname, parent->name);
+		__assign_str(pname, parent ? parent->name : "none");
 	),
 
 	TP_printk("%s %s", __get_str(name), __get_str(pname))
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index e4b0b8e099325f2801e4f3af168004c603c23794..2c735a3e66133fc08740b4df6d64919c491c9d1e 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
 	{ KVM_TRACE_MMIO_WRITE, "write" }
 
 TRACE_EVENT(kvm_mmio,
-	TP_PROTO(int type, int len, u64 gpa, u64 val),
+	TP_PROTO(int type, int len, u64 gpa, void *val),
 	TP_ARGS(type, len, gpa, val),
 
 	TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
 		__entry->type		= type;
 		__entry->len		= len;
 		__entry->gpa		= gpa;
-		__entry->val		= val;
+		__entry->val		= 0;
+		if (val)
+			memcpy(&__entry->val, val,
+			       min_t(u32, sizeof(__entry->val), len));
 	),
 
 	TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
index f5024c560d8ff028b8952083625f66cde6d77f97..9c4eb33c5a1d35d9fcfa83b8386a7c4be3ef7c9e 100644
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -56,15 +56,18 @@ DEFINE_EVENT(preemptirq_template, preempt_enable,
 
 #include <trace/define_trace.h>
 
-#else /* !CONFIG_PREEMPTIRQ_EVENTS */
+#endif /* !CONFIG_PREEMPTIRQ_EVENTS */
 
+#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING)
 #define trace_irq_enable(...)
 #define trace_irq_disable(...)
-#define trace_preempt_enable(...)
-#define trace_preempt_disable(...)
 #define trace_irq_enable_rcuidle(...)
 #define trace_irq_disable_rcuidle(...)
+#endif
+
+#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT)
+#define trace_preempt_enable(...)
+#define trace_preempt_disable(...)
 #define trace_preempt_enable_rcuidle(...)
 #define trace_preempt_disable_rcuidle(...)
-
 #endif
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca6cbf1762684152146372e35e1cd758338..ab34c561f26bec42a8ff32cb6f7e911447f03af2 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -25,6 +25,35 @@
 		tcp_state_name(TCP_CLOSING),		\
 		tcp_state_name(TCP_NEW_SYN_RECV))
 
+#define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
+	do {							\
+		struct in6_addr *pin6;				\
+								\
+		pin6 = (struct in6_addr *)__entry->saddr_v6;	\
+		ipv6_addr_set_v4mapped(saddr, pin6);		\
+		pin6 = (struct in6_addr *)__entry->daddr_v6;	\
+		ipv6_addr_set_v4mapped(daddr, pin6);		\
+	} while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)		\
+	do {								\
+		if (sk->sk_family == AF_INET6) {			\
+			struct in6_addr *pin6;				\
+									\
+			pin6 = (struct in6_addr *)__entry->saddr_v6;	\
+			*pin6 = saddr6;					\
+			pin6 = (struct in6_addr *)__entry->daddr_v6;	\
+			*pin6 = daddr6;					\
+		} else {						\
+			TP_STORE_V4MAPPED(__entry, saddr, daddr);	\
+		}							\
+	} while (0)
+#else
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)	\
+	TP_STORE_V4MAPPED(__entry, saddr, daddr)
+#endif
+
 /*
  * tcp event with arguments sk and skb
  *
@@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
 
 	TP_fast_assign(
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skbaddr = skb;
@@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = sk->sk_v6_rcv_saddr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = sk->sk_v6_daddr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			      sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
 
 	TP_fast_assign(
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skaddr = sk;
@@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = sk->sk_v6_rcv_saddr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = sk->sk_v6_daddr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
 
 	TP_fast_assign(
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skaddr = sk;
@@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = sk->sk_v6_rcv_saddr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = sk->sk_v6_daddr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
@@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
 
 	TP_fast_assign(
 		struct inet_request_sock *ireq = inet_rsk(req);
-		struct in6_addr *pin6;
 		__be32 *p32;
 
 		__entry->skaddr = sk;
@@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
 		p32 = (__be32 *) __entry->daddr;
 		*p32 = ireq->ir_rmt_addr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6) {
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			*pin6 = ireq->ir_v6_loc_addr;
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			*pin6 = ireq->ir_v6_rmt_addr;
-		} else
-#endif
-		{
-			pin6 = (struct in6_addr *)__entry->saddr_v6;
-			ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
-			pin6 = (struct in6_addr *)__entry->daddr_v6;
-			ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
-		}
+		TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
+			      ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
 	),
 
 	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 4cd0f05d01134d1a1e2d5bd231407bfd7d92d250..8989a92c571a2d7036b74b233913b588e4e4248c 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -8,6 +8,7 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/tracepoint.h>
+#include <linux/bpf.h>
 
 #define __XDP_ACT_MAP(FN)	\
 	FN(ABORTED)		\
diff --git a/include/uapi/asm-generic/bpf_perf_event.h b/include/uapi/asm-generic/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..53815d2cd047a0fbf816e31c9cf92c0a8adfaeef
--- /dev/null
+++ b/include/uapi/asm-generic/bpf_perf_event.h
@@ -0,0 +1,9 @@
+#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__
+
+#include <linux/ptrace.h>
+
+/* Export kernel pt_regs structure */
+typedef struct pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 3ad838d3f93f323b5a456dbd0f305687a5c786ab..e04613d30a134f696f9fa52dc439b57a87ac8f8c 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -178,7 +178,7 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_NONE    0
 #define DRM_FORMAT_MOD_VENDOR_INTEL   0x01
 #define DRM_FORMAT_MOD_VENDOR_AMD     0x02
-#define DRM_FORMAT_MOD_VENDOR_NV      0x03
+#define DRM_FORMAT_MOD_VENDOR_NVIDIA  0x03
 #define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04
 #define DRM_FORMAT_MOD_VENDOR_QCOM    0x05
 #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
@@ -188,7 +188,7 @@ extern "C" {
 #define DRM_FORMAT_RESERVED	      ((1ULL << 56) - 1)
 
 #define fourcc_mod_code(vendor, val) \
-	((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL))
+	((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL))
 
 /*
  * Format Modifier tokens:
@@ -338,29 +338,17 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
 
-/* NVIDIA Tegra frame buffer modifiers */
-
-/*
- * Some modifiers take parameters, for example the number of vertical GOBs in
- * a block. Reserve the lower 32 bits for parameters
- */
-#define __fourcc_mod_tegra_mode_shift 32
-#define fourcc_mod_tegra_code(val, params) \
-	fourcc_mod_code(NV, ((((__u64)val) << __fourcc_mod_tegra_mode_shift) | params))
-#define fourcc_mod_tegra_mod(m) \
-	(m & ~((1ULL << __fourcc_mod_tegra_mode_shift) - 1))
-#define fourcc_mod_tegra_param(m) \
-	(m & ((1ULL << __fourcc_mod_tegra_mode_shift) - 1))
+/* NVIDIA frame buffer modifiers */
 
 /*
  * Tegra Tiled Layout, used by Tegra 2, 3 and 4.
  *
  * Pixels are arranged in simple tiles of 16 x 16 bytes.
  */
-#define NV_FORMAT_MOD_TEGRA_TILED fourcc_mod_tegra_code(1, 0)
+#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1)
 
 /*
- * Tegra 16Bx2 Block Linear layout, used by TK1/TX1
+ * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later
  *
  * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked
  * vertically by a power of 2 (1 to 32 GOBs) to form a block.
@@ -380,7 +368,21 @@ extern "C" {
  * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
  * in full detail.
  */
-#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \
+	fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf))
+
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \
+	fourcc_mod_code(NVIDIA, 0x10)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \
+	fourcc_mod_code(NVIDIA, 0x11)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \
+	fourcc_mod_code(NVIDIA, 0x12)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \
+	fourcc_mod_code(NVIDIA, 0x13)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \
+	fourcc_mod_code(NVIDIA, 0x14)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
+	fourcc_mod_code(NVIDIA, 0x15)
 
 /*
  * Broadcom VC4 "T" format
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index d01087b2a651a2908ccfe4091f4815482cf8368d..4a54305120e06de11a6d53db9bbd80501febd9d4 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -135,172 +135,6 @@ struct drm_exynos_g2d_exec {
 	__u64					async;
 };
 
-enum drm_exynos_ops_id {
-	EXYNOS_DRM_OPS_SRC,
-	EXYNOS_DRM_OPS_DST,
-	EXYNOS_DRM_OPS_MAX,
-};
-
-struct drm_exynos_sz {
-	__u32	hsize;
-	__u32	vsize;
-};
-
-struct drm_exynos_pos {
-	__u32	x;
-	__u32	y;
-	__u32	w;
-	__u32	h;
-};
-
-enum drm_exynos_flip {
-	EXYNOS_DRM_FLIP_NONE = (0 << 0),
-	EXYNOS_DRM_FLIP_VERTICAL = (1 << 0),
-	EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1),
-	EXYNOS_DRM_FLIP_BOTH = EXYNOS_DRM_FLIP_VERTICAL |
-			EXYNOS_DRM_FLIP_HORIZONTAL,
-};
-
-enum drm_exynos_degree {
-	EXYNOS_DRM_DEGREE_0,
-	EXYNOS_DRM_DEGREE_90,
-	EXYNOS_DRM_DEGREE_180,
-	EXYNOS_DRM_DEGREE_270,
-};
-
-enum drm_exynos_planer {
-	EXYNOS_DRM_PLANAR_Y,
-	EXYNOS_DRM_PLANAR_CB,
-	EXYNOS_DRM_PLANAR_CR,
-	EXYNOS_DRM_PLANAR_MAX,
-};
-
-/**
- * A structure for ipp supported property list.
- *
- * @version: version of this structure.
- * @ipp_id: id of ipp driver.
- * @count: count of ipp driver.
- * @writeback: flag of writeback supporting.
- * @flip: flag of flip supporting.
- * @degree: flag of degree information.
- * @csc: flag of csc supporting.
- * @crop: flag of crop supporting.
- * @scale: flag of scale supporting.
- * @refresh_min: min hz of refresh.
- * @refresh_max: max hz of refresh.
- * @crop_min: crop min resolution.
- * @crop_max: crop max resolution.
- * @scale_min: scale min resolution.
- * @scale_max: scale max resolution.
- */
-struct drm_exynos_ipp_prop_list {
-	__u32	version;
-	__u32	ipp_id;
-	__u32	count;
-	__u32	writeback;
-	__u32	flip;
-	__u32	degree;
-	__u32	csc;
-	__u32	crop;
-	__u32	scale;
-	__u32	refresh_min;
-	__u32	refresh_max;
-	__u32	reserved;
-	struct drm_exynos_sz	crop_min;
-	struct drm_exynos_sz	crop_max;
-	struct drm_exynos_sz	scale_min;
-	struct drm_exynos_sz	scale_max;
-};
-
-/**
- * A structure for ipp config.
- *
- * @ops_id: property of operation directions.
- * @flip: property of mirror, flip.
- * @degree: property of rotation degree.
- * @fmt: property of image format.
- * @sz: property of image size.
- * @pos: property of image position(src-cropped,dst-scaler).
- */
-struct drm_exynos_ipp_config {
-	__u32 ops_id;
-	__u32 flip;
-	__u32 degree;
-	__u32	fmt;
-	struct drm_exynos_sz	sz;
-	struct drm_exynos_pos	pos;
-};
-
-enum drm_exynos_ipp_cmd {
-	IPP_CMD_NONE,
-	IPP_CMD_M2M,
-	IPP_CMD_WB,
-	IPP_CMD_OUTPUT,
-	IPP_CMD_MAX,
-};
-
-/**
- * A structure for ipp property.
- *
- * @config: source, destination config.
- * @cmd: definition of command.
- * @ipp_id: id of ipp driver.
- * @prop_id: id of property.
- * @refresh_rate: refresh rate.
- */
-struct drm_exynos_ipp_property {
-	struct drm_exynos_ipp_config config[EXYNOS_DRM_OPS_MAX];
-	__u32	cmd;
-	__u32	ipp_id;
-	__u32	prop_id;
-	__u32	refresh_rate;
-};
-
-enum drm_exynos_ipp_buf_type {
-	IPP_BUF_ENQUEUE,
-	IPP_BUF_DEQUEUE,
-};
-
-/**
- * A structure for ipp buffer operations.
- *
- * @ops_id: operation directions.
- * @buf_type: definition of buffer.
- * @prop_id: id of property.
- * @buf_id: id of buffer.
- * @handle: Y, Cb, Cr each planar handle.
- * @user_data: user data.
- */
-struct drm_exynos_ipp_queue_buf {
-	__u32	ops_id;
-	__u32	buf_type;
-	__u32	prop_id;
-	__u32	buf_id;
-	__u32	handle[EXYNOS_DRM_PLANAR_MAX];
-	__u32	reserved;
-	__u64	user_data;
-};
-
-enum drm_exynos_ipp_ctrl {
-	IPP_CTRL_PLAY,
-	IPP_CTRL_STOP,
-	IPP_CTRL_PAUSE,
-	IPP_CTRL_RESUME,
-	IPP_CTRL_MAX,
-};
-
-/**
- * A structure for ipp start/stop operations.
- *
- * @prop_id: id of property.
- * @ctrl: definition of control.
- */
-struct drm_exynos_ipp_cmd_ctrl {
-	__u32	prop_id;
-	__u32	ctrl;
-};
-
 #define DRM_EXYNOS_GEM_CREATE		0x00
 #define DRM_EXYNOS_GEM_MAP		0x01
 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */
@@ -312,11 +146,7 @@ struct drm_exynos_ipp_cmd_ctrl {
 #define DRM_EXYNOS_G2D_SET_CMDLIST	0x21
 #define DRM_EXYNOS_G2D_EXEC		0x22
 
-/* IPP - Image Post Processing */
-#define DRM_EXYNOS_IPP_GET_PROPERTY	0x30
-#define DRM_EXYNOS_IPP_SET_PROPERTY	0x31
-#define DRM_EXYNOS_IPP_QUEUE_BUF	0x32
-#define DRM_EXYNOS_IPP_CMD_CTRL	0x33
+/* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */
 
 #define DRM_IOCTL_EXYNOS_GEM_CREATE		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
@@ -335,18 +165,8 @@ struct drm_exynos_ipp_cmd_ctrl {
 #define DRM_IOCTL_EXYNOS_G2D_EXEC		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec)
 
-#define DRM_IOCTL_EXYNOS_IPP_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + \
-		DRM_EXYNOS_IPP_GET_PROPERTY, struct drm_exynos_ipp_prop_list)
-#define DRM_IOCTL_EXYNOS_IPP_SET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + \
-		DRM_EXYNOS_IPP_SET_PROPERTY, struct drm_exynos_ipp_property)
-#define DRM_IOCTL_EXYNOS_IPP_QUEUE_BUF	DRM_IOWR(DRM_COMMAND_BASE + \
-		DRM_EXYNOS_IPP_QUEUE_BUF, struct drm_exynos_ipp_queue_buf)
-#define DRM_IOCTL_EXYNOS_IPP_CMD_CTRL		DRM_IOWR(DRM_COMMAND_BASE + \
-		DRM_EXYNOS_IPP_CMD_CTRL, struct drm_exynos_ipp_cmd_ctrl)
-
 /* EXYNOS specific events */
 #define DRM_EXYNOS_G2D_EVENT		0x80000000
-#define DRM_EXYNOS_IPP_EVENT		0x80000001
 
 struct drm_exynos_g2d_event {
 	struct drm_event	base;
@@ -357,16 +177,6 @@ struct drm_exynos_g2d_event {
 	__u32			reserved;
 };
 
-struct drm_exynos_ipp_event {
-	struct drm_event	base;
-	__u64			user_data;
-	__u32			tv_sec;
-	__u32			tv_usec;
-	__u32			prop_id;
-	__u32			reserved;
-	__u32			buf_id[EXYNOS_DRM_OPS_MAX];
-};
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index af549d4ecf1b6e76522c6a157db98d48e4190b30..8f95303f9d807d10d4fd6850d91a2486b0a490ec 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -8,11 +8,10 @@
 #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
 #define _UAPI__LINUX_BPF_PERF_EVENT_H__
 
-#include <linux/types.h>
-#include <linux/ptrace.h>
+#include <asm/bpf_perf_event.h>
 
 struct bpf_perf_event_data {
-	struct pt_regs regs;
+	bpf_user_pt_regs_t regs;
 	__u64 sample_period;
 };
 
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 3ee3bf7c85262b034702875b7d356ac7595abea6..144de4d2f385e0b05233162ec425a6763262e038 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -23,6 +23,7 @@
 #define _UAPI_LINUX_IF_ETHER_H
 
 #include <linux/types.h>
+#include <linux/libc-compat.h>
 
 /*
  *	IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
@@ -149,11 +150,13 @@
  *	This is an Ethernet frame header.
  */
 
+#if __UAPI_DEF_ETHHDR
 struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
 } __attribute__((packed));
+#endif
 
 
 #endif /* _UAPI_LINUX_IF_ETHER_H */
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 6e80501368aee68e77f800349c25d53adb56b7c6..f4cab5b3ba9aaf3c78c7004665363aef89aa8d3b 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args {
 	__u64 eop_buffer_address;	/* to KFD */
 	__u64 eop_buffer_size;	/* to KFD */
 	__u64 ctx_save_restore_address; /* to KFD */
-	__u64 ctx_save_restore_size;	/* to KFD */
+	__u32 ctx_save_restore_size;	/* to KFD */
+	__u32 ctl_stack_size;		/* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
@@ -261,6 +262,13 @@ struct kfd_ioctl_get_tile_config_args {
 	 */
 };
 
+struct kfd_ioctl_set_trap_handler_args {
+	uint64_t tba_addr;		/* to KFD */
+	uint64_t tma_addr;		/* to KFD */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -321,7 +329,10 @@ struct kfd_ioctl_get_tile_config_args {
 #define AMDKFD_IOC_GET_TILE_CONFIG                                      \
 		AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
 
+#define AMDKFD_IOC_SET_TRAP_HANDLER		\
+		AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x13
+#define AMDKFD_COMMAND_END		0x14
 
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 282d7613fce8788bc466913d7fcacc960dd1c6de..496e59a2738ba99308f438e1f0509e66e17086cb 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -630,9 +630,9 @@ struct kvm_s390_irq {
 
 struct kvm_s390_irq_state {
 	__u64 buf;
-	__u32 flags;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 reserved[4];
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
 /* for KVM_SET_GUEST_DEBUG */
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 282875cf805657b41eb0b5a797aa77647b889159..fc29efaa918cb76fab92a65890cca1aab3ecf9ba 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -168,47 +168,106 @@
 
 /* If we did not see any headers from any supported C libraries,
  * or we are being included in the kernel, then define everything
- * that we need. */
+ * that we need. Check for previous __UAPI_* definitions to give
+ * unsupported C libraries a way to opt out of any kernel definition. */
 #else /* !defined(__GLIBC__) */
 
 /* Definitions for if.h */
+#ifndef __UAPI_DEF_IF_IFCONF
 #define __UAPI_DEF_IF_IFCONF 1
+#endif
+#ifndef __UAPI_DEF_IF_IFMAP
 #define __UAPI_DEF_IF_IFMAP 1
+#endif
+#ifndef __UAPI_DEF_IF_IFNAMSIZ
 #define __UAPI_DEF_IF_IFNAMSIZ 1
+#endif
+#ifndef __UAPI_DEF_IF_IFREQ
 #define __UAPI_DEF_IF_IFREQ 1
+#endif
 /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS
 #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+#endif
 /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
 #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif
 
 /* Definitions for in.h */
+#ifndef __UAPI_DEF_IN_ADDR
 #define __UAPI_DEF_IN_ADDR		1
+#endif
+#ifndef __UAPI_DEF_IN_IPPROTO
 #define __UAPI_DEF_IN_IPPROTO		1
+#endif
+#ifndef __UAPI_DEF_IN_PKTINFO
 #define __UAPI_DEF_IN_PKTINFO		1
+#endif
+#ifndef __UAPI_DEF_IP_MREQ
 #define __UAPI_DEF_IP_MREQ		1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN
 #define __UAPI_DEF_SOCKADDR_IN		1
+#endif
+#ifndef __UAPI_DEF_IN_CLASS
 #define __UAPI_DEF_IN_CLASS		1
+#endif
 
 /* Definitions for in6.h */
+#ifndef __UAPI_DEF_IN6_ADDR
 #define __UAPI_DEF_IN6_ADDR		1
+#endif
+#ifndef __UAPI_DEF_IN6_ADDR_ALT
 #define __UAPI_DEF_IN6_ADDR_ALT		1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN6
 #define __UAPI_DEF_SOCKADDR_IN6		1
+#endif
+#ifndef __UAPI_DEF_IPV6_MREQ
 #define __UAPI_DEF_IPV6_MREQ		1
+#endif
+#ifndef __UAPI_DEF_IPPROTO_V6
 #define __UAPI_DEF_IPPROTO_V6		1
+#endif
+#ifndef __UAPI_DEF_IPV6_OPTIONS
 #define __UAPI_DEF_IPV6_OPTIONS		1
+#endif
+#ifndef __UAPI_DEF_IN6_PKTINFO
 #define __UAPI_DEF_IN6_PKTINFO		1
+#endif
+#ifndef __UAPI_DEF_IP6_MTUINFO
 #define __UAPI_DEF_IP6_MTUINFO		1
+#endif
 
 /* Definitions for ipx.h */
+#ifndef __UAPI_DEF_SOCKADDR_IPX
 #define __UAPI_DEF_SOCKADDR_IPX			1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
 #define __UAPI_DEF_IPX_ROUTE_DEFINITION		1
+#endif
+#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
 #define __UAPI_DEF_IPX_INTERFACE_DEFINITION	1
+#endif
+#ifndef __UAPI_DEF_IPX_CONFIG_DATA
 #define __UAPI_DEF_IPX_CONFIG_DATA		1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEF
 #define __UAPI_DEF_IPX_ROUTE_DEF		1
+#endif
 
 /* Definitions for xattr.h */
+#ifndef __UAPI_DEF_XATTR
 #define __UAPI_DEF_XATTR		1
+#endif
 
 #endif /* __GLIBC__ */
 
+/* Definitions for if_ether.h */
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR		1
+#endif
+
 #endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 3fea7709a4412c5e3c7f94daed08576b55bf3bc2..57ccfb32e87f97bad06f7a263ceecc20cd74dc9d 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -36,7 +36,7 @@ enum ip_conntrack_info {
 
 #define NF_CT_STATE_INVALID_BIT			(1 << 0)
 #define NF_CT_STATE_BIT(ctinfo)			(1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
-#define NF_CT_STATE_UNTRACKED_BIT		(1 << (IP_CT_UNTRACKED + 1))
+#define NF_CT_STATE_UNTRACKED_BIT		(1 << 6)
 
 /* Bitset representing status of connection. */
 enum ip_conntrack_status {
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index af3cc2f4e1ad00dff0e011a4e94e0905d085d0ca..37b5096ae97be4e6115b0941b82918e11250ee6b 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -256,7 +256,6 @@ struct tc_red_qopt {
 #define TC_RED_ECN		1
 #define TC_RED_HARDDROP		2
 #define TC_RED_ADAPTATIVE	4
-#define TC_RED_OFFLOADED	8
 };
 
 struct tc_red_xstats {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d8b5f80c2ea66dd2d75600c1a556c2bc9cfa101f..843e29aa3cacf0e06beea9b59af5b5c7acc9ca3b 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -557,6 +557,7 @@ enum {
 	TCA_PAD,
 	TCA_DUMP_INVISIBLE,
 	TCA_CHAIN,
+	TCA_HW_OFFLOAD,
 	__TCA_MAX
 };
 
diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index 41a0a81b01e6bc4c4c5a65f6c36f4be9b01c160e..c4c79aa331bd123e9d340ec7283ec97e89699584 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -880,6 +880,8 @@ struct usb_wireless_cap_descriptor {	/* Ultra Wide Band */
 	__u8  bReserved;
 } __attribute__((packed));
 
+#define USB_DT_USB_WIRELESS_CAP_SIZE	11
+
 /* USB 2.0 Extension descriptor */
 #define	USB_CAP_TYPE_EXT		2
 
@@ -1072,6 +1074,7 @@ struct usb_ptm_cap_descriptor {
 	__u8  bDevCapabilityType;
 } __attribute__((packed));
 
+#define USB_DT_USB_PTM_ID_SIZE		3
 /*
  * The size of the descriptor for the Sublink Speed Attribute Count
  * (SSAC) specified in bmAttributes[4:0].
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index ce4c07688b13d86b7d4dffc3b1db27a3406ef81c..abbad94e14a1021ed622ef428cbd5a6fee9ed86c 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -344,7 +344,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan);
 int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 			      unsigned int axi_id,  unsigned int width,
 			      unsigned int height, unsigned int stride,
-			      u32 format, unsigned long *eba);
+			      u32 format, uint64_t modifier, unsigned long *eba);
 
 /*
  * IPU CMOS Sensor Interface (csi) functions
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 4914b93a23f2bdeb066a4048d7ab749456ae3fe3..61f410fd74e4cf4180f7ad5ffa1d996cc1528c91 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
 {
 }
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+struct resource;
+void arch_xen_balloon_init(struct resource *hostmem_resource);
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 2934249fba46746253b89d14f581547ada3d0bad..a9a2e2c86671a18c02c49643151204dd3358157b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -461,10 +461,15 @@ endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
 	bool "CPU isolation"
+	depends on SMP || COMPILE_TEST
+	default y
 	help
 	  Make sure that CPUs running critical tasks are not disturbed by
 	  any source of "noise" such as unbound workqueues, timers, kthreads...
-	  Unbound jobs get offloaded to housekeeping CPUs.
+	  Unbound jobs get offloaded to housekeeping CPUs. This is driven by
+	  the "isolcpus=" boot parameter.
+
+	  Say Y if unsure.
 
 source "kernel/rcu/Kconfig"
 
@@ -1392,6 +1397,13 @@ config BPF_SYSCALL
 	  Enable the bpf() system call that allows to manipulate eBPF
 	  programs and maps via file descriptors.
 
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid
+	  speculative execution of BPF instructions by the interpreter
+
 config USERFAULTFD
 	bool "Enable userfaultfd() system call"
 	select ANON_INODES
diff --git a/init/main.c b/init/main.c
index dfec3809e7404f9658d51a20aae3869cd7aab9c3..a8100b9548398e8b102052f2c1418b21ea423825 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
+#include <linux/pti.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
@@ -504,6 +505,10 @@ static void __init mm_init(void)
 	pgtable_init();
 	vmalloc_init();
 	ioremap_huge_init();
+	/* Should be run before the first non-init thread is created */
+	init_espfix_bsp();
+	/* Should be run after espfix64 is set up. */
+	pti_init();
 }
 
 asmlinkage __visible void __init start_kernel(void)
@@ -588,6 +593,12 @@ asmlinkage __visible void __init start_kernel(void)
 		local_irq_disable();
 	radix_tree_init();
 
+	/*
+	 * Set up housekeeping before setting up workqueues to allow the unbound
+	 * workqueue to take non-housekeeping into account.
+	 */
+	housekeeping_init();
+
 	/*
 	 * Allow workqueue creation and work item queueing/cancelling
 	 * early.  Work item execution depends on kthreads and starts after
@@ -605,7 +616,6 @@ asmlinkage __visible void __init start_kernel(void)
 	early_irq_init();
 	init_IRQ();
 	tick_init();
-	housekeeping_init();
 	rcu_init_nohz();
 	init_timers();
 	hrtimers_init();
@@ -673,10 +683,6 @@ asmlinkage __visible void __init start_kernel(void)
 #ifdef CONFIG_X86
 	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();
-#endif
-#ifdef CONFIG_X86_ESPFIX64
-	/* Should be run before the first non-init thread is created */
-	init_espfix_bsp();
 #endif
 	thread_stack_cache_init();
 	cred_init();
diff --git a/kernel/acct.c b/kernel/acct.c
index d15c0ee4d95504a88337498045a84f53a6ae0d15..addf7732fb562f5b0eb51a0bd8894aec81cc8afe 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -102,7 +102,7 @@ static int check_free_space(struct bsd_acct_struct *acct)
 {
 	struct kstatfs sbuf;
 
-	if (time_is_before_jiffies(acct->needcheck))
+	if (time_is_after_jiffies(acct->needcheck))
 		goto out;
 
 	/* May block */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 7c25426d3cf5699ffde545ee9a8d5e254f16f517..aaa319848e7d57422225aa99055b574dc904e36d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int numa_node = bpf_map_attr_numa_node(attr);
+	u32 elem_size, index_mask, max_entries;
+	bool unpriv = !capable(CAP_SYS_ADMIN);
 	struct bpf_array *array;
 	u64 array_size;
-	u32 elem_size;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 
 	elem_size = round_up(attr->value_size, 8);
 
+	max_entries = attr->max_entries;
+	index_mask = roundup_pow_of_two(max_entries) - 1;
+
+	if (unpriv)
+		/* round up array size to nearest power of 2,
+		 * since cpu will speculate within index_mask limits
+		 */
+		max_entries = index_mask + 1;
+
 	array_size = sizeof(*array);
 	if (percpu)
-		array_size += (u64) attr->max_entries * sizeof(void *);
+		array_size += (u64) max_entries * sizeof(void *);
 	else
-		array_size += (u64) attr->max_entries * elem_size;
+		array_size += (u64) max_entries * elem_size;
 
 	/* make sure there is no u32 overflow later in round_up() */
 	if (array_size >= U32_MAX - PAGE_SIZE)
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array = bpf_map_area_alloc(array_size, numa_node);
 	if (!array)
 		return ERR_PTR(-ENOMEM);
+	array->index_mask = index_mask;
+	array->map.unpriv_array = unpriv;
 
 	/* copy mandatory map attributes */
 	array->map.map_type = attr->map_type;
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
 	if (unlikely(index >= array->map.max_entries))
 		return NULL;
 
-	return array->value + array->elem_size * index;
+	return array->value + array->elem_size * (index & array->index_mask);
 }
 
 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct bpf_insn *insn = insn_buf;
 	u32 elem_size = round_up(map->value_size, 8);
 	const int ret = BPF_REG_0;
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+	if (map->unpriv_array) {
+		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+	} else {
+		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+	}
 
 	if (is_power_of_2(elem_size)) {
 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 	if (unlikely(index >= array->map.max_entries))
 		return NULL;
 
-	return this_cpu_ptr(array->pptrs[index]);
+	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
 }
 
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
 	 */
 	size = round_up(map->value_size, 8);
 	rcu_read_lock();
-	pptr = array->pptrs[index];
+	pptr = array->pptrs[index & array->index_mask];
 	for_each_possible_cpu(cpu) {
 		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
 		off += size;
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		return -EEXIST;
 
 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
-		memcpy(this_cpu_ptr(array->pptrs[index]),
+		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
 		       value, map->value_size);
 	else
-		memcpy(array->value + array->elem_size * index,
+		memcpy(array->value +
+		       array->elem_size * (index & array->index_mask),
 		       value, map->value_size);
 	return 0;
 }
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 	 */
 	size = round_up(map->value_size, 8);
 	rcu_read_lock();
-	pptr = array->pptrs[index];
+	pptr = array->pptrs[index & array->index_mask];
 	for_each_possible_cpu(cpu) {
 		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
 		off += size;
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 static u32 array_of_map_gen_lookup(struct bpf_map *map,
 				   struct bpf_insn *insn_buf)
 {
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 elem_size = round_up(map->value_size, 8);
 	struct bpf_insn *insn = insn_buf;
 	const int ret = BPF_REG_0;
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+	if (map->unpriv_array) {
+		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+	} else {
+		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+	}
 	if (is_power_of_2(elem_size))
 		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
 	else
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b9f8686a84cf1a5ee9d2b92d21579af11d8690aa..51ec2dda7f08c6c90af084589bb6d80662c77d12 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
 
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 };
 
+#else
+static unsigned int __bpf_prog_ret0(const void *ctx,
+				    const struct bpf_insn *insn)
+{
+	return 0;
+}
+#endif
+
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
 {
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
 
 	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+	fp->bpf_func = __bpf_prog_ret0;
+#endif
 
 	/* eBPF JITs can rewrite the program in case constant
 	 * blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	 */
 	if (!bpf_prog_is_dev_bound(fp->aux)) {
 		fp = bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+		if (!fp->jited) {
+			*err = -ENOTSUPP;
+			return fp;
+		}
+#endif
 	} else {
 		*err = bpf_prog_offload_compile(fp);
 		if (*err)
@@ -1447,7 +1466,8 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
 	rcu_read_lock();
 	prog = rcu_dereference(progs)->progs;
 	for (; *prog; prog++)
-		cnt++;
+		if (*prog != &dummy_bpf_prog.prog)
+			cnt++;
 	rcu_read_unlock();
 	return cnt;
 }
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index e469e05c8e83bc3256378644e3f3c26555651261..3905d4bc5b80d74f0b8f9e2e8f8526a0115ce239 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab)
 		pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
 					 htab->map.key_size);
 		free_percpu(pptr);
+		cond_resched();
 	}
 free_elems:
 	bpf_map_area_free(htab->elems);
@@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab)
 			goto free_elems;
 		htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
 				  pptr);
+		cond_resched();
 	}
 
 skip_percpu_elems:
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 01aaef1a77c5af164660b0f75ac99e4bd7c55a52..5bb5e49ef4c3831050cf9fa92278a150fde40bfa 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -368,7 +368,45 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	putname(pname);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(bpf_obj_get_user);
+
+static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+{
+	struct bpf_prog *prog;
+	int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (inode->i_op == &bpf_map_iops)
+		return ERR_PTR(-EINVAL);
+	if (inode->i_op != &bpf_prog_iops)
+		return ERR_PTR(-EACCES);
+
+	prog = inode->i_private;
+
+	ret = security_bpf_prog(prog);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	if (!bpf_prog_get_ok(prog, &type, false))
+		return ERR_PTR(-EINVAL);
+
+	return bpf_prog_inc(prog);
+}
+
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+{
+	struct bpf_prog *prog;
+	struct path path;
+	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
+	if (ret)
+		return ERR_PTR(ret);
+	prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+	if (!IS_ERR(prog))
+		touch_atime(&path);
+	path_put(&path);
+	return prog;
+}
+EXPORT_SYMBOL(bpf_prog_get_type_path);
 
 static void bpf_evict_inode(struct inode *inode)
 {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 68ec884440b75da08824249db74bb992f6d938ce..8455b89d1bbf698f86c44bd1e1846b4c7d4ba60f 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -1,3 +1,18 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
 #include <linux/bpf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bug.h>
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 5ee2e41893d9662641001c1de8d0776fccb590aa..1712d319c2d84292a39cb5c04cd7e5459f6d8174 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
 
 		write_lock_bh(&sock->sk_callback_lock);
 		psock = smap_psock_sk(sock);
-		smap_list_remove(psock, &stab->sock_map[i]);
-		smap_release_sock(psock, sock);
+		/* This check handles a racing sock event that can get the
+		 * sk_callback_lock before this case but after xchg happens
+		 * causing the refcnt to hit zero and sock user data (psock)
+		 * to be null and queued for garbage collection.
+		 */
+		if (likely(psock)) {
+			smap_list_remove(psock, &stab->sock_map[i]);
+			smap_release_sock(psock, sock);
+		}
 		write_unlock_bh(&sock->sk_callback_lock);
 	}
 	rcu_read_unlock();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfeaa8d5e785f16758be08cb8a462766363d9..5cb783fc8224b3c5acb65bae4d79c0ec74a71c7a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1057,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
 
-static bool bpf_prog_get_ok(struct bpf_prog *prog,
+bool bpf_prog_get_ok(struct bpf_prog *prog,
 			    enum bpf_prog_type *attach_type, bool attach_drv)
 {
 	/* not an attachment, just a refcount inc, always allow */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d4593571c4049b8d046f53f81f8e17911a21e0c9..b414d6b2d47070505f3a60fcd8b58478b293d2ad 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 		break;
 	case PTR_TO_STACK:
 		pointer_desc = "stack ";
+		/* The stack spill tracking logic in check_stack_write()
+		 * and check_stack_read() relies on stack accesses being
+		 * aligned.
+		 */
+		strict = true;
 		break;
 	default:
 		break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 					   strict);
 }
 
+/* truncate register to smaller size (in bytes)
+ * must be called with size < BPF_REG_SIZE
+ */
+static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+{
+	u64 mask;
+
+	/* clear high bits in bit representation */
+	reg->var_off = tnum_cast(reg->var_off, size);
+
+	/* fix arithmetic bounds */
+	mask = ((u64)1 << (size * 8)) - 1;
+	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
+		reg->umin_value &= mask;
+		reg->umax_value &= mask;
+	} else {
+		reg->umin_value = 0;
+		reg->umax_value = mask;
+	}
+	reg->smin_value = reg->umin_value;
+	reg->smax_value = reg->umax_value;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
 	    regs[value_regno].type == SCALAR_VALUE) {
 		/* b/h/w load zero-extends, mark upper bits as known 0 */
-		regs[value_regno].var_off =
-			tnum_cast(regs[value_regno].var_off, size);
-		__update_reg_bounds(&regs[value_regno]);
+		coerce_reg_to_size(&regs[value_regno], size);
 	}
 	return err;
 }
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
 		verbose(env, "invalid variable stack read R%d var_off=%s\n",
 			regno, tn_buf);
+		return -EACCES;
 	}
 	off = regs[regno].off + regs[regno].var_off.value;
 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 		return -EINVAL;
 	}
 
+	/* With LD_ABS/IND some JITs save/restore skb from r1. */
 	changes_data = bpf_helper_changes_pkt_data(fn->func);
+	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
+		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
+			func_id_name(func_id), func_id);
+		return -EINVAL;
+	}
 
 	memset(&meta, 0, sizeof(meta));
 	meta.pkt_access = fn->pkt_access;
@@ -1696,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
 	if (err)
 		return err;
+	if (func_id == BPF_FUNC_tail_call) {
+		if (meta.map_ptr == NULL) {
+			verbose(env, "verifier bug\n");
+			return -EINVAL;
+		}
+		env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
+	}
 	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
 	if (err)
 		return err;
@@ -1766,14 +1806,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	return 0;
 }
 
-static void coerce_reg_to_32(struct bpf_reg_state *reg)
-{
-	/* clear high 32 bits */
-	reg->var_off = tnum_cast(reg->var_off, 4);
-	/* Update bounds */
-	__update_reg_bounds(reg);
-}
-
 static bool signed_add_overflows(s64 a, s64 b)
 {
 	/* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1826,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
 	return res > a;
 }
 
+static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+				  const struct bpf_reg_state *reg,
+				  enum bpf_reg_type type)
+{
+	bool known = tnum_is_const(reg->var_off);
+	s64 val = reg->var_off.value;
+	s64 smin = reg->smin_value;
+
+	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+		verbose(env, "math between %s pointer and %lld is not allowed\n",
+			reg_type_str[type], val);
+		return false;
+	}
+
+	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+		verbose(env, "%s pointer offset %d is not allowed\n",
+			reg_type_str[type], reg->off);
+		return false;
+	}
+
+	if (smin == S64_MIN) {
+		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+			reg_type_str[type]);
+		return false;
+	}
+
+	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+		verbose(env, "value %lld makes %s pointer be out of bounds\n",
+			smin, reg_type_str[type]);
+		return false;
+	}
+
+	return true;
+}
+
 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
  * Caller should also handle BPF_MOV case separately.
  * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1897,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
-		if (!env->allow_ptr_leaks)
-			verbose(env,
-				"R%d 32-bit pointer arithmetic prohibited\n",
-				dst);
+		verbose(env,
+			"R%d 32-bit pointer arithmetic prohibited\n",
+			dst);
 		return -EACCES;
 	}
 
 	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-		if (!env->allow_ptr_leaks)
-			verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
-				dst);
+		verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+			dst);
 		return -EACCES;
 	}
 	if (ptr_reg->type == CONST_PTR_TO_MAP) {
-		if (!env->allow_ptr_leaks)
-			verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
-				dst);
+		verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+			dst);
 		return -EACCES;
 	}
 	if (ptr_reg->type == PTR_TO_PACKET_END) {
-		if (!env->allow_ptr_leaks)
-			verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
-				dst);
+		verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+			dst);
 		return -EACCES;
 	}
 
@@ -1862,6 +1925,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	dst_reg->type = ptr_reg->type;
 	dst_reg->id = ptr_reg->id;
 
+	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
+	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+		return -EINVAL;
+
 	switch (opcode) {
 	case BPF_ADD:
 		/* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1982,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	case BPF_SUB:
 		if (dst_reg == off_reg) {
 			/* scalar -= pointer.  Creates an unknown scalar */
-			if (!env->allow_ptr_leaks)
-				verbose(env, "R%d tried to subtract pointer from scalar\n",
-					dst);
+			verbose(env, "R%d tried to subtract pointer from scalar\n",
+				dst);
 			return -EACCES;
 		}
 		/* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1991,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		 * be able to deal with it.
 		 */
 		if (ptr_reg->type == PTR_TO_STACK) {
-			if (!env->allow_ptr_leaks)
-				verbose(env, "R%d subtraction from stack pointer prohibited\n",
-					dst);
+			verbose(env, "R%d subtraction from stack pointer prohibited\n",
+				dst);
 			return -EACCES;
 		}
 		if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2041,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	case BPF_AND:
 	case BPF_OR:
 	case BPF_XOR:
-		/* bitwise ops on pointers are troublesome, prohibit for now.
-		 * (However, in principle we could allow some cases, e.g.
-		 * ptr &= ~3 which would reduce min_value by 3.)
-		 */
-		if (!env->allow_ptr_leaks)
-			verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
-				dst, bpf_alu_string[opcode >> 4]);
+		/* bitwise ops on pointers are troublesome, prohibit. */
+		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
+			dst, bpf_alu_string[opcode >> 4]);
 		return -EACCES;
 	default:
 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
-		if (!env->allow_ptr_leaks)
-			verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
-				dst, bpf_alu_string[opcode >> 4]);
+		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
+			dst, bpf_alu_string[opcode >> 4]);
 		return -EACCES;
 	}
 
+	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+		return -EINVAL;
+
 	__update_reg_bounds(dst_reg);
 	__reg_deduce_bounds(dst_reg);
 	__reg_bound_offset(dst_reg);
 	return 0;
 }
 
+/* WARNING: This function does calculations on 64-bit values, but the actual
+ * execution may occur on 32-bit values. Therefore, things like bitshifts
+ * need extra checks in the 32-bit case.
+ */
 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 				      struct bpf_insn *insn,
 				      struct bpf_reg_state *dst_reg,
@@ -2008,12 +2075,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	bool src_known, dst_known;
 	s64 smin_val, smax_val;
 	u64 umin_val, umax_val;
+	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 
-	if (BPF_CLASS(insn->code) != BPF_ALU64) {
-		/* 32-bit ALU ops are (32,32)->64 */
-		coerce_reg_to_32(dst_reg);
-		coerce_reg_to_32(&src_reg);
-	}
 	smin_val = src_reg.smin_value;
 	smax_val = src_reg.smax_value;
 	umin_val = src_reg.umin_value;
@@ -2021,6 +2084,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	src_known = tnum_is_const(src_reg.var_off);
 	dst_known = tnum_is_const(dst_reg->var_off);
 
+	if (!src_known &&
+	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+		__mark_reg_unknown(dst_reg);
+		return 0;
+	}
+
 	switch (opcode) {
 	case BPF_ADD:
 		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2218,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 		__update_reg_bounds(dst_reg);
 		break;
 	case BPF_LSH:
-		if (umax_val > 63) {
-			/* Shifts greater than 63 are undefined.  This includes
-			 * shifts by a negative number.
+		if (umax_val >= insn_bitness) {
+			/* Shifts greater than 31 or 63 are undefined.
+			 * This includes shifts by a negative number.
 			 */
 			mark_reg_unknown(env, regs, insn->dst_reg);
 			break;
@@ -2177,27 +2246,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 		__update_reg_bounds(dst_reg);
 		break;
 	case BPF_RSH:
-		if (umax_val > 63) {
-			/* Shifts greater than 63 are undefined.  This includes
-			 * shifts by a negative number.
+		if (umax_val >= insn_bitness) {
+			/* Shifts greater than 31 or 63 are undefined.
+			 * This includes shifts by a negative number.
 			 */
 			mark_reg_unknown(env, regs, insn->dst_reg);
 			break;
 		}
-		/* BPF_RSH is an unsigned shift, so make the appropriate casts */
-		if (dst_reg->smin_value < 0) {
-			if (umin_val) {
-				/* Sign bit will be cleared */
-				dst_reg->smin_value = 0;
-			} else {
-				/* Lost sign bit information */
-				dst_reg->smin_value = S64_MIN;
-				dst_reg->smax_value = S64_MAX;
-			}
-		} else {
-			dst_reg->smin_value =
-				(u64)(dst_reg->smin_value) >> umax_val;
-		}
+		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
+		 * be negative, then either:
+		 * 1) src_reg might be zero, so the sign bit of the result is
+		 *    unknown, so we lose our signed bounds
+		 * 2) it's known negative, thus the unsigned bounds capture the
+		 *    signed bounds
+		 * 3) the signed bounds cross zero, so they tell us nothing
+		 *    about the result
+		 * If the value in dst_reg is known nonnegative, then again the
+		 * unsigned bounts capture the signed bounds.
+		 * Thus, in all cases it suffices to blow away our signed bounds
+		 * and rely on inferring new ones from the unsigned bounds and
+		 * var_off of the result.
+		 */
+		dst_reg->smin_value = S64_MIN;
+		dst_reg->smax_value = S64_MAX;
 		if (src_known)
 			dst_reg->var_off = tnum_rshift(dst_reg->var_off,
 						       umin_val);
@@ -2213,6 +2284,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 		break;
 	}
 
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops are (32,32)->32 */
+		coerce_reg_to_size(dst_reg, 4);
+		coerce_reg_to_size(&src_reg, 4);
+	}
+
 	__reg_deduce_bounds(dst_reg);
 	__reg_bound_offset(dst_reg);
 	return 0;
@@ -2227,7 +2304,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
 	u8 opcode = BPF_OP(insn->code);
-	int rc;
 
 	dst_reg = &regs[insn->dst_reg];
 	src_reg = NULL;
@@ -2238,43 +2314,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		if (src_reg->type != SCALAR_VALUE) {
 			if (dst_reg->type != SCALAR_VALUE) {
 				/* Combining two pointers by any ALU op yields
-				 * an arbitrary scalar.
+				 * an arbitrary scalar. Disallow all math except
+				 * pointer subtraction
 				 */
-				if (!env->allow_ptr_leaks) {
-					verbose(env, "R%d pointer %s pointer prohibited\n",
-						insn->dst_reg,
-						bpf_alu_string[opcode >> 4]);
-					return -EACCES;
+				if (opcode == BPF_SUB){
+					mark_reg_unknown(env, regs, insn->dst_reg);
+					return 0;
 				}
-				mark_reg_unknown(env, regs, insn->dst_reg);
-				return 0;
+				verbose(env, "R%d pointer %s pointer prohibited\n",
+					insn->dst_reg,
+					bpf_alu_string[opcode >> 4]);
+				return -EACCES;
 			} else {
 				/* scalar += pointer
 				 * This is legal, but we have to reverse our
 				 * src/dest handling in computing the range
 				 */
-				rc = adjust_ptr_min_max_vals(env, insn,
-							     src_reg, dst_reg);
-				if (rc == -EACCES && env->allow_ptr_leaks) {
-					/* scalar += unknown scalar */
-					__mark_reg_unknown(&off_reg);
-					return adjust_scalar_min_max_vals(
-							env, insn,
-							dst_reg, off_reg);
-				}
-				return rc;
+				return adjust_ptr_min_max_vals(env, insn,
+							       src_reg, dst_reg);
 			}
 		} else if (ptr_reg) {
 			/* pointer += scalar */
-			rc = adjust_ptr_min_max_vals(env, insn,
-						     dst_reg, src_reg);
-			if (rc == -EACCES && env->allow_ptr_leaks) {
-				/* unknown scalar += scalar */
-				__mark_reg_unknown(dst_reg);
-				return adjust_scalar_min_max_vals(
-						env, insn, dst_reg, *src_reg);
-			}
-			return rc;
+			return adjust_ptr_min_max_vals(env, insn,
+						       dst_reg, src_reg);
 		}
 	} else {
 		/* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2345,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		off_reg.type = SCALAR_VALUE;
 		__mark_reg_known(&off_reg, insn->imm);
 		src_reg = &off_reg;
-		if (ptr_reg) { /* pointer += K */
-			rc = adjust_ptr_min_max_vals(env, insn,
-						     ptr_reg, src_reg);
-			if (rc == -EACCES && env->allow_ptr_leaks) {
-				/* unknown scalar += K */
-				__mark_reg_unknown(dst_reg);
-				return adjust_scalar_min_max_vals(
-						env, insn, dst_reg, off_reg);
-			}
-			return rc;
-		}
+		if (ptr_reg) /* pointer += K */
+			return adjust_ptr_min_max_vals(env, insn,
+						       ptr_reg, src_reg);
 	}
 
 	/* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2444,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 					return -EACCES;
 				}
 				mark_reg_unknown(env, regs, insn->dst_reg);
-				/* high 32 bits are known zero. */
-				regs[insn->dst_reg].var_off = tnum_cast(
-						regs[insn->dst_reg].var_off, 4);
-				__update_reg_bounds(&regs[insn->dst_reg]);
+				coerce_reg_to_size(&regs[insn->dst_reg], 4);
 			}
 		} else {
 			/* case: R = imm
 			 * remember the value we stored into this reg
 			 */
 			regs[insn->dst_reg].type = SCALAR_VALUE;
-			__mark_reg_known(regs + insn->dst_reg, insn->imm);
+			if (BPF_CLASS(insn->code) == BPF_ALU64) {
+				__mark_reg_known(regs + insn->dst_reg,
+						 insn->imm);
+			} else {
+				__mark_reg_known(regs + insn->dst_reg,
+						 (u32)insn->imm);
+			}
 		}
 
 	} else if (opcode > BPF_END) {
@@ -3431,15 +3488,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 			return range_within(rold, rcur) &&
 			       tnum_in(rold->var_off, rcur->var_off);
 		} else {
-			/* if we knew anything about the old value, we're not
-			 * equal, because we can't know anything about the
-			 * scalar value of the pointer in the new value.
+			/* We're trying to use a pointer in place of a scalar.
+			 * Even if the scalar was unbounded, this could lead to
+			 * pointer leaks because scalars are allowed to leak
+			 * while pointers are not. We could make this safe in
+			 * special cases if root is calling us, but it's
+			 * probably not worth the hassle.
 			 */
-			return rold->umin_value == 0 &&
-			       rold->umax_value == U64_MAX &&
-			       rold->smin_value == S64_MIN &&
-			       rold->smax_value == S64_MAX &&
-			       tnum_is_unknown(rold->var_off);
+			return false;
 		}
 	case PTR_TO_MAP_VALUE:
 		/* If the new min/max/var_off satisfy the old ones and
@@ -4407,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			 */
 			insn->imm = 0;
 			insn->code = BPF_JMP | BPF_TAIL_CALL;
+
+			/* instead of changing every JIT dealing with tail_call
+			 * emit two extra insns:
+			 * if (index >= max_entries) goto out;
+			 * index &= array->index_mask;
+			 * to avoid out-of-bounds cpu speculation
+			 */
+			map_ptr = env->insn_aux_data[i + delta].map_ptr;
+			if (map_ptr == BPF_MAP_PTR_POISON) {
+				verbose(env, "tail_call obusing map_ptr\n");
+				return -EINVAL;
+			}
+			if (!map_ptr->unpriv_array)
+				continue;
+			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+						  map_ptr->max_entries, 2);
+			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+						    container_of(map_ptr,
+								 struct bpf_array,
+								 map)->index_mask);
+			insn_buf[2] = *insn;
+			cnt = 3;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
 			continue;
 		}
 
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 024085daab1aede5958235b0663c19ec667b5836..a2c05d2476ac5fd3d530ba64938cd85be52fece2 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -123,7 +123,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	 */
 	do {
 		css_task_iter_start(&from->self, 0, &it);
-		task = css_task_iter_next(&it);
+
+		do {
+			task = css_task_iter_next(&it);
+		} while (task && (task->flags & PF_EXITING));
+
 		if (task)
 			get_task_struct(task);
 		css_task_iter_end(&it);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0b1ffe147f240c39726d79505c4e02e8fe40cd47..2cf06c274e4ca6cc66194f4ce0fcb9ad0ad82f88 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
 			 cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
 			 cft->name);
 	else
-		strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
+		strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
 	return buf;
 }
 
@@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
 
 	root->flags = opts->flags;
 	if (opts->release_agent)
-		strcpy(root->release_agent_path, opts->release_agent);
+		strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX);
 	if (opts->name)
-		strcpy(root->name, opts->name);
+		strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
 	if (opts->cpuset_clone_children)
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
@@ -4125,26 +4125,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 
 static void css_task_iter_advance(struct css_task_iter *it)
 {
-	struct list_head *l = it->task_pos;
+	struct list_head *next;
 
 	lockdep_assert_held(&css_set_lock);
-	WARN_ON_ONCE(!l);
-
 repeat:
 	/*
 	 * Advance iterator to find next entry.  cset->tasks is consumed
 	 * first and then ->mg_tasks.  After ->mg_tasks, we move onto the
 	 * next cset.
 	 */
-	l = l->next;
+	next = it->task_pos->next;
 
-	if (l == it->tasks_head)
-		l = it->mg_tasks_head->next;
+	if (next == it->tasks_head)
+		next = it->mg_tasks_head->next;
 
-	if (l == it->mg_tasks_head)
+	if (next == it->mg_tasks_head)
 		css_task_iter_advance_css_set(it);
 	else
-		it->task_pos = l;
+		it->task_pos = next;
 
 	/* if PROCS, skip over tasks which aren't group leaders */
 	if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 5f780d8f6a9d787ed22cf30bd8be66a0ae78f069..9caeda6102491db1a32c34bc62ef75da2cd3f6df 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v)
 
 	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
-	cset = rcu_dereference(current->cgroups);
+	cset = task_css_set(current);
 	refcnt = refcount_read(&cset->refcount);
 	seq_printf(seq, "css_set %pK %d", cset, refcnt);
 	if (refcnt > cset->nr_tasks)
@@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 
 	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
-	cset = rcu_dereference(current->cgroups);
+	cset = task_css_set(current);
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
 		struct cgroup *c = link->cgrp;
 
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
index 133b465691d6fe82462c33215639b6b7f97f04b7..1e111dd455c49cd9f8e3e7619fb4f11eaffb3c71 100644
--- a/kernel/cgroup/stat.c
+++ b/kernel/cgroup/stat.c
@@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp)
 	}
 
 	/* ->updated_children list is self terminated */
-	for_each_possible_cpu(cpu)
-		cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp;
+	for_each_possible_cpu(cpu) {
+		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+
+		cstat->updated_children = cgrp;
+		u64_stats_init(&cstat->sync);
+	}
 
 	prev_cputime_init(&cgrp->stat.prev_cputime);
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 04892a82f6ac36c92324806b66a1c1855880c8f7..53f7dc65f9a3b917c1c347834257cf26521eff95 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map =
 	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
 
 
-static void inline cpuhp_lock_acquire(bool bringup)
+static inline void cpuhp_lock_acquire(bool bringup)
 {
 	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
 }
 
-static void inline cpuhp_lock_release(bool bringup)
+static inline void cpuhp_lock_release(bool bringup)
 {
 	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
 }
 #else
 
-static void inline cpuhp_lock_acquire(bool bringup) { }
-static void inline cpuhp_lock_release(bool bringup) { }
+static inline void cpuhp_lock_acquire(bool bringup) { }
+static inline void cpuhp_lock_release(bool bringup) { }
 
 #endif
 
@@ -780,8 +780,8 @@ static int takedown_cpu(unsigned int cpu)
 	BUG_ON(cpu_online(cpu));
 
 	/*
-	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
-	 * runnable tasks from the cpu, there's only the idle task left now
+	 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
+	 * all runnable tasks from the CPU, there's only the idle task left now
 	 * that the migration thread is done doing the stop_machine thing.
 	 *
 	 * Wait for the stop thread to go away.
@@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = {
 	 * before blk_mq_queue_reinit_notify() from notify_dead(),
 	 * otherwise a RCU stall occurs.
 	 */
-	[CPUHP_TIMERS_DEAD] = {
+	[CPUHP_TIMERS_PREPARE] = {
 		.name			= "timers:dead",
-		.startup.single		= NULL,
+		.startup.single		= timers_prepare_cpu,
 		.teardown.single	= timers_dead_cpu,
 	},
 	/* Kicks the plugged cpu into life */
@@ -1289,11 +1289,6 @@ static struct cpuhp_step cpuhp_bp_states[] = {
 		.teardown.single	= NULL,
 		.cant_stop		= true,
 	},
-	[CPUHP_AP_SMPCFD_DYING] = {
-		.name			= "smpcfd:dying",
-		.startup.single		= NULL,
-		.teardown.single	= smpcfd_dying_cpu,
-	},
 	/*
 	 * Handled on controll processor until the plugged processor manages
 	 * this itself.
@@ -1335,6 +1330,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 		.startup.single		= NULL,
 		.teardown.single	= rcutree_dying_cpu,
 	},
+	[CPUHP_AP_SMPCFD_DYING] = {
+		.name			= "smpcfd:dying",
+		.startup.single		= NULL,
+		.teardown.single	= smpcfd_dying_cpu,
+	},
 	/* Entry state on starting. Interrupts enabled from here on. Transient
 	 * state for synchronsization */
 	[CPUHP_AP_ONLINE] = {
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278ed71f854963024caa6549b6a57935..4f63597c824dfa81542a38268f9ec339f7fe0c47 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #ifdef CONFIG_SPARSEMEM
-	VMCOREINFO_SYMBOL(mem_section);
+	VMCOREINFO_SYMBOL_ARRAY(mem_section);
 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
 	VMCOREINFO_STRUCT_SIZE(mem_section);
 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index e74be38245adf732f34c55c0a676004f59870ba4..ed5d34925ad0617a40aeed3774b0e393aec03e99 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -350,7 +350,7 @@ static char *kdb_read(char *buffer, size_t bufsize)
 			}
 			kdb_printf("\n");
 			for (i = 0; i < count; i++) {
-				if (kallsyms_symbol_next(p_tmp, i) < 0)
+				if (WARN_ON(!kallsyms_symbol_next(p_tmp, i)))
 					break;
 				kdb_printf("%s ", p_tmp);
 				*(p_tmp + len) = '\0';
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 16beab4767e1e686e8ccd3642a82cbc4adde7f59..4df5b695bf0db1c035a22b914d87ad190d1a0f42 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6639,6 +6639,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
 	struct perf_namespaces_event *namespaces_event = data;
 	struct perf_output_handle handle;
 	struct perf_sample_data sample;
+	u16 header_size = namespaces_event->event_id.header.size;
 	int ret;
 
 	if (!perf_event_namespaces_match(event))
@@ -6649,7 +6650,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
 	ret = perf_output_begin(&handle, event,
 				namespaces_event->event_id.header.size);
 	if (ret)
-		return;
+		goto out;
 
 	namespaces_event->event_id.pid = perf_event_pid(event,
 							namespaces_event->task);
@@ -6661,6 +6662,8 @@ static void perf_event_namespaces_output(struct perf_event *event,
 	perf_event__output_id_sample(event, &handle, &sample);
 
 	perf_output_end(&handle);
+out:
+	namespaces_event->event_id.header.size = header_size;
 }
 
 static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
@@ -7987,11 +7990,11 @@ static void bpf_overflow_handler(struct perf_event *event,
 {
 	struct bpf_perf_event_data_kern ctx = {
 		.data = data,
-		.regs = regs,
 		.event = event,
 	};
 	int ret = 0;
 
+	ctx.regs = perf_arch_bpf_user_pt_regs(regs);
 	preempt_disable();
 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
 		goto out;
diff --git a/kernel/exit.c b/kernel/exit.c
index 6b4298a41167c7f3f7ea7be1d85af9a08d9d44cb..995453d9fb5529d0e210538cf27943839ec67721 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1755,3 +1755,12 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	return -EFAULT;
 }
 #endif
+
+__weak void abort(void)
+{
+	BUG();
+
+	/* if that doesn't kill us, halt */
+	panic("Oops failed to kill thread");
+}
+EXPORT_SYMBOL(abort);
diff --git a/kernel/fork.c b/kernel/fork.c
index 432eadf6b58c18d9de6a3d09f3fef36089b4b5a2..2295fc69717f6c3d877ef3cac15b55336d7746c6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			goto out;
 	}
 	/* a new mm has just been created */
-	arch_dup_mmap(oldmm, mm);
-	retval = 0;
+	retval = arch_dup_mmap(oldmm, mm);
 out:
 	up_write(&mm->mmap_sem);
 	flush_tlb_mm(oldmm);
diff --git a/kernel/futex.c b/kernel/futex.c
index 76ed5921117a24cc347fa3aa79f59c2ba378897f..57d0b3657e16b90268fa3396668bb62e6e54d287 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1582,8 +1582,8 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
 {
 	unsigned int op =	  (encoded_op & 0x70000000) >> 28;
 	unsigned int cmp =	  (encoded_op & 0x0f000000) >> 24;
-	int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 12);
-	int cmparg = sign_extend32(encoded_op & 0x00000fff, 12);
+	int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
+	int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
 	int oldval, ret;
 
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
diff --git a/kernel/groups.c b/kernel/groups.c
index e357bc800111043ed8d477d758bd245b8aa4c566..daae2f2dc6d4f64565112f0d7fea9c984887ce26 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const void *_b)
 	return gid_gt(a, b) - gid_lt(a, b);
 }
 
-static void groups_sort(struct group_info *group_info)
+void groups_sort(struct group_info *group_info)
 {
 	sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
 	     gid_cmp, NULL);
 }
+EXPORT_SYMBOL(groups_sort);
 
 /* a simple bsearch */
 int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -122,7 +123,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
 void set_groups(struct cred *new, struct group_info *group_info)
 {
 	put_group_info(new->group_info);
-	groups_sort(group_info);
 	get_group_info(group_info);
 	new->group_info = group_info;
 }
@@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 		return retval;
 	}
 
+	groups_sort(group_info);
 	retval = set_current_groups(group_info);
 	put_group_info(group_info);
 
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 17f05ef8f575f996f87e614a914bbe21d537f949..e4d3819a91cc7d7bda5416bfd5aaba99a5576cce 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -12,6 +12,11 @@
 
 static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+	if (!__ratelimit(&ratelimit))
+		return;
+
 	printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
 		irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
 	printk("->handle_irq():  %p, ", desc->handle_irq);
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 7f608ac3965379fc112d85051fd80325b2ec84ce..acfaaef8672ad2b1d2419bd7522f5556ebb1ae50 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = {
 	BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
 	BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
 	BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+	BIT_MASK_DESCR(IRQD_CAN_RESERVE),
 
 	BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
 
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index c26c5bb6b491f75f76f1190cdc21989f79d17e09..508c03dfef254b9dc8c3a7a1e3e7307d3a7201b3 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
 EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
 
 /*
- * Separate lockdep class for interrupt chip which can nest irq_desc
- * lock.
+ * Separate lockdep classes for interrupt chip which can nest irq_desc
+ * lock and request mutex.
  */
 static struct lock_class_key irq_nested_lock_class;
+static struct lock_class_key irq_nested_request_class;
 
 /*
  * irq_map_generic_chip - Map a generic chip for an irq domain
@@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
 	set_bit(idx, &gc->installed);
 
 	if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK)
-		irq_set_lockdep_class(virq, &irq_nested_lock_class);
+		irq_set_lockdep_class(virq, &irq_nested_lock_class,
+				      &irq_nested_request_class);
 
 	if (chip->irq_calc_mask)
 		chip->irq_calc_mask(data);
@@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 			continue;
 
 		if (flags & IRQ_GC_INIT_NESTED_LOCK)
-			irq_set_lockdep_class(i, &irq_nested_lock_class);
+			irq_set_lockdep_class(i, &irq_nested_lock_class,
+					      &irq_nested_request_class);
 
 		if (!(flags & IRQ_GC_NO_MASK)) {
 			struct irq_data *d = irq_get_irq_data(i);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 07d08ca701ec4627b558d0435c54cf6de7e147d2..ab19371eab9b8e1e9a7789b882ed4a308883b555 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
 #endif /* !CONFIG_GENERIC_PENDING_IRQ */
 
 #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
-static inline int irq_domain_activate_irq(struct irq_data *data, bool early)
+static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
 {
 	irqd_set_activated(data);
 	return 0;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4f4f60015e8ab4196ef1df5107f04beda37d4c6c..62068ad46930dd12088081df5233dffdfee0d7f4 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
 	}
 }
 
-static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
+static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
 {
 	int ret = 0;
 
@@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
 
 		if (irqd->parent_data)
 			ret = __irq_domain_activate_irq(irqd->parent_data,
-							early);
+							reserve);
 		if (!ret && domain->ops->activate) {
-			ret = domain->ops->activate(domain, irqd, early);
+			ret = domain->ops->activate(domain, irqd, reserve);
 			/* Rollback in case of error */
 			if (ret && irqd->parent_data)
 				__irq_domain_deactivate_irq(irqd->parent_data);
@@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
 /**
  * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
  *			     interrupt
- * @irq_data:	outermost irq_data associated with interrupt
+ * @irq_data:	Outermost irq_data associated with interrupt
+ * @reserve:	If set only reserve an interrupt vector instead of assigning one
  *
  * This is the second step to call domain_ops->activate to program interrupt
  * controllers, so the interrupt could actually get delivered.
  */
-int irq_domain_activate_irq(struct irq_data *irq_data, bool early)
+int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
 {
 	int ret = 0;
 
 	if (!irqd_is_activated(irq_data))
-		ret = __irq_domain_activate_irq(irq_data, early);
+		ret = __irq_domain_activate_irq(irq_data, reserve);
 	if (!ret)
 		irqd_set_activated(irq_data);
 	return ret;
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 7df2480005f863693f20d5450515d85085c225b6..0ba0dd8863a779f5b318ef07f03c2915ea57cd57 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -384,7 +384,9 @@ unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown)
 {
 	struct cpumap *cm = this_cpu_ptr(m->maps);
 
-	return (m->global_available - cpudown) ? cm->available : 0;
+	if (!cpudown)
+		return m->global_available;
+	return m->global_available - cm->available;
 }
 
 /**
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index edb987b2c58dc1553342b5a87c91335b42888e8b..2f3c4f5382cc6bad8daf528146cf613d9a1ac3e6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 	return ret;
 }
 
+/*
+ * Carefully check whether the device can use reservation mode. If
+ * reservation mode is enabled then the early activation will assign a
+ * dummy vector to the device. If the PCI/MSI device does not support
+ * masking of the entry then this can result in spurious interrupts when
+ * the device driver is not absolutely careful. But even then a malfunction
+ * of the hardware could result in a spurious interrupt on the dummy vector
+ * and render the device unusable. If the entry can be masked then the core
+ * logic will prevent the spurious interrupt and reservation mode can be
+ * used. For now reservation mode is restricted to PCI/MSI.
+ */
+static bool msi_check_reservation_mode(struct irq_domain *domain,
+				       struct msi_domain_info *info,
+				       struct device *dev)
+{
+	struct msi_desc *desc;
+
+	if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
+		return false;
+
+	if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
+		return false;
+
+	if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
+		return false;
+
+	/*
+	 * Checking the first MSI descriptor is sufficient. MSIX supports
+	 * masking and MSI does so when the maskbit is set.
+	 */
+	desc = first_msi_entry(dev);
+	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+}
+
 /**
  * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
  * @domain:	The domain to allocate from
@@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 {
 	struct msi_domain_info *info = domain->host_data;
 	struct msi_domain_ops *ops = info->ops;
-	msi_alloc_info_t arg;
+	struct irq_data *irq_data;
 	struct msi_desc *desc;
+	msi_alloc_info_t arg;
 	int i, ret, virq;
+	bool can_reserve;
 
 	ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
 	if (ret)
@@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 	if (ops->msi_finish)
 		ops->msi_finish(&arg, 0);
 
+	can_reserve = msi_check_reservation_mode(domain, info, dev);
+
 	for_each_msi_entry(desc, dev) {
 		virq = desc->irq;
 		if (desc->nvec_used == 1)
@@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		 * the MSI entries before the PCI layer enables MSI in the
 		 * card. Otherwise the card latches a random msi message.
 		 */
-		if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
-			struct irq_data *irq_data;
+		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
+			continue;
 
+		irq_data = irq_domain_get_irq_data(domain, desc->irq);
+		if (!can_reserve)
+			irqd_clr_can_reserve(irq_data);
+		ret = irq_domain_activate_irq(irq_data, can_reserve);
+		if (ret)
+			goto cleanup;
+	}
+
+	/*
+	 * If these interrupts use reservation mode, clear the activated bit
+	 * so request_irq() will assign the final vector.
+	 */
+	if (can_reserve) {
+		for_each_msi_entry(desc, dev) {
 			irq_data = irq_domain_get_irq_data(domain, desc->irq);
-			ret = irq_domain_activate_irq(irq_data, true);
-			if (ret)
-				goto cleanup;
-			if (info->flags & MSI_FLAG_MUST_REACTIVATE)
-				irqd_clr_activated(irq_data);
+			irqd_clr_activated(irq_data);
 		}
 	}
 	return 0;
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 15f33faf4013bdfea16baf8b0b31053456606620..7594c033d98a39f3f51632a8f6f77b4440079ec4 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2)
 }
 EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2);
 
-void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2)
+void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
 {
 	write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_);
 }
@@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2)
 }
 EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2);
 
-void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2)
+void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
 {
 	write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2,
 			_RET_IP_);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9776da8db180d63c94f0a698bac844e91c24c0ce..5fa1324a4f29a57901bdcf0ca81874a3dfd9a66c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -57,10 +57,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/lock.h>
 
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-#include <linux/slab.h>
-#endif
-
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
 module_param(prove_locking, int, 0644);
@@ -75,19 +71,6 @@ module_param(lock_stat, int, 0644);
 #define lock_stat 0
 #endif
 
-#ifdef CONFIG_BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
-static int crossrelease_fullstack = 1;
-#else
-static int crossrelease_fullstack;
-#endif
-static int __init allow_crossrelease_fullstack(char *str)
-{
-	crossrelease_fullstack = 1;
-	return 0;
-}
-
-early_param("crossrelease_fullstack", allow_crossrelease_fullstack);
-
 /*
  * lockdep_lock: protects the lockdep graph, the hashes and the
  *               class/list/hash allocators.
@@ -740,18 +723,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
 	return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
 }
 
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-static void cross_init(struct lockdep_map *lock, int cross);
-static int cross_lock(struct lockdep_map *lock);
-static int lock_acquire_crosslock(struct held_lock *hlock);
-static int lock_release_crosslock(struct lockdep_map *lock);
-#else
-static inline void cross_init(struct lockdep_map *lock, int cross) {}
-static inline int cross_lock(struct lockdep_map *lock) { return 0; }
-static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
-static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
-#endif
-
 /*
  * Register a lock's class in the hash-table, if the class is not present
  * yet. Otherwise we look it up. We cache the result in the lock object
@@ -1151,41 +1122,22 @@ print_circular_lock_scenario(struct held_lock *src,
 		printk(KERN_CONT "\n\n");
 	}
 
-	if (cross_lock(tgt->instance)) {
-		printk(" Possible unsafe locking scenario by crosslock:\n\n");
-		printk("       CPU0                    CPU1\n");
-		printk("       ----                    ----\n");
-		printk("  lock(");
-		__print_lock_name(parent);
-		printk(KERN_CONT ");\n");
-		printk("  lock(");
-		__print_lock_name(target);
-		printk(KERN_CONT ");\n");
-		printk("                               lock(");
-		__print_lock_name(source);
-		printk(KERN_CONT ");\n");
-		printk("                               unlock(");
-		__print_lock_name(target);
-		printk(KERN_CONT ");\n");
-		printk("\n *** DEADLOCK ***\n\n");
-	} else {
-		printk(" Possible unsafe locking scenario:\n\n");
-		printk("       CPU0                    CPU1\n");
-		printk("       ----                    ----\n");
-		printk("  lock(");
-		__print_lock_name(target);
-		printk(KERN_CONT ");\n");
-		printk("                               lock(");
-		__print_lock_name(parent);
-		printk(KERN_CONT ");\n");
-		printk("                               lock(");
-		__print_lock_name(target);
-		printk(KERN_CONT ");\n");
-		printk("  lock(");
-		__print_lock_name(source);
-		printk(KERN_CONT ");\n");
-		printk("\n *** DEADLOCK ***\n\n");
-	}
+	printk(" Possible unsafe locking scenario:\n\n");
+	printk("       CPU0                    CPU1\n");
+	printk("       ----                    ----\n");
+	printk("  lock(");
+	__print_lock_name(target);
+	printk(KERN_CONT ");\n");
+	printk("                               lock(");
+	__print_lock_name(parent);
+	printk(KERN_CONT ");\n");
+	printk("                               lock(");
+	__print_lock_name(target);
+	printk(KERN_CONT ");\n");
+	printk("  lock(");
+	__print_lock_name(source);
+	printk(KERN_CONT ");\n");
+	printk("\n *** DEADLOCK ***\n\n");
 }
 
 /*
@@ -1211,10 +1163,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
 		curr->comm, task_pid_nr(curr));
 	print_lock(check_src);
 
-	if (cross_lock(check_tgt->instance))
-		pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
-	else
-		pr_warn("\nbut task is already holding lock:\n");
+	pr_warn("\nbut task is already holding lock:\n");
 
 	print_lock(check_tgt);
 	pr_warn("\nwhich lock already depends on the new lock.\n\n");
@@ -1244,9 +1193,7 @@ static noinline int print_circular_bug(struct lock_list *this,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	if (cross_lock(check_tgt->instance))
-		this->trace = *trace;
-	else if (!save_trace(&this->trace))
+	if (!save_trace(&this->trace))
 		return 0;
 
 	depth = get_lock_depth(target);
@@ -1850,9 +1797,6 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
 		if (nest)
 			return 2;
 
-		if (cross_lock(prev->instance))
-			continue;
-
 		return print_deadlock_bug(curr, prev, next);
 	}
 	return 1;
@@ -2018,31 +1962,26 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 	for (;;) {
 		int distance = curr->lockdep_depth - depth + 1;
 		hlock = curr->held_locks + depth - 1;
+
 		/*
-		 * Only non-crosslock entries get new dependencies added.
-		 * Crosslock entries will be added by commit later:
+		 * Only non-recursive-read entries get new dependencies
+		 * added:
 		 */
-		if (!cross_lock(hlock->instance)) {
+		if (hlock->read != 2 && hlock->check) {
+			int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace);
+			if (!ret)
+				return 0;
+
 			/*
-			 * Only non-recursive-read entries get new dependencies
-			 * added:
+			 * Stop after the first non-trylock entry,
+			 * as non-trylock entries have added their
+			 * own direct dependencies already, so this
+			 * lock is connected to them indirectly:
 			 */
-			if (hlock->read != 2 && hlock->check) {
-				int ret = check_prev_add(curr, hlock, next,
-							 distance, &trace, save_trace);
-				if (!ret)
-					return 0;
-
-				/*
-				 * Stop after the first non-trylock entry,
-				 * as non-trylock entries have added their
-				 * own direct dependencies already, so this
-				 * lock is connected to them indirectly:
-				 */
-				if (!hlock->trylock)
-					break;
-			}
+			if (!hlock->trylock)
+				break;
 		}
+
 		depth--;
 		/*
 		 * End of lock-stack?
@@ -3292,21 +3231,10 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
 void lockdep_init_map(struct lockdep_map *lock, const char *name,
 		      struct lock_class_key *key, int subclass)
 {
-	cross_init(lock, 0);
 	__lockdep_init_map(lock, name, key, subclass);
 }
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
-		      struct lock_class_key *key, int subclass)
-{
-	cross_init(lock, 1);
-	__lockdep_init_map(lock, name, key, subclass);
-}
-EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
-#endif
-
 struct lock_class_key __lockdep_no_validate__;
 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
@@ -3362,7 +3290,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	int chain_head = 0;
 	int class_idx;
 	u64 chain_key;
-	int ret;
 
 	if (unlikely(!debug_locks))
 		return 0;
@@ -3411,8 +3338,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 
 	class_idx = class - lock_classes + 1;
 
-	/* TODO: nest_lock is not implemented for crosslock yet. */
-	if (depth && !cross_lock(lock)) {
+	if (depth) {
 		hlock = curr->held_locks + depth - 1;
 		if (hlock->class_idx == class_idx && nest_lock) {
 			if (hlock->references) {
@@ -3500,14 +3426,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
 		return 0;
 
-	ret = lock_acquire_crosslock(hlock);
-	/*
-	 * 2 means normal acquire operations are needed. Otherwise, it's
-	 * ok just to return with '0:fail, 1:success'.
-	 */
-	if (ret != 2)
-		return ret;
-
 	curr->curr_chain_key = chain_key;
 	curr->lockdep_depth++;
 	check_chain_key(curr);
@@ -3745,19 +3663,11 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
 	struct task_struct *curr = current;
 	struct held_lock *hlock;
 	unsigned int depth;
-	int ret, i;
+	int i;
 
 	if (unlikely(!debug_locks))
 		return 0;
 
-	ret = lock_release_crosslock(lock);
-	/*
-	 * 2 means normal release operations are needed. Otherwise, it's
-	 * ok just to return with '0:fail, 1:success'.
-	 */
-	if (ret != 2)
-		return ret;
-
 	depth = curr->lockdep_depth;
 	/*
 	 * So we're all set to release this lock.. wait what lock? We don't
@@ -4675,494 +4585,3 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	dump_stack();
 }
 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
-
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-
-/*
- * Crossrelease works by recording a lock history for each thread and
- * connecting those historic locks that were taken after the
- * wait_for_completion() in the complete() context.
- *
- * Task-A				Task-B
- *
- *					mutex_lock(&A);
- *					mutex_unlock(&A);
- *
- * wait_for_completion(&C);
- *   lock_acquire_crosslock();
- *     atomic_inc_return(&cross_gen_id);
- *                                |
- *				  |	mutex_lock(&B);
- *				  |	mutex_unlock(&B);
- *                                |
- *				  |	complete(&C);
- *				  `--	  lock_commit_crosslock();
- *
- * Which will then add a dependency between B and C.
- */
-
-#define xhlock(i)         (current->xhlocks[(i) % MAX_XHLOCKS_NR])
-
-/*
- * Whenever a crosslock is held, cross_gen_id will be increased.
- */
-static atomic_t cross_gen_id; /* Can be wrapped */
-
-/*
- * Make an entry of the ring buffer invalid.
- */
-static inline void invalidate_xhlock(struct hist_lock *xhlock)
-{
-	/*
-	 * Normally, xhlock->hlock.instance must be !NULL.
-	 */
-	xhlock->hlock.instance = NULL;
-}
-
-/*
- * Lock history stacks; we have 2 nested lock history stacks:
- *
- *   HARD(IRQ)
- *   SOFT(IRQ)
- *
- * The thing is that once we complete a HARD/SOFT IRQ the future task locks
- * should not depend on any of the locks observed while running the IRQ.  So
- * what we do is rewind the history buffer and erase all our knowledge of that
- * temporal event.
- */
-
-void crossrelease_hist_start(enum xhlock_context_t c)
-{
-	struct task_struct *cur = current;
-
-	if (!cur->xhlocks)
-		return;
-
-	cur->xhlock_idx_hist[c] = cur->xhlock_idx;
-	cur->hist_id_save[c]    = cur->hist_id;
-}
-
-void crossrelease_hist_end(enum xhlock_context_t c)
-{
-	struct task_struct *cur = current;
-
-	if (cur->xhlocks) {
-		unsigned int idx = cur->xhlock_idx_hist[c];
-		struct hist_lock *h = &xhlock(idx);
-
-		cur->xhlock_idx = idx;
-
-		/* Check if the ring was overwritten. */
-		if (h->hist_id != cur->hist_id_save[c])
-			invalidate_xhlock(h);
-	}
-}
-
-/*
- * lockdep_invariant_state() is used to annotate independence inside a task, to
- * make one task look like multiple independent 'tasks'.
- *
- * Take for instance workqueues; each work is independent of the last. The
- * completion of a future work does not depend on the completion of a past work
- * (in general). Therefore we must not carry that (lock) dependency across
- * works.
- *
- * This is true for many things; pretty much all kthreads fall into this
- * pattern, where they have an invariant state and future completions do not
- * depend on past completions. Its just that since they all have the 'same'
- * form -- the kthread does the same over and over -- it doesn't typically
- * matter.
- *
- * The same is true for system-calls, once a system call is completed (we've
- * returned to userspace) the next system call does not depend on the lock
- * history of the previous system call.
- *
- * They key property for independence, this invariant state, is that it must be
- * a point where we hold no locks and have no history. Because if we were to
- * hold locks, the restore at _end() would not necessarily recover it's history
- * entry. Similarly, independence per-definition means it does not depend on
- * prior state.
- */
-void lockdep_invariant_state(bool force)
-{
-	/*
-	 * We call this at an invariant point, no current state, no history.
-	 * Verify the former, enforce the latter.
-	 */
-	WARN_ON_ONCE(!force && current->lockdep_depth);
-	invalidate_xhlock(&xhlock(current->xhlock_idx));
-}
-
-static int cross_lock(struct lockdep_map *lock)
-{
-	return lock ? lock->cross : 0;
-}
-
-/*
- * This is needed to decide the relationship between wrapable variables.
- */
-static inline int before(unsigned int a, unsigned int b)
-{
-	return (int)(a - b) < 0;
-}
-
-static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
-{
-	return hlock_class(&xhlock->hlock);
-}
-
-static inline struct lock_class *xlock_class(struct cross_lock *xlock)
-{
-	return hlock_class(&xlock->hlock);
-}
-
-/*
- * Should we check a dependency with previous one?
- */
-static inline int depend_before(struct held_lock *hlock)
-{
-	return hlock->read != 2 && hlock->check && !hlock->trylock;
-}
-
-/*
- * Should we check a dependency with next one?
- */
-static inline int depend_after(struct held_lock *hlock)
-{
-	return hlock->read != 2 && hlock->check;
-}
-
-/*
- * Check if the xhlock is valid, which would be false if,
- *
- *    1. Has not used after initializaion yet.
- *    2. Got invalidated.
- *
- * Remind hist_lock is implemented as a ring buffer.
- */
-static inline int xhlock_valid(struct hist_lock *xhlock)
-{
-	/*
-	 * xhlock->hlock.instance must be !NULL.
-	 */
-	return !!xhlock->hlock.instance;
-}
-
-/*
- * Record a hist_lock entry.
- *
- * Irq disable is only required.
- */
-static void add_xhlock(struct held_lock *hlock)
-{
-	unsigned int idx = ++current->xhlock_idx;
-	struct hist_lock *xhlock = &xhlock(idx);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
-	/*
-	 * This can be done locklessly because they are all task-local
-	 * state, we must however ensure IRQs are disabled.
-	 */
-	WARN_ON_ONCE(!irqs_disabled());
-#endif
-
-	/* Initialize hist_lock's members */
-	xhlock->hlock = *hlock;
-	xhlock->hist_id = ++current->hist_id;
-
-	xhlock->trace.nr_entries = 0;
-	xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
-	xhlock->trace.entries = xhlock->trace_entries;
-
-	if (crossrelease_fullstack) {
-		xhlock->trace.skip = 3;
-		save_stack_trace(&xhlock->trace);
-	} else {
-		xhlock->trace.nr_entries = 1;
-		xhlock->trace.entries[0] = hlock->acquire_ip;
-	}
-}
-
-static inline int same_context_xhlock(struct hist_lock *xhlock)
-{
-	return xhlock->hlock.irq_context == task_irq_context(current);
-}
-
-/*
- * This should be lockless as far as possible because this would be
- * called very frequently.
- */
-static void check_add_xhlock(struct held_lock *hlock)
-{
-	/*
-	 * Record a hist_lock, only in case that acquisitions ahead
-	 * could depend on the held_lock. For example, if the held_lock
-	 * is trylock then acquisitions ahead never depends on that.
-	 * In that case, we don't need to record it. Just return.
-	 */
-	if (!current->xhlocks || !depend_before(hlock))
-		return;
-
-	add_xhlock(hlock);
-}
-
-/*
- * For crosslock.
- */
-static int add_xlock(struct held_lock *hlock)
-{
-	struct cross_lock *xlock;
-	unsigned int gen_id;
-
-	if (!graph_lock())
-		return 0;
-
-	xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
-
-	/*
-	 * When acquisitions for a crosslock are overlapped, we use
-	 * nr_acquire to perform commit for them, based on cross_gen_id
-	 * of the first acquisition, which allows to add additional
-	 * dependencies.
-	 *
-	 * Moreover, when no acquisition of a crosslock is in progress,
-	 * we should not perform commit because the lock might not exist
-	 * any more, which might cause incorrect memory access. So we
-	 * have to track the number of acquisitions of a crosslock.
-	 *
-	 * depend_after() is necessary to initialize only the first
-	 * valid xlock so that the xlock can be used on its commit.
-	 */
-	if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
-		goto unlock;
-
-	gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
-	xlock->hlock = *hlock;
-	xlock->hlock.gen_id = gen_id;
-unlock:
-	graph_unlock();
-	return 1;
-}
-
-/*
- * Called for both normal and crosslock acquires. Normal locks will be
- * pushed on the hist_lock queue. Cross locks will record state and
- * stop regular lock_acquire() to avoid being placed on the held_lock
- * stack.
- *
- * Return: 0 - failure;
- *         1 - crosslock, done;
- *         2 - normal lock, continue to held_lock[] ops.
- */
-static int lock_acquire_crosslock(struct held_lock *hlock)
-{
-	/*
-	 *	CONTEXT 1		CONTEXT 2
-	 *	---------		---------
-	 *	lock A (cross)
-	 *	X = atomic_inc_return(&cross_gen_id)
-	 *	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-	 *				Y = atomic_read_acquire(&cross_gen_id)
-	 *				lock B
-	 *
-	 * atomic_read_acquire() is for ordering between A and B,
-	 * IOW, A happens before B, when CONTEXT 2 see Y >= X.
-	 *
-	 * Pairs with atomic_inc_return() in add_xlock().
-	 */
-	hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
-
-	if (cross_lock(hlock->instance))
-		return add_xlock(hlock);
-
-	check_add_xhlock(hlock);
-	return 2;
-}
-
-static int copy_trace(struct stack_trace *trace)
-{
-	unsigned long *buf = stack_trace + nr_stack_trace_entries;
-	unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
-	unsigned int nr = min(max_nr, trace->nr_entries);
-
-	trace->nr_entries = nr;
-	memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
-	trace->entries = buf;
-	nr_stack_trace_entries += nr;
-
-	if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
-		if (!debug_locks_off_graph_unlock())
-			return 0;
-
-		print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
-		dump_stack();
-
-		return 0;
-	}
-
-	return 1;
-}
-
-static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
-{
-	unsigned int xid, pid;
-	u64 chain_key;
-
-	xid = xlock_class(xlock) - lock_classes;
-	chain_key = iterate_chain_key((u64)0, xid);
-	pid = xhlock_class(xhlock) - lock_classes;
-	chain_key = iterate_chain_key(chain_key, pid);
-
-	if (lookup_chain_cache(chain_key))
-		return 1;
-
-	if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
-				chain_key))
-		return 0;
-
-	if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
-			    &xhlock->trace, copy_trace))
-		return 0;
-
-	return 1;
-}
-
-static void commit_xhlocks(struct cross_lock *xlock)
-{
-	unsigned int cur = current->xhlock_idx;
-	unsigned int prev_hist_id = xhlock(cur).hist_id;
-	unsigned int i;
-
-	if (!graph_lock())
-		return;
-
-	if (xlock->nr_acquire) {
-		for (i = 0; i < MAX_XHLOCKS_NR; i++) {
-			struct hist_lock *xhlock = &xhlock(cur - i);
-
-			if (!xhlock_valid(xhlock))
-				break;
-
-			if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
-				break;
-
-			if (!same_context_xhlock(xhlock))
-				break;
-
-			/*
-			 * Filter out the cases where the ring buffer was
-			 * overwritten and the current entry has a bigger
-			 * hist_id than the previous one, which is impossible
-			 * otherwise:
-			 */
-			if (unlikely(before(prev_hist_id, xhlock->hist_id)))
-				break;
-
-			prev_hist_id = xhlock->hist_id;
-
-			/*
-			 * commit_xhlock() returns 0 with graph_lock already
-			 * released if fail.
-			 */
-			if (!commit_xhlock(xlock, xhlock))
-				return;
-		}
-	}
-
-	graph_unlock();
-}
-
-void lock_commit_crosslock(struct lockdep_map *lock)
-{
-	struct cross_lock *xlock;
-	unsigned long flags;
-
-	if (unlikely(!debug_locks || current->lockdep_recursion))
-		return;
-
-	if (!current->xhlocks)
-		return;
-
-	/*
-	 * Do commit hist_locks with the cross_lock, only in case that
-	 * the cross_lock could depend on acquisitions after that.
-	 *
-	 * For example, if the cross_lock does not have the 'check' flag
-	 * then we don't need to check dependencies and commit for that.
-	 * Just skip it. In that case, of course, the cross_lock does
-	 * not depend on acquisitions ahead, either.
-	 *
-	 * WARNING: Don't do that in add_xlock() in advance. When an
-	 * acquisition context is different from the commit context,
-	 * invalid(skipped) cross_lock might be accessed.
-	 */
-	if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
-		return;
-
-	raw_local_irq_save(flags);
-	check_flags(flags);
-	current->lockdep_recursion = 1;
-	xlock = &((struct lockdep_map_cross *)lock)->xlock;
-	commit_xhlocks(xlock);
-	current->lockdep_recursion = 0;
-	raw_local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(lock_commit_crosslock);
-
-/*
- * Return: 0 - failure;
- *         1 - crosslock, done;
- *         2 - normal lock, continue to held_lock[] ops.
- */
-static int lock_release_crosslock(struct lockdep_map *lock)
-{
-	if (cross_lock(lock)) {
-		if (!graph_lock())
-			return 0;
-		((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
-		graph_unlock();
-		return 1;
-	}
-	return 2;
-}
-
-static void cross_init(struct lockdep_map *lock, int cross)
-{
-	if (cross)
-		((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
-
-	lock->cross = cross;
-
-	/*
-	 * Crossrelease assumes that the ring buffer size of xhlocks
-	 * is aligned with power of 2. So force it on build.
-	 */
-	BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
-}
-
-void lockdep_init_task(struct task_struct *task)
-{
-	int i;
-
-	task->xhlock_idx = UINT_MAX;
-	task->hist_id = 0;
-
-	for (i = 0; i < XHLOCK_CTX_NR; i++) {
-		task->xhlock_idx_hist[i] = UINT_MAX;
-		task->hist_id_save[i] = 0;
-	}
-
-	task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
-				GFP_KERNEL);
-}
-
-void lockdep_free_task(struct task_struct *task)
-{
-	if (task->xhlocks) {
-		void *tmp = task->xhlocks;
-		/* Diable crossrelease for current */
-		task->xhlocks = NULL;
-		kfree(tmp);
-	}
-}
-#endif
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 1fd1a7543cdddf39197acaa3882a0f9de7ddb3ab..936f3d14dd6bfeda3ef7921266fef5dc5b36e2a8 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock)			\
 			break;						\
 		preempt_enable();					\
 									\
-		if (!(lock)->break_lock)				\
-			(lock)->break_lock = 1;				\
-		while ((lock)->break_lock)				\
-			arch_##op##_relax(&lock->raw_lock);		\
+		arch_##op##_relax(&lock->raw_lock);			\
 	}								\
-	(lock)->break_lock = 0;						\
 }									\
 									\
 unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)	\
@@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)	\
 		local_irq_restore(flags);				\
 		preempt_enable();					\
 									\
-		if (!(lock)->break_lock)				\
-			(lock)->break_lock = 1;				\
-		while ((lock)->break_lock)				\
-			arch_##op##_relax(&lock->raw_lock);		\
+		arch_##op##_relax(&lock->raw_lock);			\
 	}								\
-	(lock)->break_lock = 0;						\
+									\
 	return flags;							\
 }									\
 									\
diff --git a/kernel/pid.c b/kernel/pid.c
index b13b624e2c4902c67d2ae789c7af8fb2d6269cd8..1e8bb6550ec4bf61c367806dc31010e8a25f47b9 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -193,10 +193,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 	}
 
 	if (unlikely(is_child_reaper(pid))) {
-		if (pid_ns_prepare_proc(ns)) {
-			disable_pid_allocation(ns);
+		if (pid_ns_prepare_proc(ns))
 			goto out_free;
-		}
 	}
 
 	get_pid_ns(ns);
@@ -226,6 +224,10 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 	while (++i <= ns->level)
 		idr_remove(&ns->idr, (pid->numbers + i)->nr);
 
+	/* On failure to allocate the first pid, reset the state */
+	if (ns->pid_allocated == PIDNS_ADDING)
+		idr_set_cursor(&ns->idr, 0);
+
 	spin_unlock_irq(&pidmap_lock);
 
 	kmem_cache_free(ns->pid_cachep, pid);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5d81206a572d721e7d96b129f160a4e16d2e2f2e..b9006617710f591bc659e8f522c6d73baa2a04d5 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3141,9 +3141,6 @@ void dump_stack_print_info(const char *log_lvl)
 void show_regs_print_info(const char *log_lvl)
 {
 	dump_stack_print_info(log_lvl);
-
-	printk("%stask: %p task.stack: %p\n",
-	       log_lvl, current, task_stack_page(current));
 }
 
 #endif
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 2ddaec40956f7cf1356e17404e758b9a11924c8d..0926aef10dadc26d73959b5d7a736e247320a7de 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -34,11 +34,6 @@ void complete(struct completion *x)
 
 	spin_lock_irqsave(&x->wait.lock, flags);
 
-	/*
-	 * Perform commit of crossrelease here.
-	 */
-	complete_release_commit(x);
-
 	if (x->done != UINT_MAX)
 		x->done++;
 	__wake_up_locked(&x->wait, TASK_NORMAL, 1);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75554f366fd3aa20339a00a03d7897cf42bc7d97..644fa2e3d993b5ef1bd1c57e4286168daadf14ff 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5097,17 +5097,6 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
 	return ret;
 }
 
-/**
- * sys_sched_rr_get_interval - return the default timeslice of a process.
- * @pid: pid of the process.
- * @interval: userspace pointer to the timeslice value.
- *
- * this syscall writes the default timeslice value of a given process
- * into the user-space timespec buffer. A value of '0' means infinity.
- *
- * Return: On success, 0 and the timeslice is in @interval. Otherwise,
- * an error code.
- */
 static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
 {
 	struct task_struct *p;
@@ -5144,6 +5133,17 @@ static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
 	return retval;
 }
 
+/**
+ * sys_sched_rr_get_interval - return the default timeslice of a process.
+ * @pid: pid of the process.
+ * @interval: userspace pointer to the timeslice value.
+ *
+ * this syscall writes the default timeslice value of a given process
+ * into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
+ */
 SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 		struct timespec __user *, interval)
 {
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2f52ec0f1539fce62a099d9fa559df334a19869c..d6717a3331a1b21bd1be02f034596ab293e8ceac 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 #ifdef CONFIG_NO_HZ_COMMON
 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
 {
-	unsigned long idle_calls = tick_nohz_get_idle_calls();
+	unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
 	bool ret = idle_calls == sg_cpu->saved_idle_calls;
 
 	sg_cpu->saved_idle_calls = idle_calls;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4037e19bbca25939f0dd57b05f8fb25de8a90908..2fe3aa853e4dbacef363b70246390f94cc67932c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se,
  * _IFF_ we look at the pure running and runnable sums. Because they
  * represent the very same entity, just at different points in the hierarchy.
  *
- *
- * Per the above update_tg_cfs_util() is trivial (and still 'wrong') and
- * simply copies the running sum over.
+ * Per the above update_tg_cfs_util() is trivial and simply copies the running
+ * sum over (but still wrong, because the group entity and group rq do not have
+ * their PELT windows aligned).
  *
  * However, update_tg_cfs_runnable() is more complex. So we have:
  *
@@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se,
  * And since, like util, the runnable part should be directly transferable,
  * the following would _appear_ to be the straight forward approach:
  *
- *   grq->avg.load_avg = grq->load.weight * grq->avg.running_avg	(3)
+ *   grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg	(3)
  *
  * And per (1) we have:
  *
- *   ge->avg.running_avg == grq->avg.running_avg
+ *   ge->avg.runnable_avg == grq->avg.runnable_avg
  *
  * Which gives:
  *
@@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se,
  * to (shortly) return to us. This only works by keeping the weights as
  * integral part of the sum. We therefore cannot decompose as per (3).
  *
- * OK, so what then?
+ * Another reason this doesn't work is that runnable isn't a 0-sum entity.
+ * Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the
+ * rq itself is runnable anywhere between 2/3 and 1 depending on how the
+ * runnable section of these tasks overlap (or not). If they were to perfectly
+ * align the rq as a whole would be runnable 2/3 of the time. If however we
+ * always have at least 1 runnable task, the rq as a whole is always runnable.
  *
+ * So we'll have to approximate.. :/
  *
- * Another way to look at things is:
+ * Given the constraint:
  *
- *   grq->avg.load_avg = \Sum se->avg.load_avg
+ *   ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX
  *
- * Therefore, per (2):
+ * We can construct a rule that adds runnable to a rq by assuming minimal
+ * overlap.
  *
- *   grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg
+ * On removal, we'll assume each task is equally runnable; which yields:
  *
- * And the very thing we're propagating is a change in that sum (someone
- * joined/left). So we can easily know the runnable change, which would be, per
- * (2) the already tracked se->load_avg divided by the corresponding
- * se->weight.
+ *   grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight
  *
- * Basically (4) but in differential form:
+ * XXX: only do this for the part of runnable > running ?
  *
- *   d(runnable_avg) += se->avg.load_avg / se->load.weight
- *								   (5)
- *   ge->avg.load_avg += ge->load.weight * d(runnable_avg)
  */
 
 static inline void
@@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	if (!delta)
 		return;
 
+	/*
+	 * The relation between sum and avg is:
+	 *
+	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
+	 *
+	 * however, the PELT windows are not aligned between grq and gse.
+	 */
+
 	/* Set new sched_entity's utilization */
 	se->avg.util_avg = gcfs_rq->avg.util_avg;
 	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
@@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 static inline void
 update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
-	long runnable_sum = gcfs_rq->prop_runnable_sum;
-	long runnable_load_avg, load_avg;
-	s64 runnable_load_sum, load_sum;
+	long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+	unsigned long runnable_load_avg, load_avg;
+	u64 runnable_load_sum, load_sum = 0;
+	s64 delta_sum;
 
 	if (!runnable_sum)
 		return;
 
 	gcfs_rq->prop_runnable_sum = 0;
 
+	if (runnable_sum >= 0) {
+		/*
+		 * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
+		 * the CPU is saturated running == runnable.
+		 */
+		runnable_sum += se->avg.load_sum;
+		runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
+	} else {
+		/*
+		 * Estimate the new unweighted runnable_sum of the gcfs_rq by
+		 * assuming all tasks are equally runnable.
+		 */
+		if (scale_load_down(gcfs_rq->load.weight)) {
+			load_sum = div_s64(gcfs_rq->avg.load_sum,
+				scale_load_down(gcfs_rq->load.weight));
+		}
+
+		/* But make sure to not inflate se's runnable */
+		runnable_sum = min(se->avg.load_sum, load_sum);
+	}
+
+	/*
+	 * runnable_sum can't be lower than running_sum
+	 * As running sum is scale with cpu capacity wehreas the runnable sum
+	 * is not we rescale running_sum 1st
+	 */
+	running_sum = se->avg.util_sum /
+		arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+	runnable_sum = max(runnable_sum, running_sum);
+
 	load_sum = (s64)se_weight(se) * runnable_sum;
 	load_avg = div_s64(load_sum, LOAD_AVG_MAX);
 
-	add_positive(&se->avg.load_sum, runnable_sum);
-	add_positive(&se->avg.load_avg, load_avg);
+	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
+	delta_avg = load_avg - se->avg.load_avg;
 
-	add_positive(&cfs_rq->avg.load_avg, load_avg);
-	add_positive(&cfs_rq->avg.load_sum, load_sum);
+	se->avg.load_sum = runnable_sum;
+	se->avg.load_avg = load_avg;
+	add_positive(&cfs_rq->avg.load_avg, delta_avg);
+	add_positive(&cfs_rq->avg.load_sum, delta_sum);
 
 	runnable_load_sum = (s64)se_runnable(se) * runnable_sum;
 	runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX);
+	delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum;
+	delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
 
-	add_positive(&se->avg.runnable_load_sum, runnable_sum);
-	add_positive(&se->avg.runnable_load_avg, runnable_load_avg);
+	se->avg.runnable_load_sum = runnable_sum;
+	se->avg.runnable_load_avg = runnable_load_avg;
 
 	if (se->on_rq) {
-		add_positive(&cfs_rq->avg.runnable_load_avg, runnable_load_avg);
-		add_positive(&cfs_rq->avg.runnable_load_sum, runnable_load_sum);
+		add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg);
+		add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);
 	}
 }
 
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index dd7908743dab696facd9dd32f4399ee952228151..9bcbacba82a8115ddceda7fb26097a23c50d44f5 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -89,7 +89,9 @@ static int membarrier_private_expedited(void)
 		rcu_read_unlock();
 	}
 	if (!fallback) {
+		preempt_disable();
 		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+		preempt_enable();
 		free_cpumask_var(tmpmask);
 	}
 	cpus_read_unlock();
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4056c19ca3f00efbc7592a1b4b071426fabf2124..665ace2fc55885e0a4c0621e4bdbff2a5870b61f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2034,8 +2034,9 @@ static void pull_rt_task(struct rq *this_rq)
 	bool resched = false;
 	struct task_struct *p;
 	struct rq *src_rq;
+	int rt_overload_count = rt_overloaded(this_rq);
 
-	if (likely(!rt_overloaded(this_rq)))
+	if (likely(!rt_overload_count))
 		return;
 
 	/*
@@ -2044,6 +2045,11 @@ static void pull_rt_task(struct rq *this_rq)
 	 */
 	smp_rmb();
 
+	/* If we are the only overloaded CPU do nothing */
+	if (rt_overload_count == 1 &&
+	    cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
+		return;
+
 #ifdef HAVE_RT_PUSH_IPI
 	if (sched_feat(RT_PUSH_IPI)) {
 		tell_cpu_to_push(this_rq);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 98feab7933c76a0d178cd7da0115376641e7bbad..929ecb7d6b78a70f4ec4548a582f78c749d7d3ee 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 
 	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&wq_head->lock, flags);
-	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	__add_wait_queue(wq_head, wq_entry);
 	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e776fc8cc1df3bc6dd09e7722250f87f183fb5ef..f6b5f19223d6cb78efb3179f6dc0fd15383aeded 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -95,6 +95,7 @@ config NO_HZ_FULL
 	select RCU_NOCB_CPU
 	select VIRT_CPU_ACCOUNTING_GEN
 	select IRQ_WORK
+	select CPU_ISOLATION
 	help
 	 Adaptively try to shutdown the tick whenever possible, even when
 	 the CPU is running tasks. Typically this requires running a single
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 13d6881f908b7f91a5669264a060f59d7990f801..ec999f32c84058a0624d55ff3ee26a4fac63eb57 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event)
 {
 	struct task_struct *rtn = current->group_leader;
 
-	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-		(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
-		 !same_thread_group(rtn, current) ||
-		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+	switch (event->sigev_notify) {
+	case SIGEV_SIGNAL | SIGEV_THREAD_ID:
+		rtn = find_task_by_vpid(event->sigev_notify_thread_id);
+		if (!rtn || !same_thread_group(rtn, current))
+			return NULL;
+		/* FALLTHRU */
+	case SIGEV_SIGNAL:
+	case SIGEV_THREAD:
+		if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
+			return NULL;
+		/* FALLTHRU */
+	case SIGEV_NONE:
+		return task_pid(rtn);
+	default:
 		return NULL;
-
-	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
-	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
-		return NULL;
-
-	return task_pid(rtn);
+	}
 }
 
 static struct k_itimer * alloc_posix_timer(void)
@@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
 	struct timespec64 ts64;
 	bool sig_none;
 
-	sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE;
+	sig_none = timr->it_sigev_notify == SIGEV_NONE;
 	iv = timr->it_interval;
 
 	/* interval timer ? */
@@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags,
 
 	timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
 	expires = timespec64_to_ktime(new_setting->it_value);
-	sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE;
+	sigev_none = timr->it_sigev_notify == SIGEV_NONE;
 
 	kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
 	timr->it_active = !sigev_none;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 99578f06c8d4fe57cb15cdd44fd58865cae8d0f3..f7cc7abfcf252f0cb6fadb3b9437b9fd096b8baf 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 	ts->next_tick = 0;
 }
 
+static inline bool local_timer_softirq_pending(void)
+{
+	return local_softirq_pending() & TIMER_SOFTIRQ;
+}
+
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 					 ktime_t now, int cpu)
 {
@@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	} while (read_seqretry(&jiffies_lock, seq));
 	ts->last_jiffies = basejiff;
 
-	if (rcu_needs_cpu(basemono, &next_rcu) ||
-	    arch_needs_cpu() || irq_work_needs_cpu()) {
+	/*
+	 * Keep the periodic tick, when RCU, architecture or irq_work
+	 * requests it.
+	 * Aside of that check whether the local timer softirq is
+	 * pending. If so its a bad idea to call get_next_timer_interrupt()
+	 * because there is an already expired timer, so it will request
+	 * immeditate expiry, which rearms the hardware timer with a
+	 * minimal delta which brings us back to this place
+	 * immediately. Lather, rinse and repeat...
+	 */
+	if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+	    irq_work_needs_cpu() || local_timer_softirq_pending()) {
 		next_tick = basemono + TICK_NSEC;
 	} else {
 		/*
@@ -985,6 +1000,19 @@ ktime_t tick_nohz_get_sleep_length(void)
 	return ts->sleep_length;
 }
 
+/**
+ * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
+ * for a particular CPU.
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
+{
+	struct tick_sched *ts = tick_get_tick_sched(cpu);
+
+	return ts->idle_calls;
+}
+
 /**
  * tick_nohz_get_idle_calls - return the current idle calls counter value
  *
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ffebcf878fba5d5cf67f5e9abcece25c16259919..89a9e1b4264a07a9a6d69e37759c27ced8b4de37 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
 	struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
 
 	/*
-	 * If the timer is deferrable and nohz is active then we need to use
-	 * the deferrable base.
+	 * If the timer is deferrable and NO_HZ_COMMON is set then we need
+	 * to use the deferrable base.
 	 */
-	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
-	    (tflags & TIMER_DEFERRABLE))
+	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
 		base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
 	return base;
 }
@@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
 	/*
-	 * If the timer is deferrable and nohz is active then we need to use
-	 * the deferrable base.
+	 * If the timer is deferrable and NO_HZ_COMMON is set then we need
+	 * to use the deferrable base.
 	 */
-	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
-	    (tflags & TIMER_DEFERRABLE))
+	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
 		base = this_cpu_ptr(&timer_bases[BASE_DEF]);
 	return base;
 }
@@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 	if (!ret && (options & MOD_TIMER_PENDING_ONLY))
 		goto out_unlock;
 
-	debug_activate(timer, expires);
-
 	new_base = get_target_base(base, timer->flags);
 
 	if (base != new_base) {
@@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 		}
 	}
 
+	debug_activate(timer, expires);
+
 	timer->expires = expires;
 	/*
 	 * If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 	base->must_forward_clk = false;
 
 	__run_timers(base);
-	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
+	if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
 		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
 }
 
@@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h
 	}
 }
 
+int timers_prepare_cpu(unsigned int cpu)
+{
+	struct timer_base *base;
+	int b;
+
+	for (b = 0; b < NR_BASES; b++) {
+		base = per_cpu_ptr(&timer_bases[b], cpu);
+		base->clk = jiffies;
+		base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+		base->is_idle = false;
+		base->must_forward_clk = true;
+	}
+	return 0;
+}
+
 int timers_dead_cpu(unsigned int cpu)
 {
 	struct timer_base *old_base;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index af7dad126c13cecbe73f5d797778f005b5838377..904c952ac3833bdfd0e017dff437d26f4c545e3d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -164,6 +164,7 @@ config PREEMPTIRQ_EVENTS
 	bool "Enable trace events for preempt and irq disable/enable"
 	select TRACE_IRQFLAGS
 	depends on DEBUG_PREEMPT || !PROVE_LOCKING
+	depends on TRACING
 	default n
 	help
 	  Enable tracing of disable and enable events for preemption and irqs.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 27d1f4ffa3def946525b2d248757fac3620504e5..40207c2a41134851d5016fe9cbdc678e42606868 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
 	.arg4_type	= ARG_CONST_SIZE,
 };
 
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
+static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
 
 static __always_inline u64
 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
-			u64 flags, struct perf_raw_record *raw)
+			u64 flags, struct perf_sample_data *sd)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
-	struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
 	unsigned int cpu = smp_processor_id();
 	u64 index = flags & BPF_F_INDEX_MASK;
 	struct bpf_event_entry *ee;
@@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
 	if (unlikely(event->oncpu != cpu))
 		return -EOPNOTSUPP;
 
-	perf_sample_data_init(sd, 0, 0);
-	sd->raw = raw;
 	perf_event_output(event, sd, regs);
 	return 0;
 }
@@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
 	   u64, flags, void *, data, u64, size)
 {
+	struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
 	struct perf_raw_record raw = {
 		.frag = {
 			.size = size,
@@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
 		return -EINVAL;
 
-	return __bpf_perf_event_output(regs, map, flags, &raw);
+	perf_sample_data_init(sd, 0, 0);
+	sd->raw = &raw;
+
+	return __bpf_perf_event_output(regs, map, flags, sd);
 }
 
 static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
 };
 
 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
+static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
 {
+	struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
 	struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
 	struct perf_raw_frag frag = {
 		.copy		= ctx_copy,
@@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 	};
 
 	perf_fetch_caller_regs(regs);
+	perf_sample_data_init(sd, 0, 0);
+	sd->raw = &raw;
 
-	return __bpf_perf_event_output(regs, map, flags, &raw);
+	return __bpf_perf_event_output(regs, map, flags, sd);
 }
 
 BPF_CALL_0(bpf_get_current_task)
@@ -759,6 +764,8 @@ const struct bpf_prog_ops perf_event_prog_ops = {
 
 static DEFINE_MUTEX(bpf_event_mutex);
 
+#define BPF_TRACE_MAX_PROGS 64
+
 int perf_event_attach_bpf_prog(struct perf_event *event,
 			       struct bpf_prog *prog)
 {
@@ -772,6 +779,12 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
 		goto unlock;
 
 	old_array = event->tp_event->prog_array;
+	if (old_array &&
+	    bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
+		ret = -E2BIG;
+		goto unlock;
+	}
+
 	ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
 	if (ret < 0)
 		goto unlock;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 91874a95060de5de11aa47d3fbddb8c4980a0da8..9ab18995ff1ebeea0e862f48b43f64924c87e127 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 /* Missed count stored at end */
 #define RB_MISSED_STORED	(1 << 30)
 
+#define RB_MISSED_FLAGS		(RB_MISSED_EVENTS|RB_MISSED_STORED)
+
 struct buffer_data_page {
 	u64		 time_stamp;	/* page time stamp */
 	local_t		 commit;	/* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
  */
 size_t ring_buffer_page_len(void *page)
 {
-	return local_read(&((struct buffer_data_page *)page)->commit)
+	struct buffer_data_page *bpage = page;
+
+	return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
 		+ BUF_PAGE_HDR_SIZE;
 }
 
@@ -1799,12 +1803,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
 
-static __always_inline void *
-__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
-{
-	return bpage->data + index;
-}
-
 static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
 {
 	return bpage->page->data + index;
@@ -4406,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 	struct buffer_data_page *bpage = data;
+	struct page *page = virt_to_page(bpage);
 	unsigned long flags;
 
+	/* If the page is still in use someplace else, we can't reuse it */
+	if (page_ref_count(page) > 1)
+		goto out;
+
 	local_irq_save(flags);
 	arch_spin_lock(&cpu_buffer->lock);
 
@@ -4419,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
 	arch_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 
+ out:
 	free_page((unsigned long)bpage);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73e67b68c53b47d5b422970cd0dee1d0bec27002..2a8d8a294345a258baca50b8a6b272c1ac0fc658 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -362,7 +362,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
 }
 
 /**
- * trace_pid_filter_add_remove - Add or remove a task from a pid_list
+ * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
  * @pid_list: The list to modify
  * @self: The current task for fork or NULL for exit
  * @task: The task to add or remove
@@ -925,7 +925,7 @@ static void tracing_snapshot_instance(struct trace_array *tr)
 }
 
 /**
- * trace_snapshot - take a snapshot of the current buffer.
+ * tracing_snapshot - take a snapshot of the current buffer.
  *
  * This causes a swap between the snapshot buffer and the current live
  * tracing buffer. You can use this to take snapshots of the live
@@ -1004,9 +1004,9 @@ int tracing_alloc_snapshot(void)
 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
 
 /**
- * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
+ * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
  *
- * This is similar to trace_snapshot(), but it will allocate the
+ * This is similar to tracing_snapshot(), but it will allocate the
  * snapshot buffer if it isn't already allocated. Use this only
  * where it is safe to sleep, as the allocation may sleep.
  *
@@ -1303,7 +1303,7 @@ unsigned long __read_mostly	tracing_thresh;
 /*
  * Copy the new maximum trace into the separate maximum-trace
  * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
+ * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
  */
 static void
 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2415,7 +2415,7 @@ trace_process_export(struct trace_export *export,
 
 	entry = ring_buffer_event_data(event);
 	size = ring_buffer_event_length(event);
-	export->write(entry, size);
+	export->write(export, entry, size);
 }
 
 static DEFINE_MUTEX(ftrace_export_lock);
@@ -4178,37 +4178,30 @@ static const struct file_operations show_traces_fops = {
 	.llseek		= seq_lseek,
 };
 
-/*
- * The tracer itself will not take this lock, but still we want
- * to provide a consistent cpumask to user-space:
- */
-static DEFINE_MUTEX(tracing_cpumask_update_lock);
-
-/*
- * Temporary storage for the character representation of the
- * CPU bitmask (and one more byte for the newline):
- */
-static char mask_str[NR_CPUS + 1];
-
 static ssize_t
 tracing_cpumask_read(struct file *filp, char __user *ubuf,
 		     size_t count, loff_t *ppos)
 {
 	struct trace_array *tr = file_inode(filp)->i_private;
+	char *mask_str;
 	int len;
 
-	mutex_lock(&tracing_cpumask_update_lock);
+	len = snprintf(NULL, 0, "%*pb\n",
+		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
+	mask_str = kmalloc(len, GFP_KERNEL);
+	if (!mask_str)
+		return -ENOMEM;
 
-	len = snprintf(mask_str, count, "%*pb\n",
+	len = snprintf(mask_str, len, "%*pb\n",
 		       cpumask_pr_args(tr->tracing_cpumask));
 	if (len >= count) {
 		count = -EINVAL;
 		goto out_err;
 	}
-	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
+	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
 
 out_err:
-	mutex_unlock(&tracing_cpumask_update_lock);
+	kfree(mask_str);
 
 	return count;
 }
@@ -4228,8 +4221,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	if (err)
 		goto err_unlock;
 
-	mutex_lock(&tracing_cpumask_update_lock);
-
 	local_irq_disable();
 	arch_spin_lock(&tr->max_lock);
 	for_each_tracing_cpu(cpu) {
@@ -4252,8 +4243,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	local_irq_enable();
 
 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
-
-	mutex_unlock(&tracing_cpumask_update_lock);
 	free_cpumask_var(tracing_cpumask_new);
 
 	return count;
@@ -6780,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		.spd_release	= buffer_spd_release,
 	};
 	struct buffer_ref *ref;
-	int entries, size, i;
+	int entries, i;
 	ssize_t ret = 0;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -6834,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			break;
 		}
 
-		/*
-		 * zero out any left over data, this is going to
-		 * user land.
-		 */
-		size = ring_buffer_page_len(ref->page);
-		if (size < PAGE_SIZE)
-			memset(ref->page + size, 0, PAGE_SIZE - size);
-
 		page = virt_to_page(ref->page);
 
 		spd.pages[i] = page;
@@ -7599,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
 	buf->data = alloc_percpu(struct trace_array_cpu);
 	if (!buf->data) {
 		ring_buffer_free(buf->buffer);
+		buf->buffer = NULL;
 		return -ENOMEM;
 	}
 
@@ -7622,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
 				    allocate_snapshot ? size : 1);
 	if (WARN_ON(ret)) {
 		ring_buffer_free(tr->trace_buffer.buffer);
+		tr->trace_buffer.buffer = NULL;
 		free_percpu(tr->trace_buffer.data);
+		tr->trace_buffer.data = NULL;
 		return -ENOMEM;
 	}
 	tr->allocated_snapshot = allocate_snapshot;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 734accc02418930280a5248013fa6fbd4a5868e2..3c7bfc4bf5e9981b687ca8dd4ed1cf890b38ee12 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -209,6 +209,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (__this_cpu_read(disable_stack_tracer) != 1)
 		goto out;
 
+	/* If rcu is not watching, then save stack trace can fail */
+	if (!rcu_is_watching())
+		goto out;
+
 	ip += MCOUNT_INSN_SIZE;
 
 	check_stack(ip, &stack);
diff --git a/kernel/uid16.c b/kernel/uid16.c
index ce74a4901d2b058595af031cf6c408c48dc5e1f1..ef1da2a5f9bd00689e4f78adffb04c867d8395cb 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 		return retval;
 	}
 
+	groups_sort(group_info);
 	retval = set_current_groups(group_info);
 	put_group_info(group_info);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8fdb710bfdd732fc3e6bfaa3110264de70c9af1d..43d18cb46308385865d14ce40906c4646d378063 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -38,7 +38,6 @@
 #include <linux/hardirq.h>
 #include <linux/mempolicy.h>
 #include <linux/freezer.h>
-#include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
@@ -48,6 +47,7 @@
 #include <linux/nodemask.h>
 #include <linux/moduleparam.h>
 #include <linux/uaccess.h>
+#include <linux/sched/isolation.h>
 
 #include "workqueue_internal.h"
 
@@ -1634,7 +1634,7 @@ static void worker_enter_idle(struct worker *worker)
 		mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
 
 	/*
-	 * Sanity check nr_running.  Because wq_unbind_fn() releases
+	 * Sanity check nr_running.  Because unbind_workers() releases
 	 * pool->lock between setting %WORKER_UNBOUND and zapping
 	 * nr_running, the warning may trigger spuriously.  Check iff
 	 * unbind is not in progress.
@@ -4510,9 +4510,8 @@ void show_workqueue_state(void)
  * cpu comes back online.
  */
 
-static void wq_unbind_fn(struct work_struct *work)
+static void unbind_workers(int cpu)
 {
-	int cpu = smp_processor_id();
 	struct worker_pool *pool;
 	struct worker *worker;
 
@@ -4589,16 +4588,6 @@ static void rebind_workers(struct worker_pool *pool)
 
 	spin_lock_irq(&pool->lock);
 
-	/*
-	 * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
-	 * w/o preceding DOWN_PREPARE.  Work around it.  CPU hotplug is
-	 * being reworked and this can go away in time.
-	 */
-	if (!(pool->flags & POOL_DISASSOCIATED)) {
-		spin_unlock_irq(&pool->lock);
-		return;
-	}
-
 	pool->flags &= ~POOL_DISASSOCIATED;
 
 	for_each_pool_worker(worker, pool) {
@@ -4709,12 +4698,13 @@ int workqueue_online_cpu(unsigned int cpu)
 
 int workqueue_offline_cpu(unsigned int cpu)
 {
-	struct work_struct unbind_work;
 	struct workqueue_struct *wq;
 
 	/* unbinding per-cpu workers should happen on the local CPU */
-	INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
-	queue_work_on(cpu, system_highpri_wq, &unbind_work);
+	if (WARN_ON(cpu != smp_processor_id()))
+		return -1;
+
+	unbind_workers(cpu);
 
 	/* update NUMA affinity of unbound workqueues */
 	mutex_lock(&wq_pool_mutex);
@@ -4722,9 +4712,6 @@ int workqueue_offline_cpu(unsigned int cpu)
 		wq_update_unbound_numa(wq, cpu, false);
 	mutex_unlock(&wq_pool_mutex);
 
-	/* wait for per-cpu unbinding to finish */
-	flush_work(&unbind_work);
-	destroy_work_on_stack(&unbind_work);
 	return 0;
 }
 
@@ -4957,6 +4944,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
 	if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
 		return -ENOMEM;
 
+	/*
+	 * Not excluding isolated cpus on purpose.
+	 * If the user wishes to include them, we allow that.
+	 */
 	cpumask_and(cpumask, cpumask, cpu_possible_mask);
 	if (!cpumask_empty(cpumask)) {
 		apply_wqattrs_lock();
@@ -5555,7 +5546,7 @@ int __init workqueue_init_early(void)
 	WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
 
 	BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
-	cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+	cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
 
 	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 947d3e2ed5c2f1a7fa1e001e4bf56b9c7a2b9d49..9d5b78aad4c5bcd59a927c654e8010e54269d750 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1099,8 +1099,6 @@ config PROVE_LOCKING
 	select DEBUG_MUTEXES
 	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
-	select LOCKDEP_CROSSRELEASE
-	select LOCKDEP_COMPLETIONS
 	select TRACE_IRQFLAGS
 	default n
 	help
@@ -1170,37 +1168,6 @@ config LOCK_STAT
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
-config LOCKDEP_CROSSRELEASE
-	bool
-	help
-	 This makes lockdep work for crosslock which is a lock allowed to
-	 be released in a different context from the acquisition context.
-	 Normally a lock must be released in the context acquiring the lock.
-	 However, relexing this constraint helps synchronization primitives
-	 such as page locks or completions can use the lock correctness
-	 detector, lockdep.
-
-config LOCKDEP_COMPLETIONS
-	bool
-	help
-	 A deadlock caused by wait_for_completion() and complete() can be
-	 detected by lockdep using crossrelease feature.
-
-config BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
-	bool "Enable the boot parameter, crossrelease_fullstack"
-	depends on LOCKDEP_CROSSRELEASE
-	default n
-	help
-	 The lockdep "cross-release" feature needs to record stack traces
-	 (of calling functions) for all acquisitions, for eventual later
-	 use during analysis. By default only a single caller is recorded,
-	 because the unwind operation can be very expensive with deeper
-	 stack chains.
-
-	 However a boot parameter, crossrelease_fullstack, was
-	 introduced since sometimes deeper traces are required for full
-	 analysis. This option turns on the boot parameter.
-
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 1ef0cec38d7879332966f8095d0bfdc0c7580230..dc14beae2c9aac7df010cdc36e861e33d34a1103 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -313,42 +313,47 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 
 	/* Decide how to handle the operation */
 	switch (op) {
-	case ASN1_OP_MATCH_ANY_ACT:
-	case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
-	case ASN1_OP_COND_MATCH_ANY_ACT:
-	case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
-		ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len);
-		if (ret < 0)
-			return ret;
-		goto skip_data;
-
-	case ASN1_OP_MATCH_ACT:
-	case ASN1_OP_MATCH_ACT_OR_SKIP:
-	case ASN1_OP_COND_MATCH_ACT_OR_SKIP:
-		ret = actions[machine[pc + 2]](context, hdr, tag, data + dp, len);
-		if (ret < 0)
-			return ret;
-		goto skip_data;
-
 	case ASN1_OP_MATCH:
 	case ASN1_OP_MATCH_OR_SKIP:
+	case ASN1_OP_MATCH_ACT:
+	case ASN1_OP_MATCH_ACT_OR_SKIP:
 	case ASN1_OP_MATCH_ANY:
 	case ASN1_OP_MATCH_ANY_OR_SKIP:
+	case ASN1_OP_MATCH_ANY_ACT:
+	case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
 	case ASN1_OP_COND_MATCH_OR_SKIP:
+	case ASN1_OP_COND_MATCH_ACT_OR_SKIP:
 	case ASN1_OP_COND_MATCH_ANY:
 	case ASN1_OP_COND_MATCH_ANY_OR_SKIP:
-	skip_data:
+	case ASN1_OP_COND_MATCH_ANY_ACT:
+	case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
+
 		if (!(flags & FLAG_CONS)) {
 			if (flags & FLAG_INDEFINITE_LENGTH) {
+				size_t tmp = dp;
+
 				ret = asn1_find_indefinite_length(
-					data, datalen, &dp, &len, &errmsg);
+					data, datalen, &tmp, &len, &errmsg);
 				if (ret < 0)
 					goto error;
-			} else {
-				dp += len;
 			}
 			pr_debug("- LEAF: %zu\n", len);
 		}
+
+		if (op & ASN1_OP_MATCH__ACT) {
+			unsigned char act;
+
+			if (op & ASN1_OP_MATCH__ANY)
+				act = machine[pc + 1];
+			else
+				act = machine[pc + 2];
+			ret = actions[act](context, hdr, tag, data + dp, len);
+			if (ret < 0)
+				return ret;
+		}
+
+		if (!(flags & FLAG_CONS))
+			dp += len;
 		pc += asn1_op_lengths[op];
 		goto next_op;
 
@@ -434,6 +439,8 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
 			else
 				act = machine[pc + 1];
 			ret = actions[act](context, hdr, 0, data + tdp, len);
+			if (ret < 0)
+				return ret;
 		}
 		pc += asn1_op_lengths[op];
 		goto next_op;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index c3e84edc47c965d40199b652ba78876cdaa9c70c..2615074d3de5c63e63da31995adc4a1f07f7e9fd 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
 static void zap_modalias_env(struct kobj_uevent_env *env)
 {
 	static const char modalias_prefix[] = "MODALIAS=";
-	int i;
+	size_t len;
+	int i, j;
 
 	for (i = 0; i < env->envp_idx;) {
 		if (strncmp(env->envp[i], modalias_prefix,
@@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env)
 			continue;
 		}
 
-		if (i != env->envp_idx - 1)
-			memmove(&env->envp[i], &env->envp[i + 1],
-				sizeof(env->envp[i]) * env->envp_idx - 1);
+		len = strlen(env->envp[i]) + 1;
+
+		if (i != env->envp_idx - 1) {
+			memmove(env->envp[i], env->envp[i + 1],
+				env->buflen - len);
+
+			for (j = i; j < env->envp_idx - 1; j++)
+				env->envp[j] = env->envp[j + 1] - len;
+		}
 
 		env->envp_idx--;
+		env->buflen -= len;
 	}
 }
 
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index 57fd45ab7af1a42aaf290866a2c0bbc5da7f8710..08c60d10747fddc204788e75f24107805702caf4 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -671,7 +671,23 @@ do {						\
 	**************  MIPS/64  **************
 	***************************************/
 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+/*
+ * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
+ * code below, so we special case MIPS64r6 until the compiler can do better.
+ */
+#define umul_ppmm(w1, w0, u, v)						\
+do {									\
+	__asm__ ("dmulu %0,%1,%2"					\
+		 : "=d" ((UDItype)(w0))					\
+		 : "d" ((UDItype)(u)),					\
+		   "d" ((UDItype)(v)));					\
+	__asm__ ("dmuhu %0,%1,%2"					\
+		 : "=d" ((UDItype)(w1))					\
+		 : "d" ((UDItype)(u)),					\
+		   "d" ((UDItype)(v)));					\
+} while (0)
+#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v) \
 do {									\
 	typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 8bf78b4b78f0a286e40941344ecd4b504738cc01..dfa55c873c1318643fdbcbe916b9c18a54edc4c9 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,11 @@
 #include <linux/types.h>
 #include <net/netlink.h>
 
-/* for these data types attribute length must be exactly given size */
+/* For these data types, attribute length should be exactly the given
+ * size. However, to maintain compatibility with broken commands, if the
+ * attribute length does not match the expected size a warning is emitted
+ * to the user that the command is sending invalid data and needs to be fixed.
+ */
 static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
 	[NLA_U8]	= sizeof(u8),
 	[NLA_U16]	= sizeof(u16),
@@ -28,8 +32,16 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
 };
 
 static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
+	[NLA_U8]	= sizeof(u8),
+	[NLA_U16]	= sizeof(u16),
+	[NLA_U32]	= sizeof(u32),
+	[NLA_U64]	= sizeof(u64),
 	[NLA_MSECS]	= sizeof(u64),
 	[NLA_NESTED]	= NLA_HDRLEN,
+	[NLA_S8]	= sizeof(s8),
+	[NLA_S16]	= sizeof(s16),
+	[NLA_S32]	= sizeof(s32),
+	[NLA_S64]	= sizeof(s64),
 };
 
 static int validate_nla_bitfield32(const struct nlattr *nla,
@@ -69,11 +81,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	/* for data types NLA_U* and NLA_S* require exact length */
-	if (nla_attr_len[pt->type]) {
-		if (attrlen != nla_attr_len[pt->type])
-			return -ERANGE;
-		return 0;
+	if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
+		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
+				    current->comm, type);
 	}
 
 	switch (pt->type) {
diff --git a/lib/oid_registry.c b/lib/oid_registry.c
index 41b9e50711a72af31d7548d3aea5be1177a45a9d..0bcac6ccb1b2041369926eac64e4267076969e06 100644
--- a/lib/oid_registry.c
+++ b/lib/oid_registry.c
@@ -116,14 +116,14 @@ int sprint_oid(const void *data, size_t datasize, char *buffer, size_t bufsize)
 	int count;
 
 	if (v >= end)
-		return -EBADMSG;
+		goto bad;
 
 	n = *v++;
 	ret = count = snprintf(buffer, bufsize, "%u.%u", n / 40, n % 40);
+	if (count >= bufsize)
+		return -ENOBUFS;
 	buffer += count;
 	bufsize -= count;
-	if (bufsize == 0)
-		return -ENOBUFS;
 
 	while (v < end) {
 		num = 0;
@@ -134,20 +134,24 @@ int sprint_oid(const void *data, size_t datasize, char *buffer, size_t bufsize)
 			num = n & 0x7f;
 			do {
 				if (v >= end)
-					return -EBADMSG;
+					goto bad;
 				n = *v++;
 				num <<= 7;
 				num |= n & 0x7f;
 			} while (n & 0x80);
 		}
 		ret += count = snprintf(buffer, bufsize, ".%lu", num);
-		buffer += count;
-		if (bufsize <= count)
+		if (count >= bufsize)
 			return -ENOBUFS;
+		buffer += count;
 		bufsize -= count;
 	}
 
 	return ret;
+
+bad:
+	snprintf(buffer, bufsize, "(bad)");
+	return -EBADMSG;
 }
 EXPORT_SYMBOL_GPL(sprint_oid);
 
diff --git a/lib/rbtree.c b/lib/rbtree.c
index ba4a9d165f1bed3c39651387def0008fc793fbd6..d3ff682fd4b8dac865276f5fefc7a1108c1bdb37 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -603,6 +603,16 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 }
 EXPORT_SYMBOL(rb_replace_node);
 
+void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
+			    struct rb_root_cached *root)
+{
+	rb_replace_node(victim, new, &root->rb_root);
+
+	if (root->rb_leftmost == victim)
+		root->rb_leftmost = new;
+}
+EXPORT_SYMBOL(rb_replace_node_cached);
+
 void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
 			 struct rb_root *root)
 {
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index aa8812ae6776ee31712fe88c58da4048ff9c31e4..f369889e521d7e7f1e237236651532e6a862e0f5 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -435,6 +435,41 @@ static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
 	return 0;
 }
 
+static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+{
+	struct bpf_insn *insn;
+
+	insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
+	if (!insn)
+		return -ENOMEM;
+
+	/* Due to func address being non-const, we need to
+	 * assemble this here.
+	 */
+	insn[0] = BPF_MOV64_REG(R6, R1);
+	insn[1] = BPF_LD_ABS(BPF_B, 0);
+	insn[2] = BPF_LD_ABS(BPF_H, 0);
+	insn[3] = BPF_LD_ABS(BPF_W, 0);
+	insn[4] = BPF_MOV64_REG(R7, R6);
+	insn[5] = BPF_MOV64_IMM(R6, 0);
+	insn[6] = BPF_MOV64_REG(R1, R7);
+	insn[7] = BPF_MOV64_IMM(R2, 1);
+	insn[8] = BPF_MOV64_IMM(R3, 2);
+	insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			       bpf_skb_vlan_push_proto.func - __bpf_call_base);
+	insn[10] = BPF_MOV64_REG(R6, R7);
+	insn[11] = BPF_LD_ABS(BPF_B, 0);
+	insn[12] = BPF_LD_ABS(BPF_H, 0);
+	insn[13] = BPF_LD_ABS(BPF_W, 0);
+	insn[14] = BPF_MOV64_IMM(R0, 42);
+	insn[15] = BPF_EXIT_INSN();
+
+	self->u.ptr.insns = insn;
+	self->u.ptr.len = 16;
+
+	return 0;
+}
+
 static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
 		{},
 		{ {0x1, 0x42 } },
 	},
+	{
+		"LD_ABS with helper changing skb data",
+		{ },
+		INTERNAL,
+		{ 0x34 },
+		{ { ETH_HLEN, 42 } },
+		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
+	},
 };
 
 static struct net_device dev;
@@ -6207,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
 				return NULL;
 			}
 		}
-		/* We don't expect to fail. */
 		if (*err) {
-			pr_cont("FAIL to attach err=%d len=%d\n",
+			pr_cont("FAIL to prog_create err=%d len=%d\n",
 				*err, fprog.len);
 			return NULL;
 		}
@@ -6233,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		 * checks.
 		 */
 		fp = bpf_prog_select_runtime(fp, err);
+		if (*err) {
+			pr_cont("FAIL to select_runtime err=%d\n", *err);
+			return NULL;
+		}
 		break;
 	}
 
@@ -6418,8 +6464,8 @@ static __init int test_bpf(void)
 				pass_cnt++;
 				continue;
 			}
-
-			return err;
+			err_cnt++;
+			continue;
 		}
 
 		pr_cont("jited:%u ", fp->jited);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 4a720ed4fdafd575df46159a3d51131902d638e4..0d54bcbc8170c7754aef0487eb10b8f7b5773103 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -33,8 +33,9 @@
  * @head: head of timerqueue
  * @node: timer node to be added
  *
- * Adds the timer node to the timerqueue, sorted by the
- * node's expires value.
+ * Adds the timer node to the timerqueue, sorted by the node's expires
+ * value. Returns true if the newly added timer is the first expiring timer in
+ * the queue.
  */
 bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
@@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
  * @head: head of timerqueue
  * @node: timer node to be removed
  *
- * Removes the timer node from the timerqueue.
+ * Removes the timer node from the timerqueue. Returns true if the queue is
+ * not empty after the remove.
  */
 bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140e922e2ed0d7c4d545b4d4ddf496d..b5f940ce0143ba061a183db0df3ef0dc17f57c72 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 
-	if (bdi_debug_register(bdi, dev_name(dev))) {
-		device_destroy(bdi_class, dev->devt);
-		return -ENOMEM;
-	}
 	cgwb_bdi_register(bdi);
 	bdi->dev = dev;
 
+	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(WB_registered, &bdi->wb.state);
 
 	spin_lock_bh(&bdi_lock);
diff --git a/mm/debug.c b/mm/debug.c
index d947f3e03b0dff6ab0ee8ac4d714ae361421fd86..56e2d9125ea55a57632feb22c64f2c1a76f5ec6e 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason)
 	 */
 	int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
 
-	pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
+	pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
 		  page, page_ref_count(page), mapcount,
 		  page->mapping, page_to_pgoff(page));
 	if (PageCompound(page))
@@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason)
 
 #ifdef CONFIG_MEMCG
 	if (page->mem_cgroup)
-		pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
+		pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
 #endif
 }
 
@@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page);
 
 void dump_vma(const struct vm_area_struct *vma)
 {
-	pr_emerg("vma %p start %p end %p\n"
-		"next %p prev %p mm %p\n"
-		"prot %lx anon_vma %p vm_ops %p\n"
-		"pgoff %lx file %p private_data %p\n"
+	pr_emerg("vma %px start %px end %px\n"
+		"next %px prev %px mm %px\n"
+		"prot %lx anon_vma %px vm_ops %px\n"
+		"pgoff %lx file %px private_data %px\n"
 		"flags: %#lx(%pGv)\n",
 		vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
 		vma->vm_prev, vma->vm_mm,
@@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma);
 
 void dump_mm(const struct mm_struct *mm)
 {
-	pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
+	pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
 #ifdef CONFIG_MMU
-		"get_unmapped_area %p\n"
+		"get_unmapped_area %px\n"
 #endif
 		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
-		"pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
+		"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
 		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
 		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
 		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
 		"start_brk %lx brk %lx start_stack %lx\n"
 		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
-		"binfmt %p flags %lx core_state %p\n"
+		"binfmt %px flags %lx core_state %px\n"
 #ifdef CONFIG_AIO
-		"ioctx_table %p\n"
+		"ioctx_table %px\n"
 #endif
 #ifdef CONFIG_MEMCG
-		"owner %p "
+		"owner %px "
 #endif
-		"exe_file %p\n"
+		"exe_file %px\n"
 #ifdef CONFIG_MMU_NOTIFIER
-		"mmu_notifier_mm %p\n"
+		"mmu_notifier_mm %px\n"
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index d04ac1ec05598d64c2acfd7a9b95b4b4a7a81c36..1826f191e72c836c59970006528e39bd791209bc 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -111,7 +111,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
 	enum fixed_addresses idx;
 	int i, slot;
 
-	WARN_ON(system_state != SYSTEM_BOOTING);
+	WARN_ON(system_state >= SYSTEM_RUNNING);
 
 	slot = -1;
 	for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 297c7238f7d4094a6ac4ab0dc72e04abb870972f..c64dca6e27c28c915ad4ce662de764d6da5ef6f1 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -62,8 +62,10 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 	 * get_user_pages_longterm() and disallow it for filesystem-dax
 	 * mappings.
 	 */
-	if (vma_is_fsdax(vma))
-		return -EOPNOTSUPP;
+	if (vma_is_fsdax(vma)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
 		vec->got_ref = true;
diff --git a/mm/gup.c b/mm/gup.c
index d3fb60e5bfacd4c733957dc526c28c41bd2321d1..e0d82b6706d72d82637bca5eaef1e35e15a1abdf 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
  */
 static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
 {
-	return pte_access_permitted(pte, WRITE) ||
+	return pte_write(pte) ||
 		((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
 }
 
diff --git a/mm/hmm.c b/mm/hmm.c
index 3a5c172af56039bb26007ea4cc5ec4ca0a9bf659..ea19742a5d60b1a6270629a024d88a13b9c5f3c1 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -391,11 +391,11 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 		if (pmd_protnone(pmd))
 			return hmm_vma_walk_clear(start, end, walk);
 
-		if (!pmd_access_permitted(pmd, write_fault))
+		if (write_fault && !pmd_write(pmd))
 			return hmm_vma_walk_clear(start, end, walk);
 
 		pfn = pmd_pfn(pmd) + pte_index(addr);
-		flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
+		flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
 		for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
 			pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
 		return 0;
@@ -456,11 +456,11 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 			continue;
 		}
 
-		if (!pte_access_permitted(pte, write_fault))
+		if (write_fault && !pte_write(pte))
 			goto fault;
 
 		pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
-		pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
+		pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
 		continue;
 
 fault:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2f2f5e77490278f58c6e9a923899255efff77551..0e7ded98d114d184877d2fc9bd0f02c3187f2ed5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 	 */
 	WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
 
-	if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
+	if (flags & FOLL_WRITE && !pmd_write(*pmd))
 		return NULL;
 
 	if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 
 	assert_spin_locked(pud_lockptr(mm, pud));
 
-	if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
+	if (flags & FOLL_WRITE && !pud_write(*pud))
 		return NULL;
 
 	if (pud_present(*pud) && pud_devmap(*pud))
@@ -1386,7 +1386,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
  */
 static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
 {
-	return pmd_access_permitted(pmd, WRITE) ||
+	return pmd_write(pmd) ||
 	       ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
 }
 
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
deleted file mode 100644
index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000
--- a/mm/kmemcheck.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 3d4781756d50fef924f52c0d9cb6cb0cbddd479f..f656ca27f6c20596322ce2aea248b4b04085bb29 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -127,7 +127,7 @@
 /* GFP bitmask for kmemleak internal allocations */
 #define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
 				 __GFP_NORETRY | __GFP_NOMEMALLOC | \
-				 __GFP_NOWARN)
+				 __GFP_NOWARN | __GFP_NOFAIL)
 
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
@@ -1523,7 +1523,7 @@ static void kmemleak_scan(void)
 			if (page_count(page) == 0)
 				continue;
 			scan_block(page, page + 1, NULL);
-			if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page))))
+			if (!(pfn & 63))
 				cond_resched();
 		}
 	}
diff --git a/mm/memory.c b/mm/memory.c
index 5eb3d2524bdc28239b33a0ac6e385fa5a5b9aaf9..ca5674cbaff2b65c4e51086e5922fbbd274f2cfa 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3831,7 +3831,8 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
 	return VM_FAULT_FALLBACK;
 }
 
-static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
+/* `inline' is required to avoid gcc 4.1.2 build error */
+static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3948,7 +3949,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
 	if (unlikely(!pte_same(*vmf->pte, entry)))
 		goto unlock;
 	if (vmf->flags & FAULT_FLAG_WRITE) {
-		if (!pte_access_permitted(entry, WRITE))
+		if (!pte_write(entry))
 			return do_wp_page(vmf);
 		entry = pte_mkdirty(entry);
 	}
@@ -4013,7 +4014,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 
 			/* NUMA case for anonymous PUDs would go here */
 
-			if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
+			if (dirty && !pud_write(orig_pud)) {
 				ret = wp_huge_pud(&vmf, orig_pud);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
@@ -4046,7 +4047,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 			if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
 				return do_huge_pmd_numa_page(&vmf, orig_pmd);
 
-			if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
+			if (dirty && !pmd_write(orig_pmd)) {
 				ret = wp_huge_pmd(&vmf, orig_pmd);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
@@ -4336,7 +4337,7 @@ int follow_phys(struct vm_area_struct *vma,
 		goto out;
 	pte = *ptep;
 
-	if (!pte_access_permitted(pte, flags & FOLL_WRITE))
+	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
 
 	*prot = pgprot_val(pte_pgprot(pte));
diff --git a/mm/mmap.c b/mm/mmap.c
index a4d5468212149db8a4cf20f9917c7bf48231a9ce..9efdc021ad2202fc9ebd7e55fe572813136d2f2c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3019,20 +3019,20 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);
 
-	set_bit(MMF_OOM_SKIP, &mm->flags);
-	if (unlikely(tsk_is_oom_victim(current))) {
+	if (unlikely(mm_is_oom_victim(mm))) {
 		/*
 		 * Wait for oom_reap_task() to stop working on this
 		 * mm. Because MMF_OOM_SKIP is already set before
 		 * calling down_read(), oom_reap_task() will not run
 		 * on this "mm" post up_write().
 		 *
-		 * tsk_is_oom_victim() cannot be set from under us
-		 * either because current->mm is already set to NULL
+		 * mm_is_oom_victim() cannot be set from under us
+		 * either because victim->mm is already set to NULL
 		 * under task_lock before calling mmput and oom_mm is
-		 * set not NULL by the OOM killer only if current->mm
+		 * set not NULL by the OOM killer only if victim->mm
 		 * is found not NULL while holding the task_lock.
 		 */
+		set_bit(MMF_OOM_SKIP, &mm->flags);
 		down_write(&mm->mmap_sem);
 		up_write(&mm->mmap_sem);
 	}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ec39f730a0bfeebcd1d04ec34c5955f48a6f5259..58b629bb70de3024aba118000f83f52dd92e6d95 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		next = pmd_addr_end(addr, end);
 		if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
 				&& pmd_none_or_clear_bad(pmd))
-			continue;
+			goto next;
 
 		/* invoke the mmu notifier if the pmd is populated */
 		if (!mni_start) {
@@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 					}
 
 					/* huge pmd was handled */
-					continue;
+					goto next;
 				}
 			}
 			/* fall through, the trans huge pmd just split */
@@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		this_pages = change_pte_range(vma, pmd, addr, next, newprot,
 				 dirty_accountable, prot_numa);
 		pages += this_pages;
+next:
+		cond_resched();
 	} while (pmd++, addr = next, addr != end);
 
 	if (mni_start)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c957be32b27a9e7a17a6e33e69a31b1b6fa8e820..29f855551efef89d6c251075828bc0cd79da1842 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -683,8 +683,10 @@ static void mark_oom_victim(struct task_struct *tsk)
 		return;
 
 	/* oom_mm is bound to the signal struct life time. */
-	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
+	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
 		mmgrab(tsk->signal->oom_mm);
+		set_bit(MMF_OOM_VICTIM, &mm->flags);
+	}
 
 	/*
 	 * Make sure that the task is woken up from uninterruptible sleep
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 73f5d4556b3d0b7218bea0cb9bb0fdd1f1cb3cdd..76c9688b6a0a75fc1c28e90920a8c9498c5e6d06 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2684,6 +2684,7 @@ void free_unref_page_list(struct list_head *list)
 {
 	struct page *page, *next;
 	unsigned long flags, pfn;
+	int batch_count = 0;
 
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
@@ -2700,6 +2701,16 @@ void free_unref_page_list(struct list_head *list)
 		set_page_private(page, 0);
 		trace_mm_page_free_batched(page);
 		free_unref_page_commit(page, pfn);
+
+		/*
+		 * Guard against excessive IRQ disabled times when we get
+		 * a large list of pages to free.
+		 */
+		if (++batch_count == SWAP_CLUSTER_MAX) {
+			local_irq_restore(flags);
+			batch_count = 0;
+			local_irq_save(flags);
+		}
 	}
 	local_irq_restore(flags);
 }
@@ -6249,6 +6260,8 @@ void __paginginit zero_resv_unavail(void)
 	pgcnt = 0;
 	for_each_resv_unavail_range(i, &start, &end) {
 		for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
+			if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
+				continue;
 			mm_zero_struct_page(pfn_to_page(pfn));
 			pgcnt++;
 		}
diff --git a/mm/percpu.c b/mm/percpu.c
index 79e3549cab0f40f30916499f2acca7622379fdc5..50e7fdf84055151d8c7e8bb220f7a73e96b7f3e4 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2719,7 +2719,11 @@ void __init setup_per_cpu_areas(void)
 
 	if (pcpu_setup_first_chunk(ai, fc) < 0)
 		panic("Failed to initialize percpu areas.");
+#ifdef CONFIG_CRIS
+#warning "the CRIS architecture has physical and virtual addresses confused"
+#else
 	pcpu_free_alloc_info(ai);
+#endif
 }
 
 #endif	/* CONFIG_SMP */
diff --git a/mm/slab.c b/mm/slab.c
index 183e996dde5ff37a8881e9c223a348de947bf890..4e51ef954026bd15e5bef41167459970976faab0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1584,11 +1584,8 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
 		       *dbg_redzone2(cachep, objp));
 	}
 
-	if (cachep->flags & SLAB_STORE_USER) {
-		pr_err("Last user: [<%p>](%pSR)\n",
-		       *dbg_userword(cachep, objp),
-		       *dbg_userword(cachep, objp));
-	}
+	if (cachep->flags & SLAB_STORE_USER)
+		pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));
 	realobj = (char *)objp + obj_offset(cachep);
 	size = cachep->object_size;
 	for (i = 0; i < size && lines; i += 16, lines--) {
@@ -1621,7 +1618,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 			/* Mismatch ! */
 			/* Print header */
 			if (lines == 0) {
-				pr_err("Slab corruption (%s): %s start=%p, len=%d\n",
+				pr_err("Slab corruption (%s): %s start=%px, len=%d\n",
 				       print_tainted(), cachep->name,
 				       realobj, size);
 				print_objinfo(cachep, objp, 0);
@@ -1650,13 +1647,13 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 		if (objnr) {
 			objp = index_to_obj(cachep, page, objnr - 1);
 			realobj = (char *)objp + obj_offset(cachep);
-			pr_err("Prev obj: start=%p, len=%d\n", realobj, size);
+			pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
 			print_objinfo(cachep, objp, 2);
 		}
 		if (objnr + 1 < cachep->num) {
 			objp = index_to_obj(cachep, page, objnr + 1);
 			realobj = (char *)objp + obj_offset(cachep);
-			pr_err("Next obj: start=%p, len=%d\n", realobj, size);
+			pr_err("Next obj: start=%px, len=%d\n", realobj, size);
 			print_objinfo(cachep, objp, 2);
 		}
 	}
@@ -2608,7 +2605,7 @@ static void slab_put_obj(struct kmem_cache *cachep,
 	/* Verify double free bug */
 	for (i = page->active; i < cachep->num; i++) {
 		if (get_free_obj(page, i) == objnr) {
-			pr_err("slab: double free detected in cache '%s', objp %p\n",
+			pr_err("slab: double free detected in cache '%s', objp %px\n",
 			       cachep->name, objp);
 			BUG();
 		}
@@ -2772,7 +2769,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
 	else
 		slab_error(cache, "memory outside object was overwritten");
 
-	pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
+	pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
 	       obj, redzone1, redzone2);
 }
 
@@ -3078,7 +3075,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 		if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
 				*dbg_redzone2(cachep, objp) != RED_INACTIVE) {
 			slab_error(cachep, "double free, or memory outside object was overwritten");
-			pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
+			pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
 			       objp, *dbg_redzone1(cachep, objp),
 			       *dbg_redzone2(cachep, objp));
 		}
@@ -3091,7 +3088,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 		cachep->ctor(objp);
 	if (ARCH_SLAB_MINALIGN &&
 	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
-		pr_err("0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
+		pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
 		       objp, (int)ARCH_SLAB_MINALIGN);
 	}
 	return objp;
@@ -4283,7 +4280,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
 		return;
 	}
 #endif
-	seq_printf(m, "%p", (void *)address);
+	seq_printf(m, "%px", (void *)address);
 }
 
 static int leaks_show(struct seq_file *m, void *p)
diff --git a/mm/sparse.c b/mm/sparse.c
index 7a5dacaa06e3f277c8543c502dba0e3c6a6a1a13..2609aba121e89cc5c8b656f4ef3539545c434091 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 	if (unlikely(!mem_section)) {
 		unsigned long size, align;
 
-		size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
 		align = 1 << (INTERNODE_CACHE_SHIFT);
 		mem_section = memblock_virt_alloc(size, align);
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c02c850ea3490af95fde44f94bd199fbdc500684..47d5ced51f2d44cddc559814b42caa0ea9bf5863 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker);
  */
 void unregister_shrinker(struct shrinker *shrinker)
 {
+	if (!shrinker->nr_deferred)
+		return;
 	down_write(&shrinker_rwsem);
 	list_del(&shrinker->list);
 	up_write(&shrinker_rwsem);
 	kfree(shrinker->nr_deferred);
+	shrinker->nr_deferred = NULL;
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 685049a9048d8e9f8b53041cba43b96317b112fa..683c0651098c719ab25c5b48789262e541a13362 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -53,6 +53,7 @@
 #include <linux/mount.h>
 #include <linux/migrate.h>
 #include <linux/pagemap.h>
+#include <linux/fs.h>
 
 #define ZSPAGE_MAGIC	0x58
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8dfdd94e430fd57f7bf6d1da86f18658a057e05d..bad01b14a4ad6b5d4e61ac5e0ed93022755223ab 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 		vlan_gvrp_uninit_applicant(real_dev);
 	}
 
-	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing if
-	 * VLAN is not 0 (leave it there for 802.1p).
-	 */
-	if (vlan_id)
-		vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+	vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
 
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 985046ae42312e86505d6fded2fb56501c38536b..80f5c79053a4d2eac267f555fd732b7c1687ba36 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 	if (IS_ERR(file)) {
 		pr_err("%s (%d): failed to map fd\n",
 		       __func__, task_pid_nr(current));
-		sock_release(csocket);
 		kfree(p);
 		return PTR_ERR(file);
 	}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb0c70f964d0bb292e99527a4eda8f..bbe8414b6ee7d21f86e5ab9302ebfc875dbe33d3 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	orig_node->last_seen = jiffies;
 
 	/* find packet count of corresponding one hop neighbor */
-	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
 	if_num = if_incoming->if_num;
 	orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
 	neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	} else {
 		neigh_rq_count = 0;
 	}
-	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+	spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
 
 	/* pay attention to not get a value bigger than 100 % */
 	if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d829d14a48831bef805bb04af0a8f..e0e2bfcd6b3efd73f2567d23ce304951c9e2c3f0 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
 	}
 
 	orig_gw = batadv_gw_node_get(bat_priv, orig_node);
-	if (!orig_node)
+	if (!orig_gw)
 		goto out;
 
 	if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30a30e66fb6018bef59dea83dc4b7a..ebe6e38934e46ed5de4d30204e791dbe40285fcc 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 	 */
 	if (skb->priority >= 256 && skb->priority <= 263)
 		frag_header.priority = skb->priority - 256;
+	else
+		frag_header.priority = 0;
 
 	ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
 	ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e17f0b501dd5294631a1a3315c587..ebc4e2241c770d826fa8e731ba46ec043fe1992d 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
 
 /**
  * batadv_tp_sender_timeout - timer that fires in case of packet loss
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
  *
  * If fired it means that there was packet loss.
  * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
 /**
  * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
  *  reached without received ack
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
  */
 static void batadv_tp_receiver_shutdown(struct timer_list *t)
 {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 43ba91c440bcd65bd9f24258a28d01492cf6ac13..fc6615d5916524c446a9f4f952f2f8b7b5b67e16 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
 			break;
 
 		case L2CAP_CONF_EFS:
-			remote_efs = 1;
-			if (olen == sizeof(efs))
+			if (olen == sizeof(efs)) {
+				remote_efs = 1;
 				memcpy(&efs, (void *) val, olen);
+			}
 			break;
 
 		case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
 			break;
 
 		case L2CAP_CONF_EFS:
-			if (olen == sizeof(efs))
+			if (olen == sizeof(efs)) {
 				memcpy(&efs, (void *)val, olen);
 
-			if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
-			    efs.stype != L2CAP_SERV_NOTRAFIC &&
-			    efs.stype != chan->local_stype)
-				return -ECONNREFUSED;
+				if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+				    efs.stype != L2CAP_SERV_NOTRAFIC &&
+				    efs.stype != chan->local_stype)
+					return -ECONNREFUSED;
 
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
-					   (unsigned long) &efs, endptr - ptr);
+				l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+						   (unsigned long) &efs, endptr - ptr);
+			}
 			break;
 
 		case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831920cb86fc767eb0d756bf89feb46..015f465c514b28564c9e91eec40dc041b765fe25 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
 	struct net_bridge *br = netdev_priv(dev);
 	int err;
 
+	err = register_netdevice(dev);
+	if (err)
+		return err;
+
 	if (tb[IFLA_ADDRESS]) {
 		spin_lock_bh(&br->lock);
 		br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
 		spin_unlock_bh(&br->lock);
 	}
 
-	err = register_netdevice(dev);
-	if (err)
-		return err;
-
 	err = br_changelink(dev, tb, data, extack);
 	if (err)
-		unregister_netdevice(dev);
+		br_dev_delete(dev, NULL);
+
 	return err;
 }
 
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 2d38b6e34203b7df5638c340126c2f5169fb3791..e0adcd123f48a1a4f66028bbf731132ed8e7ff17 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 	mutex_lock(&caifdevs->lock);
 	list_add_rcu(&caifd->list, &caifdevs->list);
 
-	strncpy(caifd->layer.name, dev->name,
-		sizeof(caifd->layer.name) - 1);
-	caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+	strlcpy(caifd->layer.name, dev->name,
+		sizeof(caifd->layer.name));
 	caifd->layer.transmit = transmit;
 	cfcnfg_add_phy_layer(cfg,
 				dev,
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f6479a5e5ec9d02cfc417b899b05e9d..1a082a946045e53f1617cc9a814bae5dcb9784e7 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
 		dev_add_pack(&caif_usb_type);
 	pack_added = true;
 
-	strncpy(layer->name, dev->name,
-			sizeof(layer->name) - 1);
-	layer->name[sizeof(layer->name) - 1] = 0;
+	strlcpy(layer->name, dev->name, sizeof(layer->name));
 
 	return 0;
 }
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 273cb07f57d87186224fb50943af949b99b3cde2..8f00bea093b942f8b50193def6d311d09e75b8eb 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
 	case CAIFPROTO_RFM:
 		l->linktype = CFCTRL_SRV_RFM;
 		l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
-		strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
-			sizeof(l->u.rfm.volume)-1);
-		l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+		strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+			sizeof(l->u.rfm.volume));
 		break;
 	case CAIFPROTO_UTIL:
 		l->linktype = CFCTRL_SRV_UTIL;
 		l->endpoint = 0x00;
 		l->chtype = 0x00;
-		strncpy(l->u.utility.name, s->sockaddr.u.util.service,
-			sizeof(l->u.utility.name)-1);
-		l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+		strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+			sizeof(l->u.utility.name));
 		caif_assert(sizeof(l->u.utility.name) > 10);
 		l->u.utility.paramlen = s->param.size;
 		if (l->u.utility.paramlen > sizeof(l->u.utility.params))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index f5afda1abc76fe908e8cce160ad032870972424c..655ed7032150309062d0c8c85cda571d34030423 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
 		tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
 		cfpkt_add_body(pkt, &tmp16, 2);
 		memset(utility_name, 0, sizeof(utility_name));
-		strncpy(utility_name, param->u.utility.name,
-			UTILITY_NAME_LENGTH - 1);
+		strlcpy(utility_name, param->u.utility.name,
+			UTILITY_NAME_LENGTH);
 		cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
 		tmp8 = param->u.utility.paramlen;
 		cfpkt_add_body(pkt, &tmp8, 1);
diff --git a/net/core/dev.c b/net/core/dev.c
index 07ed21d64f92b39da9b683aa432efde6a14afdf0..0e0ba36eeac9852b8df5ddd398dbc66ad6c83760 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 	 * when the name is long and there isn't enough space left
 	 * for the digits, or if all bits are used.
 	 */
-	return p ? -ENFILE : -EEXIST;
+	return -ENFILE;
 }
 
 static int dev_alloc_name_ns(struct net *net,
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
 int dev_get_valid_name(struct net *net, struct net_device *dev,
 		       const char *name)
 {
-	return dev_alloc_name_ns(net, dev, name);
+	BUG_ON(!net);
+
+	if (!dev_valid_name(name))
+		return -EINVAL;
+
+	if (strchr(name, '%'))
+		return dev_alloc_name_ns(net, dev, name);
+	else if (__dev_get_by_name(net, name))
+		return -EEXIST;
+	else if (dev->name != name)
+		strlcpy(dev->name, name, IFNAMSIZ);
+
+	return 0;
 }
 EXPORT_SYMBOL(dev_get_valid_name);
 
@@ -3904,7 +3916,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 				     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
 				     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
 			goto do_drop;
-		if (troom > 0 && __skb_linearize(skb))
+		if (skb_linearize(skb))
 			goto do_drop;
 	}
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf450a36e604fe4c91dcccb4fd2a4a35ef1df..8225416911aed6d95f91ed5cb98ef1fb843101bb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
 	return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
 }
 
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
-	char name[sizeof(current->comm)];
-
-	pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
-		     get_task_comm(name, current), details);
-}
-
 /* Query device for its ethtool_cmd settings.
  *
  * Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 							   &link_ksettings);
 		if (err < 0)
 			return err;
-		if (!convert_link_ksettings_to_legacy_settings(&cmd,
-							       &link_ksettings))
-			warn_incomplete_ethtool_legacy_settings_conversion(
-				"link modes are only partially reported");
+		convert_link_ksettings_to_legacy_settings(&cmd,
+							  &link_ksettings);
 
 		/* send a sensible cmd tag back to user */
 		cmd.cmd = ETHTOOL_GSET;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce224b534dd87bb9407ae115f8ba7a..d339ef170df60282c8812b63148c479820b3008f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 		 */
 		goto out_err_free;
 
-	/* We are guaranteed to never error here with cBPF to eBPF
-	 * transitions, since there's no issue with type compatibility
-	 * checks on program arrays.
-	 */
 	fp = bpf_prog_select_runtime(fp, &err);
+	if (err)
+		goto out_err_free;
 
 	kfree(old_prog);
 	return fp;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d34ccefb374ee87f9cbc460779a484..60a71be75aea063b418a48ade2a1e1c7804ab35c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
 	spin_lock_bh(&net->nsid_lock);
 	peer = idr_find(&net->netns_ids, id);
 	if (peer)
-		get_net(peer);
+		peer = maybe_get_net(peer);
 	spin_unlock_bh(&net->nsid_lock);
 	rcu_read_unlock();
 
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a35900d45a659de0cd780b7e500d3..b9057478d69c8ad02ea7b4ba8d6f612e7792a738 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/module.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8ff5852681dcee53149d99798ed79..778d7f03404a6631607f29c3267a0e39a40ac57c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev,
 	return false;
 }
 
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
 {
 	struct net *net;
 
-	net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+	net = get_net_ns_by_id(sock_net(sk), netnsid);
 	if (!net)
 		return ERR_PTR(-EINVAL);
 
 	/* For now, the caller is required to have CAP_NET_ADMIN in
 	 * the user namespace owning the target net ns.
 	 */
-	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+	if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
 		put_net(net);
 		return ERR_PTR(-EACCES);
 	}
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			ifla_policy, NULL) >= 0) {
 		if (tb[IFLA_IF_NETNSID]) {
 			netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-			tgt_net = get_target_net(skb, netnsid);
+			tgt_net = get_target_net(skb->sk, netnsid);
 			if (IS_ERR(tgt_net)) {
 				tgt_net = net;
 				netnsid = -1;
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (tb[IFLA_IF_NETNSID]) {
 		netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-		tgt_net = get_target_net(skb, netnsid);
+		tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
 		if (IS_ERR(tgt_net))
 			return PTR_ERR(tgt_net);
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9dc58fed483597d459c66243be4cd2..08f57408131523adec3cc36044952e14aa5a6b9a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 	int i, new_frags;
 	u32 d_off;
 
-	if (!num_frags)
-		return 0;
-
 	if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
 		return -EINVAL;
 
+	if (!num_frags)
+		goto release;
+
 	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < new_frags; i++) {
 		page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
 	skb_shinfo(skb)->nr_frags = new_frags;
 
+release:
 	skb_zcopy_clear(skb, false);
 	return 0;
 }
@@ -3654,8 +3655,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 		skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
 					      SKBTX_SHARED_FRAG;
-		if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
-			goto err;
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 			if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
 				goto err;
+			if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+				goto err;
 
 			*nskb_frag = *frag;
 			__skb_frag_ref(nskb_frag);
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	struct sock *sk = skb->sk;
 
 	if (!skb_may_tx_timestamp(sk, false))
-		return;
+		goto err;
 
 	/* Take a reference to prevent skb_orphan() from freeing the socket,
 	 * but only if the socket refcount is not zero.
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 		*skb_hwtstamps(skb) = *hwtstamps;
 		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
 		sock_put(sk);
+		return;
 	}
+
+err:
+	kfree_skb(skb);
 }
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 217f4e3b82f6edca841b66089ab0772bd8d391e7..146b50e30659daca3bdc4413d800890ef482f521 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
 	case SKNLGRP_INET6_UDP_DESTROY:
 		if (!sock_diag_handlers[AF_INET6])
 			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				       NETLINK_SOCK_DIAG, AF_INET);
+				       NETLINK_SOCK_DIAG, AF_INET6);
 		break;
 	}
 	return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcccae89ba262bb032edbf13639321f..a47ad6cd41c0396558ee6666ae45e78f36d64bd5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
 		.data		= &bpf_jit_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 		.proc_handler	= proc_dointvec
+#else
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+#endif
 	},
 # ifdef CONFIG_HAVE_EBPF_JIT
 	{
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index abd07a443219853b022bef41cb072e90ff8f07f0..178bb9833311f83205317b07fe64cb2e45a9f734 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 		if (state == DCCP_TIME_WAIT)
 			timeo = DCCP_TIMEWAIT_LEN;
 
+		/* tw_timer is pinned, so we need to make sure BH are disabled
+		 * in following section, otherwise timer handler could run before
+		 * we complete the initialization.
+		 */
+		local_bh_disable();
 		inet_twsk_schedule(tw, timeo);
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
 		inet_twsk_put(tw);
+		local_bh_enable();
 	} else {
 		/* Sorry, if we're out of memory, just CLOSE this
 		 * socket up.  We've got bigger problems than
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168fcc06aa1981258eca4857511329af62f9a..9d43c1f4027408f3a2176767da0dd425938ba652 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct dccp_sock *dp = dccp_sk(sk);
 	int err = 0;
 	const int old_state = sk->sk_state;
 
@@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags)
 		sk->sk_err = ECONNRESET;
 
 	dccp_clear_xmit_timers(sk);
+	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+	dp->dccps_hc_rx_ccid = NULL;
+	dp->dccps_hc_tx_ccid = NULL;
 
 	__skb_queue_purge(&sk->sk_receive_queue);
 	__skb_queue_purge(&sk->sk_write_queue);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b03223ab9890247983d9d1499cea0..a95a55f7913746bab3aa7a993265885ece25f35a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/mdio.h>
-#include <linux/list.h>
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da7b6763016d4f07d3032d44483d99..7a93359fbc7229389fc7bec67889ca1115f47a69 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
 
 static bool inetdev_valid_mtu(unsigned int mtu)
 {
-	return mtu >= 68;
+	return mtu >= IPV4_MIN_MTU;
 }
 
 static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c37298b2ad0c1dbba0e5307f1a46b6..08259d078b1ca821c581aeb34251c79a9aba8c8d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ static int __net_init ip_fib_net_init(struct net *net)
 
 static void ip_fib_net_exit(struct net *net)
 {
-	unsigned int i;
+	int i;
 
 	rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
-	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+	/* Destroy the tables in reverse order to guarantee that the
+	 * local table, ID 255, is destroyed before the main table, ID
+	 * 254. This is necessary as the local table may contain
+	 * references to data contained in the main table.
+	 */
+	for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[i];
 		struct hlist_node *tmp;
 		struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe0bfbb840837bb35d28fe6d8d25d0..c586597da20dbb0e46eb0f693fd65bccfc8f3633 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
 
 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
 		int type = nla_type(nla);
-		u32 val;
+		u32 fi_val, val;
 
 		if (!type)
 			continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
 			val = nla_get_u32(nla);
 		}
 
-		if (fi->fib_metrics->metrics[type - 1] != val)
+		fi_val = fi->fib_metrics->metrics[type - 1];
+		if (type == RTAX_FEATURES)
+			fi_val &= ~DST_FEATURE_ECN_CA;
+
+		if (fi_val != val)
 			return false;
 	}
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3ed5a079f27efa6eeaf802de40243..726f6b6082748896686ae603546fa189348f9142 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/times.h>
 #include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
 
 #include <net/net_namespace.h>
 #include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
 	return scount;
 }
 
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+				 const struct flowi4 *fl4)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+	if (!in_dev)
+		return htonl(INADDR_ANY);
+
+	for_ifa(in_dev) {
+		if (inet_ifa_match(fl4->saddr, ifa))
+			return fl4->saddr;
+	} endfor_ifa(in_dev);
+
+	return htonl(INADDR_ANY);
+}
+
 static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
 {
 	struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
 	pip->frag_off = htons(IP_DF);
 	pip->ttl      = 1;
 	pip->daddr    = fl4.daddr;
-	pip->saddr    = fl4.saddr;
+	pip->saddr    = igmpv3_get_srcaddr(dev, &fl4);
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
 	ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
 }
 
 static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
-	int type, struct igmpv3_grec **ppgr)
+	int type, struct igmpv3_grec **ppgr, unsigned int mtu)
 {
 	struct net_device *dev = pmc->interface->dev;
 	struct igmpv3_report *pih;
 	struct igmpv3_grec *pgr;
 
-	if (!skb)
-		skb = igmpv3_newpack(dev, dev->mtu);
-	if (!skb)
-		return NULL;
+	if (!skb) {
+		skb = igmpv3_newpack(dev, mtu);
+		if (!skb)
+			return NULL;
+	}
 	pgr = skb_put(skb, sizeof(struct igmpv3_grec));
 	pgr->grec_type = type;
 	pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	struct igmpv3_grec *pgr = NULL;
 	struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
 	int scount, stotal, first, isquery, truncate;
+	unsigned int mtu;
 
 	if (pmc->multiaddr == IGMP_ALL_HOSTS)
 		return skb;
 	if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
 		return skb;
 
+	mtu = READ_ONCE(dev->mtu);
+	if (mtu < IPV4_MIN_MTU)
+		return skb;
+
 	isquery = type == IGMPV3_MODE_IS_INCLUDE ||
 		  type == IGMPV3_MODE_IS_EXCLUDE;
 	truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
 			if (skb)
 				igmpv3_sendpack(skb);
-			skb = igmpv3_newpack(dev, dev->mtu);
+			skb = igmpv3_newpack(dev, mtu);
 		}
 	}
 	first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 				pgr->grec_nsrcs = htons(scount);
 			if (skb)
 				igmpv3_sendpack(skb);
-			skb = igmpv3_newpack(dev, dev->mtu);
+			skb = igmpv3_newpack(dev, mtu);
 			first = 1;
 			scount = 0;
 		}
 		if (first) {
-			skb = add_grhead(skb, pmc, type, &pgr);
+			skb = add_grhead(skb, pmc, type, &pgr, mtu);
 			first = 0;
 		}
 		if (!skb)
@@ -538,7 +562,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 				igmpv3_sendpack(skb);
 				skb = NULL; /* add_grhead will get a new one */
 			}
-			skb = add_grhead(skb, pmc, type, &pgr);
+			skb = add_grhead(skb, pmc, type, &pgr, mtu);
 		}
 	}
 	if (pgr)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c690cd0d9b3f0af53c23b9a1ecc87be4098ae059..b563e0c46bac2362acccf38495546a8b6b726384 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
 }
 
 /*
- * Enter the time wait state.
+ * Enter the time wait state. This is called with locally disabled BH.
  * Essentially we whip up a timewait bucket, copy the relevant info into it
  * from the SK, and mess with hash chains and list linkage.
  */
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	 */
 	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
 			hashinfo->bhash_size)];
-	spin_lock_bh(&bhead->lock);
+	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
 	WARN_ON(!icsk->icsk_bind_hash);
 	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	if (__sk_nulls_del_node_init_rcu(sk))
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 
-	spin_unlock_bh(lock);
+	spin_unlock(lock);
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1ab943a6418494dab5018843bcde3c..45ffd3d045d240cad8e4d0ed8dd0dd7da997bf9e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 	len = gre_hdr_len + sizeof(*ershdr);
 
 	if (unlikely(!pskb_may_pull(skb, len)))
-		return -ENOMEM;
+		return PACKET_REJECT;
 
 	iph = ip_hdr(skb);
 	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
+	dev->max_mtu = 0;
 	dev->netdev_ops	= &gre_tap_netdev_ops;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce49d01b55aa478698e1a3bcf9a3bdb..5ddb1cb52bd405ed10cce43195a25607d136efbf 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 	dev->needed_headroom = t_hlen + hlen;
 	mtu -= (dev->hard_header_len + t_hlen);
 
-	if (mtu < 68)
-		mtu = 68;
+	if (mtu < IPV4_MIN_MTU)
+		mtu = IPV4_MIN_MTU;
 
 	return mtu;
 }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d7b61cf2c09f2b3d2351c4095f64f..0c3c944a7b7201f74dace535c8a26d58b8039a1f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -373,7 +373,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
-					e = entry0 + newpos;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf079755cd08f33c24a4077c6c4a63..2e0d339028bbcb6766f92e5b87d70866a419b893 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -439,7 +439,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
-					e = entry0 + newpos;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944c35b50015bbc40aa580ca62ddfb7..69060e3abe8598b350e6bfe5815a702c4b3d2ade 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
 
 static void clusterip_net_exit(struct net *net)
 {
-#ifdef CONFIG_PROC_FS
 	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+#ifdef CONFIG_PROC_FS
 	proc_remove(cn->procdir);
 	cn->procdir = NULL;
 #endif
 	nf_unregister_net_hook(net, &cip_arp_ops);
+	WARN_ON_ONCE(!list_empty(&cn->configs));
 }
 
 static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122f08f4897ea6a68eb51e3a74bb1e5..5e570aa9e43b77f5c9ccd267319f762200f732d2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int err;
 	struct ip_options_data opt_copy;
 	struct raw_frag_vec rfv;
+	int hdrincl;
 
 	err = -EMSGSIZE;
 	if (len > 0xFFFF)
 		goto out;
 
+	/* hdrincl should be READ_ONCE(inet->hdrincl)
+	 * but READ_ONCE() doesn't work with bit fields.
+	 * Doing this indirectly yields the same result.
+	 */
+	hdrincl = inet->hdrincl;
+	hdrincl = READ_ONCE(hdrincl);
 	/*
 	 *	Check the flags.
 	 */
@@ -593,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		/* Linux does not mangle headers on raw sockets,
 		 * so that IP options + IP_HDRINCL is non-sense.
 		 */
-		if (inet->hdrincl)
+		if (hdrincl)
 			goto done;
 		if (ipc.opt->opt.srr) {
 			if (!daddr)
@@ -615,12 +622,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE,
-			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+			   hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk) |
-			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+			    (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
 			   daddr, saddr, 0, 0, sk->sk_uid);
 
-	if (!inet->hdrincl) {
+	if (!hdrincl) {
 		rfv.msg = msg;
 		rfv.hlen = 0;
 
@@ -645,7 +652,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		goto do_confirm;
 back_from_confirm:
 
-	if (inet->hdrincl)
+	if (hdrincl)
 		err = raw_send_hdrinc(sk, &fl4, msg, len,
 				      &rt, msg->msg_flags, &ipc.sockc);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf97317e6c974285a652664d02e8b2c48a8cfe96..f08eebe60446e2e99d19bd0ea51e5fafb96aca63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->snd_cwnd_cnt = 0;
 	tp->window_clamp = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
+	tp->is_sack_reneg = 0;
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
 	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 69ee877574d08b36bc990f890899037108eafe05..8322f26e770e4406fe9accb386a99659941cc874 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -110,7 +110,8 @@ struct bbr {
 	u32	lt_last_lost;	     /* LT intvl start: tp->lost */
 	u32	pacing_gain:10,	/* current gain for setting pacing rate */
 		cwnd_gain:10,	/* current gain for setting cwnd */
-		full_bw_cnt:3,	/* number of rounds without large bw gains */
+		full_bw_reached:1,   /* reached full bw in Startup? */
+		full_bw_cnt:2,	/* number of rounds without large bw gains */
 		cycle_idx:3,	/* current index in pacing_gain cycle array */
 		has_seen_rtt:1, /* have we seen an RTT sample yet? */
 		unused_b:5;
@@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk)
 {
 	const struct bbr *bbr = inet_csk_ca(sk);
 
-	return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+	return bbr->full_bw_reached;
 }
 
 /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk,
 		return;
 	}
 	++bbr->full_bw_cnt;
+	bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
 }
 
 /* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk)
 	bbr->restore_cwnd = 0;
 	bbr->round_start = 0;
 	bbr->idle_restart = 0;
+	bbr->full_bw_reached = 0;
 	bbr->full_bw = 0;
 	bbr->full_bw_cnt = 0;
 	bbr->cycle_mstamp = 0;
@@ -871,6 +874,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
  */
 static u32 bbr_undo_cwnd(struct sock *sk)
 {
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->full_bw = 0;   /* spurious slow-down; reset full pipe detection */
+	bbr->full_bw_cnt = 0;
+	bbr_reset_lt_bw_sampling(sk);
 	return tcp_sk(sk)->snd_cwnd;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 734cfc8ff76edf3453921b50620be2986bfcfdb9..45f750e85714da11f569ae0c6522f1cc56c6d2a2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 	u32 new_sample = tp->rcv_rtt_est.rtt_us;
 	long m = sample;
 
-	if (m == 0)
-		m = 1;
-
 	if (new_sample != 0) {
 		/* If we sample in larger samples in the non-timestamp
 		 * case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 		return;
 	delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
+	if (!delta_us)
+		delta_us = 1;
 	tcp_rcv_rtt_update(tp, delta_us, 1);
 
 new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 	    (TCP_SKB_CB(skb)->end_seq -
 	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+		u32 delta_us;
 
+		if (!delta)
+			delta = 1;
+		delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
 		tcp_rcv_rtt_update(tp, delta_us, 0);
 	}
 }
@@ -579,6 +581,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	int time;
 	int copied;
 
+	tcp_mstamp_refresh(tp);
 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
 		return;
@@ -1941,6 +1944,8 @@ void tcp_enter_loss(struct sock *sk)
 	if (is_reneg) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
 		tp->sacked_out = 0;
+		/* Mark SACK reneging until we recover from this loss event. */
+		tp->is_sack_reneg = 1;
 	}
 	tcp_clear_all_retrans_hints(tp);
 
@@ -2326,6 +2331,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 	}
 	tp->snd_cwnd_stamp = tcp_jiffies32;
 	tp->undo_marker = 0;
+	tp->rack.advanced = 1; /* Force RACK to re-exam losses */
 }
 
 static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2364,6 +2370,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 		return true;
 	}
 	tcp_set_ca_state(sk, TCP_CA_Open);
+	tp->is_sack_reneg = 0;
 	return false;
 }
 
@@ -2397,8 +2404,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 			NET_INC_STATS(sock_net(sk),
 					LINUX_MIB_TCPSPURIOUSRTOS);
 		inet_csk(sk)->icsk_retransmits = 0;
-		if (frto_undo || tcp_is_sack(tp))
+		if (frto_undo || tcp_is_sack(tp)) {
 			tcp_set_ca_state(sk, TCP_CA_Open);
+			tp->is_sack_reneg = 0;
+		}
 		return true;
 	}
 	return false;
@@ -3495,6 +3504,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	struct tcp_sacktag_state sack_state;
 	struct rate_sample rs = { .prior_delivered = 0 };
 	u32 prior_snd_una = tp->snd_una;
+	bool is_sack_reneg = tp->is_sack_reneg;
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	bool is_dupack = false;
@@ -3611,7 +3621,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
 	lost = tp->lost - lost;			/* freshly marked lost */
-	tcp_rate_gen(sk, delivered, lost, sack_state.rate);
+	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
 	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
 	tcp_xmit_recovery(sk, rexmit);
 	return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6bc0c4d19c624888b0d0b5a4246c7183edf63f5..94e28350f4205b1a57809d5471d7e2ade51f5196 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent,
 			0,
-			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
 					  AF_INET),
 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 			ip_hdr(skb)->tos);
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_filter);
 
+static void tcp_v4_restore_cb(struct sk_buff *skb)
+{
+	memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
+		sizeof(struct inet_skb_parm));
+}
+
+static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
+			   const struct tcphdr *th)
+{
+	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+	 * barrier() makes sure compiler wont play fool^Waliasing games.
+	 */
+	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+		sizeof(struct inet_skb_parm));
+	barrier();
+
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff * 4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
+	TCP_SKB_CB(skb)->sacked	 = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+}
+
 /*
  *	From tcp_input.c
  */
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	th = (const struct tcphdr *)skb->data;
 	iph = ip_hdr(skb);
-	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
-	 * barrier() makes sure compiler wont play fool^Waliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
-		sizeof(struct inet_skb_parm));
-	barrier();
-
-	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
-	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
-				    skb->len - th->doff * 4);
-	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
-	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
-	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
-	TCP_SKB_CB(skb)->sacked	 = 0;
-	TCP_SKB_CB(skb)->has_rxtstamp =
-			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
-
 lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
 			       th->dest, sdif, &refcounted);
@@ -1679,14 +1689,19 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
-		if (!tcp_filter(sk, skb))
+		if (!tcp_filter(sk, skb)) {
+			th = (const struct tcphdr *)skb->data;
+			iph = ip_hdr(skb);
+			tcp_v4_fill_cb(skb, iph, th);
 			nsk = tcp_check_req(sk, skb, req, false);
+		}
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
 			reqsk_put(req);
+			tcp_v4_restore_cb(skb);
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v4_send_reset(nsk, skb);
 			goto discard_and_relse;
@@ -1712,6 +1727,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		goto discard_and_relse;
 	th = (const struct tcphdr *)skb->data;
 	iph = ip_hdr(skb);
+	tcp_v4_fill_cb(skb, iph, th);
 
 	skb->dev = NULL;
 
@@ -1742,6 +1758,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	tcp_v4_fill_cb(skb, iph, th);
+
 	if (tcp_checksum_complete(skb)) {
 csum_error:
 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@@ -1768,6 +1786,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		goto discard_it;
 	}
 
+	tcp_v4_fill_cb(skb, iph, th);
+
 	if (tcp_checksum_complete(skb)) {
 		inet_twsk_put(inet_twsk(sk));
 		goto csum_error;
@@ -1784,6 +1804,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
+			tcp_v4_restore_cb(skb);
 			refcounted = false;
 			goto process;
 		}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e36eff0403f4e80c4f7291a70614f40125652133..b079b619b60ca577d5ef20a5065fce87acecd96c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		if (state == TCP_TIME_WAIT)
 			timeo = TCP_TIMEWAIT_LEN;
 
+		/* tw_timer is pinned, so we need to make sure BH are disabled
+		 * in following section, otherwise timer handler could run before
+		 * we complete the initialization.
+		 */
+		local_bh_disable();
 		inet_twsk_schedule(tw, timeo);
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 		inet_twsk_put(tw);
+		local_bh_enable();
 	} else {
 		/* Sorry, if we're out of memory, just CLOSE this
 		 * socket up.  We've got bigger problems than
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 3330a370d3061edd7cda90e1f50713ed0e7868a1..c61240e43923d6dd6a5d6215074e2da2c2bc71f4 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 
 /* Update the connection delivery information and generate a rate sample. */
 void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
-		  struct rate_sample *rs)
+		  bool is_sack_reneg, struct rate_sample *rs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 snd_us, ack_us;
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 
 	rs->acked_sacked = delivered;	/* freshly ACKed or SACKed */
 	rs->losses = lost;		/* freshly marked lost */
-	/* Return an invalid sample if no timing information is available. */
-	if (!rs->prior_mstamp) {
+	/* Return an invalid sample if no timing information is available or
+	 * in recovery from loss with SACK reneging. Rate samples taken during
+	 * a SACK reneging event may overestimate bw by including packets that
+	 * were SACKed before the reneg.
+	 */
+	if (!rs->prior_mstamp || is_sack_reneg) {
 		rs->delivered = -1;
 		rs->interval_us = -1;
 		return;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d3ea89020c69c17189f6a5eefb28e92bd97ac2e1..3a81720ac0c40877386e37c99f4f321ab4127fa4 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 	 * to queuing or delayed ACKs.
 	 */
 	reo_wnd = 1000;
-	if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+	if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) &&
+	    min_rtt != ~0U) {
 		reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
 		reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
 	}
@@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 		 */
 		remaining = tp->rack.rtt_us + reo_wnd -
 			    tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
-		if (remaining < 0) {
+		if (remaining <= 0) {
 			tcp_rack_mark_skb_lost(sk, skb);
 			list_del_init(&skb->tcp_tsorted_anchor);
 		} else {
-			/* Record maximum wait time (+1 to avoid 0) */
-			*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
+			/* Record maximum wait time */
+			*reo_timeout = max_t(u32, *reo_timeout, remaining);
 		}
 	}
 }
@@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 {
 	u32 rtt_us;
 
-	if (tp->rack.mstamp &&
-	    !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
-				 end_seq, tp->rack.end_seq))
-		return;
-
 	rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
-	if (sacked & TCPCB_RETRANS) {
+	if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
 		/* If the sacked packet was retransmitted, it's ambiguous
 		 * whether the retransmission or the original (or the prior
 		 * retransmission) was sacked.
@@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 		 * so it's at least one RTT (i.e., retransmission is at least
 		 * an RTT later).
 		 */
-		if (rtt_us < tcp_min_rtt(tp))
-			return;
+		return;
 	}
-	tp->rack.rtt_us = rtt_us;
-	tp->rack.mstamp = xmit_time;
-	tp->rack.end_seq = end_seq;
 	tp->rack.advanced = 1;
+	tp->rack.rtt_us = rtt_us;
+	if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
+				end_seq, tp->rack.end_seq)) {
+		tp->rack.mstamp = xmit_time;
+		tp->rack.end_seq = end_seq;
+	}
 }
 
 /* We have waited long enough to accommodate reordering. Mark the expired
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b988a128d97df3a7953437499a216e8..968fda1983762e6d7c078a28ccfcbd9066788daf 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -264,6 +264,7 @@ void tcp_delack_timer_handler(struct sock *sk)
 			icsk->icsk_ack.pingpong = 0;
 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
 		}
+		tcp_mstamp_refresh(tcp_sk(sk));
 		tcp_send_ack(sk);
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
 	}
@@ -632,6 +633,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
 		goto out;
 	}
 
+	tcp_mstamp_refresh(tp);
 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
 		if (tp->linger2 >= 0) {
 			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee35c117002463f473ccd41face358..bcfc00e88756dabb1f491d3d41137ccbc7ab1cbc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 	return xfrm4_extract_header(skb);
 }
 
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+				   struct sk_buff *skb)
+{
+	return dst_input(skb);
+}
+
 static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
 					 struct sk_buff *skb)
 {
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
 					 iph->tos, skb->dev))
 			goto drop;
 	}
-	return dst_input(skb);
+
+	if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+		goto drop;
+
+	return 0;
 drop:
 	kfree_skb(skb);
 	return NET_RX_DROP;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c01a82ec9d40423ee28957468ed46..c9441ca4539936486291147a47a84ef1b2ecf095 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->autoflowlabel = ip6_default_np_autolabel(net);
 	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 83bd75713535cf8fadf348e5d452445aba9c426c..bc68eb661970a5404e6cfb060b0011680d104ed5 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
 	sr_phdr->segments[0] = **addr_p;
 	*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
 
+	if (sr_ihdr->hdrlen > hops * 2) {
+		int tlvs_offset, tlvs_length;
+
+		tlvs_offset = (1 + hops * 2) << 3;
+		tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
+		memcpy((char *)sr_phdr + tlvs_offset,
+		       (char *)sr_ihdr + tlvs_offset, tlvs_length);
+	}
+
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (sr_has_hmac(sr_phdr)) {
 		struct net *net = NULL;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08acb60d42fb6fd10a0c38a239324f..9dcc3924a97561d689a1fd8862742d9543339dd2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
 			if (!(fn->fn_flags & RTN_RTINFO)) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
 				rt6_release(leaf);
+			/* remove null_entry in the root node */
+			} else if (fn->fn_flags & RTN_TL_ROOT &&
+				   rcu_access_pointer(fn->leaf) ==
+				   net->ipv6.ip6_null_entry) {
+				RCU_INIT_POINTER(fn->leaf, NULL);
 			}
 
 			return fn;
@@ -1241,23 +1246,28 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		 * If fib6_add_1 has cleared the old leaf pointer in the
 		 * super-tree leaf node we have to find a new one for it.
 		 */
-		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
-					    lockdep_is_held(&table->tb6_lock));
-		if (pn != fn && pn_leaf == rt) {
-			pn_leaf = NULL;
-			RCU_INIT_POINTER(pn->leaf, NULL);
-			atomic_dec(&rt->rt6i_ref);
-		}
-		if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
-			pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
-			if (!pn_leaf) {
-				WARN_ON(!pn_leaf);
-				pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+		if (pn != fn) {
+			struct rt6_info *pn_leaf =
+				rcu_dereference_protected(pn->leaf,
+				    lockdep_is_held(&table->tb6_lock));
+			if (pn_leaf == rt) {
+				pn_leaf = NULL;
+				RCU_INIT_POINTER(pn->leaf, NULL);
+				atomic_dec(&rt->rt6i_ref);
 			}
+			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+				pn_leaf = fib6_find_prefix(info->nl_net, table,
+							   pn);
+#if RT6_DEBUG >= 2
+				if (!pn_leaf) {
+					WARN_ON(!pn_leaf);
+					pn_leaf =
+					    info->nl_net->ipv6.ip6_null_entry;
+				}
 #endif
-			atomic_inc(&pn_leaf->rt6i_ref);
-			rcu_assign_pointer(pn->leaf, pn_leaf);
+				atomic_inc(&pn_leaf->rt6i_ref);
+				rcu_assign_pointer(pn->leaf, pn_leaf);
+			}
 		}
 #endif
 		goto failure;
@@ -1265,13 +1275,17 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	return err;
 
 failure:
-	/* fn->leaf could be NULL if fn is an intermediate node and we
-	 * failed to add the new route to it in both subtree creation
-	 * failure and fib6_add_rt2node() failure case.
-	 * In both cases, fib6_repair_tree() should be called to fix
-	 * fn->leaf.
+	/* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+	 * 1. fn is an intermediate node and we failed to add the new
+	 * route to it in both subtree creation failure and fib6_add_rt2node()
+	 * failure case.
+	 * 2. fn is the root node in the table and we fail to add the first
+	 * default route to it.
 	 */
-	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+	if (fn &&
+	    (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+	     (fn->fn_flags & RTN_TL_ROOT &&
+	      !rcu_access_pointer(fn->leaf))))
 		fib6_repair_tree(info->nl_net, table, fn);
 	/* Always release dst as dst->__refcnt is guaranteed
 	 * to be taken before entering this function
@@ -1526,6 +1540,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 	struct fib6_walker *w;
 	int iter = 0;
 
+	/* Set fn->leaf to null_entry for root node. */
+	if (fn->fn_flags & RTN_TL_ROOT) {
+		rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+		return fn;
+	}
+
 	for (;;) {
 		struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
 					    lockdep_is_held(&table->tb6_lock));
@@ -1680,10 +1700,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	}
 	read_unlock(&net->ipv6.fib6_walker_lock);
 
-	/* If it was last route, expunge its radix tree node */
+	/* If it was last route, call fib6_repair_tree() to:
+	 * 1. For root node, put back null_entry as how the table was created.
+	 * 2. For other nodes, expunge its radix tree node.
+	 */
 	if (!rcu_access_pointer(fn->leaf)) {
-		fn->fn_flags &= ~RTN_RTINFO;
-		net->ipv6.rt6_stats->fib_route_nodes--;
+		if (!(fn->fn_flags & RTN_TL_ROOT)) {
+			fn->fn_flags &= ~RTN_RTINFO;
+			net->ipv6.rt6_stats->fib_route_nodes--;
+		}
 		fn = fib6_repair_tree(net, table, fn);
 	}
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe77f6d7af7ca3579a2418aef972f6..772695960890893f9ab7862cf64728b783f5bb96 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
 	eth_random_addr(dev->perm_addr);
 }
 
+#define GRE6_FEATURES (NETIF_F_SG |		\
+		       NETIF_F_FRAGLIST |	\
+		       NETIF_F_HIGHDMA |	\
+		       NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+	struct ip6_tnl *nt = netdev_priv(dev);
+
+	dev->features		|= GRE6_FEATURES;
+	dev->hw_features	|= GRE6_FEATURES;
+
+	if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+		/* TCP offload with GRE SEQ is not supported, nor
+		 * can we support 2 levels of outer headers requiring
+		 * an update.
+		 */
+		if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+		    nt->encap.type == TUNNEL_ENCAP_NONE) {
+			dev->features    |= NETIF_F_GSO_SOFTWARE;
+			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		}
+
+		/* Can use a lockless transmit, unless we generate
+		 * output sequences
+		 */
+		dev->features |= NETIF_F_LLTX;
+	}
+}
+
 static int ip6gre_tunnel_init_common(struct net_device *dev)
 {
 	struct ip6_tnl *tunnel;
@@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
 	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		dev->mtu -= 8;
 
+	ip6gre_tnl_init_features(dev);
+
 	return 0;
 }
 
@@ -1298,16 +1330,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
 	.ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
-#define GRE6_FEATURES (NETIF_F_SG |		\
-		       NETIF_F_FRAGLIST |	\
-		       NETIF_F_HIGHDMA |		\
-		       NETIF_F_HW_CSUM)
-
 static void ip6gre_tap_setup(struct net_device *dev)
 {
 
 	ether_setup(dev);
 
+	dev->max_mtu = 0;
 	dev->netdev_ops = &ip6gre_tap_netdev_ops;
 	dev->needs_free_netdev = true;
 	dev->priv_destructor = ip6gre_dev_free;
@@ -1382,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
 	nt->net = dev_net(dev);
 	ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
 
-	dev->features		|= GRE6_FEATURES;
-	dev->hw_features	|= GRE6_FEATURES;
-
-	if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
-		/* TCP offload with GRE SEQ is not supported, nor
-		 * can we support 2 levels of outer headers requiring
-		 * an update.
-		 */
-		if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
-		    (nt->encap.type == TUNNEL_ENCAP_NONE)) {
-			dev->features    |= NETIF_F_GSO_SOFTWARE;
-			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-		}
-
-		/* Can use a lockless transmit, unless we generate
-		 * output sequences
-		 */
-		dev->features |= NETIF_F_LLTX;
-	}
-
 	err = register_netdevice(dev);
 	if (err)
 		goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d0c1040506394460cb482698d8c15..688ba5f7516b37c87b879036dce781bdcfa01739 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+	if (!np->autoflowlabel_set)
+		return ip6_default_np_autolabel(net);
+	else
+		return np->autoflowlabel;
+}
+
 /*
  * xmit an sk_buff (used by TCP, SCTP and DCCP)
  * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 		hlimit = ip6_dst_hoplimit(dst);
 
 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-						     np->autoflowlabel, fl6));
+				ip6_autoflowlabel(net, np), fl6));
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
 	ip6_flow_hdr(hdr, v6_cork->tclass,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
-					np->autoflowlabel, fl6));
+					ip6_autoflowlabel(net, np), fl6));
 	hdr->hop_limit = v6_cork->hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
@@ -1727,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	cork.base.opt = NULL;
 	v6_cork.opt = NULL;
 	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
-	if (err)
+	if (err) {
+		ip6_cork_release(&cork, &v6_cork);
 		return ERR_PTR(err);
-
+	}
 	if (ipc6->dontfrag < 0)
 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3d3092adf1d2d5962b5fc87bdf08419762d1b1ee..9a7cf355bc8c8ff67388456060c1f7e67d8762ee 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -904,7 +904,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 		if (t->parms.collect_md) {
 			tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
 			if (!tun_dst)
-				return 0;
+				goto drop;
 		}
 		ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
 				    log_ecn_error);
@@ -1074,10 +1074,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 			memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
 			neigh_release(neigh);
 		}
-	} else if (!(t->parms.flags &
-		     (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
-		/* enable the cache only only if the routing decision does
-		 * not depend on the current inner header value
+	} else if (t->parms.proto != 0 && !(t->parms.flags &
+					    (IP6_TNL_F_USE_ORIG_TCLASS |
+					     IP6_TNL_F_USE_ORIG_FWMARK))) {
+		/* enable the cache only if neither the outer protocol nor the
+		 * routing decision depends on the current inner header value
 		 */
 		use_cache = true;
 	}
@@ -1123,8 +1124,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 		max_headroom += 8;
 		mtu -= 8;
 	}
-	if (mtu < IPV6_MIN_MTU)
-		mtu = IPV6_MIN_MTU;
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (mtu < IPV6_MIN_MTU)
+			mtu = IPV6_MIN_MTU;
+	} else if (mtu < 576) {
+		mtu = 576;
+	}
+
 	if (skb_dst(skb) && !t->parms.collect_md)
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 	if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
@@ -1671,11 +1677,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ip6_tnl *tnl = netdev_priv(dev);
 
-	if (tnl->parms.proto == IPPROTO_IPIP) {
-		if (new_mtu < ETH_MIN_MTU)
+	if (tnl->parms.proto == IPPROTO_IPV6) {
+		if (new_mtu < IPV6_MIN_MTU)
 			return -EINVAL;
 	} else {
-		if (new_mtu < IPV6_MIN_MTU)
+		if (new_mtu < ETH_MIN_MTU)
 			return -EINVAL;
 	}
 	if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd7857fe0873fbc4f346d281f3600807..2d4680e0376f41deee6c999eadaf9409353e0b4a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		break;
 	case IPV6_AUTOFLOWLABEL:
 		np->autoflowlabel = valbool;
+		np->autoflowlabel_set = 1;
 		retv = 0;
 		break;
 	case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c29aab9a3f56eae02e5337e65a97b..844642682b8363c4c32d329ed92474f834a59618 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
 }
 
 static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
-	int type, struct mld2_grec **ppgr)
+	int type, struct mld2_grec **ppgr, unsigned int mtu)
 {
-	struct net_device *dev = pmc->idev->dev;
 	struct mld2_report *pmr;
 	struct mld2_grec *pgr;
 
-	if (!skb)
-		skb = mld_newpack(pmc->idev, dev->mtu);
-	if (!skb)
-		return NULL;
+	if (!skb) {
+		skb = mld_newpack(pmc->idev, mtu);
+		if (!skb)
+			return NULL;
+	}
 	pgr = skb_put(skb, sizeof(struct mld2_grec));
 	pgr->grec_type = type;
 	pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	struct mld2_grec *pgr = NULL;
 	struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
 	int scount, stotal, first, isquery, truncate;
+	unsigned int mtu;
 
 	if (pmc->mca_flags & MAF_NOREPORT)
 		return skb;
 
+	mtu = READ_ONCE(dev->mtu);
+	if (mtu < IPV6_MIN_MTU)
+		return skb;
+
 	isquery = type == MLD2_MODE_IS_INCLUDE ||
 		  type == MLD2_MODE_IS_EXCLUDE;
 	truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
 			if (skb)
 				mld_sendpack(skb);
-			skb = mld_newpack(idev, dev->mtu);
+			skb = mld_newpack(idev, mtu);
 		}
 	}
 	first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 				pgr->grec_nsrcs = htons(scount);
 			if (skb)
 				mld_sendpack(skb);
-			skb = mld_newpack(idev, dev->mtu);
+			skb = mld_newpack(idev, mtu);
 			first = 1;
 			scount = 0;
 		}
 		if (first) {
-			skb = add_grhead(skb, pmc, type, &pgr);
+			skb = add_grhead(skb, pmc, type, &pgr, mtu);
 			first = 0;
 		}
 		if (!skb)
@@ -1814,7 +1819,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 				mld_sendpack(skb);
 				skb = NULL; /* add_grhead will get a new one */
 			}
-			skb = add_grhead(skb, pmc, type, &pgr);
+			skb = add_grhead(skb, pmc, type, &pgr, mtu);
 		}
 	}
 	if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a342e361d7ae68ae1d60304b3f43f39..1d7ae93663351297395208f2c9a65bd5fba236e5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -458,7 +458,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
-					e = entry0 + newpos;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9ac1f40d45aef52af6aab92d515408..92c0047e7e33dc5925054c41143fe200db06f125 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
 
 	if (range->flags & NF_NAT_RANGE_MAP_IPS)
 		return -EINVAL;
-	return 0;
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
 }
 
 static struct xt_target masquerade_tg6_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV6,
 	.checkentry	= masquerade_tg6_checkentry,
+	.destroy	= masquerade_tg6_destroy,
 	.target		= masquerade_tg6,
 	.targetsize	= sizeof(struct nf_nat_range),
 	.table		= "nat",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374b4089e623ed2b20d68110cff498e..0458b761f3c56ce765841e0a3a7e5e78f90b95eb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	}
 
 	rt->dst.flags |= DST_HOST;
+	rt->dst.input = ip6_input;
 	rt->dst.output  = ip6_output;
 	rt->rt6i_gateway  = fl6->daddr;
 	rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		if (!ipv6_addr_any(&fl6.saddr))
 			flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-		if (!fibmatch)
-			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
-		else
-			dst = ip6_route_lookup(net, &fl6, 0);
+		dst = ip6_route_input_lookup(net, dev, &fl6, flags);
 
 		rcu_read_unlock();
 	} else {
 		fl6.flowi6_oif = oif;
 
-		if (!fibmatch)
-			dst = ip6_route_output(net, NULL, &fl6);
-		else
-			dst = ip6_route_lookup(net, &fl6, 0);
+		dst = ip6_route_output(net, NULL, &fl6);
 	}
 
 
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
+	if (fibmatch && rt->dst.from) {
+		struct rt6_info *ort = container_of(rt->dst.from,
+						    struct rt6_info, dst);
+
+		dst_hold(&ort->dst);
+		ip6_rt_put(rt);
+		rt = ort;
+	}
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d60ddcb0bfe240d5351089ed43464683e68c1db8..d7dc23c1b2ca32fb554cccf1fbf50f736a7f6f4c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
 	ipip6_tunnel_link(sitn, t);
 	t->parms.iph.ttl = p->iph.ttl;
 	t->parms.iph.tos = p->iph.tos;
+	t->parms.iph.frag_off = p->iph.frag_off;
 	if (t->parms.link != p->link || t->fwmark != fwmark) {
 		t->parms.link = p->link;
 		t->fwmark = fwmark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6bb98c93edfe2ed2f16fe5229605f8108cfc7f9a..7178476b3d2f64f01832fe3292c7dec849ec2265 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent, sk->sk_bound_dev_if,
-			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
 			0, 0);
 }
 
@@ -1454,7 +1454,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		tcp_v6_fill_cb(skb, hdr, th);
 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
 			sk_drops_add(sk, skb);
 			reqsk_put(req);
@@ -1467,8 +1466,12 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
-		if (!tcp_filter(sk, skb))
+		if (!tcp_filter(sk, skb)) {
+			th = (const struct tcphdr *)skb->data;
+			hdr = ipv6_hdr(skb);
+			tcp_v6_fill_cb(skb, hdr, th);
 			nsk = tcp_check_req(sk, skb, req, false);
+		}
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
@@ -1492,8 +1495,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	tcp_v6_fill_cb(skb, hdr, th);
-
 	if (tcp_v6_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
 
@@ -1501,6 +1502,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		goto discard_and_relse;
 	th = (const struct tcphdr *)skb->data;
 	hdr = ipv6_hdr(skb);
+	tcp_v6_fill_cb(skb, hdr, th);
 
 	skb->dev = NULL;
 
@@ -1590,7 +1592,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		tcp_v6_timewait_ack(sk, skb);
 		break;
 	case TCP_TW_RST:
-		tcp_v6_restore_cb(skb);
 		tcp_v6_send_reset(sk, skb);
 		inet_twsk_deschedule_put(inet_twsk(sk));
 		goto discard_it;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af9862557fde2a9c214faf3a10b7e5eda..841f4a07438e83502eadd6ec6c16a16d1de6aa55 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 }
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+				   struct sk_buff *skb)
+{
+	if (xfrm_trans_queue(skb, ip6_rcv_finish))
+		__kfree_skb(skb);
+	return -1;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
 		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-		ip6_rcv_finish);
+		xfrm6_transport_finish2);
 	return -1;
 }
 
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b750a22c4b9bf92e079fcd4a694fccf81f00a8e..d4e98f20fc2ac1c55a1f1db67498af900e0842ea 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1625,60 +1625,30 @@ static struct proto kcm_proto = {
 };
 
 /* Clone a kcm socket. */
-static int kcm_clone(struct socket *osock, struct kcm_clone *info,
-		     struct socket **newsockp)
+static struct file *kcm_clone(struct socket *osock)
 {
 	struct socket *newsock;
 	struct sock *newsk;
-	struct file *newfile;
-	int err, newfd;
 
-	err = -ENFILE;
 	newsock = sock_alloc();
 	if (!newsock)
-		goto out;
+		return ERR_PTR(-ENFILE);
 
 	newsock->type = osock->type;
 	newsock->ops = osock->ops;
 
 	__module_get(newsock->ops->owner);
 
-	newfd = get_unused_fd_flags(0);
-	if (unlikely(newfd < 0)) {
-		err = newfd;
-		goto out_fd_fail;
-	}
-
-	newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
-	if (IS_ERR(newfile)) {
-		err = PTR_ERR(newfile);
-		goto out_sock_alloc_fail;
-	}
-
 	newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
 			 &kcm_proto, true);
 	if (!newsk) {
-		err = -ENOMEM;
-		goto out_sk_alloc_fail;
+		sock_release(newsock);
+		return ERR_PTR(-ENOMEM);
 	}
-
 	sock_init_data(newsock, newsk);
 	init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
 
-	fd_install(newfd, newfile);
-	*newsockp = newsock;
-	info->fd = newfd;
-
-	return 0;
-
-out_sk_alloc_fail:
-	fput(newfile);
-out_sock_alloc_fail:
-	put_unused_fd(newfd);
-out_fd_fail:
-	sock_release(newsock);
-out:
-	return err;
+	return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
 }
 
 static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1708,17 +1678,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	}
 	case SIOCKCMCLONE: {
 		struct kcm_clone info;
-		struct socket *newsock = NULL;
-
-		err = kcm_clone(sock, &info, &newsock);
-		if (!err) {
-			if (copy_to_user((void __user *)arg, &info,
-					 sizeof(info))) {
-				err = -EFAULT;
-				sys_close(info.fd);
-			}
-		}
+		struct file *file;
+
+		info.fd = get_unused_fd_flags(0);
+		if (unlikely(info.fd < 0))
+			return info.fd;
 
+		file = kcm_clone(sock);
+		if (IS_ERR(file)) {
+			put_unused_fd(info.fd);
+			return PTR_ERR(file);
+		}
+		if (copy_to_user((void __user *)arg, &info,
+				 sizeof(info))) {
+			put_unused_fd(info.fd);
+			fput(file);
+			return -EFAULT;
+		}
+		fd_install(info.fd, file);
+		err = 0;
 		break;
 	}
 	default:
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 167f83b853e6bd391256e15ef99439b792e18cdc..1621b6ab17ba45e63f79e85a42563781b5536dc2 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,16 +291,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 	int i;
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
-	for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
+	for (i = 0; i <  IEEE80211_NUM_TIDS; i++)
 		___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
 						WLAN_REASON_QSTA_LEAVE_QBSS,
 						reason != AGG_STOP_DESTROY_STA &&
 						reason != AGG_STOP_PEER_REQUEST);
-	}
-	mutex_unlock(&sta->ampdu_mlme.mtx);
 
 	for (i = 0; i <  IEEE80211_NUM_TIDS; i++)
 		___ieee80211_stop_tx_ba_session(sta, i, reason);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 
 	/* stopping might queue the work again - so cancel only afterwards */
 	cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 70e9d2ca8bbec176edac094268247bb125e3cbdc..4daafb07602f5f9727af7bec4ea7603901438b63 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
 		}
 		return true;
 	case NL80211_IFTYPE_MESH_POINT:
+		if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+			return false;
 		if (multicast)
 			return true;
 		return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c1027207a86889f93da667d8b2313b9..dc6347342e34c499eaef5403f63034b137ad14e3 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
 #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
 #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
 #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
-#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
 static unsigned int get_len(struct bitstr *bs);
 static unsigned int get_bit(struct bitstr *bs);
 static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
 	return v;
 }
 
+static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
+{
+	bits += bs->bit;
+	bytes += bits / BITS_PER_BYTE;
+	if (bits % BITS_PER_BYTE > 0)
+		bytes++;
+
+	if (*bs->cur + bytes > *bs->end)
+		return 1;
+
+	return 0;
+}
+
 /****************************************************************************/
 static unsigned int get_bit(struct bitstr *bs)
 {
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
 	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
 
 	INC_BIT(bs);
-
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
 	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
 
 	BYTE_ALIGN(bs);
-	CHECK_BOUND(bs, 1);
+	if (nf_h323_error_boundary(bs, 1, 0))
+		return H323_ERROR_BOUND;
+
 	len = *bs->cur++;
 	bs->cur += len;
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 
-	CHECK_BOUND(bs, 0);
 	return H323_ERROR_NONE;
 }
 
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
 		bs->cur += 2;
 		break;
 	case CONS:		/* 64K < Range < 4G */
+		if (nf_h323_error_boundary(bs, 0, 2))
+			return H323_ERROR_BOUND;
 		len = get_bits(bs, 2) + 1;
 		BYTE_ALIGN(bs);
 		if (base && (f->attr & DECODE)) {	/* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
 		break;
 	case UNCO:
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		len = get_len(bs);
 		bs->cur += len;
 		break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
 
 	PRINT("\n");
 
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
 		INC_BITS(bs, f->sz);
 	}
 
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
 		len = f->lb;
 		break;
 	case WORD:		/* 2-byte length */
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		len = (*bs->cur++) << 8;
 		len += (*bs->cur++) + f->lb;
 		break;
 	case SEMI:
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		len = get_len(bs);
 		break;
 	default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
 	bs->cur += len >> 3;
 	bs->bit = len & 7;
 
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
 	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
 
 	/* 2 <= Range <= 255 */
+	if (nf_h323_error_boundary(bs, 0, f->sz))
+		return H323_ERROR_BOUND;
 	len = get_bits(bs, f->sz) + f->lb;
 
 	BYTE_ALIGN(bs);
 	INC_BITS(bs, (len << 2));
 
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
 		break;
 	case BYTE:		/* Range == 256 */
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 1);
+		if (nf_h323_error_boundary(bs, 1, 0))
+			return H323_ERROR_BOUND;
 		len = (*bs->cur++) + f->lb;
 		break;
 	case SEMI:
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		len = get_len(bs) + f->lb;
 		break;
 	default:		/* 2 <= Range <= 255 */
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
 		len = get_bits(bs, f->sz) + f->lb;
 		BYTE_ALIGN(bs);
 		break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
 
 	PRINT("\n");
 
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
 	switch (f->sz) {
 	case BYTE:		/* Range == 256 */
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 1);
+		if (nf_h323_error_boundary(bs, 1, 0))
+			return H323_ERROR_BOUND;
 		len = (*bs->cur++) + f->lb;
 		break;
 	default:		/* 2 <= Range <= 255 */
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
 		len = get_bits(bs, f->sz) + f->lb;
 		BYTE_ALIGN(bs);
 		break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
 
 	bs->cur += len << 1;
 
-	CHECK_BOUND(bs, 0);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
 	return H323_ERROR_NONE;
 }
 
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
 	base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
 
 	/* Extensible? */
+	if (nf_h323_error_boundary(bs, 0, 1))
+		return H323_ERROR_BOUND;
 	ext = (f->attr & EXT) ? get_bit(bs) : 0;
 
 	/* Get fields bitmap */
+	if (nf_h323_error_boundary(bs, 0, f->sz))
+		return H323_ERROR_BOUND;
 	bmp = get_bitmap(bs, f->sz);
 	if (base)
 		*(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
 
 		/* Decode */
 		if (son->attr & OPEN) {	/* Open field */
-			CHECK_BOUND(bs, 2);
+			if (nf_h323_error_boundary(bs, 2, 0))
+				return H323_ERROR_BOUND;
 			len = get_len(bs);
-			CHECK_BOUND(bs, len);
+			if (nf_h323_error_boundary(bs, len, 0))
+				return H323_ERROR_BOUND;
 			if (!base || !(son->attr & DECODE)) {
 				PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
 				      " ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
 		return H323_ERROR_NONE;
 
 	/* Get the extension bitmap */
+	if (nf_h323_error_boundary(bs, 0, 7))
+		return H323_ERROR_BOUND;
 	bmp2_len = get_bits(bs, 7) + 1;
-	CHECK_BOUND(bs, (bmp2_len + 7) >> 3);
+	if (nf_h323_error_boundary(bs, 0, bmp2_len))
+		return H323_ERROR_BOUND;
 	bmp2 = get_bitmap(bs, bmp2_len);
 	bmp |= bmp2 >> f->sz;
 	if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
 	for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
 		/* Check Range */
 		if (i >= f->ub) {	/* Newer Version? */
-			CHECK_BOUND(bs, 2);
+			if (nf_h323_error_boundary(bs, 2, 0))
+				return H323_ERROR_BOUND;
 			len = get_len(bs);
-			CHECK_BOUND(bs, len);
+			if (nf_h323_error_boundary(bs, len, 0))
+				return H323_ERROR_BOUND;
 			bs->cur += len;
 			continue;
 		}
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
 		if (!((0x80000000 >> opt) & bmp2))	/* Not present */
 			continue;
 
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		len = get_len(bs);
-		CHECK_BOUND(bs, len);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
 		if (!base || !(son->attr & DECODE)) {
 			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
 			      son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
 	switch (f->sz) {
 	case BYTE:
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 1);
+		if (nf_h323_error_boundary(bs, 1, 0))
+			return H323_ERROR_BOUND;
 		count = *bs->cur++;
 		break;
 	case WORD:
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		count = *bs->cur++;
 		count <<= 8;
 		count += *bs->cur++;
 		break;
 	case SEMI:
 		BYTE_ALIGN(bs);
-		CHECK_BOUND(bs, 2);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		count = get_len(bs);
 		break;
 	default:
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
 		count = get_bits(bs, f->sz);
 		break;
 	}
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
 	for (i = 0; i < count; i++) {
 		if (son->attr & OPEN) {
 			BYTE_ALIGN(bs);
+			if (nf_h323_error_boundary(bs, 2, 0))
+				return H323_ERROR_BOUND;
 			len = get_len(bs);
-			CHECK_BOUND(bs, len);
+			if (nf_h323_error_boundary(bs, len, 0))
+				return H323_ERROR_BOUND;
 			if (!base || !(son->attr & DECODE)) {
 				PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
 				      " ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
 	base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
 
 	/* Decode the choice index number */
+	if (nf_h323_error_boundary(bs, 0, 1))
+		return H323_ERROR_BOUND;
 	if ((f->attr & EXT) && get_bit(bs)) {
 		ext = 1;
+		if (nf_h323_error_boundary(bs, 0, 7))
+			return H323_ERROR_BOUND;
 		type = get_bits(bs, 7) + f->lb;
 	} else {
 		ext = 0;
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
 		type = get_bits(bs, f->sz);
 		if (type >= f->lb)
 			return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
 	/* Check Range */
 	if (type >= f->ub) {	/* Newer version? */
 		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
 		len = get_len(bs);
-		CHECK_BOUND(bs, len);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
 		bs->cur += len;
 		return H323_ERROR_NONE;
 	}
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
 
 	if (ext || (son->attr & OPEN)) {
 		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
 		len = get_len(bs);
-		CHECK_BOUND(bs, len);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
 		if (!base || !(son->attr & DECODE)) {
 			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
 			      son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdfdb9c16aa7e9cc1d33f62a46a1769..382d49792f428099a1fa78ebc1f50224ba8b7d97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 static int ctnetlink_change_timeout(struct nf_conn *ct,
 				    const struct nlattr * const cda[])
 {
-	u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+	u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
 
-	ct->timeout = nfct_time_stamp + timeout * HZ;
+	if (timeout > INT_MAX)
+		timeout = INT_MAX;
+	ct->timeout = nfct_time_stamp + (u32)timeout;
 
 	if (test_bit(IPS_DYING_BIT, &ct->status))
 		return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 	struct nf_conn_tstamp *tstamp;
+	u64 timeout;
 
 	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
 	if (!cda[CTA_TIMEOUT])
 		goto err1;
 
-	ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+	timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+	if (timeout > INT_MAX)
+		timeout = INT_MAX;
+	ct->timeout = (u32)timeout + nfct_time_stamp;
 
 	rcu_read_lock();
  	if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0847b014410291df947bddc32d46a..37ef35b861f24365c843a4eec5ecc5ad8292cd22 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
 		timeout = timeouts[TCP_CONNTRACK_UNACK];
+	else if (ct->proto.tcp.last_win == 0 &&
+		 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+		timeout = timeouts[TCP_CONNTRACK_RETRANS];
 	else
 		timeout = timeouts[new_state];
 	spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dce64593573178a397b160488a6355..07bd4138c84ef250decc00a5c70df84298124a3a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 				continue;
 
 			list_for_each_entry_rcu(chain, &table->chains, list) {
-				if (ctx && ctx->chain[0] &&
+				if (ctx && ctx->chain &&
 				    strcmp(ctx->chain, chain->name) != 0)
 					continue;
 
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
 	struct nft_obj_filter *filter = cb->data;
 
-	kfree(filter->table);
-	kfree(filter);
+	if (filter) {
+		kfree(filter->table);
+		kfree(filter);
+	}
 
 	return 0;
 }
@@ -5847,6 +5849,12 @@ static int __net_init nf_tables_init_net(struct net *net)
 	return 0;
 }
 
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+	WARN_ON_ONCE(!list_empty(&net->nft.af_info));
+	WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
 int __nft_release_basechain(struct nft_ctx *ctx)
 {
 	struct nft_rule *rule, *nr;
@@ -5917,6 +5925,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
 
 static struct pernet_operations nf_tables_net_ops = {
 	.init	= nf_tables_init_net,
+	.exit	= nf_tables_exit_net,
 };
 
 static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b3936731b77885717c27cf4dfa8e62b3c0f..d33ce6d5ebce92db2fab30cb4286c11ffd8c321a 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/errno.h>
+#include <linux/capability.h>
 #include <net/netlink.h>
 #include <net/sock.h>
 
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 	struct nfnl_cthelper *nlcth;
 	int ret = 0;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
 		return -EINVAL;
 
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 	struct nfnl_cthelper *nlcth;
 	bool tuple_set = false;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 	struct nfnl_cthelper *nlcth, *n;
 	int j = 0, ret;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (tb[NFCTH_NAME])
 		helper_name = nla_data(tb[NFCTH_NAME]);
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381ca1f1487195009ffde4b8eaeec3e2..e955bec0acc6a949a32ac8d1e6ea383b31983c73 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
 
 static void __net_exit nfnl_log_net_exit(struct net *net)
 {
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
+	unsigned int i;
+
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
 #endif
 	nf_log_unset(net, &nfulnl_logger);
+	for (i = 0; i < INSTANCE_BUCKETS; i++)
+		WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
 }
 
 static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3646a9b70a0d0b443db28a696a0a0..c09b36755ed721f45be12523c4c328c97fd0e166 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
 
 static void __net_exit nfnl_queue_net_exit(struct net *net)
 {
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+	unsigned int i;
+
 	nf_unregister_queue_handler(net);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
 #endif
+	for (i = 0; i < INSTANCE_BUCKETS; i++)
+		WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
 }
 
 static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3bd440dc19bafe9e65b023c6baf217..47ec1046ad11536e337f709d1f41a267a77cf1d3 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
 	[NFTA_EXTHDR_OFFSET]		= { .type = NLA_U32 },
 	[NFTA_EXTHDR_LEN]		= { .type = NLA_U32 },
 	[NFTA_EXTHDR_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_OP]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_SREG]		= { .type = NLA_U32 },
 };
 
 static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c9627d6103dbcb6428bb6bdd165ad..55802e97f906d1987ed78b4296584deb38e5f876 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
 	return 0;
 }
 
+static void __net_exit xt_net_exit(struct net *net)
+{
+	int i;
+
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
+		WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+}
+
 static struct pernet_operations xt_net_ops = {
 	.init = xt_net_init,
+	.exit = xt_net_exit,
 };
 
 static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f2b1c2ecb31851932ac5a350122a9..06b090d8e9014d6bf6adfb742b5c2620197b98fb 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
 {
 	struct sock_fprog_kern program;
 
+	if (len > XT_BPF_MAX_NUM_INSTR)
+		return -EINVAL;
+
 	program.len = len;
 	program.filter = insns;
 
@@ -52,18 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 
 static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
 {
-	mm_segment_t oldfs = get_fs();
-	int retval, fd;
-
-	set_fs(KERNEL_DS);
-	fd = bpf_obj_get_user(path, 0);
-	set_fs(oldfs);
-	if (fd < 0)
-		return fd;
-
-	retval = __bpf_mt_check_fd(fd, ret);
-	sys_close(fd);
-	return retval;
+	if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+		return -EINVAL;
+
+	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+	return PTR_ERR_OR_ZERO(*ret);
 }
 
 static int bpf_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061ddf7eb77327a3e717e15b6af83bfb..a34f314a8c2380e6b6a223dd6d38dcc88ca2c1ac 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 
+#include <linux/capability.h>
 #include <linux/if.h>
 #include <linux/inetdevice.h>
 #include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
 	struct xt_osf_finger *kf = NULL, *sf;
 	int err = 0;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (!osf_attrs[OSF_ATTR_FINGER])
 		return -EINVAL;
 
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
 	struct xt_osf_finger *sf;
 	int err = -ENOENT;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (!osf_attrs[OSF_ATTR_FINGER])
 		return -EINVAL;
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f57066d0ac0bc5f64181fbb65e6acc..79cc1bf36e4af7d2c70575e56203a482ba2dca97 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
 	struct sock *sk = skb->sk;
 	int ret = -ENOMEM;
 
+	if (!net_eq(dev_net(dev), sock_net(sk)))
+		return 0;
+
 	dev_hold(dev);
 
 	if (is_vmalloc_addr(skb->head))
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5517fb164b6024d40fabebe7855c8..f039064ce922f3aac8419dcda65ad875f89e966b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 			return -EINVAL;
 
 		skb_reset_network_header(skb);
+		key->eth.type = skb->protocol;
 	} else {
 		eth = eth_hdr(skb);
 		ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 		if (unlikely(parse_vlan(skb, key)))
 			return -ENOMEM;
 
-		skb->protocol = parse_ethertype(skb);
-		if (unlikely(skb->protocol == htons(0)))
+		key->eth.type = parse_ethertype(skb);
+		if (unlikely(key->eth.type == htons(0)))
 			return -ENOMEM;
 
+		/* Multiple tagged packets need to retain TPID to satisfy
+		 * skb_vlan_pop(), which will later shift the ethertype into
+		 * skb->protocol.
+		 */
+		if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+			skb->protocol = key->eth.cvlan.tpid;
+		else
+			skb->protocol = key->eth.type;
+
 		skb_reset_network_header(skb);
 		__skb_push(skb, skb->data - skb_mac_header(skb));
 	}
 	skb_reset_mac_len(skb);
-	key->eth.type = skb->protocol;
 
 	/* Network layer. */
 	if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8886f15abe90e16005d7e02f3514476c68d2b0db..634cfcb7bba6833bde376706947c99f1cb103199 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
 	long i;
 	int ret;
 
-	if (rs->rs_bound_addr == 0) {
+	if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
 		ret = -ENOTCONN; /* XXX not a great errno */
 		goto out;
 	}
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
 
 	local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
 
+	if (args->nr_local == 0)
+		return -EINVAL;
+
 	/* figure out the number of pages in the vector */
 	for (i = 0; i < args->nr_local; i++) {
 		if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
 err:
 	if (page)
 		put_page(page);
+	rm->atomic.op_active = 0;
 	kfree(rm->atomic.op_notifier);
 
 	return ret;
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428819a5853509a06852ec2ebc43a5a..f72466c63f0c5657a2f44e11ae432dd1457991cf 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
 			continue;
 
 		if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+			if (cmsg->cmsg_len <
+			    CMSG_LEN(sizeof(struct rds_rdma_args)))
+				return -EINVAL;
 			args = CMSG_DATA(cmsg);
 			*rdma_bytes += args->remote_vec.bytes;
 		}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 8f7cf4c042be2b9b4379968655bea594a2928546..dcd818fa837e0af91978d6f1128085f93eb80f15 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -860,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
 static int rxrpc_release_sock(struct sock *sk)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 
 	_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
 
@@ -895,8 +896,8 @@ static int rxrpc_release_sock(struct sock *sk)
 	rxrpc_release_calls_on_socket(rx);
 	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
-	rxrpc_queue_work(&rx->local->rxnet->service_conn_reaper);
-	rxrpc_queue_work(&rx->local->rxnet->client_conn_reaper);
+	rxrpc_queue_work(&rxnet->service_conn_reaper);
+	rxrpc_queue_work(&rxnet->client_conn_reaper);
 
 	rxrpc_put_local(rx->local);
 	rx->local = NULL;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index bda952ffe6a6eab394e39220a6fe6a6af19c8e08..ad2ab11031899fd0d1b398622deee579b6fa9f42 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,7 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 		else
 			ack_at = expiry;
 
-		ack_at = jiffies + expiry;
+		ack_at += now;
 		if (time_before(ack_at, call->ack_at)) {
 			WRITE_ONCE(call->ack_at, ack_at);
 			rxrpc_reduce_call_timer(call, ack_at, now,
@@ -426,7 +426,7 @@ void rxrpc_process_call(struct work_struct *work)
 	next = call->expect_rx_by;
 
 #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
-	
+
 	set(call->expect_req_by);
 	set(call->expect_term_by);
 	set(call->ack_at);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 9e9a8db1bc9cd0f1afd3efd7e9e26c4d2890a7d3..4ca11be6be3cadcfda93eab7892292ca1ec127b5 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -30,22 +30,18 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
 	struct rxrpc_channel *chan;
 	struct msghdr msg;
-	struct kvec iov;
+	struct kvec iov[3];
 	struct {
 		struct rxrpc_wire_header whdr;
 		union {
-			struct {
-				__be32 code;
-			} abort;
-			struct {
-				struct rxrpc_ackpacket ack;
-				u8 padding[3];
-				struct rxrpc_ackinfo info;
-			};
+			__be32 abort_code;
+			struct rxrpc_ackpacket ack;
 		};
 	} __attribute__((packed)) pkt;
+	struct rxrpc_ackinfo ack_info;
 	size_t len;
-	u32 serial, mtu, call_id;
+	int ioc;
+	u32 serial, mtu, call_id, padding;
 
 	_enter("%d", conn->debug_id);
 
@@ -66,6 +62,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
 
+	iov[0].iov_base	= &pkt;
+	iov[0].iov_len	= sizeof(pkt.whdr);
+	iov[1].iov_base	= &padding;
+	iov[1].iov_len	= 3;
+	iov[2].iov_base	= &ack_info;
+	iov[2].iov_len	= sizeof(ack_info);
+
 	pkt.whdr.epoch		= htonl(conn->proto.epoch);
 	pkt.whdr.cid		= htonl(conn->proto.cid);
 	pkt.whdr.callNumber	= htonl(call_id);
@@ -80,8 +83,10 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	len = sizeof(pkt.whdr);
 	switch (chan->last_type) {
 	case RXRPC_PACKET_TYPE_ABORT:
-		pkt.abort.code	= htonl(chan->last_abort);
-		len += sizeof(pkt.abort);
+		pkt.abort_code	= htonl(chan->last_abort);
+		iov[0].iov_len += sizeof(pkt.abort_code);
+		len += sizeof(pkt.abort_code);
+		ioc = 1;
 		break;
 
 	case RXRPC_PACKET_TYPE_ACK:
@@ -94,13 +99,19 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 		pkt.ack.serial		= htonl(skb ? sp->hdr.serial : 0);
 		pkt.ack.reason		= skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
 		pkt.ack.nAcks		= 0;
-		pkt.info.rxMTU		= htonl(rxrpc_rx_mtu);
-		pkt.info.maxMTU		= htonl(mtu);
-		pkt.info.rwind		= htonl(rxrpc_rx_window_size);
-		pkt.info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
+		ack_info.rxMTU		= htonl(rxrpc_rx_mtu);
+		ack_info.maxMTU		= htonl(mtu);
+		ack_info.rwind		= htonl(rxrpc_rx_window_size);
+		ack_info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
 		pkt.whdr.flags		|= RXRPC_SLOW_START_OK;
-		len += sizeof(pkt.ack) + sizeof(pkt.info);
+		padding			= 0;
+		iov[0].iov_len += sizeof(pkt.ack);
+		len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+		ioc = 3;
 		break;
+
+	default:
+		return;
 	}
 
 	/* Resync with __rxrpc_disconnect_call() and check that the last call
@@ -110,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	if (READ_ONCE(chan->last_call) != call_id)
 		return;
 
-	iov.iov_base	= &pkt;
-	iov.iov_len	= len;
-
 	serial = atomic_inc_return(&conn->serial);
 	pkt.whdr.serial = htonl(serial);
 
@@ -127,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 		break;
 	}
 
-	kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+	kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
 	_leave("");
 	return;
 }
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1aad04a32d5e203ab17928e2247275c8b01c954d..c628351eb9008da7059102f48dad7f605343de5b 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -424,7 +424,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
 	if (earliest != now + MAX_JIFFY_OFFSET) {
 		_debug("reschedule reaper %ld", (long)earliest - (long)now);
 		ASSERT(time_after(earliest, now));
-		rxrpc_set_service_reap_timer(rxnet, earliest);		
+		rxrpc_set_service_reap_timer(rxnet, earliest);
 	}
 
 	while (!list_empty(&graveyard)) {
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 23a5e61d8f79a01622c29de07061fcff94a14f3c..6fc61400337fb3e8a96658ed685efa9a8280f70e 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -976,7 +976,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
 		rxrpc_reduce_call_timer(call, expect_rx_by, now,
 					rxrpc_timer_set_for_normal);
 	}
-	
+
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_DATA:
 		rxrpc_input_data(call, skb, skew);
@@ -1213,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 				goto reupgrade;
 			conn->service_id = sp->hdr.serviceId;
 		}
-		
+
 		if (sp->hdr.callNumber == 0) {
 			/* Connection-level packet */
 			_debug("CONN %p {%d}", conn, conn->debug_id);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index a1c53ac066a10bda169b0222b6d6177066c6dca9..09f2a3e0522163e0e5ae900555c56b74ace26b7a 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -233,7 +233,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 		if (resend_at < 1)
 			resend_at = 1;
 
-		resend_at = now + rxrpc_resend_timeout;
+		resend_at += now;
 		WRITE_ONCE(call->resend_at, resend_at);
 		rxrpc_reduce_call_timer(call, resend_at, now,
 					rxrpc_timer_set_for_send);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc348aefdc720cf08d1509ac5b53559..a0ac42b3ed0652fe897d59bd043c189af4fa0a12 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 	if (action == TC_ACT_SHOT)
 		this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
 
-	tm->lastuse = lastuse;
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da996a868b8315c53cfac964842bbec3..6445184b2759a783f05a48578af330e0872f5d11 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
 #include <net/pkt_sched.h>
 #include <uapi/linux/tc_act/tc_ife.h>
 #include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
 
 static int skbmark_encode(struct sk_buff *skb, void *skbdata,
 			  struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e966b26076f48757c5351287d9f0943..7221437ca3a6fadad5ebc3f286cc1fc5d35d89e1 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
 #include <net/pkt_sched.h>
 #include <uapi/linux/tc_act/tc_ife.h>
 #include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
 
 static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
 			     struct tcf_meta_info *e)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e593884803370ea27ef5fe770fa5ae829a888..08b61849c2a2f448fa0a29b0c93037ebbbafd0e7 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 	struct tcf_t *tm = &m->tcf_tm;
 
 	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-	tm->lastuse = lastuse;
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 8b5abcd2f32faeaa2a283bcc8fb388201f7a86e2..9438969290a6147c16c971558aeef3d01d21dde5 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	return ret;
 }
 
-static void tcf_sample_cleanup_rcu(struct rcu_head *rcu)
+static void tcf_sample_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu);
+	struct tcf_sample *s = to_sample(a);
 	struct psample_group *psample_group;
 
-	psample_group = rcu_dereference_protected(s->psample_group, 1);
+	psample_group = rtnl_dereference(s->psample_group);
 	RCU_INIT_POINTER(s->psample_group, NULL);
 	psample_group_put(psample_group);
 }
 
-static void tcf_sample_cleanup(struct tc_action *a, int bind)
-{
-	struct tcf_sample *s = to_sample(a);
-
-	call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
-}
-
 static bool tcf_sample_dev_ok_push(struct net_device *dev)
 {
 	switch (dev->type) {
@@ -264,7 +257,6 @@ static int __init sample_init_module(void)
 
 static void __exit sample_cleanup_module(void)
 {
-	rcu_barrier();
 	tcf_unregister_action(&act_sample_ops, &sample_net_ops);
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ddcf04b4ab43732c001869f70d63ea193768ebc3..b9d63d2246e667329c30606165dd485b9dc777aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
-#include <linux/err.h>
 #include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -352,6 +351,8 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 	struct tcf_chain *chain;
 
+	if (!block)
+		return;
 	/* Hold a refcnt for all chains, except 0, so that they don't disappear
 	 * while we are iterating.
 	 */
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a5303cd61cd3ad53cd2f9385d16de..8d78e7f4ecc33082517aaab5767a30c119f49dc0 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
 	struct list_head link;
 	struct tcf_result res;
 	bool exts_integrated;
-	bool offloaded;
 	u32 gen_flags;
 	struct tcf_exts exts;
 	u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 }
 
 static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-			       enum tc_clsbpf_command cmd)
+			       struct cls_bpf_prog *oldprog)
 {
-	bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
 	struct tcf_block *block = tp->chain->block;
-	bool skip_sw = tc_skip_sw(prog->gen_flags);
 	struct tc_cls_bpf_offload cls_bpf = {};
+	struct cls_bpf_prog *obj;
+	bool skip_sw;
 	int err;
 
+	skip_sw = prog && tc_skip_sw(prog->gen_flags);
+	obj = prog ?: oldprog;
+
 	tc_cls_common_offload_init(&cls_bpf.common, tp);
-	cls_bpf.command = cmd;
-	cls_bpf.exts = &prog->exts;
-	cls_bpf.prog = prog->filter;
-	cls_bpf.name = prog->bpf_name;
-	cls_bpf.exts_integrated = prog->exts_integrated;
-	cls_bpf.gen_flags = prog->gen_flags;
+	cls_bpf.command = TC_CLSBPF_OFFLOAD;
+	cls_bpf.exts = &obj->exts;
+	cls_bpf.prog = prog ? prog->filter : NULL;
+	cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+	cls_bpf.name = obj->bpf_name;
+	cls_bpf.exts_integrated = obj->exts_integrated;
+	cls_bpf.gen_flags = obj->gen_flags;
 
 	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-	if (addorrep) {
+	if (prog) {
 		if (err < 0) {
-			cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+			cls_bpf_offload_cmd(tp, oldprog, prog);
 			return err;
 		} else if (err > 0) {
 			prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
 		}
 	}
 
-	if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+	if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
 		return -EINVAL;
 
 	return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			   struct cls_bpf_prog *oldprog)
 {
-	struct cls_bpf_prog *obj = prog;
-	enum tc_clsbpf_command cmd;
-	bool skip_sw;
-	int ret;
-
-	skip_sw = tc_skip_sw(prog->gen_flags) ||
-		(oldprog && tc_skip_sw(oldprog->gen_flags));
-
-	if (oldprog && oldprog->offloaded) {
-		if (!tc_skip_hw(prog->gen_flags)) {
-			cmd = TC_CLSBPF_REPLACE;
-		} else if (!tc_skip_sw(prog->gen_flags)) {
-			obj = oldprog;
-			cmd = TC_CLSBPF_DESTROY;
-		} else {
-			return -EINVAL;
-		}
-	} else {
-		if (tc_skip_hw(prog->gen_flags))
-			return skip_sw ? -EINVAL : 0;
-		cmd = TC_CLSBPF_ADD;
-	}
-
-	ret = cls_bpf_offload_cmd(tp, obj, cmd);
-	if (ret)
-		return ret;
+	if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+		return -EINVAL;
 
-	obj->offloaded = true;
-	if (oldprog)
-		oldprog->offloaded = false;
+	if (prog && tc_skip_hw(prog->gen_flags))
+		prog = NULL;
+	if (oldprog && tc_skip_hw(oldprog->gen_flags))
+		oldprog = NULL;
+	if (!prog && !oldprog)
+		return 0;
 
-	return 0;
+	return cls_bpf_offload_cmd(tp, prog, oldprog);
 }
 
 static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
 {
 	int err;
 
-	if (!prog->offloaded)
-		return;
-
-	err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
-	if (err) {
+	err = cls_bpf_offload_cmd(tp, NULL, prog);
+	if (err)
 		pr_err("Stopping hardware offload failed: %d\n", err);
-		return;
-	}
-
-	prog->offloaded = false;
 }
 
 static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
 					 struct cls_bpf_prog *prog)
 {
-	if (!prog->offloaded)
-		return;
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_bpf_offload cls_bpf = {};
+
+	tc_cls_common_offload_init(&cls_bpf.common, tp);
+	cls_bpf.command = TC_CLSBPF_STATS;
+	cls_bpf.exts = &prog->exts;
+	cls_bpf.prog = prog->filter;
+	cls_bpf.name = prog->bpf_name;
+	cls_bpf.exts_integrated = prog->exts_integrated;
+	cls_bpf.gen_flags = prog->gen_flags;
 
-	cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247d61d1761886352a93cb03b585cbf..507859cdd1cb1e7d97751ebad5cd688cb02b14ea 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
-#include <linux/netdevice.h>
 #include <linux/idr.h>
 
 struct tc_u_knode {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b70b0ad24fee686129a396e07f573..0f1eab99ff4edb6e7e27f4b4b34552b5ee996cbf 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	tcm->tcm_info = refcount_read(&q->refcnt);
 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
 		goto nla_put_failure;
+	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
+		goto nla_put_failure;
 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
 		goto nla_put_failure;
 	qlen = q->q.qlen;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index b30a2c70bd489b36a2295a11707b6a36d4eb9ac0..531250fceb9e5a75d6a8b843e5e5fd9d481fddf2 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 
 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+		return -EINVAL;
+
 	if (ctl->limit > CHOKE_MAX_QUEUE)
 		return -EINVAL;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3839cbbdc32b1eadd2cae6a42a6b8c998ca88a15..661c7144b53af048b3a65484777910e2d60f25aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
@@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev)
 
 	if (is_vlan_dev(dev))
 		dev = vlan_dev_real_dev(dev);
+	else if (netif_is_macvlan(dev))
+		dev = macvlan_dev_real_dev(dev);
 	res = netdev_get_tx_queue(dev, 0)->trans_start;
 	for (i = 1; i < dev->num_tx_queues; i++) {
 		val = netdev_get_tx_queue(dev, i)->trans_start;
@@ -1037,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 
 	if (!tp_head) {
 		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+		/* Wait for flying RCU callback before it is freed. */
+		rcu_barrier_bh();
 		return;
 	}
 
@@ -1052,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 	rcu_assign_pointer(*miniqp->p_miniq, miniq);
 
 	if (miniq_old)
-		/* This is counterpart of the rcu barrier above. We need to
+		/* This is counterpart of the rcu barriers above. We need to
 		 * block potential new user of miniq_old until all readers
 		 * are not seeing it.
 		 */
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 17c7130454bd90e8af1d17e95f477ea558fb481d..bc30f9186ac67cd7b1c21d4d2b0035d3a6b886af 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 	struct gred_sched *table = qdisc_priv(sch);
 	struct gred_sched_data *q = table->tab[dp];
 
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+		return -EINVAL;
+
 	if (!q) {
 		table->tab[dp] = q = *prealloc;
 		*prealloc = NULL;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d4774fdfa402d9a4c4a0e655e1c91c2..fc1286f499c1462ab29c5054f734237788974e0e 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -68,6 +68,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 	struct net_device *dev = qdisc_dev(sch);
 	int err;
 
+	net_inc_ingress_queue();
+
 	mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
 
 	q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -78,7 +80,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 	if (err)
 		return err;
 
-	net_inc_ingress_queue();
 	sch->flags |= TCQ_F_CPUSTATS;
 
 	return 0;
@@ -172,6 +173,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 	struct net_device *dev = qdisc_dev(sch);
 	int err;
 
+	net_inc_ingress_queue();
+	net_inc_egress_queue();
+
 	mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
 
 	q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -190,18 +194,11 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 
 	err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
 	if (err)
-		goto err_egress_block_get;
-
-	net_inc_ingress_queue();
-	net_inc_egress_queue();
+		return err;
 
 	sch->flags |= TCQ_F_CPUSTATS;
 
 	return 0;
-
-err_egress_block_get:
-	tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
-	return err;
 }
 
 static void clsact_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7f8ea9e297c36acd0969b0330ab479e0199f47ac..f0747eb87dc4784e67e0b5872dcf37effaaa4060 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
 		.handle = sch->handle,
 		.parent = sch->parent,
 	};
+	int err;
 
 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 		return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
 		opt.command = TC_RED_DESTROY;
 	}
 
-	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+
+	if (!err && enable)
+		sch->flags |= TCQ_F_OFFLOADED;
+	else
+		sch->flags &= ~TCQ_F_OFFLOADED;
+
+	return err;
 }
 
 static void red_destroy(struct Qdisc *sch)
@@ -212,6 +220,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 
 	ctl = nla_data(tb[TCA_RED_PARMS]);
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+		return -EINVAL;
 
 	if (ctl->limit > 0) {
 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
@@ -272,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
 	return red_change(sch, opt);
 }
 
-static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct tc_red_qopt_offload hw_stats = {
@@ -284,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
 			.stats.qstats = &sch->qstats,
 		},
 	};
-	int err;
 
-	opt->flags &= ~TC_RED_OFFLOADED;
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return 0;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
-					    &hw_stats);
-	if (err == -EOPNOTSUPP)
+	if (!(sch->flags & TCQ_F_OFFLOADED))
 		return 0;
 
-	if (!err)
-		opt->flags |= TC_RED_OFFLOADED;
-
-	return err;
+	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+					     &hw_stats);
 }
 
 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -317,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	int err;
 
 	sch->qstats.backlog = q->qdisc->qstats.backlog;
-	err = red_dump_offload(sch, &opt);
+	err = red_dump_offload_stats(sch, &opt);
 	if (err)
 		goto nla_put_failure;
 
@@ -345,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
 	};
 
-	if (tc_can_offload(dev) &&  dev->netdev_ops->ndo_setup_tc) {
+	if (sch->flags & TCQ_F_OFFLOADED) {
 		struct red_stats hw_stats = {0};
 		struct tc_red_qopt_offload hw_stats_request = {
 			.command = TC_RED_XSTATS,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 09c1203c17119829d183fbdd0dfe9757460b863e..930e5bd26d3d7650a41b9472463c3fc39732495b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
+	if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+					ctl_v1->Wlog))
+		return -EINVAL;
 	if (ctl_v1 && ctl_v1->qth_min) {
 		p = kmalloc(sizeof(*p), GFP_KERNEL);
 		if (!p)
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7b261afc47b9d709fdd780a93aaba874f35d79be..7f8baa48e7c2a834aea292106fd319c2489432a3 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 	msg->send_failed = 0;
 	msg->send_error = 0;
 	msg->can_delay = 1;
+	msg->abandoned = 0;
 	msg->expires_at = 0;
 	INIT_LIST_HEAD(&msg->chunks);
 }
@@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	if (!chunk->asoc->peer.prsctp_capable)
 		return 0;
 
+	if (chunk->msg->abandoned)
+		return 1;
+
+	if (!chunk->has_tsn &&
+	    !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG))
+		return 0;
+
 	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
 	    time_after(jiffies, chunk->msg->expires_at)) {
 		struct sctp_stream_out *streamout =
@@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 			chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
 			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
 		}
+		chunk->msg->abandoned = 1;
 		return 1;
 	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
 		   chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
@@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 
 		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+		chunk->msg->abandoned = 1;
 		return 1;
 	} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
 		   chunk->msg->expires_at &&
 		   time_after(jiffies, chunk->msg->expires_at)) {
+		chunk->msg->abandoned = 1;
 		return 1;
 	}
 	/* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a0b4a6f35ece8021c011f874a2d79..291c97b07058218635fcfcd06214aa79d74ec80d 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
 	case SCTP_CID_AUTH:
 		return "AUTH";
 
+	case SCTP_CID_RECONF:
+		return "RECONF";
+
 	default:
 		break;
 	}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c17c3d7ef7bb1c7677ab98cebbe77..141c9c466ec172ff67930cf38eb02a49e01a12ab 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 		return;
 	}
 
-	if (t->param_flags & SPP_PMTUD_ENABLE) {
-		/* Update transports view of the MTU */
-		sctp_transport_update_pmtu(t, pmtu);
-
-		/* Update association pmtu. */
-		sctp_assoc_sync_pmtu(asoc);
-	}
+	if (!(t->param_flags & SPP_PMTUD_ENABLE))
+		/* We can't allow retransmitting in such case, as the
+		 * retransmission would be sized just as before, and thus we
+		 * would get another icmp, and retransmit again.
+		 */
+		return;
 
-	/* Retransmit with the new pmtu setting.
-	 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
-	 * Needed will never be sent, but if a message was sent before
-	 * PMTU discovery was disabled that was larger than the PMTU, it
-	 * would not be fragmented, so it must be re-transmitted fragmented.
+	/* Update transports view of the MTU. Return if no update was needed.
+	 * If an update wasn't needed/possible, it also doesn't make sense to
+	 * try to retransmit now.
 	 */
+	if (!sctp_transport_update_pmtu(t, pmtu))
+		return;
+
+	/* Update association pmtu. */
+	sctp_assoc_sync_pmtu(asoc);
+
+	/* Retransmit with the new pmtu setting. */
 	sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
 }
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4db012aa25f7a042f063bc17b56270effebc6cc6..7d67feeeffc1e758ae4be4ef1ddaea23276d1f5e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 	list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
 		struct sctp_stream_out *streamout;
 
-		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+		if (!chk->msg->abandoned &&
+		    (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		     chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
 			continue;
 
+		chk->msg->abandoned = 1;
 		list_del_init(&chk->transmitted_list);
 		sctp_insert_list(&asoc->outqueue.abandoned,
 				 &chk->transmitted_list);
@@ -377,7 +379,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 		streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 
-		if (!chk->tsn_gap_acked) {
+		if (queue != &asoc->outqueue.retransmit &&
+		    !chk->tsn_gap_acked) {
 			if (chk->transport)
 				chk->transport->flight_size -=
 						sctp_data_size(chk);
@@ -403,10 +406,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 	q->sched->unsched_all(&asoc->stream);
 
 	list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
-		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+		if (!chk->msg->abandoned &&
+		    (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) ||
+		     !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		     chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
 			continue;
 
+		chk->msg->abandoned = 1;
 		sctp_sched_dequeue_common(q, chk);
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
@@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			/* If this chunk has not been acked, stop
 			 * considering it as 'outstanding'.
 			 */
-			if (!tchunk->tsn_gap_acked) {
+			if (transmitted_queue != &q->retransmit &&
+			    !tchunk->tsn_gap_acked) {
 				if (tchunk->transport)
 					tchunk->transport->flight_size -=
 							sctp_data_size(tchunk);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 014847e25648182dbf99d8fb095e094af76264bb..9b01e994f66108a12abc31f452482f1de8749d84 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2277,7 +2277,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
 
 		if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
 			event = sctp_ulpevent_make_sender_dry_event(asoc,
-					GFP_ATOMIC);
+					GFP_USER | __GFP_NOWARN);
 			if (!event)
 				return -ENOMEM;
 
@@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 
 	if (optlen < sizeof(struct sctp_hmacalgo))
 		return -EINVAL;
+	optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
+					     SCTP_AUTH_NUM_HMACS * sizeof(u16));
 
 	hmacs = memdup_user(optval, optlen);
 	if (IS_ERR(hmacs))
@@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 
 	if (optlen <= sizeof(struct sctp_authkey))
 		return -EINVAL;
+	/* authkey->sca_keylength is u16, so optlen can't be bigger than
+	 * this.
+	 */
+	optlen = min_t(unsigned int, optlen, USHRT_MAX +
+					     sizeof(struct sctp_authkey));
 
 	authkey = memdup_user(optval, optlen);
 	if (IS_ERR(authkey))
@@ -3891,13 +3898,20 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
 	struct sctp_association *asoc;
 	int retval = -EINVAL;
 
-	if (optlen < sizeof(struct sctp_reset_streams))
+	if (optlen < sizeof(*params))
 		return -EINVAL;
+	/* srs_number_streams is u16, so optlen can't be bigger than this. */
+	optlen = min_t(unsigned int, optlen, USHRT_MAX +
+					     sizeof(__u16) * sizeof(*params));
 
 	params = memdup_user(optval, optlen);
 	if (IS_ERR(params))
 		return PTR_ERR(params);
 
+	if (params->srs_number_streams * sizeof(__u16) >
+	    optlen - sizeof(*params))
+		goto out;
+
 	asoc = sctp_id2assoc(sk, params->srs_assoc_id);
 	if (!asoc)
 		goto out;
@@ -4494,7 +4508,7 @@ static int sctp_init_sock(struct sock *sk)
 	SCTP_DBG_OBJCNT_INC(sock);
 
 	local_bh_disable();
-	percpu_counter_inc(&sctp_sockets_allocated);
+	sk_sockets_allocated_inc(sk);
 	sock_prot_inuse_add(net, sk->sk_prot, 1);
 
 	/* Nothing can fail after this block, otherwise
@@ -4538,7 +4552,7 @@ static void sctp_destroy_sock(struct sock *sk)
 	}
 	sctp_endpoint_free(sp->ep);
 	local_bh_disable();
-	percpu_counter_dec(&sctp_sockets_allocated);
+	sk_sockets_allocated_dec(sk);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	local_bh_enable();
 }
@@ -5011,7 +5025,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
 	len = sizeof(int);
 	if (put_user(len, optlen))
 		return -EFAULT;
-	if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
+	if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
 		return -EFAULT;
 	return 0;
 }
@@ -5080,7 +5094,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p
 	*newfile = sock_alloc_file(newsock, 0, NULL);
 	if (IS_ERR(*newfile)) {
 		put_unused_fd(retval);
-		sock_release(newsock);
 		retval = PTR_ERR(*newfile);
 		*newfile = NULL;
 		return retval;
@@ -5642,6 +5655,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 		err = -EFAULT;
 		goto out;
 	}
+	/* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
+	 * but we can't change it anymore.
+	 */
 	if (put_user(bytes_copied, optlen))
 		err = -EFAULT;
 out:
@@ -6078,7 +6094,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
 		params.assoc_id = 0;
 	} else if (len >= sizeof(struct sctp_assoc_value)) {
 		len = sizeof(struct sctp_assoc_value);
-		if (copy_from_user(&params, optval, sizeof(params)))
+		if (copy_from_user(&params, optval, len))
 			return -EFAULT;
 	} else
 		return -EINVAL;
@@ -6248,7 +6264,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 
 	if (len < sizeof(struct sctp_authkeyid))
 		return -EINVAL;
-	if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
+
+	len = sizeof(struct sctp_authkeyid);
+	if (copy_from_user(&val, optval, len))
 		return -EFAULT;
 
 	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6260,7 +6278,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 	else
 		val.scact_keynumber = ep->active_key_id;
 
-	len = sizeof(struct sctp_authkeyid);
 	if (put_user(len, optlen))
 		return -EFAULT;
 	if (copy_to_user(optval, &val, len))
@@ -6286,7 +6303,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 	if (len < sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
-	if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+	if (copy_from_user(&val, optval, sizeof(val)))
 		return -EFAULT;
 
 	to = p->gauth_chunks;
@@ -6331,7 +6348,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	if (len < sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
-	if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+	if (copy_from_user(&val, optval, sizeof(val)))
 		return -EFAULT;
 
 	to = p->gauth_chunks;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 76ea66be0bbee7d3f018676d52c8b95ba06dbcb1..524dfeb94c41ab1ac735746a8acf93af1c96ae48 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 	sctp_stream_outq_migrate(stream, NULL, outcnt);
 	sched->sched_all(stream);
 
-	i = sctp_stream_alloc_out(stream, outcnt, gfp);
-	if (i)
-		return i;
+	ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+	if (ret)
+		goto out;
 
 	stream->outcnt = outcnt;
 	for (i = 0; i < stream->outcnt; i++)
@@ -170,19 +170,17 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 	if (!incnt)
 		goto out;
 
-	i = sctp_stream_alloc_in(stream, incnt, gfp);
-	if (i) {
-		ret = -ENOMEM;
-		goto free;
+	ret = sctp_stream_alloc_in(stream, incnt, gfp);
+	if (ret) {
+		sched->free(stream);
+		kfree(stream->out);
+		stream->out = NULL;
+		stream->outcnt = 0;
+		goto out;
 	}
 
 	stream->incnt = incnt;
-	goto out;
 
-free:
-	sched->free(stream);
-	kfree(stream->out);
-	stream->out = NULL;
 out:
 	return ret;
 }
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf56e40a6f323081beb97836b506384..47f82bd794d915188bad037463c2aa14175a55ef 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
 	struct dst_entry *dst = sctp_transport_dst_check(t);
+	bool change = true;
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
-		pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
-			__func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
-		/* Use default minimum segment size and disable
-		 * pmtu discovery on this transport.
-		 */
-		t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
-	} else {
-		t->pathmtu = pmtu;
+		pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+				    __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+		/* Use default minimum segment instead */
+		pmtu = SCTP_DEFAULT_MINSEGMENT;
 	}
+	pmtu = SCTP_TRUNC4(pmtu);
 
 	if (dst) {
 		dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
 		dst = sctp_transport_dst_check(t);
 	}
 
-	if (!dst)
+	if (!dst) {
 		t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+		dst = t->dst;
+	}
+
+	if (dst) {
+		/* Re-fetch, as under layers may have a higher minimum size */
+		pmtu = SCTP_TRUNC4(dst_mtu(dst));
+		change = t->pathmtu != pmtu;
+	}
+	t->pathmtu = pmtu;
+
+	return change;
 }
 
 /* Caches the dst entry and source address for a transport's destination
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afeb0aaaef174ee082c4c547aab1e2d..e36ec5dd64c6ff969fc30aae893d1d5ca8c221bf 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 		      gfp_t gfp)
 {
-	struct sctp_association *asoc;
-	__u16 needed, freed;
-
-	asoc = ulpq->asoc;
+	struct sctp_association *asoc = ulpq->asoc;
+	__u32 freed = 0;
+	__u16 needed;
 
-	if (chunk) {
-		needed = ntohs(chunk->chunk_hdr->length);
-		needed -= sizeof(struct sctp_data_chunk);
-	} else
-		needed = SCTP_DEFAULT_MAXWINDOW;
-
-	freed = 0;
+	needed = ntohs(chunk->chunk_hdr->length) -
+		 sizeof(struct sctp_data_chunk);
 
 	if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
 		freed = sctp_ulpq_renege_order(ulpq, needed);
-		if (freed < needed) {
+		if (freed < needed)
 			freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
-		}
 	}
 	/* If able to free enough room, accept this chunk. */
-	if (chunk && (freed >= needed)) {
-		int retval;
-		retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+	if (freed >= needed) {
+		int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
 		/*
 		 * Enter partial delivery if chunk has not been
 		 * delivered; otherwise, drain the reassembly queue.
diff --git a/net/socket.c b/net/socket.c
index 42d8e9c9ccd5028793ebeb27fb319911a0f4ce35..6f05d5c4bf30759ea69944bd212891b9ce49469e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 		name.len = strlen(name.name);
 	}
 	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
-	if (unlikely(!path.dentry))
+	if (unlikely(!path.dentry)) {
+		sock_release(sock);
 		return ERR_PTR(-ENOMEM);
+	}
 	path.mnt = mntget(sock_mnt);
 
 	d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	if (IS_ERR(file)) {
-		/* drop dentry, keep inode */
+		/* drop dentry, keep inode for a bit */
 		ihold(d_inode(path.dentry));
 		path_put(&path);
+		/* ... and now kill it properly */
+		sock_release(sock);
 		return file;
 	}
 
@@ -432,8 +436,10 @@ static int sock_map_fd(struct socket *sock, int flags)
 {
 	struct file *newfile;
 	int fd = get_unused_fd_flags(flags);
-	if (unlikely(fd < 0))
+	if (unlikely(fd < 0)) {
+		sock_release(sock);
 		return fd;
+	}
 
 	newfile = sock_alloc_file(sock, flags, NULL);
 	if (likely(!IS_ERR(newfile))) {
@@ -1330,19 +1336,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
-		goto out;
-
-	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
-	if (retval < 0)
-		goto out_release;
-
-out:
-	/* It may be already another descriptor 8) Not kernel problem. */
-	return retval;
+		return retval;
 
-out_release:
-	sock_release(sock);
-	return retval;
+	return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 }
 
 /*
@@ -1365,88 +1361,73 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
+	/*
+	 * reserve descriptors and make sure we won't fail
+	 * to return them to userland.
+	 */
+	fd1 = get_unused_fd_flags(flags);
+	if (unlikely(fd1 < 0))
+		return fd1;
+
+	fd2 = get_unused_fd_flags(flags);
+	if (unlikely(fd2 < 0)) {
+		put_unused_fd(fd1);
+		return fd2;
+	}
+
+	err = put_user(fd1, &usockvec[0]);
+	if (err)
+		goto out;
+
+	err = put_user(fd2, &usockvec[1]);
+	if (err)
+		goto out;
+
 	/*
 	 * Obtain the first socket and check if the underlying protocol
 	 * supports the socketpair call.
 	 */
 
 	err = sock_create(family, type, protocol, &sock1);
-	if (err < 0)
+	if (unlikely(err < 0))
 		goto out;
 
 	err = sock_create(family, type, protocol, &sock2);
-	if (err < 0)
-		goto out_release_1;
-
-	err = sock1->ops->socketpair(sock1, sock2);
-	if (err < 0)
-		goto out_release_both;
-
-	fd1 = get_unused_fd_flags(flags);
-	if (unlikely(fd1 < 0)) {
-		err = fd1;
-		goto out_release_both;
+	if (unlikely(err < 0)) {
+		sock_release(sock1);
+		goto out;
 	}
 
-	fd2 = get_unused_fd_flags(flags);
-	if (unlikely(fd2 < 0)) {
-		err = fd2;
-		goto out_put_unused_1;
+	err = sock1->ops->socketpair(sock1, sock2);
+	if (unlikely(err < 0)) {
+		sock_release(sock2);
+		sock_release(sock1);
+		goto out;
 	}
 
 	newfile1 = sock_alloc_file(sock1, flags, NULL);
 	if (IS_ERR(newfile1)) {
 		err = PTR_ERR(newfile1);
-		goto out_put_unused_both;
+		sock_release(sock2);
+		goto out;
 	}
 
 	newfile2 = sock_alloc_file(sock2, flags, NULL);
 	if (IS_ERR(newfile2)) {
 		err = PTR_ERR(newfile2);
-		goto out_fput_1;
+		fput(newfile1);
+		goto out;
 	}
 
-	err = put_user(fd1, &usockvec[0]);
-	if (err)
-		goto out_fput_both;
-
-	err = put_user(fd2, &usockvec[1]);
-	if (err)
-		goto out_fput_both;
-
 	audit_fd_pair(fd1, fd2);
 
 	fd_install(fd1, newfile1);
 	fd_install(fd2, newfile2);
-	/* fd1 and fd2 may be already another descriptors.
-	 * Not kernel problem.
-	 */
-
 	return 0;
 
-out_fput_both:
-	fput(newfile2);
-	fput(newfile1);
-	put_unused_fd(fd2);
-	put_unused_fd(fd1);
-	goto out;
-
-out_fput_1:
-	fput(newfile1);
-	put_unused_fd(fd2);
-	put_unused_fd(fd1);
-	sock_release(sock2);
-	goto out;
-
-out_put_unused_both:
+out:
 	put_unused_fd(fd2);
-out_put_unused_1:
 	put_unused_fd(fd1);
-out_release_both:
-	sock_release(sock2);
-out_release_1:
-	sock_release(sock1);
-out:
 	return err;
 }
 
@@ -1562,7 +1543,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	if (IS_ERR(newfile)) {
 		err = PTR_ERR(newfile);
 		put_unused_fd(newfd);
-		sock_release(newsock);
 		goto out_put;
 	}
 
@@ -2641,6 +2621,15 @@ static int __init sock_init(void)
 
 core_initcall(sock_init);	/* early initcall */
 
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	bpf_jit_enable = 1;
+#endif
+	return 0;
+}
+pure_initcall(jit_init);
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba3193f811151043ac3675a2ebfb15c38..1fdab5c4eda8c2a218448abb9bc461cf6474615c 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
 	 * allows a thread in BH context to safely check if the process
 	 * lock is held. In this case, if the lock is held, queue work.
 	 */
-	if (sock_owned_by_user(strp->sk)) {
+	if (sock_owned_by_user_nocheck(strp->sk)) {
 		queue_work(strp_wq, &strp->work);
 		return;
 	}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef12c191958261a50e58e5f67082e8..444380f968f1158660f6a01a10cd8223c9db6081 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
 			goto out_free_groups;
 		creds->cr_group_info->gid[i] = kgid;
 	}
+	groups_sort(creds->cr_group_info);
 
 	return 0;
 out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 5dd4e6c9fef21f650db78907e0fa46ee09413c71..26531193fce4d07f4b6d513093544b4a760f96ab 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
 				goto out;
 			rsci.cred.cr_group_info->gid[i] = kgid;
 		}
+		groups_sort(rsci.cred.cr_group_info);
 
 		/* mech name */
 		len = qword_get(&mesg, buf, mlen);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 740b67d5a733bdcd1ad10b6efdf957a8cd9a7889..af7f28fb8102e4313f5ced6aa585e30f3911ca6c 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
 		ug.gi->gid[i] = kgid;
 	}
 
+	groups_sort(ug.gi);
 	ugp = unix_gid_lookup(cd, uid);
 	if (ugp) {
 		struct cache_head *ch;
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 		kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
 		cred->cr_group_info->gid[i] = kgid;
 	}
+	groups_sort(cred->cr_group_info);
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
 		*authp = rpc_autherr_badverf;
 		return SVC_DENIED;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5373d00c6001b9c7f75f3d6c0ed91..33b74fd8405185d906d07e315c9b5a83775e747d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
+	unsigned int connect_cookie;
 	int status, numreqs;
 
 	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
 	} else if (!req->rq_bytes_sent)
 		return;
 
+	connect_cookie = xprt->connect_cookie;
 	req->rq_xtime = ktime_get();
 	status = xprt->ops->send_request(task);
 	trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
 	xprt->stat.bklog_u += xprt->backlog.qlen;
 	xprt->stat.sending_u += xprt->sending.qlen;
 	xprt->stat.pending_u += xprt->pending.qlen;
+	spin_unlock_bh(&xprt->transport_lock);
 
-	/* Don't race with disconnect */
-	if (!xprt_connected(xprt))
-		task->tk_status = -ENOTCONN;
-	else {
+	req->rq_connect_cookie = connect_cookie;
+	if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
 		/*
-		 * Sleep on the pending queue since
-		 * we're expecting a reply.
+		 * Sleep on the pending queue if we're expecting a reply.
+		 * The spinlock ensures atomicity between the test of
+		 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
 		 */
-		if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task))
+		spin_lock(&xprt->recv_lock);
+		if (!req->rq_reply_bytes_recvd) {
 			rpc_sleep_on(&xprt->pending, task, xprt_timer);
-		req->rq_connect_cookie = xprt->connect_cookie;
+			/*
+			 * Send an extra queue wakeup call if the
+			 * connection was dropped in case the call to
+			 * rpc_sleep_on() raced.
+			 */
+			if (!xprt_connected(xprt))
+				xprt_wake_pending_tasks(xprt, -ENOTCONN);
+		}
+		spin_unlock(&xprt->recv_lock);
 	}
-	spin_unlock_bh(&xprt->transport_lock);
 }
 
 static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144cce537fce51dcba5bb12fe0b6df1c..a3f2ab283aeba38b26514dd9eb0e948c71a9ee7e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1408,11 +1408,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
 		__func__, rep, req, be32_to_cpu(rep->rr_xid));
 
-	if (list_empty(&req->rl_registered) &&
-	    !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
-		rpcrdma_complete_rqst(rep);
-	else
-		queue_work(rpcrdma_receive_wq, &rep->rr_work);
+	queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
 	return;
 
 out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7eba7fb2a2296ba6339e8dbf8f31e..6ee1ad8978f3b2977de2798d1a76ded1af6f78c4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/sunrpc/addr.h>
+#include <linux/smp.h>
 
 #include "xprt_rdma.h"
 
@@ -656,6 +657,7 @@ xprt_rdma_allocate(struct rpc_task *task)
 		task->tk_pid, __func__, rqst->rq_callsize,
 		rqst->rq_rcvsize, req);
 
+	req->rl_cpu = smp_processor_id();
 	req->rl_connect_cookie = 0;	/* our reserved value */
 	rpcrdma_set_xprtdata(rqst, req);
 	rqst->rq_buffer = req->rl_sendbuf->rg_base;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82869cd23abb90ea308ca67beef2bf..8607c029c0dd820250f4547c68bda41b7daca313 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
 	struct workqueue_struct *recv_wq;
 
 	recv_wq = alloc_workqueue("xprtrdma_receive",
-				  WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+				  WQ_MEM_RECLAIM | WQ_HIGHPRI,
 				  0);
 	if (!recv_wq)
 		return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f992d9d23d674f94df0e77f58bb72..1342f743f1c41acae0145a49962825aa1574311c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -342,6 +342,7 @@ enum {
 struct rpcrdma_buffer;
 struct rpcrdma_req {
 	struct list_head	rl_list;
+	int			rl_cpu;
 	unsigned int		rl_connect_cookie;
 	struct rpcrdma_buffer	*rl_buffer;
 	struct rpcrdma_rep	*rl_reply;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce4ef95850f688d85b50eff766a710..c8001471da6c3c53be6c63dde1311302b093f415 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	if (res) {
 		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
 			name, -res);
+		kfree(b);
 		return -EINVAL;
 	}
 
@@ -347,8 +348,10 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	if (skb)
 		tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
 
-	if (tipc_mon_create(net, bearer_id))
+	if (tipc_mon_create(net, bearer_id)) {
+		bearer_disable(net, b);
 		return -ENOMEM;
+	}
 
 	pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
 		name,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 95fec2c057d6ebdb223e19ef83bf9c383cb2156e..5f4ffae807eee899a51ab79c258d3b26a8c863d9 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
 static void tipc_group_decr_active(struct tipc_group *grp,
 				   struct tipc_member *m)
 {
-	if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+	if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+	    m->state == MBR_REMITTED)
 		grp->active_cnt--;
 }
 
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
 	if (m->window >= ADV_IDLE)
 		return;
 
-	if (!list_empty(&m->congested))
-		return;
+	list_del_init(&m->congested);
 
 	/* Sort member into congested members' list */
 	list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
 	u16 prev = grp->bc_snd_nxt - 1;
 	struct tipc_member *m;
 	struct rb_node *n;
+	u16 ackers = 0;
 
 	for (n = rb_first(&grp->members); n; n = rb_next(n)) {
 		m = container_of(n, struct tipc_member, tree_node);
 		if (tipc_group_is_enabled(m)) {
 			tipc_group_update_member(m, len);
 			m->bc_acked = prev;
+			ackers++;
 		}
 	}
 
 	/* Mark number of acknowledges to expect, if any */
 	if (ack)
-		grp->bc_ackers = grp->member_cnt;
+		grp->bc_ackers = ackers;
 	grp->bc_snd_nxt++;
 }
 
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
 	int max_active = grp->max_active;
 	int reclaim_limit = max_active * 3 / 4;
 	int active_cnt = grp->active_cnt;
-	struct tipc_member *m, *rm;
+	struct tipc_member *m, *rm, *pm;
 
 	m = tipc_group_find_member(grp, node, port);
 	if (!m)
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
 			pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
 			tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
 		}
+		grp->active_cnt--;
+		list_del_init(&m->list);
+		if (list_empty(&grp->pending))
+			return;
+
+		/* Set oldest pending member to active and advertise */
+		pm = list_first_entry(&grp->pending, struct tipc_member, list);
+		pm->state = MBR_ACTIVE;
+		list_move_tail(&pm->list, &grp->active);
+		grp->active_cnt++;
+		tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
 		break;
 	case MBR_RECLAIMING:
 	case MBR_DISCOVERED:
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
 	} else if (mtyp == GRP_REMIT_MSG) {
 		msg_set_grp_remitted(hdr, m->window);
 	}
+	msg_set_dest_droppable(hdr, true);
 	__skb_queue_tail(xmitq, skb);
 }
 
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 			msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
 			__skb_queue_tail(inputq, m->event_msg);
 		}
-		if (m->window < ADV_IDLE)
-			tipc_group_update_member(m, 0);
-		else
-			list_del_init(&m->congested);
+		list_del_init(&m->congested);
+		tipc_group_update_member(m, 0);
 		return;
 	case GRP_LEAVE_MSG:
 		if (!m)
 			return;
 		m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+		list_del_init(&m->list);
+		list_del_init(&m->congested);
+		*usr_wakeup = true;
 
 		/* Wait until WITHDRAW event is received */
 		if (m->state != MBR_LEAVING) {
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 		ehdr = buf_msg(m->event_msg);
 		msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
 		__skb_queue_tail(inputq, m->event_msg);
-		*usr_wakeup = true;
-		list_del_init(&m->congested);
 		return;
 	case GRP_ADV_MSG:
 		if (!m)
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 		if (!m || m->state != MBR_RECLAIMING)
 			return;
 
-		list_del_init(&m->list);
-		grp->active_cnt--;
 		remitted = msg_grp_remitted(hdr);
 
 		/* Messages preceding the REMIT still in receive queue */
 		if (m->advertised > remitted) {
 			m->state = MBR_REMITTED;
 			in_flight = m->advertised - remitted;
+			m->advertised = ADV_IDLE + in_flight;
+			return;
 		}
 		/* All messages preceding the REMIT have been read */
 		if (m->advertised <= remitted) {
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 			tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
 
 		m->advertised = ADV_IDLE + in_flight;
+		grp->active_cnt--;
+		list_del_init(&m->list);
 
 		/* Set oldest pending member to active and advertise */
 		if (list_empty(&grp->pending))
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
 		*usr_wakeup = true;
 		m->usr_pending = false;
 		node_up = tipc_node_is_up(net, node);
-
-		/* Hold back event if more messages might be expected */
-		if (m->state != MBR_LEAVING && node_up) {
-			m->event_msg = skb;
-			tipc_group_decr_active(grp, m);
-			m->state = MBR_LEAVING;
-		} else {
-			if (node_up)
+		m->event_msg = NULL;
+
+		if (node_up) {
+			/* Hold back event if a LEAVE msg should be expected */
+			if (m->state != MBR_LEAVING) {
+				m->event_msg = skb;
+				tipc_group_decr_active(grp, m);
+				m->state = MBR_LEAVING;
+			} else {
 				msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
-			else
+				__skb_queue_tail(inputq, skb);
+			}
+		} else {
+			if (m->state != MBR_LEAVING) {
+				tipc_group_decr_active(grp, m);
+				m->state = MBR_LEAVING;
 				msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+			} else {
+				msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+			}
 			__skb_queue_tail(inputq, skb);
 		}
+		list_del_init(&m->list);
 		list_del_init(&m->congested);
 	}
 	*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b13d9751c27a51959b216b7f48b18..32dc33a94bc714f762a066389a4907e558244cd7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
 {
 	struct tipc_net *tn = tipc_net(net);
 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-	struct tipc_peer *self = get_self(net, bearer_id);
+	struct tipc_peer *self;
 	struct tipc_peer *peer, *tmp;
 
+	if (!mon)
+		return;
+
+	self = get_self(net, bearer_id);
 	write_lock_bh(&mon->lock);
 	tn->monitors[bearer_id] = NULL;
 	list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/server.c b/net/tipc/server.c
index acaef80fb88cfca4ea569a003f52ca04a3e2f577..d60c303423275db5c0c7c45c9bf1a7d61987bf27 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
 	newcon->usr_data = s->tipc_conn_new(newcon->conid);
 	if (!newcon->usr_data) {
 		sock_release(newsock);
+		conn_put(newcon);
 		return -ENOMEM;
 	}
 
@@ -511,7 +512,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
 	s = con->server;
 	scbr = s->tipc_conn_new(*conid);
 	if (!scbr) {
-		tipc_close_conn(con);
+		conn_put(con);
 		return false;
 	}
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b213740e5c6e3152ec8ce37717dc2..3b408448037769d3dba2da38fc00cdaf360e4950 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 
 	switch (sk->sk_state) {
 	case TIPC_ESTABLISHED:
+	case TIPC_CONNECTING:
 		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
 			revents |= POLLOUT;
 		/* fall thru' */
 	case TIPC_LISTEN:
-	case TIPC_CONNECTING:
 		if (!skb_queue_empty(&sk->sk_receive_queue))
 			revents |= POLLIN | POLLRDNORM;
 		break;
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 				__skb_dequeue(arrvq);
 				__skb_queue_tail(inputq, skb);
 			}
-			refcount_dec(&skb->users);
+			kfree_skb(skb);
 			spin_unlock_bh(&inputq->lock);
 			continue;
 		}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f223bf8c0e09e3a299fe4fd62509d..3deabcab4882165b668f65319a3555027bf3b292 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 			goto rcu_out;
 	}
 
-	tipc_rcv(sock_net(sk), skb, b);
-	rcu_read_unlock();
-	return 0;
-
 rcu_out:
 	rcu_read_unlock();
 out:
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 5583df708b8cfedb61ca2e6fec93a79789d6f949..a827547aa102be4f3cf46a267c8c684e815e02d6 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk)
 
 	lock_sock(sk);
 
-	sk->sk_state = SS_DISCONNECTING;
+	sk->sk_state = TCP_CLOSING;
 	vsock_remove_sock(vsk);
 
 	release_sock(sk);
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a7e1f3581e2b33895f6ecfd160994..1d84f91bbfb0c8c9087e309821eb687325733358 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
 cfg80211-y += extra-certs.o
 endif
 
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
 	@$(kecho) "  GEN     $@"
-	@echo '#include "reg.h"' > $@
-	@echo 'const u8 shipped_regdb_certs[] = {' >> $@
-	@for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
-	@echo '};' >> $@
-	@echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+	@(echo '#include "reg.h"'; \
+	  echo 'const u8 shipped_regdb_certs[] = {'; \
+	  cat $^ ; \
+	  echo '};'; \
+	  echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+	 ) > $@
 
 $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
 		      $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
 	@$(kecho) "  GEN     $@"
-	@echo '#include "reg.h"' > $@
-	@echo 'const u8 extra_regdb_certs[] = {' >> $@
-	@for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
-	@echo '};' >> $@
-	@echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
+	@(set -e; \
+	  allf=""; \
+	  for f in $^ ; do \
+	      # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
+	      thisf=$$(od -An -v -tx1 < $$f | \
+	                   sed -e 's/ /\n/g' | \
+	                   sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
+	                   sed -e 's/^/0x/;s/$$/,/'); \
+	      # file should not be empty - maybe command substitution failed? \
+	      test ! -z "$$thisf";\
+	      allf=$$allf$$thisf;\
+	  done; \
+	  ( \
+	      echo '#include "reg.h"'; \
+	      echo 'const u8 extra_regdb_certs[] = {'; \
+	      echo "$$allf"; \
+	      echo '};'; \
+	      echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
+	  ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 0000000000000000000000000000000000000000..14ea66643ffaa2f8463bd8a699b5c2074bc7d83a
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b98839048822ebbe27ecb831614ccf3d..0000000000000000000000000000000000000000
Binary files a/net/wireless/certs/sforshee.x509 and /dev/null differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c86be0af71e9a1ba92cc99d8d5a967..2b3dbcd40e46390debf84f4a127b23f185e76b39 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 	case NL80211_IFTYPE_AP:
 		if (wdev->ssid_len &&
 		    nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
-			goto nla_put_failure;
+			goto nla_put_failure_locked;
 		break;
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 		if (!ssid_ie)
 			break;
 		if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-			goto nla_put_failure;
+			goto nla_put_failure_locked;
 		break;
 		}
 	default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 	genlmsg_end(msg, hdr);
 	return 0;
 
+ nla_put_failure_locked:
+	wdev_unlock(wdev);
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
@@ -11359,7 +11361,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
 		break;
 	case NL80211_NAN_FUNC_FOLLOW_UP:
 		if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
-		    !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+		    !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+		    !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
 			err = -EINVAL;
 			goto out;
 		}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d509ac9edd448bcbb7501a05f5ac2d..3f6f6f8c9fa5224e75c1b62f299f217da172d370 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
  *
  */
 
+#include <linux/bottom_half.h>
+#include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/percpu.h>
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ip_tunnels.h>
 #include <net/ip6_tunnel.h>
 
+struct xfrm_trans_tasklet {
+	struct tasklet_struct tasklet;
+	struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
 static struct kmem_cache *secpath_cachep __read_mostly;
 
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
 static struct gro_cells gro_cells;
 static struct net_device xfrm_napi_dev;
 
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
 {
 	int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	xfrm_address_t *daddr;
 	struct xfrm_mode *inner_mode;
 	u32 mark = skb->mark;
-	unsigned int family;
+	unsigned int family = AF_UNSPEC;
 	int decaps = 0;
 	int async = 0;
 	bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 	if (encap_type < 0) {
 		x = xfrm_input_state(skb);
+
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			if (x->km.state == XFRM_STATE_ACQ)
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+			else
+				XFRM_INC_STATS(net,
+					       LINUX_MIB_XFRMINSTATEINVALID);
+			goto drop;
+		}
+
 		family = x->outer_mode->afinfo->family;
 
 		/* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
+static void xfrm_trans_reinject(unsigned long data)
+{
+	struct xfrm_trans_tasklet *trans = (void *)data;
+	struct sk_buff_head queue;
+	struct sk_buff *skb;
+
+	__skb_queue_head_init(&queue);
+	skb_queue_splice_init(&trans->queue, &queue);
+
+	while ((skb = __skb_dequeue(&queue)))
+		XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+		     int (*finish)(struct net *, struct sock *,
+				   struct sk_buff *))
+{
+	struct xfrm_trans_tasklet *trans;
+
+	trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+	if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+		return -ENOBUFS;
+
+	XFRM_TRANS_SKB_CB(skb)->finish = finish;
+	skb_queue_tail(&trans->queue, skb);
+	tasklet_schedule(&trans->tasklet);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
 void __init xfrm_input_init(void)
 {
 	int err;
+	int i;
 
 	init_dummy_netdev(&xfrm_napi_dev);
 	err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
 					   sizeof(struct sec_path),
 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL);
+
+	for_each_possible_cpu(i) {
+		struct xfrm_trans_tasklet *trans;
+
+		trans = &per_cpu(xfrm_trans_tasklet, i);
+		__skb_queue_head_init(&trans->queue);
+		tasklet_init(&trans->tasklet, xfrm_trans_reinject,
+			     (unsigned long)trans);
+	}
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f90dcb2bae894edeb9b418d04f252e..70aa5cb0c659d54eacb85f92dc95b2db17bfe1d0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  again:
 	pol = rcu_dereference(sk->sk_policy[dir]);
 	if (pol != NULL) {
-		bool match = xfrm_selector_match(&pol->selector, fl, family);
+		bool match;
 		int err = 0;
 
+		if (pol->family != family) {
+			pol = NULL;
+			goto out;
+		}
+
+		match = xfrm_selector_match(&pol->selector, fl, family);
 		if (match) {
 			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
 				pol = NULL;
@@ -1833,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
 	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
 		dst_hold(&xdst->u.dst);
+		xfrm_pols_put(pols, num_pols);
 		while (err > 0)
 			xfrm_state_put(xfrm[--err]);
 		return xdst;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888ec1bf053577d3949746bcea6f099..500b3391f474b96fe273060ff8eae16f1e23f3c2 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 
 	if (orig->aead) {
 		x->aead = xfrm_algo_aead_clone(orig->aead);
+		x->geniv = orig->geniv;
 		if (!x->aead)
 			goto error;
 	}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767bec16ba55b763ef82ce781ca5046a..bdb48e5dba0480aa4c3c6855be42cfb93f3bf335 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 
 static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 {
+	u16 prev_family;
 	int i;
 
 	if (nr > XFRM_MAX_DEPTH)
 		return -EINVAL;
 
+	prev_family = family;
+
 	for (i = 0; i < nr; i++) {
 		/* We never validated the ut->family value, so many
 		 * applications simply leave it at zero.  The check was
@@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 		if (!ut[i].family)
 			ut[i].family = family;
 
+		if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+		    (ut[i].family != prev_family))
+			return -EINVAL;
+
+		prev_family = ut[i].family;
+
 		switch (ut[i].family) {
 		case AF_INET:
 			break;
@@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 		default:
 			return -EINVAL;
 		}
+
+		switch (ut[i].id.proto) {
+		case IPPROTO_AH:
+		case IPPROTO_ESP:
+		case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+#endif
+		case IPSEC_PROTO_ANY:
+			break;
+		default:
+			return -EINVAL;
+		}
+
 	}
 
 	return 0;
@@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 	[XFRMA_OFFLOAD_DEV]	= { .len = sizeof(struct xfrm_user_offload) },
-	[XFRMA_OUTPUT_MARK]	= { .len = NLA_U32 },
+	[XFRMA_OUTPUT_MARK]	= { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 522ca9252d6cd41f5272b4755da876ff93cffdbb..242631aa4ea2366081711ba96284f189c9755569 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -193,8 +193,18 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		return -1;
 	}
 	event_fd[prog_cnt - 1] = efd;
-	ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
-	ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+	err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+	if (err < 0) {
+		printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
+		       strerror(errno));
+		return -1;
+	}
+	err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+	if (err < 0) {
+		printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
+		       strerror(errno));
+		return -1;
+	}
 
 	return 0;
 }
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 040aa79e1d9d39c55df7a748565dea3296ec6a6f..31031f10fe56b0b15ec44916ca7996e174fd5090 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6233,28 +6233,6 @@ sub process {
 			}
 		}
 
-# whine about ACCESS_ONCE
-		if ($^V && $^V ge 5.10.0 &&
-		    $line =~ /\bACCESS_ONCE\s*$balanced_parens\s*(=(?!=))?\s*($FuncArg)?/) {
-			my $par = $1;
-			my $eq = $2;
-			my $fun = $3;
-			$par =~ s/^\(\s*(.*)\s*\)$/$1/;
-			if (defined($eq)) {
-				if (WARN("PREFER_WRITE_ONCE",
-					 "Prefer WRITE_ONCE(<FOO>, <BAR>) over ACCESS_ONCE(<FOO>) = <BAR>\n" . $herecurr) &&
-				    $fix) {
-					$fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)\s*$eq\s*\Q$fun\E/WRITE_ONCE($par, $fun)/;
-				}
-			} else {
-				if (WARN("PREFER_READ_ONCE",
-					 "Prefer READ_ONCE(<FOO>) over ACCESS_ONCE(<FOO>)\n" . $herecurr) &&
-				    $fix) {
-					$fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)/READ_ONCE($par)/;
-				}
-			}
-		}
-
 # check for mutex_trylock_recursive usage
 		if ($line =~ /mutex_trylock_recursive/) {
 			ERROR("LOCKING",
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 39e07d8574dd787c2af71937852156abb9b1a7fb..7721d5b2b0c04ee923b3c14216214db68fcb7dd0 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -44,10 +44,10 @@
 set -o errexit
 set -o nounset
 
-READELF="${CROSS_COMPILE}readelf"
-ADDR2LINE="${CROSS_COMPILE}addr2line"
-SIZE="${CROSS_COMPILE}size"
-NM="${CROSS_COMPILE}nm"
+READELF="${CROSS_COMPILE:-}readelf"
+ADDR2LINE="${CROSS_COMPILE:-}addr2line"
+SIZE="${CROSS_COMPILE:-}size"
+NM="${CROSS_COMPILE:-}nm"
 
 command -v awk >/dev/null 2>&1 || die "awk isn't installed"
 command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
diff --git a/scripts/genksyms/.gitignore b/scripts/genksyms/.gitignore
index 86dc07a01b4398a3e72e79584a2c447bb5643780..e7836b47f060889a036405e7cf70d04c4177adf9 100644
--- a/scripts/genksyms/.gitignore
+++ b/scripts/genksyms/.gitignore
@@ -1,4 +1,3 @@
-*.hash.c
 *.lex.c
 *.tab.c
 *.tab.h
diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index cbf4996dd9c1045ecc7ffa66a9ec79edd99b1038..8cee597d33a594d3d7380390aa2b697218034578 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -893,7 +893,10 @@ static enum string_value_kind expr_parse_string(const char *str,
 	switch (type) {
 	case S_BOOLEAN:
 	case S_TRISTATE:
-		return k_string;
+		val->s = !strcmp(str, "n") ? 0 :
+			 !strcmp(str, "m") ? 1 :
+			 !strcmp(str, "y") ? 2 : -1;
+		return k_signed;
 	case S_INT:
 		val->s = strtoll(str, &tail, 10);
 		kind = k_signed;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index bd29a92b4b48aa1648f8c892c980f1a72c2ad67e..df0f045a9a89401e28b6ca36d6b38218f425ad85 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -3248,4 +3248,4 @@ if ($verbose && $warnings) {
   print STDERR "$warnings warnings\n";
 }
 
-exit($errors);
+exit($output_mode eq "none" ? 0 : $errors);
diff --git a/security/Kconfig b/security/Kconfig
index e8e449444e658be4a9190c6ea2de14cca8fc4890..b0cb9a5f94480d2c5aaf290b31fd15e6c5930ec2 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -54,6 +54,17 @@ config SECURITY_NETWORK
 	  implement socket and networking access controls.
 	  If you are unsure how to answer this question, answer N.
 
+config PAGE_TABLE_ISOLATION
+	bool "Remove the kernel mapping in user mode"
+	default y
+	depends on X86_64 && !UML
+	help
+	  This feature reduces the number of hardware side channels by
+	  ensuring that the majority of kernel addresses are not mapped
+	  into userspace.
+
+	  See Documentation/x86/pti.txt for more details.
+
 config SECURITY_INFINIBAND
 	bool "Infiniband Security Hooks"
 	depends on SECURITY && INFINIBAND
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 04ba9d0718ea590b7c5033cfc4b952c701acb54d..6a54d2ffa84012fc7fd27aef18c7fa0410e050e3 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -330,10 +330,7 @@ static struct aa_profile *__attach_match(const char *name,
 			continue;
 
 		if (profile->xmatch) {
-			if (profile->xmatch_len == len) {
-				conflict = true;
-				continue;
-			} else if (profile->xmatch_len > len) {
+			if (profile->xmatch_len >= len) {
 				unsigned int state;
 				u32 perm;
 
@@ -342,6 +339,10 @@ static struct aa_profile *__attach_match(const char *name,
 				perm = dfa_user_allow(profile->xmatch, state);
 				/* any accepting state means a valid match. */
 				if (perm & MAY_EXEC) {
+					if (profile->xmatch_len == len) {
+						conflict = true;
+						continue;
+					}
 					candidate = profile;
 					len = profile->xmatch_len;
 					conflict = false;
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 2b27bb79aec4421665aaa6f2a2490868227bed0f..d7b7e711516010806d194ed39b2a24e1a9f9fd09 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -133,6 +133,9 @@ extern struct aa_perms allperms;
 #define xcheck_labels_profiles(L1, L2, FN, args...)		\
 	xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
 
+#define xcheck_labels(L1, L2, P, FN1, FN2)			\
+	xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
+
 
 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
 void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 7ca0032e7ba96ef374aefe98d47acb919f3f81d3..b40678f3c1d5a4d62eeb1255f26e2a73aabd6767 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
 			FLAGS_NONE, GFP_ATOMIC);
 }
 
+/* assumes check for PROFILE_MEDIATES is already done */
 /* TODO: conditionals */
 static int profile_ptrace_perm(struct aa_profile *profile,
-			       struct aa_profile *peer, u32 request,
-			       struct common_audit_data *sa)
+			     struct aa_label *peer, u32 request,
+			     struct common_audit_data *sa)
 {
 	struct aa_perms perms = { };
 
-	/* need because of peer in cross check */
-	if (profile_unconfined(profile) ||
-	    !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
-		return 0;
-
-	aad(sa)->peer = &peer->label;
-	aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
+	aad(sa)->peer = peer;
+	aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
 			       &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
 }
 
-static int cross_ptrace_perm(struct aa_profile *tracer,
-			     struct aa_profile *tracee, u32 request,
-			     struct common_audit_data *sa)
+static int profile_tracee_perm(struct aa_profile *tracee,
+			       struct aa_label *tracer, u32 request,
+			       struct common_audit_data *sa)
 {
+	if (profile_unconfined(tracee) || unconfined(tracer) ||
+	    !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
+		return 0;
+
+	return profile_ptrace_perm(tracee, tracer, request, sa);
+}
+
+static int profile_tracer_perm(struct aa_profile *tracer,
+			       struct aa_label *tracee, u32 request,
+			       struct common_audit_data *sa)
+{
+	if (profile_unconfined(tracer))
+		return 0;
+
 	if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
-		return xcheck(profile_ptrace_perm(tracer, tracee, request, sa),
-			      profile_ptrace_perm(tracee, tracer,
-						  request << PTRACE_PERM_SHIFT,
-						  sa));
-	/* policy uses the old style capability check for ptrace */
-	if (profile_unconfined(tracer) || tracer == tracee)
+		return profile_ptrace_perm(tracer, tracee, request, sa);
+
+	/* profile uses the old style capability check for ptrace */
+	if (&tracer->label == tracee)
 		return 0;
 
 	aad(sa)->label = &tracer->label;
-	aad(sa)->peer = &tracee->label;
+	aad(sa)->peer = tracee;
 	aad(sa)->request = 0;
 	aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
 
@@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer,
 int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
 		  u32 request)
 {
+	struct aa_profile *profile;
+	u32 xrequest = request << PTRACE_PERM_SHIFT;
 	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
 
-	return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm,
-				      request, &sa);
+	return xcheck_labels(tracer, tracee, profile,
+			profile_tracer_perm(profile, tracee, request, &sa),
+			profile_tracee_perm(profile, tracer, xrequest, &sa));
 }
 
 
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index ed9b4d0f9f7e212b161c1a312c52b56f5e0b0b49..8c558cbce930b15a69a7451973a974152fbb6a08 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -329,6 +329,9 @@ static int match_mnt_path_str(struct aa_profile *profile,
 	AA_BUG(!mntpath);
 	AA_BUG(!buffer);
 
+	if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+		return 0;
+
 	error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
 			     &mntpnt, &info, profile->disconnected);
 	if (error)
@@ -380,6 +383,9 @@ static int match_mnt(struct aa_profile *profile, const struct path *path,
 	AA_BUG(!profile);
 	AA_BUG(devpath && !devbuffer);
 
+	if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+		return 0;
+
 	if (devpath) {
 		error = aa_path_name(devpath, path_flags(profile, devpath),
 				     devbuffer, &devname, &info,
@@ -558,6 +564,9 @@ static int profile_umount(struct aa_profile *profile, struct path *path,
 	AA_BUG(!profile);
 	AA_BUG(!path);
 
+	if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+		return 0;
+
 	error = aa_path_name(path, path_flags(profile, path), buffer, &name,
 			     &info, profile->disconnected);
 	if (error)
@@ -613,7 +622,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 	AA_BUG(!new_path);
 	AA_BUG(!old_path);
 
-	if (profile_unconfined(profile))
+	if (profile_unconfined(profile) ||
+	    !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
 		return aa_get_newest_label(&profile->label);
 
 	error = aa_path_name(old_path, path_flags(profile, old_path),
diff --git a/security/commoncap.c b/security/commoncap.c
index 4f8e0934095679f71e9674d4a567c6c1fe492f9e..48620c93d6976eca3a9b1cc3c34cfe21a69c7a39 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -348,21 +348,18 @@ static __u32 sansflags(__u32 m)
 	return m & ~VFS_CAP_FLAGS_EFFECTIVE;
 }
 
-static bool is_v2header(size_t size, __le32 magic)
+static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
 {
-	__u32 m = le32_to_cpu(magic);
 	if (size != XATTR_CAPS_SZ_2)
 		return false;
-	return sansflags(m) == VFS_CAP_REVISION_2;
+	return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
 }
 
-static bool is_v3header(size_t size, __le32 magic)
+static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
 {
-	__u32 m = le32_to_cpu(magic);
-
 	if (size != XATTR_CAPS_SZ_3)
 		return false;
-	return sansflags(m) == VFS_CAP_REVISION_3;
+	return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
 }
 
 /*
@@ -405,7 +402,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
 
 	fs_ns = inode->i_sb->s_user_ns;
 	cap = (struct vfs_cap_data *) tmpbuf;
-	if (is_v2header((size_t) ret, cap->magic_etc)) {
+	if (is_v2header((size_t) ret, cap)) {
 		/* If this is sizeof(vfs_cap_data) then we're ok with the
 		 * on-disk value, so return that.  */
 		if (alloc)
@@ -413,7 +410,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
 		else
 			kfree(tmpbuf);
 		return ret;
-	} else if (!is_v3header((size_t) ret, cap->magic_etc)) {
+	} else if (!is_v3header((size_t) ret, cap)) {
 		kfree(tmpbuf);
 		return -EINVAL;
 	}
@@ -470,9 +467,9 @@ static kuid_t rootid_from_xattr(const void *value, size_t size,
 	return make_kuid(task_ns, rootid);
 }
 
-static bool validheader(size_t size, __le32 magic)
+static bool validheader(size_t size, const struct vfs_cap_data *cap)
 {
-	return is_v2header(size, magic) || is_v3header(size, magic);
+	return is_v2header(size, cap) || is_v3header(size, cap);
 }
 
 /*
@@ -495,7 +492,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
 
 	if (!*ivalue)
 		return -EINVAL;
-	if (!validheader(size, cap->magic_etc))
+	if (!validheader(size, cap))
 		return -EINVAL;
 	if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
 		return -EPERM;
diff --git a/security/keys/key.c b/security/keys/key.c
index 66049183ad8961554cbf0a4ea0ccf403bd0b7579..d97c9394b5dd4f7479e9211a2dbb236503e05d2b 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -833,7 +833,6 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 
 	key_check(keyring);
 
-	key_ref = ERR_PTR(-EPERM);
 	if (!(flags & KEY_ALLOC_BYPASS_RESTRICTION))
 		restrict_link = keyring->restrict_link;
 
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 76d22f726ae49d7e112c648c59e9c0d3124063f1..1ffe60bb2845f97638157b01ed7fcc4f45714312 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1588,9 +1588,8 @@ long keyctl_session_to_parent(void)
  * The caller must have Setattr permission to change keyring restrictions.
  *
  * The requested type name may be a NULL pointer to reject all attempts
- * to link to the keyring. If _type is non-NULL, _restriction can be
- * NULL or a pointer to a string describing the restriction. If _type is
- * NULL, _restriction must also be NULL.
+ * to link to the keyring.  In this case, _restriction must also be NULL.
+ * Otherwise, both _type and _restriction must be non-NULL.
  *
  * Returns 0 if successful.
  */
@@ -1598,7 +1597,6 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
 			     const char __user *_restriction)
 {
 	key_ref_t key_ref;
-	bool link_reject = !_type;
 	char type[32];
 	char *restriction = NULL;
 	long ret;
@@ -1607,31 +1605,29 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
 	if (IS_ERR(key_ref))
 		return PTR_ERR(key_ref);
 
+	ret = -EINVAL;
 	if (_type) {
-		ret = key_get_type_from_user(type, _type, sizeof(type));
-		if (ret < 0)
+		if (!_restriction)
 			goto error;
-	}
 
-	if (_restriction) {
-		if (!_type) {
-			ret = -EINVAL;
+		ret = key_get_type_from_user(type, _type, sizeof(type));
+		if (ret < 0)
 			goto error;
-		}
 
 		restriction = strndup_user(_restriction, PAGE_SIZE);
 		if (IS_ERR(restriction)) {
 			ret = PTR_ERR(restriction);
 			goto error;
 		}
+	} else {
+		if (_restriction)
+			goto error;
 	}
 
-	ret = keyring_restrict(key_ref, link_reject ? NULL : type, restriction);
+	ret = keyring_restrict(key_ref, _type ? type : NULL, restriction);
 	kfree(restriction);
-
 error:
 	key_ref_put(key_ref);
-
 	return ret;
 }
 
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index e8036cd0ad5430a87ec2e2ea1496e921ae941b3d..114f7408feee620b868801cbb53b578e7b44f615 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -251,11 +251,12 @@ static int construct_key(struct key *key, const void *callout_info,
  * The keyring selected is returned with an extra reference upon it which the
  * caller must release.
  */
-static void construct_get_dest_keyring(struct key **_dest_keyring)
+static int construct_get_dest_keyring(struct key **_dest_keyring)
 {
 	struct request_key_auth *rka;
 	const struct cred *cred = current_cred();
 	struct key *dest_keyring = *_dest_keyring, *authkey;
+	int ret;
 
 	kenter("%p", dest_keyring);
 
@@ -264,6 +265,8 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 		/* the caller supplied one */
 		key_get(dest_keyring);
 	} else {
+		bool do_perm_check = true;
+
 		/* use a default keyring; falling through the cases until we
 		 * find one that we actually have */
 		switch (cred->jit_keyring) {
@@ -278,8 +281,10 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 					dest_keyring =
 						key_get(rka->dest_keyring);
 				up_read(&authkey->sem);
-				if (dest_keyring)
+				if (dest_keyring) {
+					do_perm_check = false;
 					break;
+				}
 			}
 
 		case KEY_REQKEY_DEFL_THREAD_KEYRING:
@@ -314,11 +319,29 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 		default:
 			BUG();
 		}
+
+		/*
+		 * Require Write permission on the keyring.  This is essential
+		 * because the default keyring may be the session keyring, and
+		 * joining a keyring only requires Search permission.
+		 *
+		 * However, this check is skipped for the "requestor keyring" so
+		 * that /sbin/request-key can itself use request_key() to add
+		 * keys to the original requestor's destination keyring.
+		 */
+		if (dest_keyring && do_perm_check) {
+			ret = key_permission(make_key_ref(dest_keyring, 1),
+					     KEY_NEED_WRITE);
+			if (ret) {
+				key_put(dest_keyring);
+				return ret;
+			}
+		}
 	}
 
 	*_dest_keyring = dest_keyring;
 	kleave(" [dk %d]", key_serial(dest_keyring));
-	return;
+	return 0;
 }
 
 /*
@@ -444,11 +467,15 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
 	if (ctx->index_key.type == &key_type_keyring)
 		return ERR_PTR(-EPERM);
 
-	user = key_user_lookup(current_fsuid());
-	if (!user)
-		return ERR_PTR(-ENOMEM);
+	ret = construct_get_dest_keyring(&dest_keyring);
+	if (ret)
+		goto error;
 
-	construct_get_dest_keyring(&dest_keyring);
+	user = key_user_lookup(current_fsuid());
+	if (!user) {
+		ret = -ENOMEM;
+		goto error_put_dest_keyring;
+	}
 
 	ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
 	key_user_put(user);
@@ -463,7 +490,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
 	} else if (ret == -EINPROGRESS) {
 		ret = 0;
 	} else {
-		goto couldnt_alloc_key;
+		goto error_put_dest_keyring;
 	}
 
 	key_put(dest_keyring);
@@ -473,8 +500,9 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
 construction_failed:
 	key_negate_and_link(key, key_negative_timeout, NULL, NULL);
 	key_put(key);
-couldnt_alloc_key:
+error_put_dest_keyring:
 	key_put(dest_keyring);
+error:
 	kleave(" = %d", ret);
 	return ERR_PTR(ret);
 }
@@ -546,9 +574,7 @@ struct key *request_key_and_link(struct key_type *type,
 	if (!IS_ERR(key_ref)) {
 		key = key_ref_to_ptr(key_ref);
 		if (dest_keyring) {
-			construct_get_dest_keyring(&dest_keyring);
 			ret = key_link(dest_keyring, key);
-			key_put(dest_keyring);
 			if (ret < 0) {
 				key_put(key);
 				key = ERR_PTR(ret);
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index e49f448ee04f4af00a7c7c00755f8a29b226bb6f..c2db7e905f7d69b4f9b153c8a72b0f463c3d4287 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -455,7 +455,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
 		v = snd_pcm_hw_param_last(pcm, params, var, dir);
 	else
 		v = snd_pcm_hw_param_first(pcm, params, var, dir);
-	snd_BUG_ON(v < 0);
 	return v;
 }
 
@@ -1335,8 +1334,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
 
 	if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
 		return tmp;
-	mutex_lock(&runtime->oss.params_lock);
 	while (bytes > 0) {
+		if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+			tmp = -ERESTARTSYS;
+			break;
+		}
 		if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
 			tmp = bytes;
 			if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
@@ -1380,14 +1382,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
 			xfer += tmp;
 			if ((substream->f_flags & O_NONBLOCK) != 0 &&
 			    tmp != runtime->oss.period_bytes)
-				break;
+				tmp = -EAGAIN;
 		}
-	}
-	mutex_unlock(&runtime->oss.params_lock);
-	return xfer;
-
  err:
-	mutex_unlock(&runtime->oss.params_lock);
+		mutex_unlock(&runtime->oss.params_lock);
+		if (tmp < 0)
+			break;
+		if (signal_pending(current)) {
+			tmp = -ERESTARTSYS;
+			break;
+		}
+		tmp = 0;
+	}
 	return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
 }
 
@@ -1435,8 +1441,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
 
 	if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
 		return tmp;
-	mutex_lock(&runtime->oss.params_lock);
 	while (bytes > 0) {
+		if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+			tmp = -ERESTARTSYS;
+			break;
+		}
 		if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
 			if (runtime->oss.buffer_used == 0) {
 				tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
@@ -1467,12 +1476,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
 			bytes -= tmp;
 			xfer += tmp;
 		}
-	}
-	mutex_unlock(&runtime->oss.params_lock);
-	return xfer;
-
  err:
-	mutex_unlock(&runtime->oss.params_lock);
+		mutex_unlock(&runtime->oss.params_lock);
+		if (tmp < 0)
+			break;
+		if (signal_pending(current)) {
+			tmp = -ERESTARTSYS;
+			break;
+		}
+		tmp = 0;
+	}
 	return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
 }
 
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index cadc937928683cb3abcf7ec8208e8159df31c00f..85a56af104bd619160ace33f51ad2786eca3279a 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -592,18 +592,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
 	snd_pcm_sframes_t frames = size;
 
 	plugin = snd_pcm_plug_first(plug);
-	while (plugin && frames > 0) {
+	while (plugin) {
+		if (frames <= 0)
+			return frames;
 		if ((next = plugin->next) != NULL) {
 			snd_pcm_sframes_t frames1 = frames;
-			if (plugin->dst_frames)
+			if (plugin->dst_frames) {
 				frames1 = plugin->dst_frames(plugin, frames);
+				if (frames1 <= 0)
+					return frames1;
+			}
 			if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
 				return err;
 			}
 			if (err != frames1) {
 				frames = err;
-				if (plugin->src_frames)
+				if (plugin->src_frames) {
 					frames = plugin->src_frames(plugin, frames1);
+					if (frames <= 0)
+						return frames;
+				}
 			}
 		} else
 			dst_channels = NULL;
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 9070f277f8db99421eadf7b55ef2c5c675cb50b3..09ee8c6b9f75e3828663c6b8874e93506d450ac9 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -153,7 +153,9 @@ static int snd_pcm_control_ioctl(struct snd_card *card,
 				err = -ENXIO;
 				goto _error;
 			}
+			mutex_lock(&pcm->open_mutex);
 			err = snd_pcm_info_user(substream, info);
+			mutex_unlock(&pcm->open_mutex);
 		_error:
 			mutex_unlock(&register_mutex);
 			return err;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 10e7ef7a8804b1ba0d88b6a0c4ad478f38a9397e..db7894bb028ccc6ca941c45c4a7e37602ec1228d 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1632,7 +1632,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
 		return changed;
 	if (params->rmask) {
 		int err = snd_pcm_hw_refine(pcm, params);
-		if (snd_BUG_ON(err < 0))
+		if (err < 0)
 			return err;
 	}
 	return snd_pcm_hw_param_value(params, var, dir);
@@ -1678,7 +1678,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
 		return changed;
 	if (params->rmask) {
 		int err = snd_pcm_hw_refine(pcm, params);
-		if (snd_BUG_ON(err < 0))
+		if (err < 0)
 			return err;
 	}
 	return snd_pcm_hw_param_value(params, var, dir);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index a4d92e46c459b014287b729f9fe6b9d9e5c19ebf..f08772568c1709ab560d7ed6db1bf418b08c984c 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2580,7 +2580,7 @@ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream,
 	return ret < 0 ? ret : frames;
 }
 
-/* decrease the appl_ptr; returns the processed frames or a negative error */
+/* decrease the appl_ptr; returns the processed frames or zero for error */
 static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
 					 snd_pcm_uframes_t frames,
 					 snd_pcm_sframes_t avail)
@@ -2597,7 +2597,12 @@ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
 	if (appl_ptr < 0)
 		appl_ptr += runtime->boundary;
 	ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
-	return ret < 0 ? ret : frames;
+	/* NOTE: we return zero for errors because PulseAudio gets depressed
+	 * upon receiving an error from rewind ioctl and stops processing
+	 * any longer.  Returning zero means that no rewind is done, so
+	 * it's not absolutely wrong to answer like that.
+	 */
+	return ret < 0 ? 0 : frames;
 }
 
 static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index b3b353d7252724e10f23a9288cd24aab3ef34007..f055ca10bbc1d33c9c1cee1fd913b7c930984ac1 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
 	return 0;
 }
 
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+				     struct snd_rawmidi_info *info)
 {
 	struct snd_rawmidi *rmidi;
 	struct snd_rawmidi_str *pstr;
 	struct snd_rawmidi_substream *substream;
 
-	mutex_lock(&register_mutex);
 	rmidi = snd_rawmidi_search(card, info->device);
-	mutex_unlock(&register_mutex);
 	if (!rmidi)
 		return -ENXIO;
 	if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
 	}
 	return -ENXIO;
 }
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+	int ret;
+
+	mutex_lock(&register_mutex);
+	ret = __snd_rawmidi_info_select(card, info);
+	mutex_unlock(&register_mutex);
+	return ret;
+}
 EXPORT_SYMBOL(snd_rawmidi_info_select);
 
 static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index 37d9cfbc29f9c829facd29ffdc2224ada7a63efd..b80985fbc334cc6598d7ca2953d1ce79edb9f68b 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -355,7 +355,7 @@ static int initialize_timer(struct snd_seq_timer *tmr)
 	unsigned long freq;
 
 	t = tmr->timeri->timer;
-	if (snd_BUG_ON(!t))
+	if (!t)
 		return -EINVAL;
 
 	freq = tmr->preferred_resolution;
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index afac886ffa28a7c7cef39bb3c4f5d1ba816e890a..0333143a1fa7ad646c4da4b25eb653b2d3a1a3fb 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -39,6 +39,7 @@
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/pcm.h>
+#include <sound/pcm_params.h>
 #include <sound/info.h>
 #include <sound/initval.h>
 
@@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
 	return 0;
 }
 
-static void params_change_substream(struct loopback_pcm *dpcm,
-				    struct snd_pcm_runtime *runtime)
-{
-	struct snd_pcm_runtime *dst_runtime;
-
-	if (dpcm == NULL || dpcm->substream == NULL)
-		return;
-	dst_runtime = dpcm->substream->runtime;
-	if (dst_runtime == NULL)
-		return;
-	dst_runtime->hw = dpcm->cable->hw;
-}
-
 static void params_change(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
 	cable->hw.rate_max = runtime->rate;
 	cable->hw.channels_min = runtime->channels;
 	cable->hw.channels_max = runtime->channels;
-	params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
-				runtime);
-	params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
-				runtime);
 }
 
 static int loopback_prepare(struct snd_pcm_substream *substream)
@@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
 static int rule_format(struct snd_pcm_hw_params *params,
 		       struct snd_pcm_hw_rule *rule)
 {
+	struct loopback_pcm *dpcm = rule->private;
+	struct loopback_cable *cable = dpcm->cable;
+	struct snd_mask m;
 
-	struct snd_pcm_hardware *hw = rule->private;
-	struct snd_mask *maskp = hw_param_mask(params, rule->var);
-
-	maskp->bits[0] &= (u_int32_t)hw->formats;
-	maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
-	memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
-	if (! maskp->bits[0] && ! maskp->bits[1])
-		return -EINVAL;
-	return 0;
+	snd_mask_none(&m);
+	mutex_lock(&dpcm->loopback->cable_lock);
+	m.bits[0] = (u_int32_t)cable->hw.formats;
+	m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
+	mutex_unlock(&dpcm->loopback->cable_lock);
+	return snd_mask_refine(hw_param_mask(params, rule->var), &m);
 }
 
 static int rule_rate(struct snd_pcm_hw_params *params,
 		     struct snd_pcm_hw_rule *rule)
 {
-	struct snd_pcm_hardware *hw = rule->private;
+	struct loopback_pcm *dpcm = rule->private;
+	struct loopback_cable *cable = dpcm->cable;
 	struct snd_interval t;
 
-        t.min = hw->rate_min;
-        t.max = hw->rate_max;
+	mutex_lock(&dpcm->loopback->cable_lock);
+	t.min = cable->hw.rate_min;
+	t.max = cable->hw.rate_max;
+	mutex_unlock(&dpcm->loopback->cable_lock);
         t.openmin = t.openmax = 0;
         t.integer = 0;
 	return snd_interval_refine(hw_param_interval(params, rule->var), &t);
@@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
 static int rule_channels(struct snd_pcm_hw_params *params,
 			 struct snd_pcm_hw_rule *rule)
 {
-	struct snd_pcm_hardware *hw = rule->private;
+	struct loopback_pcm *dpcm = rule->private;
+	struct loopback_cable *cable = dpcm->cable;
 	struct snd_interval t;
 
-        t.min = hw->channels_min;
-        t.max = hw->channels_max;
+	mutex_lock(&dpcm->loopback->cable_lock);
+	t.min = cable->hw.channels_min;
+	t.max = cable->hw.channels_max;
+	mutex_unlock(&dpcm->loopback->cable_lock);
         t.openmin = t.openmax = 0;
         t.integer = 0;
 	return snd_interval_refine(hw_param_interval(params, rule->var), &t);
 }
 
+static void free_cable(struct snd_pcm_substream *substream)
+{
+	struct loopback *loopback = substream->private_data;
+	int dev = get_cable_index(substream);
+	struct loopback_cable *cable;
+
+	cable = loopback->cables[substream->number][dev];
+	if (!cable)
+		return;
+	if (cable->streams[!substream->stream]) {
+		/* other stream is still alive */
+		cable->streams[substream->stream] = NULL;
+	} else {
+		/* free the cable */
+		loopback->cables[substream->number][dev] = NULL;
+		kfree(cable);
+	}
+}
+
 static int loopback_open(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct loopback *loopback = substream->private_data;
 	struct loopback_pcm *dpcm;
-	struct loopback_cable *cable;
+	struct loopback_cable *cable = NULL;
 	int err = 0;
 	int dev = get_cable_index(substream);
 
@@ -681,7 +690,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
 	if (!cable) {
 		cable = kzalloc(sizeof(*cable), GFP_KERNEL);
 		if (!cable) {
-			kfree(dpcm);
 			err = -ENOMEM;
 			goto unlock;
 		}
@@ -699,19 +707,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
 	/* are cached -> they do not reflect the actual state */
 	err = snd_pcm_hw_rule_add(runtime, 0,
 				  SNDRV_PCM_HW_PARAM_FORMAT,
-				  rule_format, &runtime->hw,
+				  rule_format, dpcm,
 				  SNDRV_PCM_HW_PARAM_FORMAT, -1);
 	if (err < 0)
 		goto unlock;
 	err = snd_pcm_hw_rule_add(runtime, 0,
 				  SNDRV_PCM_HW_PARAM_RATE,
-				  rule_rate, &runtime->hw,
+				  rule_rate, dpcm,
 				  SNDRV_PCM_HW_PARAM_RATE, -1);
 	if (err < 0)
 		goto unlock;
 	err = snd_pcm_hw_rule_add(runtime, 0,
 				  SNDRV_PCM_HW_PARAM_CHANNELS,
-				  rule_channels, &runtime->hw,
+				  rule_channels, dpcm,
 				  SNDRV_PCM_HW_PARAM_CHANNELS, -1);
 	if (err < 0)
 		goto unlock;
@@ -723,6 +731,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
 	else
 		runtime->hw = cable->hw;
  unlock:
+	if (err < 0) {
+		free_cable(substream);
+		kfree(dpcm);
+	}
 	mutex_unlock(&loopback->cable_lock);
 	return err;
 }
@@ -731,20 +743,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
 {
 	struct loopback *loopback = substream->private_data;
 	struct loopback_pcm *dpcm = substream->runtime->private_data;
-	struct loopback_cable *cable;
-	int dev = get_cable_index(substream);
 
 	loopback_timer_stop(dpcm);
 	mutex_lock(&loopback->cable_lock);
-	cable = loopback->cables[substream->number][dev];
-	if (cable->streams[!substream->stream]) {
-		/* other stream is still alive */
-		cable->streams[substream->stream] = NULL;
-	} else {
-		/* free the cable */
-		loopback->cables[substream->number][dev] = NULL;
-		kfree(cable);
-	}
+	free_cable(substream);
 	mutex_unlock(&loopback->cable_lock);
 	return 0;
 }
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 038a180d3f8117a7455ca6914ed883173aed8863..cbe818eda3363c0f0595c3496e980f64343776d6 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
  */
 int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
 {
-	if (WARN_ON(!hdac_acomp))
+	if (!hdac_acomp)
 		return -ENODEV;
 
 	hdac_acomp->audio_ops = aops;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index a81aacf684b26341ec9257366d7c83a47962a16d..37e1cf8218ff0f864de4635d0188ed5b9b91d73c 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -271,6 +271,8 @@ enum {
 	CXT_FIXUP_HP_SPECTRE,
 	CXT_FIXUP_HP_GATE_MIC,
 	CXT_FIXUP_MUTE_LED_GPIO,
+	CXT_FIXUP_HEADSET_MIC,
+	CXT_FIXUP_HP_MIC_NO_PRESENCE,
 };
 
 /* for hda_fixup_thinkpad_acpi() */
@@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec,
 	}
 }
 
+static void cxt_fixup_headset_mic(struct hda_codec *codec,
+				    const struct hda_fixup *fix, int action)
+{
+	struct conexant_spec *spec = codec->spec;
+
+	switch (action) {
+	case HDA_FIXUP_ACT_PRE_PROBE:
+		spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+		break;
+	}
+}
+
 /* OPLC XO 1.5 fixup */
 
 /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = cxt_fixup_mute_led_gpio,
 	},
+	[CXT_FIXUP_HEADSET_MIC] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = cxt_fixup_headset_mic,
+	},
+	[CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1a, 0x02a1113c },
+			{ }
+		},
+		.chained = true,
+		.chain_id = CXT_FIXUP_HEADSET_MIC,
+	},
 };
 
 static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
 	SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
 	SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
+	SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
 	SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index c19c81d230bd7423b4153d2266a45e09333f8714..b4f1b6e88305496f91d028ceb82fe9b8a6a60ccb 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
 #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
 				((codec)->core.vendor_id == 0x80862800))
+#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
 				|| is_skylake(codec) || is_broxton(codec) \
-				|| is_kabylake(codec)) || is_geminilake(codec)
-
+				|| is_kabylake(codec)) || is_geminilake(codec) \
+				|| is_cannonlake(codec)
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
 #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",	patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI",	patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI",	patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI",	patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",	patch_generic_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 921a10eff43a36ad76bb1631d6de41f52df32c17..8fd2d9c62c96ce53a78dced9d0cd3529961978a9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -324,21 +324,24 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0292:
 		alc_update_coef_idx(codec, 0x4, 1<<15, 0);
 		break;
-	case 0x10ec0215:
 	case 0x10ec0225:
+	case 0x10ec0295:
+	case 0x10ec0299:
+		alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
+		/* fallthrough */
+	case 0x10ec0215:
 	case 0x10ec0233:
 	case 0x10ec0236:
 	case 0x10ec0255:
 	case 0x10ec0256:
+	case 0x10ec0257:
 	case 0x10ec0282:
 	case 0x10ec0283:
 	case 0x10ec0286:
 	case 0x10ec0288:
 	case 0x10ec0285:
-	case 0x10ec0295:
 	case 0x10ec0298:
 	case 0x10ec0289:
-	case 0x10ec0299:
 		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
 		break;
 	case 0x10ec0275:
@@ -2772,6 +2775,7 @@ enum {
 	ALC269_TYPE_ALC298,
 	ALC269_TYPE_ALC255,
 	ALC269_TYPE_ALC256,
+	ALC269_TYPE_ALC257,
 	ALC269_TYPE_ALC215,
 	ALC269_TYPE_ALC225,
 	ALC269_TYPE_ALC294,
@@ -2805,6 +2809,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
 	case ALC269_TYPE_ALC298:
 	case ALC269_TYPE_ALC255:
 	case ALC269_TYPE_ALC256:
+	case ALC269_TYPE_ALC257:
 	case ALC269_TYPE_ALC215:
 	case ALC269_TYPE_ALC225:
 	case ALC269_TYPE_ALC294:
@@ -5182,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
 	}
 }
 
+/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
+static void alc274_fixup_bind_dacs(struct hda_codec *codec,
+				    const struct hda_fixup *fix, int action)
+{
+	struct alc_spec *spec = codec->spec;
+	static hda_nid_t preferred_pairs[] = {
+		0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
+		0
+	};
+
+	if (action != HDA_FIXUP_ACT_PRE_PROBE)
+		return;
+
+	spec->gen.preferred_dacs = preferred_pairs;
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -5299,6 +5320,8 @@ enum {
 	ALC233_FIXUP_LENOVO_MULTI_CODECS,
 	ALC294_FIXUP_LENOVO_MIC_LOCATION,
 	ALC700_FIXUP_INTEL_REFERENCE,
+	ALC274_FIXUP_DELL_BIND_DACS,
+	ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6109,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = {
 			{}
 		}
 	},
+	[ALC274_FIXUP_DELL_BIND_DACS] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc274_fixup_bind_dacs,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
+	[ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1b, 0x0401102f },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC274_FIXUP_DELL_BIND_DACS
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6292,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+	SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6549,6 +6588,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x1b, 0x01011020},
 		{0x21, 0x02211010}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60130},
+		{0x14, 0x90170110},
+		{0x1b, 0x01011020},
+		{0x21, 0x0221101f}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x12, 0x90a60160},
 		{0x14, 0x90170120},
@@ -6575,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x14, 0x90170110},
 		{0x1b, 0x90a70130},
 		{0x21, 0x03211020}),
-	SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+	SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
 		{0x12, 0xb7a60130},
 		{0x13, 0xb8a61140},
 		{0x16, 0x90170110},
@@ -6867,6 +6911,10 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
 		alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
 		break;
+	case 0x10ec0257:
+		spec->codec_variant = ALC269_TYPE_ALC257;
+		spec->gen.mixer_nid = 0;
+		break;
 	case 0x10ec0215:
 	case 0x10ec0285:
 	case 0x10ec0289:
@@ -7914,6 +7962,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
 	HDA_CODEC_ENTRY(0x10ec0236, "ALC236", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269),
+	HDA_CODEC_ENTRY(0x10ec0257, "ALC257", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0260, "ALC260", patch_alc260),
 	HDA_CODEC_ENTRY(0x10ec0262, "ALC262", patch_alc262),
 	HDA_CODEC_ENTRY(0x10ec0267, "ALC267", patch_alc268),
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index 9f521a55d610186ae78d433909edd26c8dc7035f..b5e41df6bb3a895f10a90756fab0e54954a685d7 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev)
 	struct resource *res;
 	const u32 *pdata = pdev->dev.platform_data;
 
+	if (!pdata) {
+		dev_err(&pdev->dev, "Missing platform data\n");
+		return -ENODEV;
+	}
+
 	audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
 					GFP_KERNEL);
 	if (audio_drv_data == NULL)
@@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(audio_drv_data->acp_mmio))
+		return PTR_ERR(audio_drv_data->acp_mmio);
 
 	/* The following members gets populated in device 'open'
 	 * function. Till then interrupts are disabled in 'acp_init'
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index 4a56f3dfba5132a01bbb49eb985540d16a944ecf..dcee145dd17922ac2f81b2fb92a4085d3754e78d 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731
 config SND_ATMEL_SOC_CLASSD
 	tristate "Atmel ASoC driver for boards using CLASSD"
 	depends on ARCH_AT91 || COMPILE_TEST
-	select SND_ATMEL_SOC_DMA
+	select SND_SOC_GENERIC_DMAENGINE_PCM
 	select REGMAP_MMIO
 	help
 	  Say Y if you want to add support for Atmel ASoC driver for boards using
diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c
index b2d42ec1dcd9f7f75e98c04dd35c06351bf3ca7b..56564ce90cb6b0cb156f08ae39cc46a828f32594 100644
--- a/sound/soc/codecs/da7218.c
+++ b/sound/soc/codecs/da7218.c
@@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec)
 	}
 
 	if (da7218->dev_id == DA7218_DEV_ID) {
-		hpldet_np = of_find_node_by_name(np, "da7218_hpldet");
+		hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
 		if (!hpldet_np)
 			return pdata;
 
diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
index 5f3c42c4f74ad7d12d5c697c56e028c8329c48cb..066ea2f4ce7b02a8f2cc58c2920b2ef7b6f2b160 100644
--- a/sound/soc/codecs/msm8916-wcd-analog.c
+++ b/sound/soc/codecs/msm8916-wcd-analog.c
@@ -267,7 +267,7 @@
 #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
 			SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-				    SNDRV_PCM_FMTBIT_S24_LE)
+				    SNDRV_PCM_FMTBIT_S32_LE)
 
 static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
 	       SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index a10a724eb448f4d06bffb034fbc86c68ccecb94d..13354d6304a848759433122e124d83119321bd71 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -194,7 +194,7 @@
 				   SNDRV_PCM_RATE_32000 | \
 				   SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-				     SNDRV_PCM_FMTBIT_S24_LE)
+				     SNDRV_PCM_FMTBIT_S32_LE)
 
 struct msm8916_wcd_digital_priv {
 	struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
 				    RX_I2S_CTL_RX_I2S_MODE_MASK,
 				    RX_I2S_CTL_RX_I2S_MODE_16);
 		break;
-	case SNDRV_PCM_FORMAT_S24_LE:
+	case SNDRV_PCM_FORMAT_S32_LE:
 		snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
 				    TX_I2S_CTL_TX_I2S_MODE_MASK,
 				    TX_I2S_CTL_TX_I2S_MODE_32);
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 714ce17da717c0edbc8817aa482dc1f2ec6fbcbe..e853a6dfd33b0ee7b8bf700dffea8b1e477f2d09 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w,
 
 	switch (event) {
 	case SND_SOC_DAPM_POST_PMU:
+		msleep(125);
 		regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL,
 			NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC);
 		break;
diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c
index 2df91db765acd6300406aa330cb0d85b3edb7d96..64bf26cec20d535314551ae542f8c28ed25352dc 100644
--- a/sound/soc/codecs/rt5514-spi.c
+++ b/sound/soc/codecs/rt5514-spi.c
@@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform)
 			dev_err(&rt5514_spi->dev,
 				"%s Failed to reguest IRQ: %d\n", __func__,
 				ret);
+		else
+			device_init_wakeup(rt5514_dsp->dev, true);
 	}
 
 	return 0;
@@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi)
 		return ret;
 	}
 
-	device_init_wakeup(&spi->dev, true);
-
 	return 0;
 }
 
@@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev)
 	if (device_may_wakeup(dev))
 		disable_irq_wake(irq);
 
-	if (rt5514_dsp->substream) {
-		rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf));
-		if (buf[0] & RT5514_IRQ_STATUS_BIT)
-			rt5514_schedule_copy(rt5514_dsp);
+	if (rt5514_dsp) {
+		if (rt5514_dsp->substream) {
+			rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf,
+				sizeof(buf));
+			if (buf[0] & RT5514_IRQ_STATUS_BIT)
+				rt5514_schedule_copy(rt5514_dsp);
+		}
 	}
 
 	return 0;
diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c
index 2a5b5d74e69714eeb966c6fc793021fcb7cd3a13..2dd6e9f990a4c4c7a2377c5f159987a046220b7a 100644
--- a/sound/soc/codecs/rt5514.c
+++ b/sound/soc/codecs/rt5514.c
@@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = {
 	SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0),
 	SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0),
 
-	SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0,
+	SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0,
 		rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
 
 	SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1,
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index f020d2d1eef4a98baa8c91442485ee64257c708e..edc152c8a1fe7596e9bbc5760574d973a9185d16 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
 	regmap_read(regmap, RT5645_VENDOR_ID, &val);
 	rt5645->v_id = val & 0xff;
 
+	regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080);
+
 	ret = regmap_register_patch(rt5645->regmap, init_list,
 				    ARRAY_SIZE(init_list));
 	if (ret != 0)
diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c
index b036c9dc0c8cf1883dd550a6755ae61c7a81e87d..d329bf719d80f01e5dc67865e4258f0cca50d6d5 100644
--- a/sound/soc/codecs/rt5663.c
+++ b/sound/soc/codecs/rt5663.c
@@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
 			RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
 		snd_soc_update_bits(codec, RT5663_IRQ_1,
 			RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN);
+		snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+			RT5663_EM_JD_MASK, RT5663_EM_JD_RST);
+		snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+			RT5663_EM_JD_MASK, RT5663_EM_JD_NOR);
 
 		while (true) {
 			regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val);
diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h
index c5a9b69579ad216d9b039359910e8c10acfa1832..03adc8004ba98d73da3f132d23285fdae77bf72d 100644
--- a/sound/soc/codecs/rt5663.h
+++ b/sound/soc/codecs/rt5663.h
@@ -1029,6 +1029,10 @@
 #define RT5663_POL_EXT_JD_SHIFT			10
 #define RT5663_POL_EXT_JD_EN			(0x1 << 10)
 #define RT5663_POL_EXT_JD_DIS			(0x0 << 10)
+#define RT5663_EM_JD_MASK			(0x1 << 7)
+#define RT5663_EM_JD_SHIFT			7
+#define RT5663_EM_JD_NOR			(0x1 << 7)
+#define RT5663_EM_JD_RST			(0x0 << 7)
 
 /* DACREF LDO Control (0x0112)*/
 #define RT5663_PWR_LDO_DACREFL_MASK		(0x1 << 9)
diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h
index 730fb2058869978b3f1b5281b123844a44458c42..1ff3edb7bbb6b28eb45c99b7b14baeadb46651ec 100644
--- a/sound/soc/codecs/tlv320aic31xx.h
+++ b/sound/soc/codecs/tlv320aic31xx.h
@@ -116,7 +116,7 @@ struct aic31xx_pdata {
 /* INT2 interrupt control */
 #define AIC31XX_INT2CTRL	AIC31XX_REG(0, 49)
 /* GPIO1 control */
-#define AIC31XX_GPIO1		AIC31XX_REG(0, 50)
+#define AIC31XX_GPIO1		AIC31XX_REG(0, 51)
 
 #define AIC31XX_DACPRB		AIC31XX_REG(0, 60)
 /* ADC Instruction Set Register */
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index c482b2e7a7d2a55d7ead20e56a680da6c0791378..cfe72b9d4356069327e80d592ff1e660bca39258 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
 	struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
 	struct device_node *twl4030_codec_node = NULL;
 
-	twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node,
+	twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
 						  "codec");
 
 	if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
 				     GFP_KERNEL);
 		if (!pdata) {
 			dev_err(codec->dev, "Can not allocate memory\n");
+			of_node_put(twl4030_codec_node);
 			return NULL;
 		}
 		twl4030_setup_pdata_of(pdata, twl4030_codec_node);
+		of_node_put(twl4030_codec_node);
 	}
 
 	return pdata;
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 65c059b5ffd784066fd6d66ae74dae044b721eeb..66e32f5d2917f2f0b958c124b5f4a0eeca296c10 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
 		 le64_to_cpu(footer->timestamp));
 
 	while (pos < firmware->size &&
-	       pos - firmware->size > sizeof(*region)) {
+	       sizeof(*region) < firmware->size - pos) {
 		region = (void *)&(firmware->data[pos]);
 		region_name = "Unknown";
 		reg = 0;
@@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp)
 			 regions, le32_to_cpu(region->len), offset,
 			 region_name);
 
-		if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
-		    firmware->size) {
+		if (le32_to_cpu(region->len) >
+		    firmware->size - pos - sizeof(*region)) {
 			adsp_err(dsp,
 				 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
 				 file, regions, region_name,
@@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 
 	blocks = 0;
 	while (pos < firmware->size &&
-	       pos - firmware->size > sizeof(*blk)) {
+	       sizeof(*blk) < firmware->size - pos) {
 		blk = (void *)(&firmware->data[pos]);
 
 		type = le16_to_cpu(blk->type);
@@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 		}
 
 		if (reg) {
-			if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
-			    firmware->size) {
+			if (le32_to_cpu(blk->len) >
+			    firmware->size - pos - sizeof(*blk)) {
 				adsp_err(dsp,
 					 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
 					 file, blocks, region_name,
diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h
index 0f163abe4ba37d93bec022afd0842b20423f2361..52c27a358933b100d85b2392abe2d2fc4501dd3a 100644
--- a/sound/soc/fsl/fsl_asrc.h
+++ b/sound/soc/fsl/fsl_asrc.h
@@ -260,8 +260,8 @@
 #define ASRFSTi_OUTPUT_FIFO_SHIFT	12
 #define ASRFSTi_OUTPUT_FIFO_MASK	(((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT)
 #define ASRFSTi_IAEi_SHIFT		11
-#define ASRFSTi_IAEi_MASK		(1 << ASRFSTi_OAFi_SHIFT)
-#define ASRFSTi_IAEi			(1 << ASRFSTi_OAFi_SHIFT)
+#define ASRFSTi_IAEi_MASK		(1 << ASRFSTi_IAEi_SHIFT)
+#define ASRFSTi_IAEi			(1 << ASRFSTi_IAEi_SHIFT)
 #define ASRFSTi_INPUT_FIFO_WIDTH	7
 #define ASRFSTi_INPUT_FIFO_SHIFT	0
 #define ASRFSTi_INPUT_FIFO_MASK		((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1)
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index f2f51e06e22cc8b6ff3f9e4f21046be256bd1265..424bafaf51efe63e108fcd83ce3c6bda43f9e92c 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -38,6 +38,7 @@
 #include <linux/ctype.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
@@ -265,6 +266,8 @@ struct fsl_ssi_private {
 
 	u32 fifo_watermark;
 	u32 dma_maxburst;
+
+	struct mutex ac97_reg_lock;
 };
 
 /*
@@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
 	if (reg > 0x7f)
 		return;
 
+	mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
 	ret = clk_prepare_enable(fsl_ac97_data->clk);
 	if (ret) {
 		pr_err("ac97 write clk_prepare_enable failed: %d\n",
 			ret);
-		return;
+		goto ret_unlock;
 	}
 
 	lreg = reg <<  12;
@@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
 	udelay(100);
 
 	clk_disable_unprepare(fsl_ac97_data->clk);
+
+ret_unlock:
+	mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
 }
 
 static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
@@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 {
 	struct regmap *regs = fsl_ac97_data->regs;
 
-	unsigned short val = -1;
+	unsigned short val = 0;
 	u32 reg_val;
 	unsigned int lreg;
 	int ret;
 
+	mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
 	ret = clk_prepare_enable(fsl_ac97_data->clk);
 	if (ret) {
 		pr_err("ac97 read clk_prepare_enable failed: %d\n",
 			ret);
-		return -1;
+		goto ret_unlock;
 	}
 
 	lreg = (reg & 0x7f) <<  12;
@@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 
 	clk_disable_unprepare(fsl_ac97_data->clk);
 
+ret_unlock:
+	mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
 	return val;
 }
 
@@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
 				sizeof(fsl_ssi_ac97_dai));
 
 		fsl_ac97_data = ssi_private;
-
-		ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
-		if (ret) {
-			dev_err(&pdev->dev, "could not set AC'97 ops\n");
-			return ret;
-		}
 	} else {
 		/* Initialize this copy of the CPU DAI driver structure */
 		memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
 			return ret;
 	}
 
+	if (fsl_ssi_is_ac97(ssi_private)) {
+		mutex_init(&ssi_private->ac97_reg_lock);
+		ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
+		if (ret) {
+			dev_err(&pdev->dev, "could not set AC'97 ops\n");
+			goto error_ac97_ops;
+		}
+	}
+
 	ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
 					      &ssi_private->cpu_dai_drv, 1);
 	if (ret) {
@@ -1657,6 +1672,13 @@ static int fsl_ssi_probe(struct platform_device *pdev)
 	fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
 
 error_asoc_register:
+	if (fsl_ssi_is_ac97(ssi_private))
+		snd_soc_set_ac97_ops(NULL);
+
+error_ac97_ops:
+	if (fsl_ssi_is_ac97(ssi_private))
+		mutex_destroy(&ssi_private->ac97_reg_lock);
+
 	if (ssi_private->soc->imx)
 		fsl_ssi_imx_clean(pdev, ssi_private);
 
@@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev)
 	if (ssi_private->soc->imx)
 		fsl_ssi_imx_clean(pdev, ssi_private);
 
-	if (fsl_ssi_is_ac97(ssi_private))
+	if (fsl_ssi_is_ac97(ssi_private)) {
 		snd_soc_set_ac97_ops(NULL);
+		mutex_destroy(&ssi_private->ac97_reg_lock);
+	}
 
 	return 0;
 }
diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c
index 6f9a8bcf20f3ebb4407a2452eed8870a11b40c02..6dcad0a8a0d045969873d983fa8b89bbe1f91e96 100644
--- a/sound/soc/intel/boards/kbl_rt5663_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c
@@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
 	{ "ssp0 Tx", NULL, "spk_out" },
 
 	{ "AIF Playback", NULL, "ssp1 Tx" },
-	{ "ssp1 Tx", NULL, "hs_out" },
+	{ "ssp1 Tx", NULL, "codec1_out" },
 
 	{ "hs_in", NULL, "ssp1 Rx" },
 	{ "ssp1 Rx", NULL, "AIF Capture" },
diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
index 6072164f2d43db7c7f8f50000892f6e61e80d79c..271ae3c2c5354c5788aa9383772ebbe63b4e8d9a 100644
--- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
@@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
 	{ "ssp0 Tx", NULL, "spk_out" },
 
 	{ "AIF Playback", NULL, "ssp1 Tx" },
-	{ "ssp1 Tx", NULL, "hs_out" },
+	{ "ssp1 Tx", NULL, "codec1_out" },
 
 	{ "hs_in", NULL, "ssp1 Rx" },
 	{ "ssp1 Rx", NULL, "AIF Capture" },
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index d14c50a602894c4ad8aa76f2259839552aff2f6a..3eaac41090ca7f8b07ed99511d58cbc1f5498c56 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt,
 
 	if ((epnt->virtual_bus_id == instance_id) &&
 			(epnt->linktype == link_type) &&
-			(epnt->direction == dirn) &&
-			(epnt->device_type == dev_type))
-		return true;
-	else
-		return false;
+			(epnt->direction == dirn)) {
+		/* do not check dev_type for DMIC link type */
+		if (epnt->linktype == NHLT_LINK_DMIC)
+			return true;
+
+		if (epnt->device_type == dev_type)
+			return true;
+	}
+
+	return false;
 }
 
 struct nhlt_specific_cfg
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a072bcf209d2aa4c9c72503466e027e6867cd9bb..81923da18ac2259ad159b8c0282242cc7c94c53c 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
 		break;
 
 	default:
-		dev_warn(bus->dev, "Control load not supported %d:%d:%d\n",
+		dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n",
 			hdr->ops.get, hdr->ops.put, hdr->ops.info);
 		break;
 	}
diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
index ee5055d47d13d0ba6f63417947c63f731bcac4a8..a89fe9b6463ba6b56d88c9afa6ae3fdd1397e6ae 100644
--- a/sound/soc/rockchip/rockchip_spdif.c
+++ b/sound/soc/rockchip/rockchip_spdif.c
@@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev)
 	spdif->mclk = devm_clk_get(&pdev->dev, "mclk");
 	if (IS_ERR(spdif->mclk)) {
 		dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n");
-		return PTR_ERR(spdif->mclk);
+		ret = PTR_ERR(spdif->mclk);
+		goto err_disable_hclk;
 	}
 
 	ret = clk_prepare_enable(spdif->mclk);
 	if (ret) {
 		dev_err(spdif->dev, "clock enable failed %d\n", ret);
-		return ret;
+		goto err_disable_clocks;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(regs))
-		return PTR_ERR(regs);
+	if (IS_ERR(regs)) {
+		ret = PTR_ERR(regs);
+		goto err_disable_clocks;
+	}
 
 	spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs,
 						  &rk_spdif_regmap_config);
 	if (IS_ERR(spdif->regmap)) {
 		dev_err(&pdev->dev,
 			"Failed to initialise managed register map\n");
-		return PTR_ERR(spdif->regmap);
+		ret = PTR_ERR(spdif->regmap);
+		goto err_disable_clocks;
 	}
 
 	spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR;
@@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev)
 
 err_pm_runtime:
 	pm_runtime_disable(&pdev->dev);
+err_disable_clocks:
+	clk_disable_unprepare(spdif->mclk);
+err_disable_hclk:
+	clk_disable_unprepare(spdif->hclk);
 
 	return ret;
 }
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 8ddb08714faabb5bc2b58716155b0ecec501db0d..4672688cac325ce79970c620c5f9e6d2d81b3525 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod,
 				   NULL, &val, NULL);
 
 	val  = val	<< shift;
-	mask = 0xffff	<< shift;
+	mask = 0x0f1f	<< shift;
 
 	rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val);
 
@@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod,
 
 	in   = in	<< shift;
 	out  = out	<< shift;
-	mask = 0xffff	<< shift;
+	mask = 0x0f1f	<< shift;
 
 	switch (id / 2) {
 	case 0:
@@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
 			ckr = 0x80000000;
 	}
 
-	rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr);
+	rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
 	rsnd_mod_write(adg_mod, BRRA,  adg->rbga);
 	rsnd_mod_write(adg_mod, BRRB,  adg->rbgb);
 
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index c70eb20978163d6952a6764381d083779dbd758b..f12a88a21dfa24d32f04dad32635dee2482a9337 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
 
 	return snd_pcm_lib_preallocate_pages_for_all(
 		rtd->pcm,
-		SNDRV_DMA_TYPE_CONTINUOUS,
-		snd_dma_continuous_data(GFP_KERNEL),
+		SNDRV_DMA_TYPE_DEV,
+		rtd->card->snd_card->dev,
 		PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
 }
 
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index fd557abfe390a1ea6c6800e9e9836d039bdbcee3..4d750bdf8e2449981537a4b6d41bea33af321789 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -26,10 +26,7 @@
 struct rsnd_dmaen {
 	struct dma_chan		*chan;
 	dma_cookie_t		cookie;
-	dma_addr_t		dma_buf;
 	unsigned int		dma_len;
-	unsigned int		dma_period;
-	unsigned int		dma_cnt;
 };
 
 struct rsnd_dmapp {
@@ -71,38 +68,10 @@ static struct rsnd_mod mem = {
 /*
  *		Audio DMAC
  */
-#define rsnd_dmaen_sync(dmaen, io, i)	__rsnd_dmaen_sync(dmaen, io, i, 1)
-#define rsnd_dmaen_unsync(dmaen, io, i)	__rsnd_dmaen_sync(dmaen, io, i, 0)
-static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io,
-			      int i, int sync)
-{
-	struct device *dev = dmaen->chan->device->dev;
-	enum dma_data_direction dir;
-	int is_play = rsnd_io_is_play(io);
-	dma_addr_t buf;
-	int len, max;
-	size_t period;
-
-	len	= dmaen->dma_len;
-	period	= dmaen->dma_period;
-	max	= len / period;
-	i	= i % max;
-	buf	= dmaen->dma_buf + (period * i);
-
-	dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-	if (sync)
-		dma_sync_single_for_device(dev, buf, period, dir);
-	else
-		dma_sync_single_for_cpu(dev, buf, period, dir);
-}
-
 static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
 				  struct rsnd_dai_stream *io)
 {
 	struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
-	struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
-	struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 	bool elapsed = false;
 	unsigned long flags;
 
@@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
 	 */
 	spin_lock_irqsave(&priv->lock, flags);
 
-	if (rsnd_io_is_working(io)) {
-		rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt);
-
-		/*
-		 * Next period is already started.
-		 * Let's sync Next Next period
-		 * see
-		 *	rsnd_dmaen_start()
-		 */
-		rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2);
-
+	if (rsnd_io_is_working(io))
 		elapsed = true;
 
-		dmaen->dma_cnt++;
-	}
-
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	if (elapsed)
@@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
 	struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
 	struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 
-	if (dmaen->chan) {
-		int is_play = rsnd_io_is_play(io);
-
+	if (dmaen->chan)
 		dmaengine_terminate_all(dmaen->chan);
-		dma_unmap_single(dmaen->chan->device->dev,
-				 dmaen->dma_buf, dmaen->dma_len,
-				 is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-	}
 
 	return 0;
 }
@@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
 	struct device *dev = rsnd_priv_to_dev(priv);
 	struct dma_async_tx_descriptor *desc;
 	struct dma_slave_config cfg = {};
-	dma_addr_t buf;
-	size_t len;
-	size_t period;
 	int is_play = rsnd_io_is_play(io);
-	int i;
 	int ret;
 
 	cfg.direction	= is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
@@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
 	if (ret < 0)
 		return ret;
 
-	len	= snd_pcm_lib_buffer_bytes(substream);
-	period	= snd_pcm_lib_period_bytes(substream);
-	buf	= dma_map_single(dmaen->chan->device->dev,
-				 substream->runtime->dma_area,
-				 len,
-				 is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-	if (dma_mapping_error(dmaen->chan->device->dev, buf)) {
-		dev_err(dev, "dma map failed\n");
-		return -EIO;
-	}
-
 	desc = dmaengine_prep_dma_cyclic(dmaen->chan,
-					 buf, len, period,
+					 substream->runtime->dma_addr,
+					 snd_pcm_lib_buffer_bytes(substream),
+					 snd_pcm_lib_period_bytes(substream),
 					 is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
 					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
@@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
 	desc->callback		= rsnd_dmaen_complete;
 	desc->callback_param	= rsnd_mod_get(dma);
 
-	dmaen->dma_buf		= buf;
-	dmaen->dma_len		= len;
-	dmaen->dma_period	= period;
-	dmaen->dma_cnt		= 0;
-
-	/*
-	 * synchronize this and next period
-	 * see
-	 *	__rsnd_dmaen_complete()
-	 */
-	for (i = 0; i < 2; i++)
-		rsnd_dmaen_sync(dmaen, io, i);
+	dmaen->dma_len		= snd_pcm_lib_buffer_bytes(substream);
 
 	dmaen->cookie = dmaengine_submit(desc);
 	if (dmaen->cookie < 0) {
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index fece1e5f582f35ab5e558a0b66b3b558816048d6..cbf3bf312d23bda9427747c1af2dde30d9fcf43d 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod,
 				    int byte)
 {
 	struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+	bool ret = false;
+	int byte_pos;
 
-	ssi->byte_pos += byte;
+	byte_pos = ssi->byte_pos + byte;
 
-	if (ssi->byte_pos >= ssi->next_period_byte) {
+	if (byte_pos >= ssi->next_period_byte) {
 		struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
 		ssi->period_pos++;
 		ssi->next_period_byte += ssi->byte_per_period;
 
 		if (ssi->period_pos >= runtime->periods) {
-			ssi->byte_pos = 0;
+			byte_pos = 0;
 			ssi->period_pos = 0;
 			ssi->next_period_byte = ssi->byte_per_period;
 		}
 
-		return true;
+		ret = true;
 	}
 
-	return false;
+	WRITE_ONCE(ssi->byte_pos, byte_pos);
+
+	return ret;
 }
 
 /*
@@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod,
 	struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
 	struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
-	*pointer = bytes_to_frames(runtime, ssi->byte_pos);
+	*pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos));
 
 	return 0;
 }
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 4d948757d300d04be3bfca3c63f78c68085a8506..6ff8a36c2c82224da8ae88c94b317092ea87f88f 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 {
 	int hdmi = rsnd_ssi_hdmi_port(io);
 	int ret;
+	u32 mode = 0;
 
 	ret = rsnd_ssiu_init(mod, io, priv);
 	if (ret < 0)
@@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 		 * see
 		 *	rsnd_ssi_config_init()
 		 */
-		rsnd_mod_write(mod, SSI_MODE, 0x1);
+		mode = 0x1;
 	}
 
+	rsnd_mod_write(mod, SSI_MODE, mode);
+
 	if (rsnd_ssi_use_busif(io)) {
 		rsnd_mod_write(mod, SSI_BUSIF_ADINR,
 			       rsnd_get_adinr_bit(mod, io) |
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 61b348383de88fa282028a78603567fa513589b7..2b4ceda36291c01c6cca69d3a1cacd6c23014f40 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -204,6 +204,10 @@ static int snd_usb_copy_string_desc(struct mixer_build *state,
 				    int index, char *buf, int maxlen)
 {
 	int len = usb_string(state->chip->dev, index, buf, maxlen - 1);
+
+	if (len < 0)
+		return 0;
+
 	buf[len] = 0;
 	return len;
 }
@@ -2169,19 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
 	kctl->private_value = (unsigned long)namelist;
 	kctl->private_free = usb_mixer_selector_elem_free;
 
-	nameid = uac_selector_unit_iSelector(desc);
+	/* check the static mapping table at first */
 	len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
-	if (len)
-		;
-	else if (nameid)
-		snd_usb_copy_string_desc(state, nameid, kctl->id.name,
-					 sizeof(kctl->id.name));
-	else {
-		len = get_term_name(state, &state->oterm,
+	if (!len) {
+		/* no mapping ? */
+		/* if iSelector is given, use it */
+		nameid = uac_selector_unit_iSelector(desc);
+		if (nameid)
+			len = snd_usb_copy_string_desc(state, nameid,
+						       kctl->id.name,
+						       sizeof(kctl->id.name));
+		/* ... or pick up the terminal name at next */
+		if (!len)
+			len = get_term_name(state, &state->oterm,
 				    kctl->id.name, sizeof(kctl->id.name), 0);
+		/* ... or use the fixed string "USB" as the last resort */
 		if (!len)
 			strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
 
+		/* and add the proper suffix */
 		if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
 			append_ctl_name(kctl, " Clock Source");
 		else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 77eecaa4db1f32c9b7af87273c599181bf307443..a66ef5777887a78d7416e64c049c73b26477c7f7 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
 /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
  * between PCM/DOP and native DSD mode
  */
-static bool is_teac_50X_dac(unsigned int id)
+static bool is_teac_dsd_dac(unsigned int id)
 {
 	switch (id) {
 	case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
+	case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
 		return true;
 	}
 	return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
 			break;
 		}
 		mdelay(20);
-	} else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
+	} else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
 		/* Vendor mode switch cmd is required. */
 		switch (fmt->altsetting) {
 		case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	}
 
 	/* TEAC devices with USB DAC functionality */
-	if (is_teac_50X_dac(chip->usb_id)) {
+	if (is_teac_dsd_dac(chip->usb_id)) {
 		if (fp->altsetting == 3)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 	}
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 1f57bbe82b6fb8582c2a3a1617345266c22e33e8..6edd177bb1c7c66e0ec32caf7ec8d2c3680ed2f3 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -152,6 +152,12 @@ struct kvm_arch_memory_slot {
 	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
 #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
 
+/* PL1 Physical Timer Registers */
+#define KVM_REG_ARM_PTIMER_CTL		ARM_CP15_REG32(0, 14, 2, 1)
+#define KVM_REG_ARM_PTIMER_CNT		ARM_CP15_REG64(0, 14)
+#define KVM_REG_ARM_PTIMER_CVAL		ARM_CP15_REG64(2, 14)
+
+/* Virtual Timer Registers */
 #define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14)
 #define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14)
@@ -216,6 +222,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
 #define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
 #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
+#define   KVM_DEV_ARM_ITS_CTRL_RESET		4
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..b551b741653d251d6155a214023b2215ae02e871
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 51149ec75fe480b324fd74d2697579a936438fdc..9abbf30446545a0668083b0891461f015563bcb1 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -196,6 +196,12 @@ struct kvm_arch_memory_slot {
 
 #define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
 
+/* Physical Timer EL0 Registers */
+#define KVM_REG_ARM_PTIMER_CTL		ARM64_SYS_REG(3, 3, 14, 2, 1)
+#define KVM_REG_ARM_PTIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 2, 2)
+#define KVM_REG_ARM_PTIMER_CNT		ARM64_SYS_REG(3, 3, 14, 0, 1)
+
+/* EL0 Virtual Timer Registers */
 #define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
@@ -228,6 +234,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
 #define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
 #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
+#define   KVM_DEV_ARM_ITS_CTRL_RESET		4
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..0a8e37a519f258e72317f39a87ac9cc60ad47f6f
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include "ptrace.h"
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 9ad172dcd912d5763b0bf954617c9e398ad31aa8..38535a57fef8327c3b08bf20e1f8621fd93c776a 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -6,10 +6,6 @@
  *
  * Copyright IBM Corp. 2008
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/tools/arch/s390/include/uapi/asm/kvm_perf.h b/tools/arch/s390/include/uapi/asm/kvm_perf.h
index c36c97ffdc6fa24f246c41bfe993790410e4e032..84606b8cc49e47c794fb3b16ef5681de098bfc6f 100644
--- a/tools/arch/s390/include/uapi/asm/kvm_perf.h
+++ b/tools/arch/s390/include/uapi/asm/kvm_perf.h
@@ -4,10 +4,6 @@
  *
  * Copyright 2014 IBM Corp.
  * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
  */
 
 #ifndef __LINUX_KVM_PERF_S390_H
diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..d17dd9e5d51638f08ee44ad3e94d840ea0fc568d
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/perf_regs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+	PERF_REG_S390_R0,
+	PERF_REG_S390_R1,
+	PERF_REG_S390_R2,
+	PERF_REG_S390_R3,
+	PERF_REG_S390_R4,
+	PERF_REG_S390_R5,
+	PERF_REG_S390_R6,
+	PERF_REG_S390_R7,
+	PERF_REG_S390_R8,
+	PERF_REG_S390_R9,
+	PERF_REG_S390_R10,
+	PERF_REG_S390_R11,
+	PERF_REG_S390_R12,
+	PERF_REG_S390_R13,
+	PERF_REG_S390_R14,
+	PERF_REG_S390_R15,
+	PERF_REG_S390_FP0,
+	PERF_REG_S390_FP1,
+	PERF_REG_S390_FP2,
+	PERF_REG_S390_FP3,
+	PERF_REG_S390_FP4,
+	PERF_REG_S390_FP5,
+	PERF_REG_S390_FP6,
+	PERF_REG_S390_FP7,
+	PERF_REG_S390_FP8,
+	PERF_REG_S390_FP9,
+	PERF_REG_S390_FP10,
+	PERF_REG_S390_FP11,
+	PERF_REG_S390_FP12,
+	PERF_REG_S390_FP13,
+	PERF_REG_S390_FP14,
+	PERF_REG_S390_FP15,
+	PERF_REG_S390_MASK,
+	PERF_REG_S390_PC,
+
+	PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000000000000000000000000000000000000..543dd70e12c81d59a9987c437f1895a216c2525b
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/ptrace.h
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+
+#ifndef _UAPI_S390_PTRACE_H
+#define _UAPI_S390_PTRACE_H
+
+/*
+ * Offsets in the user_regs_struct. They are used for the ptrace
+ * system call and in entry.S
+ */
+#ifndef __s390x__
+
+#define PT_PSWMASK  0x00
+#define PT_PSWADDR  0x04
+#define PT_GPR0     0x08
+#define PT_GPR1     0x0C
+#define PT_GPR2     0x10
+#define PT_GPR3     0x14
+#define PT_GPR4     0x18
+#define PT_GPR5     0x1C
+#define PT_GPR6     0x20
+#define PT_GPR7     0x24
+#define PT_GPR8     0x28
+#define PT_GPR9     0x2C
+#define PT_GPR10    0x30
+#define PT_GPR11    0x34
+#define PT_GPR12    0x38
+#define PT_GPR13    0x3C
+#define PT_GPR14    0x40
+#define PT_GPR15    0x44
+#define PT_ACR0     0x48
+#define PT_ACR1     0x4C
+#define PT_ACR2     0x50
+#define PT_ACR3     0x54
+#define PT_ACR4	    0x58
+#define PT_ACR5	    0x5C
+#define PT_ACR6	    0x60
+#define PT_ACR7	    0x64
+#define PT_ACR8	    0x68
+#define PT_ACR9	    0x6C
+#define PT_ACR10    0x70
+#define PT_ACR11    0x74
+#define PT_ACR12    0x78
+#define PT_ACR13    0x7C
+#define PT_ACR14    0x80
+#define PT_ACR15    0x84
+#define PT_ORIGGPR2 0x88
+#define PT_FPC	    0x90
+/*
+ * A nasty fact of life that the ptrace api
+ * only supports passing of longs.
+ */
+#define PT_FPR0_HI  0x98
+#define PT_FPR0_LO  0x9C
+#define PT_FPR1_HI  0xA0
+#define PT_FPR1_LO  0xA4
+#define PT_FPR2_HI  0xA8
+#define PT_FPR2_LO  0xAC
+#define PT_FPR3_HI  0xB0
+#define PT_FPR3_LO  0xB4
+#define PT_FPR4_HI  0xB8
+#define PT_FPR4_LO  0xBC
+#define PT_FPR5_HI  0xC0
+#define PT_FPR5_LO  0xC4
+#define PT_FPR6_HI  0xC8
+#define PT_FPR6_LO  0xCC
+#define PT_FPR7_HI  0xD0
+#define PT_FPR7_LO  0xD4
+#define PT_FPR8_HI  0xD8
+#define PT_FPR8_LO  0XDC
+#define PT_FPR9_HI  0xE0
+#define PT_FPR9_LO  0xE4
+#define PT_FPR10_HI 0xE8
+#define PT_FPR10_LO 0xEC
+#define PT_FPR11_HI 0xF0
+#define PT_FPR11_LO 0xF4
+#define PT_FPR12_HI 0xF8
+#define PT_FPR12_LO 0xFC
+#define PT_FPR13_HI 0x100
+#define PT_FPR13_LO 0x104
+#define PT_FPR14_HI 0x108
+#define PT_FPR14_LO 0x10C
+#define PT_FPR15_HI 0x110
+#define PT_FPR15_LO 0x114
+#define PT_CR_9	    0x118
+#define PT_CR_10    0x11C
+#define PT_CR_11    0x120
+#define PT_IEEE_IP  0x13C
+#define PT_LASTOFF  PT_IEEE_IP
+#define PT_ENDREGS  0x140-1
+
+#define GPR_SIZE	4
+#define CR_SIZE		4
+
+#define STACK_FRAME_OVERHEAD	96	/* size of minimum stack frame */
+
+#else /* __s390x__ */
+
+#define PT_PSWMASK  0x00
+#define PT_PSWADDR  0x08
+#define PT_GPR0     0x10
+#define PT_GPR1     0x18
+#define PT_GPR2     0x20
+#define PT_GPR3     0x28
+#define PT_GPR4     0x30
+#define PT_GPR5     0x38
+#define PT_GPR6     0x40
+#define PT_GPR7     0x48
+#define PT_GPR8     0x50
+#define PT_GPR9     0x58
+#define PT_GPR10    0x60
+#define PT_GPR11    0x68
+#define PT_GPR12    0x70
+#define PT_GPR13    0x78
+#define PT_GPR14    0x80
+#define PT_GPR15    0x88
+#define PT_ACR0     0x90
+#define PT_ACR1     0x94
+#define PT_ACR2     0x98
+#define PT_ACR3     0x9C
+#define PT_ACR4	    0xA0
+#define PT_ACR5	    0xA4
+#define PT_ACR6	    0xA8
+#define PT_ACR7	    0xAC
+#define PT_ACR8	    0xB0
+#define PT_ACR9	    0xB4
+#define PT_ACR10    0xB8
+#define PT_ACR11    0xBC
+#define PT_ACR12    0xC0
+#define PT_ACR13    0xC4
+#define PT_ACR14    0xC8
+#define PT_ACR15    0xCC
+#define PT_ORIGGPR2 0xD0
+#define PT_FPC	    0xD8
+#define PT_FPR0     0xE0
+#define PT_FPR1     0xE8
+#define PT_FPR2     0xF0
+#define PT_FPR3     0xF8
+#define PT_FPR4     0x100
+#define PT_FPR5     0x108
+#define PT_FPR6     0x110
+#define PT_FPR7     0x118
+#define PT_FPR8     0x120
+#define PT_FPR9     0x128
+#define PT_FPR10    0x130
+#define PT_FPR11    0x138
+#define PT_FPR12    0x140
+#define PT_FPR13    0x148
+#define PT_FPR14    0x150
+#define PT_FPR15    0x158
+#define PT_CR_9     0x160
+#define PT_CR_10    0x168
+#define PT_CR_11    0x170
+#define PT_IEEE_IP  0x1A8
+#define PT_LASTOFF  PT_IEEE_IP
+#define PT_ENDREGS  0x1B0-1
+
+#define GPR_SIZE	8
+#define CR_SIZE		8
+
+#define STACK_FRAME_OVERHEAD	160	 /* size of minimum stack frame */
+
+#endif /* __s390x__ */
+
+#define NUM_GPRS	16
+#define NUM_FPRS	16
+#define NUM_CRS		16
+#define NUM_ACRS	16
+
+#define NUM_CR_WORDS	3
+
+#define FPR_SIZE	8
+#define FPC_SIZE	4
+#define FPC_PAD_SIZE	4 /* gcc insists on aligning the fpregs */
+#define ACR_SIZE	4
+
+
+#define PTRACE_OLDSETOPTIONS	     21
+
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef union {
+	float	f;
+	double	d;
+	__u64	ui;
+	struct
+	{
+		__u32 hi;
+		__u32 lo;
+	} fp;
+} freg_t;
+
+typedef struct {
+	__u32	fpc;
+	__u32	pad;
+	freg_t	fprs[NUM_FPRS];
+} s390_fp_regs;
+
+#define FPC_EXCEPTION_MASK	0xF8000000
+#define FPC_FLAGS_MASK		0x00F80000
+#define FPC_DXC_MASK		0x0000FF00
+#define FPC_RM_MASK		0x00000003
+
+/* this typedef defines how a Program Status Word looks like */
+typedef struct {
+	unsigned long mask;
+	unsigned long addr;
+} __attribute__ ((aligned(8))) psw_t;
+
+#ifndef __s390x__
+
+#define PSW_MASK_PER		0x40000000UL
+#define PSW_MASK_DAT		0x04000000UL
+#define PSW_MASK_IO		0x02000000UL
+#define PSW_MASK_EXT		0x01000000UL
+#define PSW_MASK_KEY		0x00F00000UL
+#define PSW_MASK_BASE		0x00080000UL	/* always one */
+#define PSW_MASK_MCHECK		0x00040000UL
+#define PSW_MASK_WAIT		0x00020000UL
+#define PSW_MASK_PSTATE		0x00010000UL
+#define PSW_MASK_ASC		0x0000C000UL
+#define PSW_MASK_CC		0x00003000UL
+#define PSW_MASK_PM		0x00000F00UL
+#define PSW_MASK_RI		0x00000000UL
+#define PSW_MASK_EA		0x00000000UL
+#define PSW_MASK_BA		0x00000000UL
+
+#define PSW_MASK_USER		0x0000FF00UL
+
+#define PSW_ADDR_AMODE		0x80000000UL
+#define PSW_ADDR_INSN		0x7FFFFFFFUL
+
+#define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW_ASC_PRIMARY		0x00000000UL
+#define PSW_ASC_ACCREG		0x00004000UL
+#define PSW_ASC_SECONDARY	0x00008000UL
+#define PSW_ASC_HOME		0x0000C000UL
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER		0x4000000000000000UL
+#define PSW_MASK_DAT		0x0400000000000000UL
+#define PSW_MASK_IO		0x0200000000000000UL
+#define PSW_MASK_EXT		0x0100000000000000UL
+#define PSW_MASK_BASE		0x0000000000000000UL
+#define PSW_MASK_KEY		0x00F0000000000000UL
+#define PSW_MASK_MCHECK		0x0004000000000000UL
+#define PSW_MASK_WAIT		0x0002000000000000UL
+#define PSW_MASK_PSTATE		0x0001000000000000UL
+#define PSW_MASK_ASC		0x0000C00000000000UL
+#define PSW_MASK_CC		0x0000300000000000UL
+#define PSW_MASK_PM		0x00000F0000000000UL
+#define PSW_MASK_RI		0x0000008000000000UL
+#define PSW_MASK_EA		0x0000000100000000UL
+#define PSW_MASK_BA		0x0000000080000000UL
+
+#define PSW_MASK_USER		0x0000FF0180000000UL
+
+#define PSW_ADDR_AMODE		0x0000000000000000UL
+#define PSW_ADDR_INSN		0xFFFFFFFFFFFFFFFFUL
+
+#define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 52)
+
+#define PSW_ASC_PRIMARY		0x0000000000000000UL
+#define PSW_ASC_ACCREG		0x0000400000000000UL
+#define PSW_ASC_SECONDARY	0x0000800000000000UL
+#define PSW_ASC_HOME		0x0000C00000000000UL
+
+#endif /* __s390x__ */
+
+
+/*
+ * The s390_regs structure is used to define the elf_gregset_t.
+ */
+typedef struct {
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long orig_gpr2;
+} s390_regs;
+
+/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+	unsigned long args[1];
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
+/*
+ * Now for the user space program event recording (trace) definitions.
+ * The following structures are used only for the ptrace interface, don't
+ * touch or even look at it if you don't want to modify the user-space
+ * ptrace interface. In particular stay away from it for in-kernel PER.
+ */
+typedef struct {
+	unsigned long cr[NUM_CR_WORDS];
+} per_cr_words;
+
+#define PER_EM_MASK 0xE8000000UL
+
+typedef struct {
+#ifdef __s390x__
+	unsigned		       : 32;
+#endif /* __s390x__ */
+	unsigned em_branching	       : 1;
+	unsigned em_instruction_fetch  : 1;
+	/*
+	 * Switching on storage alteration automatically fixes
+	 * the storage alteration event bit in the users std.
+	 */
+	unsigned em_storage_alteration : 1;
+	unsigned em_gpr_alt_unused     : 1;
+	unsigned em_store_real_address : 1;
+	unsigned		       : 3;
+	unsigned branch_addr_ctl       : 1;
+	unsigned		       : 1;
+	unsigned storage_alt_space_ctl : 1;
+	unsigned		       : 21;
+	unsigned long starting_addr;
+	unsigned long ending_addr;
+} per_cr_bits;
+
+typedef struct {
+	unsigned short perc_atmid;
+	unsigned long address;
+	unsigned char access_id;
+} per_lowcore_words;
+
+typedef struct {
+	unsigned perc_branching		 : 1;
+	unsigned perc_instruction_fetch  : 1;
+	unsigned perc_storage_alteration : 1;
+	unsigned perc_gpr_alt_unused	 : 1;
+	unsigned perc_store_real_address : 1;
+	unsigned			 : 3;
+	unsigned atmid_psw_bit_31	 : 1;
+	unsigned atmid_validity_bit	 : 1;
+	unsigned atmid_psw_bit_32	 : 1;
+	unsigned atmid_psw_bit_5	 : 1;
+	unsigned atmid_psw_bit_16	 : 1;
+	unsigned atmid_psw_bit_17	 : 1;
+	unsigned si			 : 2;
+	unsigned long address;
+	unsigned			 : 4;
+	unsigned access_id		 : 4;
+} per_lowcore_bits;
+
+typedef struct {
+	union {
+		per_cr_words   words;
+		per_cr_bits    bits;
+	} control_regs;
+	/*
+	 * The single_step and instruction_fetch bits are obsolete,
+	 * the kernel always sets them to zero. To enable single
+	 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
+	 */
+	unsigned  single_step	    : 1;
+	unsigned  instruction_fetch : 1;
+	unsigned		    : 30;
+	/*
+	 * These addresses are copied into cr10 & cr11 if single
+	 * stepping is switched off
+	 */
+	unsigned long starting_addr;
+	unsigned long ending_addr;
+	union {
+		per_lowcore_words words;
+		per_lowcore_bits  bits;
+	} lowcore;
+} per_struct;
+
+typedef struct {
+	unsigned int  len;
+	unsigned long kernel_addr;
+	unsigned long process_addr;
+} ptrace_area;
+
+/*
+ * S/390 specific non posix ptrace requests. I chose unusual values so
+ * they are unlikely to clash with future ptrace definitions.
+ */
+#define PTRACE_PEEKUSR_AREA	      0x5000
+#define PTRACE_POKEUSR_AREA	      0x5001
+#define PTRACE_PEEKTEXT_AREA	      0x5002
+#define PTRACE_PEEKDATA_AREA	      0x5003
+#define PTRACE_POKETEXT_AREA	      0x5004
+#define PTRACE_POKEDATA_AREA	      0x5005
+#define PTRACE_GET_LAST_BREAK	      0x5006
+#define PTRACE_PEEK_SYSTEM_CALL       0x5007
+#define PTRACE_POKE_SYSTEM_CALL	      0x5008
+#define PTRACE_ENABLE_TE	      0x5009
+#define PTRACE_DISABLE_TE	      0x5010
+#define PTRACE_TE_ABORT_RAND	      0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+
+/*
+ * PT_PROT definition is loosely based on hppa bsd definition in
+ * gdb/hppab-nat.c
+ */
+#define PTRACE_PROT			  21
+
+typedef enum {
+	ptprot_set_access_watchpoint,
+	ptprot_set_write_watchpoint,
+	ptprot_disable_watchpoint
+} ptprot_flags;
+
+typedef struct {
+	unsigned long lowaddr;
+	unsigned long hiaddr;
+	ptprot_flags prot;
+} ptprot_area;
+
+/* Sequence of bytes for breakpoint illegal instruction.  */
+#define S390_BREAKPOINT     {0x0,0x1}
+#define S390_BREAKPOINT_U16 ((__u16)0x0001)
+#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
+#define S390_SYSCALL_SIZE   2
+
+/*
+ * The user_regs_struct defines the way the user registers are
+ * store on the stack for signal handling.
+ */
+struct user_regs_struct {
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long orig_gpr2;
+	s390_fp_regs fp_regs;
+	/*
+	 * These per registers are in here so that gdb can modify them
+	 * itself as there is no "official" ptrace interface for hardware
+	 * watchpoints. This is the way intel does it.
+	 */
+	per_struct per_info;
+	unsigned long ieee_instruction_pointer;	/* obsolete, always 0 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_PTRACE_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 793690fbda3625defd130262db1e94b57152c0bc..800104c8a3edfee7f4f52a33b8451a51ee0ed90a 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,173 +13,176 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
-#define NBUGINTS	1	/* N 32-bit bug flags */
+#define NCAPINTS			18	   /* N 32-bit words worth of info */
+#define NBUGINTS			1	   /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
  * in /proc/cpuinfo instead of the macro name.  If the string is "",
  * this feature bit is not displayed in /proc/cpuinfo at all.
+ *
+ * When adding new features here that depend on other features,
+ * please update the table in kernel/cpu/cpuid-deps.c as well.
  */
 
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
-					  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
+/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
+#define X86_FEATURE_FPU			( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME			( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE			( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE			( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC			( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR			( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE			( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE			( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8			( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC		( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP			( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR		( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE			( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA			( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV		( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT			( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36		( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN			( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH		( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS			( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI		( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX			( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR		( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM			( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2		( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP		( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT			( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC			( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64		( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE			( 0*32+31) /* Pending Break Enable */
 
 /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
 /* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
+#define X86_FEATURE_SYSCALL		( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP			( 1*32+19) /* MP Capable */
+#define X86_FEATURE_NX			( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT		( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT		( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES		( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP		( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM			( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT		( 1*32+30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW		( 1*32+31) /* 3DNow */
 
 /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY		( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN		( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI		( 2*32+ 3) /* LongRun table interface */
 
 /* Other features, Linux-defined mapping, word 3 */
 /* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_ART		( 3*32+10) /* Platform has always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-#define X86_FEATURE_ACC_POWER	( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID	( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX		( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR		( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR		( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR		( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+/* CPU types for specific tunings: */
+#define X86_FEATURE_K8			( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7			( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3			( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4			( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP			( 3*32+ 9) /* SMP kernel running on UP */
+#define X86_FEATURE_ART			( 3*32+10) /* Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON	( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS		( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS			( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC	( 3*32+17) /* "" MFENCE synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS		( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY		( 3*32+22) /* CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE	( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC		( 3*32+24) /* TSC does not stop in C states */
+#define X86_FEATURE_CPUID		( 3*32+25) /* CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID		( 3*32+26) /* Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM		( 3*32+27) /* AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF		( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_NONSTOP_TSC_S3	( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ	( 3*32+31) /* TSC has known frequency */
 
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
+/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
+#define X86_FEATURE_XMM3		( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ		( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64		( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT		( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
+#define X86_FEATURE_DSCPL		( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+#define X86_FEATURE_VMX			( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX			( 4*32+ 6) /* Safer Mode eXtensions */
+#define X86_FEATURE_EST			( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2			( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3		( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID			( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG		( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA			( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16		( 4*32+13) /* CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR		( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM		( 4*32+15) /* Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID		( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA			( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1		( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2		( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC		( 4*32+21) /* X2APIC */
+#define X86_FEATURE_MOVBE		( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT		( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* TSC deadline timer */
+#define X86_FEATURE_AES			( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE		( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE		( 4*32+27) /* "" XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX			( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C		( 4*32+29) /* 16-bit FP conversions */
+#define X86_FEATURE_RDRAND		( 4*32+30) /* RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR		( 4*32+31) /* Running on a hypervisor */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_XSTORE		( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN		( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT		( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN		( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2		( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN		( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE			( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN		( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM			( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN		( 5*32+13) /* PMM enabled */
 
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PTSC	( 6*32+27) /* performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC	( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
+#define X86_FEATURE_LAHF_LM		( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY		( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM			( 6*32+ 2) /* Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC		( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY		( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM			( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A		( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE		( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH	( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW		( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS			( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP			( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT		( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT			( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP			( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4		( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE			( 6*32+17) /* Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR		( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM			( 6*32+21) /* Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT		( 6*32+22) /* Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE	( 6*32+23) /* Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB		( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT		( 6*32+26) /* Data breakpoint extension */
+#define X86_FEATURE_PTSC		( 6*32+27) /* Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC		( 6*32+28) /* Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX		( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
@@ -187,146 +190,155 @@
  *
  * Reuse free bits when adding new feature flags!
  */
-#define X86_FEATURE_RING3MWAIT	( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3	( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2	( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3	( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_RING3MWAIT		( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT		( 7*32+ 1) /* Intel CPUID faulting */
+#define X86_FEATURE_CPB			( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB			( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
 
-#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME		( 7*32+10) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 
-#define X86_FEATURE_INTEL_PPIN	( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
+#define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
-#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 
 /* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI		( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY	( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT			( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID		( 8*32+ 4) /* Intel Virtual Processor ID */
 
-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
+#define X86_FEATURE_VMMCALL		( 8*32+15) /* Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV		( 8*32+16) /* "" Xen paravirtual guest */
 
 
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A	( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ	( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW	( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL	( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
+#define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST		( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_BMI1		( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE			( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2		( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP		( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2		( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS		( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID		( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM			( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM			( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX			( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A		( 9*32+15) /* Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F		( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ		( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED		( 9*32+18) /* RDSEED instruction */
+#define X86_FEATURE_ADX			( 9*32+19) /* ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP		( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA		( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT		( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB		( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF		( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER		( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD		( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI		( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW		( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL		( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
 
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
+/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
+#define X86_FEATURE_XSAVEOPT		(10*32+ 0) /* XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC		(10*32+ 1) /* XSAVEC instruction */
+#define X86_FEATURE_XGETBV1		(10*32+ 2) /* XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES		(10*32+ 3) /* XSAVES/XRSTORS instructions */
 
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
+#define X86_FEATURE_CQM_LLC		(11*32+ 1) /* LLC QoS if 1 */
 
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC	(12*32+ 0) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL	(12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL	(12*32+ 2) /* LLC Local MBM monitoring */
 
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
+/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
+#define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
 
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+#define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA			(14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT		(14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN			(14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS			(14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP			(14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY		(14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
 
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF	(15*32+16) /* Virtual GIF */
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+#define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV		(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML		(15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS		(15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR		(15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN		(15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID		(15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS	(15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER		(15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD		(15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC		(15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
 
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
-#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+#define X86_FEATURE_AVX512VBMI		(16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP		(16*32+ 2) /* User Mode Instruction Protection */
+#define X86_FEATURE_PKU			(16*32+ 3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE		(16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2	(16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_GFNI		(16*32+ 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES		(16*32+ 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI		(16*32+11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG	(16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 
-/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
+/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR		(17*32+ 1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA		(17*32+ 3) /* Scalable MCA */
 
 /*
  * BUG word(s)
  */
-#define X86_BUG(x)		(NCAPINTS*32 + (x))
+#define X86_BUG(x)			(NCAPINTS*32 + (x))
 
-#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_F00F			X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV			X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA			X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH		X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E		X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP			X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK		X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR		X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS		X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
  * to avoid confusion.
  */
-#define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX			X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_NULL_SEG		X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index c10c9128f54e6b7296014a74e7a253a1eedaacd9..14d6d50073142b0f49b06850ccd0d394546479ee 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -16,6 +16,12 @@
 # define DISABLE_MPX	(1<<(X86_FEATURE_MPX & 31))
 #endif
 
+#ifdef CONFIG_X86_INTEL_UMIP
+# define DISABLE_UMIP	0
+#else
+# define DISABLE_UMIP	(1<<(X86_FEATURE_UMIP & 31))
+#endif
+
 #ifdef CONFIG_X86_64
 # define DISABLE_VME		(1<<(X86_FEATURE_VME & 31))
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
@@ -63,7 +69,7 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index bde77d7c4390ab0a6d14332cceca7dc7c8571894..37292bb5ce6065c31c21b2af61fa077de432a081 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -6,7 +6,7 @@ RM ?= rm -f
 
 # Make the path relative to DESTDIR, not prefix
 ifndef DESTDIR
-prefix?=$(HOME)
+prefix ?= /usr/local
 endif
 mandir ?= $(prefix)/share/man
 man8dir = $(mandir)/man8
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 813826c50936b174ce11821b0e606ce0e49668cd..ec3052c0b004011573861231edcdd0d0e982c088 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -45,8 +45,8 @@ $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
 
-prefix = /usr
-bash_compdir ?= $(prefix)/share/bash-completion/completions
+prefix = /usr/local
+bash_compdir ?= /usr/share/bash-completion/completions
 
 CC = gcc
 
@@ -76,6 +76,7 @@ clean: $(LIBBPF)-clean
 	$(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
 
 install:
+	install -m 0755 -d $(prefix)/sbin
 	install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
 	install -m 0755 -d $(bash_compdir)
 	install -m 0644 bash-completion/bpftool $(bash_compdir)
@@ -88,5 +89,5 @@ doc-install:
 
 FORCE:
 
-.PHONY: all clean FORCE
+.PHONY: all clean FORCE install doc doc-install
 .DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d6e4762170a4464d029415edc189030f55841844..d294bc8168bed8cc72f8926c6947b98aba0cbcc6 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -58,11 +58,19 @@ bool show_pinned;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 
+static void __noreturn clean_and_exit(int i)
+{
+	if (json_output)
+		jsonw_destroy(&json_wtr);
+
+	exit(i);
+}
+
 void usage(void)
 {
 	last_do_help(last_argc - 1, last_argv + 1);
 
-	exit(-1);
+	clean_and_exit(-1);
 }
 
 static int do_help(int argc, char **argv)
@@ -280,6 +288,7 @@ int main(int argc, char **argv)
 	hash_init(prog_table.table);
 	hash_init(map_table.table);
 
+	opterr = 0;
 	while ((opt = getopt_long(argc, argv, "Vhpjf",
 				  options, NULL)) >= 0) {
 		switch (opt) {
@@ -291,13 +300,25 @@ int main(int argc, char **argv)
 			pretty_output = true;
 			/* fall through */
 		case 'j':
-			json_output = true;
+			if (!json_output) {
+				json_wtr = jsonw_new(stdout);
+				if (!json_wtr) {
+					p_err("failed to create JSON writer");
+					return -1;
+				}
+				json_output = true;
+			}
+			jsonw_pretty(json_wtr, pretty_output);
 			break;
 		case 'f':
 			show_pinned = true;
 			break;
 		default:
-			usage();
+			p_err("unrecognized option '%s'", argv[optind - 1]);
+			if (json_output)
+				clean_and_exit(-1);
+			else
+				usage();
 		}
 	}
 
@@ -306,15 +327,6 @@ int main(int argc, char **argv)
 	if (argc < 0)
 		usage();
 
-	if (json_output) {
-		json_wtr = jsonw_new(stdout);
-		if (!json_wtr) {
-			p_err("failed to create JSON writer");
-			return -1;
-		}
-		jsonw_pretty(json_wtr, pretty_output);
-	}
-
 	bfd_init();
 
 	ret = cmd_select(cmds, argc, argv, do_help);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9c191e222d6f824d5d5be3d84611a551a6dbdeca..bff330b49791e5bc6021d8188bd0e637db20ea3b 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -41,6 +41,7 @@
 #include <stdbool.h>
 #include <stdio.h>
 #include <linux/bpf.h>
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/hashtable.h>
 
@@ -50,7 +51,7 @@
 
 #define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
 #define NEXT_ARGP()	({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
-#define BAD_ARG()	({ p_err("what is '%s'?\n", *argv); -1; })
+#define BAD_ARG()	({ p_err("what is '%s'?", *argv); -1; })
 
 #define ERR_MAX_LEN	1024
 
@@ -80,7 +81,7 @@ void p_info(const char *fmt, ...);
 
 bool is_prefix(const char *pfx, const char *str);
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
-void usage(void) __attribute__((noreturn));
+void usage(void) __noreturn;
 
 struct pinned_obj_table {
 	DECLARE_HASHTABLE(table, 16);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e2450c8e88e6f13cfc0934b772880e80b6f18c37..a8c3a33dd185e213f68f97b8719c7ca20198783e 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -523,21 +523,23 @@ static int do_show(int argc, char **argv)
 				break;
 			p_err("can't get next map: %s%s", strerror(errno),
 			      errno == EINVAL ? " -- kernel too old?" : "");
-			return -1;
+			break;
 		}
 
 		fd = bpf_map_get_fd_by_id(id);
 		if (fd < 0) {
+			if (errno == ENOENT)
+				continue;
 			p_err("can't get map by id (%u): %s",
 			      id, strerror(errno));
-			return -1;
+			break;
 		}
 
 		err = bpf_obj_get_info_by_fd(fd, &info, &len);
 		if (err) {
 			p_err("can't get map info: %s", strerror(errno));
 			close(fd);
-			return -1;
+			break;
 		}
 
 		if (json_output)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index ad619b96c27664300df9a78bc28958c4d8662a4b..dded77345bfb05fef1fd50a3f3e7f54d43ae5cb7 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -382,6 +382,8 @@ static int do_show(int argc, char **argv)
 
 		fd = bpf_prog_get_fd_by_id(id);
 		if (fd < 0) {
+			if (errno == ENOENT)
+				continue;
 			p_err("can't get prog by id (%u): %s",
 			      id, strerror(errno));
 			err = -1;
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index eaa3bec273c8e21fcb15fbf5ee0407017b12c036..4c99c57736cefd155326293be053a7c4ffef9c6a 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,14 @@ static void kvp_update_mem_state(int pool)
 	for (;;) {
 		readp = &record[records_read];
 		records_read += fread(readp, sizeof(struct kvp_record),
-					ENTRIES_PER_BLOCK * num_blocks,
-					filep);
+				ENTRIES_PER_BLOCK * num_blocks - records_read,
+				filep);
 
 		if (ferror(filep)) {
-			syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+			syslog(LOG_ERR,
+				"Failed to read file, pool: %d; error: %d %s",
+				 pool, errno, strerror(errno));
+			kvp_release_lock(pool);
 			exit(EXIT_FAILURE);
 		}
 
@@ -210,6 +213,7 @@ static void kvp_update_mem_state(int pool)
 
 			if (record == NULL) {
 				syslog(LOG_ERR, "malloc failed");
+				kvp_release_lock(pool);
 				exit(EXIT_FAILURE);
 			}
 			continue;
@@ -224,15 +228,11 @@ static void kvp_update_mem_state(int pool)
 	fclose(filep);
 	kvp_release_lock(pool);
 }
+
 static int kvp_file_init(void)
 {
 	int  fd;
-	FILE *filep;
-	size_t records_read;
 	char *fname;
-	struct kvp_record *record;
-	struct kvp_record *readp;
-	int num_blocks;
 	int i;
 	int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
@@ -246,61 +246,19 @@ static int kvp_file_init(void)
 
 	for (i = 0; i < KVP_POOL_COUNT; i++) {
 		fname = kvp_file_info[i].fname;
-		records_read = 0;
-		num_blocks = 1;
 		sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
 		fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */);
 
 		if (fd == -1)
 			return 1;
 
-
-		filep = fopen(fname, "re");
-		if (!filep) {
-			close(fd);
-			return 1;
-		}
-
-		record = malloc(alloc_unit * num_blocks);
-		if (record == NULL) {
-			fclose(filep);
-			close(fd);
-			return 1;
-		}
-		for (;;) {
-			readp = &record[records_read];
-			records_read += fread(readp, sizeof(struct kvp_record),
-					ENTRIES_PER_BLOCK,
-					filep);
-
-			if (ferror(filep)) {
-				syslog(LOG_ERR, "Failed to read file, pool: %d",
-				       i);
-				exit(EXIT_FAILURE);
-			}
-
-			if (!feof(filep)) {
-				/*
-				 * We have more data to read.
-				 */
-				num_blocks++;
-				record = realloc(record, alloc_unit *
-						num_blocks);
-				if (record == NULL) {
-					fclose(filep);
-					close(fd);
-					return 1;
-				}
-				continue;
-			}
-			break;
-		}
 		kvp_file_info[i].fd = fd;
-		kvp_file_info[i].num_blocks = num_blocks;
-		kvp_file_info[i].records = record;
-		kvp_file_info[i].num_records = records_read;
-		fclose(filep);
-
+		kvp_file_info[i].num_blocks = 1;
+		kvp_file_info[i].records = malloc(alloc_unit);
+		if (kvp_file_info[i].records == NULL)
+			return 1;
+		kvp_file_info[i].num_records = 0;
+		kvp_update_mem_state(i);
 	}
 
 	return 0;
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 07fd03c74a775a48a5183f96cb85e27b4f85fedd..04e32f965ad7f038beb2d8db9dc2119e07628744 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -84,8 +84,6 @@
 
 #define uninitialized_var(x) x = *(&(x))
 
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
 #include <linux/types.h>
 
 /*
@@ -135,20 +133,19 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 /*
  * Prevent the compiler from merging or refetching reads or writes. The
  * compiler is also forbidden from reordering successive instances of
- * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
- * compiler is aware of some particular ordering.  One way to make the
- * compiler aware of ordering is to put the two invocations of READ_ONCE,
- * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
  *
- * In contrast to ACCESS_ONCE these two macros will also work on aggregate
- * data types like structs or unions. If the size of the accessed data
- * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
- * compile-time warning.
+ * These two macros will also work on aggregate data types like structs or
+ * unions. If the size of the accessed data type exceeds the word size of
+ * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
+ * fall back to memcpy and print a compile-time warning.
  *
  * Their two major use cases are: (1) Mediating communication between
  * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
  * mutilate accesses that either do not require ordering or that interact
  * with an explicit memory barrier or atomic instruction that provides the
  * required ordering.
diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000
--- a/tools/include/linux/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h
index 940c1b0756591e8e65792ce6dafbeb568a5aa15d..6b0c36a58fcbc38b67157b53efe2044bd848f137 100644
--- a/tools/include/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h
@@ -48,6 +48,7 @@ static inline int debug_locks_off(void)
 #define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
 #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define pr_warn pr_err
+#define pr_cont pr_err
 
 #define list_del_rcu list_del
 
diff --git a/tools/include/uapi/asm-generic/bpf_perf_event.h b/tools/include/uapi/asm-generic/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..53815d2cd047a0fbf816e31c9cf92c0a8adfaeef
--- /dev/null
+++ b/tools/include/uapi/asm-generic/bpf_perf_event.h
@@ -0,0 +1,9 @@
+#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__
+
+#include <linux/ptrace.h>
+
+/* Export kernel pt_regs structure */
+typedef struct pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index 2dffcbf705b37857d9289dedd2e5807322382984..653687d9771b9d0e824fe4b25eee079f7fcec1cc 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -13,6 +13,7 @@
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+#define MAP_SYNC	0x80000		/* perform synchronous page faults for the mapping */
 
 /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
 
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..13a58531e6fa844df407a921385a9356294e9b61
--- /dev/null
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,7 @@
+#if defined(__aarch64__)
+#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
+#else
+#include <uapi/asm-generic/bpf_perf_event.h>
+#endif
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 97677cd6964db099689f96f11b9731c748bebcfa..6fdff5945c8a08f27af713f6b59cb27b315da447 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -737,6 +737,28 @@ struct drm_syncobj_array {
 	__u32 pad;
 };
 
+/* Query current scanout sequence number */
+struct drm_crtc_get_sequence {
+	__u32 crtc_id;		/* requested crtc_id */
+	__u32 active;		/* return: crtc output is active */
+	__u64 sequence;		/* return: most recent vblank sequence */
+	__s64 sequence_ns;	/* return: most recent time of first pixel out */
+};
+
+/* Queue event to be delivered at specified sequence. Time stamp marks
+ * when the first pixel of the refresh cycle leaves the display engine
+ * for the display
+ */
+#define DRM_CRTC_SEQUENCE_RELATIVE		0x00000001	/* sequence is relative to current */
+#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS		0x00000002	/* Use next sequence if we've missed */
+
+struct drm_crtc_queue_sequence {
+	__u32 crtc_id;
+	__u32 flags;
+	__u64 sequence;		/* on input, target sequence. on output, actual sequence */
+	__u64 user_data;	/* user data passed to event */
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -819,6 +841,9 @@ extern "C" {
 
 #define DRM_IOCTL_WAIT_VBLANK		DRM_IOWR(0x3a, union drm_wait_vblank)
 
+#define DRM_IOCTL_CRTC_GET_SEQUENCE	DRM_IOWR(0x3b, struct drm_crtc_get_sequence)
+#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE	DRM_IOWR(0x3c, struct drm_crtc_queue_sequence)
+
 #define DRM_IOCTL_UPDATE_DRAW		DRM_IOW(0x3f, struct drm_update_draw)
 
 #define DRM_IOCTL_MODE_GETRESOURCES	DRM_IOWR(0xA0, struct drm_mode_card_res)
@@ -863,6 +888,11 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
 #define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
+#define DRM_IOCTL_MODE_CREATE_LEASE	DRM_IOWR(0xC6, struct drm_mode_create_lease)
+#define DRM_IOCTL_MODE_LIST_LESSEES	DRM_IOWR(0xC7, struct drm_mode_list_lessees)
+#define DRM_IOCTL_MODE_GET_LEASE	DRM_IOWR(0xC8, struct drm_mode_get_lease)
+#define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
@@ -893,6 +923,7 @@ struct drm_event {
 
 #define DRM_EVENT_VBLANK 0x01
 #define DRM_EVENT_FLIP_COMPLETE 0x02
+#define DRM_EVENT_CRTC_SEQUENCE	0x03
 
 struct drm_event_vblank {
 	struct drm_event base;
@@ -903,6 +934,16 @@ struct drm_event_vblank {
 	__u32 crtc_id; /* 0 on older kernels that do not support this */
 };
 
+/* Event delivered at sequence. Time stamp marks when the first pixel
+ * of the refresh cycle leaves the display engine for the display
+ */
+struct drm_event_crtc_sequence {
+	struct drm_event	base;
+	__u64			user_data;
+	__s64			time_ns;
+	__u64			sequence;
+};
+
 /* typedef area */
 #ifndef __KERNEL__
 typedef struct drm_clip_rect drm_clip_rect_t;
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 9816590d3ad24b0d7037eb65f9b84c3c571c9b53..ac3c6503ca27f156ddbc3dd304bbf91671a9ac2a 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -397,10 +397,20 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
 
-/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
  * priorities and the driver will attempt to execute batches in priority order.
+ * The param returns a capability bitmask, nonzero implies that the scheduler
+ * is enabled, with different features present according to the mask.
+ *
+ * The initial priority for each batch is supplied by the context and is
+ * controlled via I915_CONTEXT_PARAM_PRIORITY.
  */
 #define I915_PARAM_HAS_SCHEDULER	 41
+#define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
+#define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
+#define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+
 #define I915_PARAM_HUC_STATUS		 42
 
 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -1309,14 +1319,16 @@ struct drm_i915_reg_read {
 	 * be specified
 	 */
 	__u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
 	__u64 val; /* Return value */
 };
 /* Known registers:
  *
  * Render engine timestamp - 0x2358 + 64bit - gen7+
  * - Note this register returns an invalid value if using the default
- *   single instruction 8byte read, in order to workaround that use
- *   offset (0x2538 | 1) instead.
+ *   single instruction 8byte read, in order to workaround that pass
+ *   flag I915_REG_READ_8B_WA in offset field.
  *
  */
 
@@ -1359,6 +1371,10 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
 #define I915_CONTEXT_PARAM_BANNABLE	0x5
+#define I915_CONTEXT_PARAM_PRIORITY	0x6
+#define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
+#define   I915_CONTEXT_DEFAULT_PRIORITY		0
+#define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
 	__u64 value;
 };
 
@@ -1510,9 +1526,14 @@ struct drm_i915_perf_oa_config {
 	__u32 n_boolean_regs;
 	__u32 n_flex_regs;
 
-	__u64 __user mux_regs_ptr;
-	__u64 __user boolean_regs_ptr;
-	__u64 __user flex_regs_ptr;
+	/*
+	 * These fields are pointers to tuples of u32 values (register
+	 * address, value). For example the expected length of the buffer
+	 * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+	 */
+	__u64 mux_regs_ptr;
+	__u64 boolean_regs_ptr;
+	__u64 flex_regs_ptr;
 };
 
 #if defined(__cplusplus)
diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
index 0674272598205c300e7f573e38dae6087de94429..8f95303f9d807d10d4fd6850d91a2486b0a490ec 100644
--- a/tools/include/uapi/linux/bpf_perf_event.h
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
@@ -7,11 +8,10 @@
 #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
 #define _UAPI__LINUX_BPF_PERF_EVENT_H__
 
-#include <linux/types.h>
-#include <linux/ptrace.h>
+#include <asm/bpf_perf_event.h>
 
 struct bpf_perf_event_data {
-	struct pt_regs regs;
+	bpf_user_pt_regs_t regs;
 	__u64 sample_period;
 };
 
diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h
index 481e103da78ed42a5a76447e836ee8374541d7c7..ef130501092531b23148898efc08b0e521f3354c 100644
--- a/tools/include/uapi/linux/kcmp.h
+++ b/tools/include/uapi/linux/kcmp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_KCMP_H
 #define _UAPI_LINUX_KCMP_H
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7e99999d6236fa2940fa2b565442e8b1b1331407..496e59a2738ba99308f438e1f0509e66e17086cb 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -630,9 +630,9 @@ struct kvm_s390_irq {
 
 struct kvm_s390_irq_state {
 	__u64 buf;
-	__u32 flags;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 reserved[4];
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -931,6 +931,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
+#define KVM_CAP_S390_AIS_MIGRATION 150
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 362493a2f950b3024efae42ac321454ab66d845d..b9a4953018edeafa64e951e4ed8d53fa94c7a1cb 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -942,6 +942,7 @@ enum perf_callchain_context {
 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
 #define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION		0x08	/* sample collided with another */
 
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index a8d0759a9e400c5d472fe37d13a924bc9e9777a6..af5f8c2df87ac51b401acab46c817b649e99893d 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _LINUX_PRCTL_H
 #define _LINUX_PRCTL_H
 
@@ -197,4 +198,13 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/* arm64 Scalable Vector Extension controls */
+/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
+#define PR_SVE_SET_VL			50	/* set task vector length */
+# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
+#define PR_SVE_GET_VL			51	/* get task vector length */
+/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
+# define PR_SVE_VL_LEN_MASK		0xffff
+# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 217cf6f95c366037ccd2ff3cedb1d61de22c616a..a5684d0968b4fd087905e659c0ce80bd170434c2 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -478,7 +478,7 @@ class Provider(object):
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
-        if not fields_filter or fields_filter == "help":
+        if not fields_filter:
             return True
         return re.match(fields_filter, field) is not None
 
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self.get_available_fields()
+                       if self.is_field_wanted(fields_filter, field)]
 
     @staticmethod
     def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
             curses.nocbreak()
             curses.endwin()
 
-    def get_all_gnames(self):
+    @staticmethod
+    def get_all_gnames():
         """Returns a list of (pid, gname) tuples of all running guests"""
         res = []
         try:
@@ -963,7 +964,7 @@ class Tui(object):
             # perform a sanity check before calling the more expensive
             # function to possibly extract the guest name
             if ' -name ' in line[1]:
-                res.append((line[0], self.get_gname_from_pid(line[0])))
+                res.append((line[0], Tui.get_gname_from_pid(line[0])))
         child.stdout.close()
 
         return res
@@ -984,7 +985,8 @@ class Tui(object):
         except Exception:
             self.screen.addstr(row + 1, 2, 'Not available')
 
-    def get_pid_from_gname(self, gname):
+    @staticmethod
+    def get_pid_from_gname(gname):
         """Fuzzy function to convert guest name to QEMU process pid.
 
         Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
 
         """
         pids = []
-        for line in self.get_all_gnames():
+        for line in Tui.get_all_gnames():
             if gname == line[1]:
                 pids.append(int(line[0]))
 
@@ -1090,15 +1092,16 @@ class Tui(object):
             # sort by totals
             return (0, -stats[x][0])
         total = 0.
-        for val in stats.values():
-            total += val[0]
+        for key in stats.keys():
+            if key.find('(') is -1:
+                total += stats[key][0]
         if self._sorting == SORT_DEFAULT:
             sortkey = sortCurAvg
         else:
             sortkey = sortTotal
+        tavg = 0
         for key in sorted(stats.keys(), key=sortkey):
-
-            if row >= self.screen.getmaxyx()[0]:
+            if row >= self.screen.getmaxyx()[0] - 1:
                 break
             values = stats[key]
             if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                    (key, values[0], values[0] * 100 / total,
                                     cur))
+                if cur is not '' and key.find('(') is -1:
+                    tavg += cur
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
+        else:
+            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
+                               ('Total', total, tavg if tavg else ''),
+                               curses.A_BOLD)
         self.screen.refresh()
 
     def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
                 if char == 'x':
                     self.update_drilldown()
                     # prevents display of current values on next refresh
-                    self.stats.get()
+                    self.stats.get(self._display_guests)
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
         try:
             pids = Tui.get_pid_from_gname(val)
         except:
-            raise optparse.OptionValueError('Error while searching for guest '
-                                            '"{}", use "-p" to specify a pid '
-                                            'instead'.format(val))
+            sys.exit('Error while searching for guest "{}". Use "-p" to '
+                     'specify a pid instead?'.format(val))
         if len(pids) == 0:
-            raise optparse.OptionValueError('No guest by the name "{}" '
-                                            'found'.format(val))
+            sys.exit('Error: No guest by the name "{}" found'.format(val))
         if len(pids) > 1:
-            raise optparse.OptionValueError('Multiple processes found (pids: '
-                                            '{}) - use "-p" to specify a pid '
-                                            'instead'.format(" ".join(pids)))
+            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
+                     'to specify the desired pid'.format(" ".join(pids)))
         parser.values.pid = pids[0]
 
     optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
                          help='restrict statistics to guest by name',
                          callback=cb_guest_to_pid,
                          )
-    (options, _) = optparser.parse_args(sys.argv)
+    options, unkn = optparser.parse_args(sys.argv)
+    if len(unkn) != 1:
+        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+    try:
+        # verify that we were passed a valid regex up front
+        re.compile(options.fields)
+    except re.error:
+        sys.exit('Error: "' + options.fields + '" is not a valid regular '
+                 'expression')
+
     return options
 
 
@@ -1564,16 +1579,13 @@ def main():
 
     stats = Stats(options)
 
-    if options.fields == "help":
-        event_list = "\n"
-        s = stats.get()
-        for key in s.keys():
-            if key.find('(') != -1:
-                key = key[0:key.find('(')]
-            if event_list.find('\n' + key + '\n') == -1:
-                event_list += key + '\n'
-        sys.stdout.write(event_list)
-        return ""
+    if options.fields == 'help':
+        stats.fields_filter = None
+        event_list = []
+        for key in stats.get().keys():
+            event_list.append(key.split('(', 1)[0])
+        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
+        sys.exit(0)
 
     if options.log:
         log(stats)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index e5cf836be8a1848bb82f39cfa3c7c75dcc67b4fa..b5b3810c9e945d7f3a39568840fbc5b73f84983b 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
 *s*::   set update interval
 
 *x*::	toggle reporting of stats for child trace events
+ ::     *Note*: The stats for the parents summarize the respective child trace
+                events
 
 Press any other key to refresh statistics immediately.
 
@@ -86,7 +88,7 @@ OPTIONS
 
 -f<fields>::
 --fields=<fields>::
-	fields to display (regex)
+	fields to display (regex), "-f help" for a list of available events
 
 -h::
 --help::
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 0f94af3ccaaa25a20325db643d4d79cbaff17688..e6acc281dd3757dbd03ba32c69a89b76ed690800 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -7,9 +7,11 @@ ARCH := x86
 endif
 
 # always use the host compiler
-CC = gcc
-LD = ld
-AR = ar
+HOSTCC	?= gcc
+HOSTLD	?= ld
+CC	 = $(HOSTCC)
+LD	 = $(HOSTLD)
+AR	 = ar
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
@@ -44,7 +46,7 @@ $(OBJTOOL_IN): fixdep FORCE
 	@$(MAKE) $(build)=objtool
 
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-	@./sync-check.sh
+	@$(CONFIG_SHELL) ./sync-check.sh
 	$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
 
 
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8acfc47af70efde4c1a3bb3ad6aff809f0ad0308..540a209b78ab3cd6ae3b972c57b338dc0aa9b58d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_ADD;
 			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_SRC_REG;
+			op->dest.type = OP_DEST_REG;
 			op->dest.reg = CFI_SP;
 		}
 		break;
diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4ad0c55d00c3784f08b393764a2bc..e0b85930dd773e87417e2b4957b8af61221b04c0 100644
--- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 4c6b5c9ef073b31c3a01dd0ebb85efbccb4bfee0..91e8e19ff5e06193adc55c455b4d97ad947da7ee 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv)
 	const char *objname;
 
 	argc--; argv++;
+	if (argc <= 0)
+		usage_with_options(orc_usage, check_options);
+
 	if (!strncmp(argv[0], "gen", 3)) {
 		argc = parse_options(argc, argv, check_options, orc_usage, 0);
 		if (argc != 1)
@@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv)
 		objname = argv[0];
 
 		return check(objname, no_fp, no_unreachable, true);
-
 	}
 
 	if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9b341584eb1b56b05761bea127ee6cf94eacc190..f40d46e24bcce3efe385aff13a4cb9274967d0d6 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -427,6 +427,40 @@ static void add_ignores(struct objtool_file *file)
 	}
 }
 
+/*
+ * FIXME: For now, just ignore any alternatives which add retpolines.  This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.nospec entry");
+			return -1;
+		}
+
+		insn->ignore_alts = true;
+	}
+
+	return 0;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -456,6 +490,13 @@ static int add_jump_destinations(struct objtool_file *file)
 		} else if (rela->sym->sec->idx) {
 			dest_sec = rela->sym->sec;
 			dest_off = rela->sym->sym.st_value + rela->addend + 4;
+		} else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+			/*
+			 * Retpoline jumps are really dynamic jumps in
+			 * disguise, so convert them accordingly.
+			 */
+			insn->type = INSN_JUMP_DYNAMIC;
+			continue;
 		} else {
 			/* sibling call */
 			insn->jump_dest = 0;
@@ -502,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file)
 			dest_off = insn->offset + insn->len + insn->immediate;
 			insn->call_dest = find_symbol_by_offset(insn->sec,
 								dest_off);
+			/*
+			 * FIXME: Thanks to retpolines, it's now considered
+			 * normal for a function to call within itself.  So
+			 * disable this warning for now.
+			 */
+#if 0
 			if (!insn->call_dest) {
 				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
 					  insn->sec, insn->offset, dest_off);
 				return -1;
 			}
+#endif
 		} else if (rela->sym->type == STT_SECTION) {
 			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
 								rela->addend+4);
@@ -671,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file)
 		return ret;
 
 	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			ret = -1;
-			goto out;
-		}
 
 		orig_insn = find_insn(file, special_alt->orig_sec,
 				      special_alt->orig_off);
@@ -687,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file)
 			goto out;
 		}
 
+		/* Ignore retpoline alternatives. */
+		if (orig_insn->ignore_alts)
+			continue;
+
 		new_insn = NULL;
 		if (!special_alt->group || special_alt->new_len) {
 			new_insn = find_insn(file, special_alt->new_sec,
@@ -712,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file)
 				goto out;
 		}
 
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			ret = -1;
+			goto out;
+		}
+
 		alt->insn = new_insn;
 		list_add_tail(&alt->list, &orig_insn->alts);
 
@@ -1028,6 +1081,10 @@ static int decode_sections(struct objtool_file *file)
 
 	add_ignores(file);
 
+	ret = add_nospec_ignores(file);
+	if (ret)
+		return ret;
+
 	ret = add_jump_destinations(file);
 	if (ret)
 		return ret;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 47d9ea70a83d9f9326fb7bd68431c88337b5ce31..dbadb304a410af0343809752bc6d2869668a9db0 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -44,7 +44,7 @@ struct instruction {
 	unsigned int len;
 	unsigned char type;
 	unsigned long immediate;
-	bool alt_group, visited, dead_end, ignore, hint, save, restore;
+	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct list_head alts;
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 36c5bf6a2675143b788e89663ee4652efbbbb582..c3343820916a6dccf0e49bf50cbfbc811b4525fd 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -76,7 +76,8 @@ int orc_dump(const char *_objname)
 	int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
 	struct orc_entry *orc = NULL;
 	char *name;
-	unsigned long nr_sections, orc_ip_addr = 0;
+	size_t nr_sections;
+	Elf64_Addr orc_ip_addr = 0;
 	size_t shstrtab_idx;
 	Elf *elf;
 	Elf_Scn *scn;
@@ -187,10 +188,10 @@ int orc_dump(const char *_objname)
 				return -1;
 			}
 
-			printf("%s+%lx:", name, rela.r_addend);
+			printf("%s+%llx:", name, (unsigned long long)rela.r_addend);
 
 		} else {
-			printf("%lx:", orc_ip_addr + (i * sizeof(int)) + orc_ip[i]);
+			printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i]));
 		}
 
 
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index e5ca31429c9bac29a9c66c4f8ddf97a94651690b..e61fe703197baa9b21814674c0250f6420e38c93 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file)
 
 	/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
 	sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+	if (!sec)
+		return -1;
 
 	ip_relasec = elf_create_rela_section(file->elf, sec);
 	if (!ip_relasec)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ed65e82f034efe0e76cef718fdb8a9bd07511edb..0294bfb6c5f87c990500ee57e931034091d0da48 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
   PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
-  ifeq ($(CC_NO_CLANG), 1)
-    PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
-  endif
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 endif
 
@@ -576,14 +574,15 @@ ifndef NO_GTK2
   endif
 endif
 
-
 ifdef NO_LIBPERL
   CFLAGS += -DNO_LIBPERL
 else
   PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
   PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
   PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-  PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+  PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+  PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
   FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
   ifneq ($(feature-libperl), 1)
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 21322e0385b886667d7bbd9a17edddc1ad1b3c8f..09ba923debe86810f8380f7df54504dee4232ec8 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -2,3 +2,4 @@ ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index d2df54a6bc5a24a588ca3153833623ca0ecaa1c3..bcfbaed78cc257f15d9820238bc1ef58256f1c6a 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -3,7 +3,7 @@
 
 #include <stdlib.h>
 #include <linux/types.h>
-#include <../../../../arch/s390/include/uapi/asm/perf_regs.h>
+#include <asm/perf_regs.h>
 
 void perf_regs_load(u64 *regs);
 
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
index f47576ce13ea9da3d6220894c8c1d191a5fe889b..a8ace5cc6301f184e5d37210e00402234a014a4b 100644
--- a/tools/perf/arch/s390/util/dwarf-regs.c
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -2,17 +2,43 @@
 /*
  * Mapping of DWARF debug register numbers into register names.
  *
- *    Copyright IBM Corp. 2010
- *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ * Copyright IBM Corp. 2010, 2017
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  *
  */
 
+#include <errno.h>
 #include <stddef.h>
-#include <dwarf-regs.h>
+#include <stdlib.h>
 #include <linux/kernel.h>
+#include <asm/ptrace.h>
+#include <string.h>
+#include <dwarf-regs.h>
 #include "dwarf-regs-table.h"
 
 const char *get_arch_regstr(unsigned int n)
 {
 	return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n];
 }
+
+/*
+ * Convert the register name into an offset to struct pt_regs (kernel).
+ * This is required by the BPF prologue generator.  The BPF
+ * program is called in the BPF overflow handler in the perf
+ * core.
+ */
+int regs_query_register_offset(const char *name)
+{
+	unsigned long gpr;
+
+	if (!name || strncmp(name, "%r", 2))
+		return -EINVAL;
+
+	errno = 0;
+	gpr = strtoul(name + 2, NULL, 10);
+	if (errno || gpr >= 16)
+		return -EINVAL;
+
+	return offsetof(user_pt_regs, gprs) + 8 * gpr;
+}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index d95fdcc26f4b6f38cffb4f7a727d3c7456cbb357..944070e98a2cd9dc4903e3be02278b1a7f8f3500 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -216,6 +216,47 @@ static const char * const numa_usage[] = {
 	NULL
 };
 
+/*
+ * To get number of numa nodes present.
+ */
+static int nr_numa_nodes(void)
+{
+	int i, nr_nodes = 0;
+
+	for (i = 0; i < g->p.nr_nodes; i++) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			nr_nodes++;
+	}
+
+	return nr_nodes;
+}
+
+/*
+ * To check if given numa node is present.
+ */
+static int is_node_present(int node)
+{
+	return numa_bitmask_isbitset(numa_nodes_ptr, node);
+}
+
+/*
+ * To check given numa node has cpus.
+ */
+static bool node_has_cpus(int node)
+{
+	struct bitmask *cpu = numa_allocate_cpumask();
+	unsigned int i;
+
+	if (cpu && !numa_node_to_cpus(node, cpu)) {
+		for (i = 0; i < cpu->size; i++) {
+			if (numa_bitmask_isbitset(cpu, i))
+				return true;
+		}
+	}
+
+	return false; /* lets fall back to nocpus safely */
+}
+
 static cpu_set_t bind_to_cpu(int target_cpu)
 {
 	cpu_set_t orig_mask, mask;
@@ -244,12 +285,12 @@ static cpu_set_t bind_to_cpu(int target_cpu)
 
 static cpu_set_t bind_to_node(int target_node)
 {
-	int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
+	int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
 	cpu_set_t orig_mask, mask;
 	int cpu;
 	int ret;
 
-	BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
+	BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
 	BUG_ON(!cpus_per_node);
 
 	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
@@ -649,7 +690,7 @@ static int parse_setup_node_list(void)
 			int i;
 
 			for (i = 0; i < mul; i++) {
-				if (t >= g->p.nr_tasks) {
+				if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
 					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
 					goto out;
 				}
@@ -964,6 +1005,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 	sum = 0;
 
 	for (node = 0; node < g->p.nr_nodes; node++) {
+		if (!is_node_present(node))
+			continue;
 		nr = nodes[node];
 		nr_min = min(nr, nr_min);
 		nr_max = max(nr, nr_max);
@@ -984,8 +1027,11 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 	process_groups = 0;
 
 	for (node = 0; node < g->p.nr_nodes; node++) {
-		int processes = count_node_processes(node);
+		int processes;
 
+		if (!is_node_present(node))
+			continue;
+		processes = count_node_processes(node);
 		nr = nodes[node];
 		tprintf(" %2d/%-2d", nr, processes);
 
@@ -1291,7 +1337,7 @@ static void print_summary(void)
 
 	printf("\n ###\n");
 	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
-		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
+		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
 	printf(" #      %5dx %5ldMB global  shared mem operations\n",
 			g->p.nr_loops, g->p.bytes_global/1024/1024);
 	printf(" #      %5dx %5ldMB process shared mem operations\n",
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index bd1fedef3d1c5d67a8ab6954467c24da622f2a27..a0f7ed2b869b0304408483e5660efa9167794a0e 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -284,7 +284,7 @@ static int perf_help_config(const char *var, const char *value, void *cb)
 		add_man_viewer(value);
 		return 0;
 	}
-	if (!strstarts(var, "man."))
+	if (strstarts(var, "man."))
 		return add_man_viewer_info(var, value);
 
 	return 0;
@@ -314,7 +314,7 @@ static const char *cmd_to_page(const char *perf_cmd)
 
 	if (!perf_cmd)
 		return "perf";
-	else if (!strstarts(perf_cmd, "perf"))
+	else if (strstarts(perf_cmd, "perf"))
 		return perf_cmd;
 
 	return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3d7f33e19df28d90c7e33dcc37ea823197de6731..003255910c05df9104eafd2c012267c96523118b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -339,6 +339,22 @@ static int record__open(struct record *rec)
 	struct perf_evsel_config_term *err_term;
 	int rc = 0;
 
+	/*
+	 * For initial_delay we need to add a dummy event so that we can track
+	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
+	 * real events, the ones asked by the user.
+	 */
+	if (opts->initial_delay) {
+		if (perf_evlist__add_dummy(evlist))
+			return -ENOMEM;
+
+		pos = perf_evlist__first(evlist);
+		pos->tracking = 0;
+		pos = perf_evlist__last(evlist);
+		pos->tracking = 1;
+		pos->attr.enable_on_exec = 1;
+	}
+
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, pos) {
@@ -749,17 +765,19 @@ static int record__synthesize(struct record *rec, bool tail)
 			goto out;
 	}
 
-	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine);
-	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
-			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-			   "Check /proc/kallsyms permission or run as root.\n");
-
-	err = perf_event__synthesize_modules(tool, process_synthesized_event,
-					     machine);
-	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
-			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-			   "Check /proc/modules permission or run as root.\n");
+	if (!perf_evlist__exclude_kernel(rec->evlist)) {
+		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+							 machine);
+		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+				   "Check /proc/kallsyms permission or run as root.\n");
+
+		err = perf_event__synthesize_modules(tool, process_synthesized_event,
+						     machine);
+		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+				   "Check /proc/modules permission or run as root.\n");
+	}
 
 	if (perf_guest) {
 		machines__process_guests(&session->machines,
@@ -1693,7 +1711,7 @@ int cmd_record(int argc, const char **argv)
 
 	err = -ENOMEM;
 
-	if (symbol_conf.kptr_restrict)
+	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
 		pr_warning(
 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
 "check /proc/sys/kernel/kptr_restrict.\n\n"
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1394cd8d96f7bb8d132e2a5cf892090690a19920..af5dd038195e3f6e3a6877d5884efda263e46843 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -441,6 +441,9 @@ static void report__warn_kptr_restrict(const struct report *rep)
 	struct map *kernel_map = machine__kernel_map(&rep->session->machines.host);
 	struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
 
+	if (perf_evlist__exclude_kernel(rep->session->evlist))
+		return;
+
 	if (kernel_map == NULL ||
 	    (kernel_map->dso->hit &&
 	     (kernel_kmap->ref_reloc_sym == NULL ||
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 68f36dc0344f2bb7d07a1bcdb05f8af4419d1d88..9b43bda45a415649a06c7b9743796bff7b760dbe 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script)
 	struct perf_evsel *evsel;
 
 	evlist__for_each_entry(script->session->evlist, evsel) {
+		/*
+		 * Already setup? I.e. we may be called twice in cases like
+		 * Intel PT, one for the intel_pt// and dummy events, then
+		 * for the evsels syntheized from the auxtrace info.
+		 *
+		 * Ses perf_script__process_auxtrace_info.
+		 */
+		if (evsel->priv != NULL)
+			continue;
+
 		evsel->priv = perf_evsel_script__new(evsel, script->session->data);
 		if (evsel->priv == NULL)
 			goto out_err_fclose;
@@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
 	return set_maps(script);
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+static int perf_script__process_auxtrace_info(struct perf_tool *tool,
+					      union perf_event *event,
+					      struct perf_session *session)
+{
+	int ret = perf_event__process_auxtrace_info(tool, event, session);
+
+	if (ret == 0) {
+		struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+		ret = perf_script__setup_per_event_dump(script);
+	}
+
+	return ret;
+}
+#else
+#define perf_script__process_auxtrace_info 0
+#endif
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv)
 			.feature	 = perf_event__process_feature,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
-			.auxtrace_info	 = perf_event__process_auxtrace_info,
+			.auxtrace_info	 = perf_script__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
 			.auxtrace_error	 = perf_event__process_auxtrace_error,
 			.stat		 = perf_event__process_stat_event,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 477a8699f0b501e3c71a711fd6a5a336645a1c46..9e0d2645ae13ad40624069077cadb7650b3245b0 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -77,6 +77,7 @@
 #include "sane_ctype.h"
 
 static volatile int done;
+static volatile int resize;
 
 #define HEADER_LINE_NR  5
 
@@ -85,11 +86,13 @@ static void perf_top__update_print_entries(struct perf_top *top)
 	top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
 }
 
-static void perf_top__sig_winch(int sig __maybe_unused,
-				siginfo_t *info __maybe_unused, void *arg)
+static void winch_sig(int sig __maybe_unused)
 {
-	struct perf_top *top = arg;
+	resize = 1;
+}
 
+static void perf_top__resize(struct perf_top *top)
+{
 	get_term_dimensions(&top->winsize);
 	perf_top__update_print_entries(top);
 }
@@ -473,12 +476,8 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 		case 'e':
 			prompt_integer(&top->print_entries, "Enter display entries (lines)");
 			if (top->print_entries == 0) {
-				struct sigaction act = {
-					.sa_sigaction = perf_top__sig_winch,
-					.sa_flags     = SA_SIGINFO,
-				};
-				perf_top__sig_winch(SIGWINCH, NULL, top);
-				sigaction(SIGWINCH, &act, NULL);
+				perf_top__resize(top);
+				signal(SIGWINCH, winch_sig);
 			} else {
 				signal(SIGWINCH, SIG_DFL);
 			}
@@ -732,14 +731,16 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (!machine->kptr_restrict_warned &&
 	    symbol_conf.kptr_restrict &&
 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
-		ui__warning(
+		if (!perf_evlist__exclude_kernel(top->session->evlist)) {
+			ui__warning(
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
 			  al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
 			  " modules" : "");
-		if (use_browser <= 0)
-			sleep(5);
+			if (use_browser <= 0)
+				sleep(5);
+		}
 		machine->kptr_restrict_warned = true;
 	}
 
@@ -1030,6 +1031,11 @@ static int __cmd_top(struct perf_top *top)
 
 		if (hits == top->samples)
 			ret = perf_evlist__poll(top->evlist, 100);
+
+		if (resize) {
+			perf_top__resize(top);
+			resize = 0;
+		}
 	}
 
 	ret = 0;
@@ -1352,12 +1358,8 @@ int cmd_top(int argc, const char **argv)
 
 	get_term_dimensions(&top.winsize);
 	if (top.print_entries == 0) {
-		struct sigaction act = {
-			.sa_sigaction = perf_top__sig_winch,
-			.sa_flags     = SA_SIGINFO,
-		};
 		perf_top__update_print_entries(&top);
-		sigaction(SIGWINCH, &act, NULL);
+		signal(SIGWINCH, winch_sig);
 	}
 
 	status = __cmd_top(&top);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f2757d38c7d7054d4dcef7d7e90f25efc115a8f7..84debdbad32717ce3f5a00cf646d778527cc6302 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1152,12 +1152,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 	if (trace->host == NULL)
 		return -ENOMEM;
 
-	if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
-		return -errno;
+	err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
+	if (err < 0)
+		goto out;
 
 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
 					    evlist->threads, trace__tool_process, false,
 					    trace->opts.proc_map_timeout, 1);
+out:
 	if (err)
 		symbol__exit();
 
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 77406d25e5218023bf15277f293338e4279fe144..3e64f10b6d6678810715e517e7354679b86efd8e 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h
 arch/arm/include/uapi/asm/perf_regs.h
 arch/arm64/include/uapi/asm/perf_regs.h
 arch/powerpc/include/uapi/asm/perf_regs.h
+arch/s390/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/kvm.h
 arch/x86/include/uapi/asm/kvm_perf.h
@@ -30,6 +31,7 @@ arch/x86/include/uapi/asm/vmx.h
 arch/powerpc/include/uapi/asm/kvm.h
 arch/s390/include/uapi/asm/kvm.h
 arch/s390/include/uapi/asm/kvm_perf.h
+arch/s390/include/uapi/asm/ptrace.h
 arch/s390/include/uapi/asm/sie.h
 arch/arm/include/uapi/asm/kvm.h
 arch/arm64/include/uapi/asm/kvm.h
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index cf36de7ea25587907d50db9c44e3dae5c3746c1a..0c6d1002b524eaf62ef62cc32763b041b2f33ba1 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym,
 }
 
 int
-jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
-		       jvmti_line_info_t *li, int nr_lines)
+jvmti_write_debug_info(void *agent, uint64_t code,
+    int nr_lines, jvmti_line_info_t *li,
+    const char * const * file_names)
 {
 	struct jr_code_debug_info rec;
-	size_t sret, len, size, flen;
+	size_t sret, len, size, flen = 0;
 	uint64_t addr;
-	const char *fn = file;
 	FILE *fp = agent;
 	int i;
 
@@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
 		return -1;
 	}
 
-	flen = strlen(file) + 1;
+	for (i = 0; i < nr_lines; ++i) {
+	    flen += strlen(file_names[i]) + 1;
+	}
 
 	rec.p.id        = JIT_CODE_DEBUG_INFO;
 	size            = sizeof(rec);
@@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
 	 * file[]   : source file name
 	 */
 	size += nr_lines * sizeof(struct debug_entry);
-	size += flen * nr_lines;
+	size += flen;
 	rec.p.total_size = size;
 
 	/*
@@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
 		if (sret != 1)
 			goto error;
 
-		sret = fwrite_unlocked(fn, flen, 1, fp);
+		sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
 		if (sret != 1)
 			goto error;
 	}
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
index fe32d8344a823f56de37f3ecea53af4c2f78329f..6ed82f6c06ddd0ed85ee79a5015707753d4f38b9 100644
--- a/tools/perf/jvmti/jvmti_agent.h
+++ b/tools/perf/jvmti/jvmti_agent.h
@@ -14,6 +14,7 @@ typedef struct {
 	unsigned long	pc;
 	int		line_number;
 	int		discrim; /* discriminator -- 0 for now */
+	jmethodID	methodID;
 } jvmti_line_info_t;
 
 void *jvmti_open(void);
@@ -22,11 +23,9 @@ int   jvmti_write_code(void *agent, char const *symbol_name,
 		       uint64_t vma, void const *code,
 		       const unsigned int code_size);
 
-int   jvmti_write_debug_info(void *agent,
-		             uint64_t code,
-			     const char *file,
+int   jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
 			     jvmti_line_info_t *li,
-			     int nr_lines);
+			     const char * const * file_names);
 
 #if defined(__cplusplus)
 }
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index c62c9fc9a525995c83ef8fc8bf2f790d599abadc..6add3e9826141346a34a93b9e0a970a22720e361 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
 			tab[lines].pc = (unsigned long)pc;
 			tab[lines].line_number = loc_tab[i].line_number;
 			tab[lines].discrim = 0; /* not yet used */
+			tab[lines].methodID = m;
 			lines++;
 		} else {
 			break;
@@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
 	return JVMTI_ERROR_NONE;
 }
 
+static void
+copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
+{
+	/*
+	* Assume path name is class hierarchy, this is a common practice with Java programs
+	*/
+	if (*class_sign == 'L') {
+		int j, i = 0;
+		char *p = strrchr(class_sign, '/');
+		if (p) {
+			/* drop the 'L' prefix and copy up to the final '/' */
+			for (i = 0; i < (p - class_sign); i++)
+				result[i] = class_sign[i+1];
+		}
+		/*
+		* append file name, we use loops and not string ops to avoid modifying
+		* class_sign which is used later for the symbol name
+		*/
+		for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
+			result[i] = file_name[j];
+
+		result[i] = '\0';
+	} else {
+		/* fallback case */
+		size_t file_name_len = strlen(file_name);
+		strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
+	}
+}
+
+static jvmtiError
+get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
+{
+	jvmtiError ret;
+	jclass decl_class;
+	char *file_name = NULL;
+	char *class_sign = NULL;
+	char fn[PATH_MAX];
+	size_t len;
+
+	ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetMethodDeclaringClass", ret);
+		return ret;
+	}
+
+	ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetSourceFileName", ret);
+		return ret;
+	}
+
+	ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetClassSignature", ret);
+		goto free_file_name_error;
+	}
+
+	copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+	len = strlen(fn);
+	*buffer = malloc((len + 1) * sizeof(char));
+	if (!*buffer) {
+		print_error(jvmti, "GetClassSignature", ret);
+		ret = JVMTI_ERROR_OUT_OF_MEMORY;
+		goto free_class_sign_error;
+	}
+	strcpy(*buffer, fn);
+	ret = JVMTI_ERROR_NONE;
+
+free_class_sign_error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+free_file_name_error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+
+	return ret;
+}
+
+static jvmtiError
+fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
+		      const jvmti_line_info_t * line_tab,
+		      char ** file_names)
+{
+	int index;
+	jvmtiError ret;
+
+	for (index = 0; index < nr_lines; ++index) {
+		ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
+		if (ret != JVMTI_ERROR_NONE)
+			return ret;
+	}
+
+	return JVMTI_ERROR_NONE;
+}
+
 static void JNICALL
 compiled_method_load_cb(jvmtiEnv *jvmti,
 			jmethodID method,
@@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 			const void *compile_info)
 {
 	jvmti_line_info_t *line_tab = NULL;
+	char ** line_file_names = NULL;
 	jclass decl_class;
 	char *class_sign = NULL;
 	char *func_name = NULL;
 	char *func_sign = NULL;
-	char *file_name= NULL;
+	char *file_name = NULL;
 	char fn[PATH_MAX];
 	uint64_t addr = (uint64_t)(uintptr_t)code_addr;
 	jvmtiError ret;
 	int nr_lines = 0; /* in line_tab[] */
 	size_t len;
+	int output_debug_info = 0;
 
 	ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
 						&decl_class);
@@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 		if (ret != JVMTI_ERROR_NONE) {
 			warnx("jvmti: cannot get line table for method");
 			nr_lines = 0;
+		} else if (nr_lines > 0) {
+			line_file_names = malloc(sizeof(char*) * nr_lines);
+			if (!line_file_names) {
+				warnx("jvmti: cannot allocate space for line table method names");
+			} else {
+				memset(line_file_names, 0, sizeof(char*) * nr_lines);
+				ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
+				if (ret != JVMTI_ERROR_NONE) {
+					warnx("jvmti: fill_source_filenames failed");
+				} else {
+					output_debug_info = 1;
+				}
+			}
 		}
 	}
 
@@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 		goto error;
 	}
 
-	/*
-	 * Assume path name is class hierarchy, this is a common practice with Java programs
-	 */
-	if (*class_sign == 'L') {
-		int j, i = 0;
-		char *p = strrchr(class_sign, '/');
-		if (p) {
-			/* drop the 'L' prefix and copy up to the final '/' */
-			for (i = 0; i < (p - class_sign); i++)
-				fn[i] = class_sign[i+1];
-		}
-		/*
-		 * append file name, we use loops and not string ops to avoid modifying
-		 * class_sign which is used later for the symbol name
-		 */
-		for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
-			fn[i] = file_name[j];
-		fn[i] = '\0';
-	} else {
-		/* fallback case */
-		strcpy(fn, file_name);
-	}
+	copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+
 	/*
 	 * write source line info record if we have it
 	 */
-	if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
-		warnx("jvmti: write_debug_info() failed");
+	if (output_debug_info)
+		if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
+			warnx("jvmti: write_debug_info() failed");
 
 	len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
 	{
@@ -223,6 +313,13 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
 	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
 	free(line_tab);
+	while (line_file_names && (nr_lines > 0)) {
+	    if (line_file_names[nr_lines - 1]) {
+	        free(line_file_names[nr_lines - 1]);
+	    }
+	    nr_lines -= 1;
+	}
+	free(line_file_names);
 }
 
 static void JNICALL
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 7a84d73324e3c1209781296dcc54260745150b53..8b3da21a08f19a110a3e9c2519495460da2eb310 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -10,8 +10,8 @@
 
 . $(dirname $0)/lib/probe.sh
 
-ld=$(realpath /lib64/ld*.so.* | uniq)
-libc=$(echo $ld | sed 's/ld/libc/g')
+libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
+nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
 
 trace_libc_inet_pton_backtrace() {
 	idx=0
@@ -37,6 +37,9 @@ trace_libc_inet_pton_backtrace() {
 	done
 }
 
+# Check for IPv6 interface existence
+ip a sh lo | fgrep -q inet6 || exit 2
+
 skip_if_no_perf_probe && \
 perf probe -q $libc inet_pton && \
 trace_libc_inet_pton_backtrace
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 2e68c5f120da87250a87b5067793e2ca75ced60c..2a9ef080efd028a1038ab214c05012f7d9186aa7 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -17,8 +17,10 @@ skip_if_no_perf_probe || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-	perf trace -e open touch $file 2>&1 | \
-	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open\(filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
+	test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; }
+
+	perf trace -e ${svc:-open} touch $file 2>&1 | \
+	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
 
 
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bc4a7344e274255141e54e8ec28f47a0d12e6819..89c8e1604ca73ab277b4cf988d08400c527f315f 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -84,7 +84,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 
 	evsel = perf_evlist__first(evlist);
 	evsel->attr.task = 1;
+#ifdef __s390x__
+	evsel->attr.sample_freq = 1000000;
+#else
 	evsel->attr.sample_freq = 1;
+#endif
 	evsel->attr.inherit = 0;
 	evsel->attr.watermark = 0;
 	evsel->attr.wakeup_events = 1;
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9e1668b2c5d7cf86f25a5fa6453026a6768d2950..417e3ecfe9d730fc1c02e2c2e24fe36e23fbf83a 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -62,6 +62,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 	P_MMAP_FLAG(POPULATE);
 	P_MMAP_FLAG(STACK);
 	P_MMAP_FLAG(UNINITIALIZED);
+#ifdef MAP_SYNC
+	P_MMAP_FLAG(SYNC);
+#endif
 #undef P_MMAP_FLAG
 
 	if (flags)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index da1c4c4a0dd842a279283f8b2b52bef10c5fda3d..3369c7830260d9bcce078adeeb02d72ac9344add 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -165,7 +165,7 @@ static void ins__delete(struct ins_operands *ops)
 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
 			      struct ins_operands *ops)
 {
-	return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw);
+	return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw);
 }
 
 int ins__scnprintf(struct ins *ins, char *bf, size_t size,
@@ -230,12 +230,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
 	if (ops->target.name)
-		return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name);
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);
 
 	if (ops->target.addr == 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr);
+	return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr);
 }
 
 static struct ins_ops call_ops = {
@@ -299,7 +299,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			c++;
 	}
 
-	return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64,
+	return scnprintf(bf, size, "%-6s %.*s%" PRIx64,
 			 ins->name, c ? c - ops->raw : 0, ops->raw,
 			 ops->target.offset);
 }
@@ -372,7 +372,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
 	if (ops->locked.ins.ops == NULL)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	printed = scnprintf(bf, size, "%-6.6s ", ins->name);
+	printed = scnprintf(bf, size, "%-6s ", ins->name);
 	return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
 					size - printed, ops->locked.ops);
 }
@@ -448,7 +448,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m
 static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
-	return scnprintf(bf, size, "%-6.6s %s,%s", ins->name,
+	return scnprintf(bf, size, "%-6s %s,%s", ins->name,
 			 ops->source.name ?: ops->source.raw,
 			 ops->target.name ?: ops->target.raw);
 }
@@ -488,7 +488,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops
 static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
-	return scnprintf(bf, size, "%-6.6s %s", ins->name,
+	return scnprintf(bf, size, "%-6s %s", ins->name,
 			 ops->target.name ?: ops->target.raw);
 }
 
@@ -500,7 +500,7 @@ static struct ins_ops dec_ops = {
 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
 			  struct ins_operands *ops __maybe_unused)
 {
-	return scnprintf(bf, size, "%-6.6s", "nop");
+	return scnprintf(bf, size, "%-6s", "nop");
 }
 
 static struct ins_ops nop_ops = {
@@ -924,7 +924,7 @@ void disasm_line__free(struct disasm_line *dl)
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
 {
 	if (raw || !dl->ins.ops)
-		return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw);
+		return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw);
 
 	return ins__scnprintf(&dl->ins, bf, size, &dl->ops);
 }
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c6c891e154a63c67971c04512c0b576d5bf43087..b62e523a70352f40cdb4c90b7284858065d6fafa 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -257,7 +257,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist)
 		.config = PERF_COUNT_SW_DUMMY,
 		.size	= sizeof(attr), /* to capture ABI version */
 	};
-	struct perf_evsel *evsel = perf_evsel__new(&attr);
+	struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
 
 	if (evsel == NULL)
 		return -ENOMEM;
@@ -1786,3 +1786,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
 state_err:
 	return;
 }
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->attr.exclude_kernel)
+			return false;
+	}
+
+	return true;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e72ae64c11acb5214d82996d9c21498b1d34547e..491f69542920978f4be31dea49d8ae47aa98b155 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -312,4 +312,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
 
 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 					    union perf_event *event);
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f894893c203d13e00259d356414a8b781066fa5a..d5fbcf8c7aa70138716f1d4e153f515a882c4ccb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -733,12 +733,16 @@ static void apply_config_terms(struct perf_evsel *evsel,
 	list_for_each_entry(term, config_terms, list) {
 		switch (term->type) {
 		case PERF_EVSEL__CONFIG_TERM_PERIOD:
-			attr->sample_period = term->val.period;
-			attr->freq = 0;
+			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
+				attr->sample_period = term->val.period;
+				attr->freq = 0;
+			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_FREQ:
-			attr->sample_freq = term->val.freq;
-			attr->freq = 1;
+			if (!(term->weak && opts->user_freq != UINT_MAX)) {
+				attr->sample_freq = term->val.freq;
+				attr->freq = 1;
+			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_TIME:
 			if (term->val.time)
@@ -1371,7 +1375,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader,
 static int
 perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread)
 {
-	struct perf_stat_evsel *ps = leader->priv;
+	struct perf_stat_evsel *ps = leader->stats;
 	u64 read_format = leader->attr.read_format;
 	int size = perf_evsel__read_size(leader);
 	u64 *data = ps->group_data;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 9277df96ffdad90a4899e922b574156bb708ef86..157f49e8a772d7d7a1bcc53a22d41f02e3397e78 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -67,6 +67,7 @@ struct perf_evsel_config_term {
 		bool	overwrite;
 		char	*branch;
 	} val;
+	bool weak;
 };
 
 struct perf_stat_evsel;
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
index 125ecd2a300d78758d3968c9fcd5049dff88c457..52dc8d911173e917124301d80052b6c4fbfb40cc 100644
--- a/tools/perf/util/intel-pt-decoder/inat.h
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -97,6 +97,16 @@
 #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
 #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
 
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE	0
+#define INAT_SEG_REG_DEFAULT	1
+#define INAT_SEG_REG_CS		2
+#define INAT_SEG_REG_SS		3
+#define INAT_SEG_REG_DS		4
+#define INAT_SEG_REG_ES		5
+#define INAT_SEG_REG_FS		6
+#define INAT_SEG_REG_GS		7
+
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
 extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 12e377184ee4ad0c55d00c3784f08b393764a2bc..e0b85930dd773e87417e2b4957b8af61221b04c0 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6a8d03c3d9b7095961b07a4d648eadc6f857e868..270f3223c6df15cef58fd4d1c4efdc27dea3a520 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -172,6 +172,9 @@ void machine__exit(struct machine *machine)
 {
 	int i;
 
+	if (machine == NULL)
+		return;
+
 	machine__destroy_kernel_maps(machine);
 	map_groups__exit(&machine->kmaps);
 	dsos__exit(&machine->dsos);
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index efd78b827b0514275f9388b2c64329c45c025248..3a5cb5a6e94ad8fc039dc3410a6724263057a970 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -70,7 +70,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md);
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
-	u64 head = ACCESS_ONCE(pc->data_head);
+	u64 head = READ_ONCE(pc->data_head);
 	rmb();
 	return head;
 }
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a7fcd95961ef0776f07c6968088a7b6cf7836924..170316795a1845c7e013d84ee17ba663da200d6d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1116,6 +1116,7 @@ do {								\
 	INIT_LIST_HEAD(&__t->list);				\
 	__t->type       = PERF_EVSEL__CONFIG_TERM_ ## __type;	\
 	__t->val.__name = __val;				\
+	__t->weak	= term->weak;				\
 	list_add_tail(&__t->list, head_terms);			\
 } while (0)
 
@@ -2410,6 +2411,7 @@ static int new_term(struct parse_events_term **_term,
 
 	*term = *temp;
 	INIT_LIST_HEAD(&term->list);
+	term->weak = false;
 
 	switch (term->type_val) {
 	case PARSE_EVENTS__TERM_TYPE_NUM:
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index be337c266697a718d3ad595a166f8f3e3602cb93..88108cd11b4c80132d7b9b80f1b53d727d2bac1b 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -101,6 +101,9 @@ struct parse_events_term {
 	/* error string indexes for within parsed string */
 	int err_term;
 	int err_val;
+
+	/* Coming from implicit alias */
+	bool weak;
 };
 
 struct parse_events_error {
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 07cb2ac041d7a63b53298b4205fd0b4cd629c349..80fb1593913a9227a742a1a2ed56330c877facad 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -405,6 +405,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
 			parse_events_terms__purge(&list);
 			return ret;
 		}
+		/*
+		 * Weak terms don't override command line options,
+		 * which we don't want for implicit terms in aliases.
+		 */
+		cloned->weak = true;
 		list_add_tail(&cloned->list, &list);
 	}
 	list_splice(&list, terms);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 333a48655ee0a4cfadd89a478b09f1be7dba42b5..9316e648a880db61cad82c399c30218695d7d215 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+
 LIBDIR := ../../../lib
 BPFDIR := $(LIBDIR)/bpf
 APIDIR := ../../../include/uapi
@@ -10,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf
+LDLIBS += -lcap -lelf -lrt
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup
@@ -38,7 +39,7 @@ $(BPFOBJ): force
 CLANG ?= clang
 LLC   ?= llc
 
-PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
 
 # Let newer LLVM versions transparently probe the kernel for availability
 # of full BPF instruction set.
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 8591c89c0828a9ff780c63f4f83f3366f79e198f..471bbbdb94db75ece343f6a5191b7501dea1ee85 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -474,27 +474,7 @@ static struct bpf_align_test tests[] = {
 		.result = REJECT,
 		.matches = {
 			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
-			/* ptr & 0x40 == either 0 or 0x40 */
-			{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
-			/* ptr << 2 == unknown, (4n) */
-			{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
-			/* (4n) + 14 == (4n+2).  We blow our bounds, because
-			 * the add could overflow.
-			 */
-			{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
-			/* Checked s>=0 */
-			{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			/* packet pointer + nonnegative (4n+2) */
-			{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
-			 * We checked the bounds, but it might have been able
-			 * to overflow if the packet pointer started in the
-			 * upper half of the address space.
-			 * So we did not get a 'range' on R6, and the access
-			 * attempt will fail.
-			 */
-			{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* R5 bitwise operator &= on pointer prohibited */
 		}
 	},
 	{
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 69427531408dd22ef1887d473ab4bf6548e173b9..6761be18a91fccc2d4f8ad52b0f83fec293189ae 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
 			  info_len != sizeof(struct bpf_map_info) ||
 			  strcmp((char *)map_infos[i].name, expected_map_name),
 			  "get-map-info(fd)",
-			  "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+			  "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
 			  err, errno,
 			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
 			  info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
 			  *(int *)prog_infos[i].map_ids != map_infos[i].id ||
 			  strcmp((char *)prog_infos[i].name, expected_prog_name),
 			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
 		      memcmp(&prog_info, &prog_infos[i], info_len) ||
 		      *(int *)prog_info.map_ids != saved_map_id,
 		      "get-prog-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
+		      "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
 		      err, errno, info_len, sizeof(struct bpf_prog_info),
 		      memcmp(&prog_info, &prog_infos[i], info_len),
 		      *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
 		      memcmp(&map_info, &map_infos[i], info_len) ||
 		      array_value != array_magic_value,
 		      "check get-map-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n",
+		      "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
 		      err, errno, info_len, sizeof(struct bpf_map_info),
 		      memcmp(&map_info, &map_infos[i], info_len),
 		      array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30cf63cc2b6adb532a3b1f3201533193f7f..b51017404c62d0dc8198afdf035016f6e5e2fd0b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "R1 subtraction from stack pointer",
-		.result_unpriv = REJECT,
-		.errstr = "R1 invalid mem access",
+		.errstr = "R1 subtraction from stack pointer",
 		.result = REJECT,
 	},
 	{
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
 		},
 		.errstr = "misaligned stack access",
 		.result = REJECT,
-		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
 		},
 		.result = REJECT,
 		.errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
 		},
 		.result = REJECT,
 		.errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.result = ACCEPT,
-		.result_unpriv = REJECT,
-		.errstr_unpriv = "R1 pointer += pointer",
+		.result = REJECT,
+		.errstr = "R1 pointer += pointer",
 	},
 	{
 		"unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
 			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
 			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid access to packet",
+		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3, 11 },
-		.errstr_unpriv = "R0 pointer += pointer",
-		.errstr = "R0 invalid mem access 'inv'",
-		.result_unpriv = REJECT,
+		.errstr = "R0 pointer += pointer",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 4 },
-		.errstr = "R4 invalid mem access",
+		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 4 },
-		.errstr = "R4 invalid mem access",
+		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 4 },
-		.errstr = "R4 invalid mem access",
+		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 bitwise operator &= on pointer",
-		.errstr = "invalid mem access 'inv'",
+		.errstr = "R0 bitwise operator &= on pointer",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
-		.errstr = "invalid mem access 'inv'",
+		.errstr = "R0 32-bit pointer arithmetic prohibited",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic with /= operator",
-		.errstr = "invalid mem access 'inv'",
+		.errstr = "R0 pointer arithmetic with /= operator",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map_in_map = { 3 },
-		.errstr = "R1 type=inv expected=map_ptr",
-		.errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+		.errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
 		.result = REJECT,
 	},
 	{
@@ -6116,6 +6102,30 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 	},
+	{
+		"ld_abs: tests on r6 and skb data reload helper",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_6, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_vlan_push),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
 	{
 		"ld_ind: check calling conv, r1",
 		.insns = {
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R8 invalid mem access 'inv'",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R8 invalid mem access 'inv'",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
 		.fixup_map1 = { 4 },
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer comparison prohibited",
-		.errstr = "R0 min value is negative",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 		.result_unpriv = REJECT,
 	},
@@ -6741,6 +6750,462 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 	},
+	{
+		"bounds check based on zero-extended MOV",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			/* r2 = 0x0000'0000'ffff'ffff */
+			BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+			/* r2 = 0 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+			/* no-op */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			/* access at offset 0 */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT
+	},
+	{
+		"bounds check based on sign-extended MOV. test1",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			/* r2 = 0xffff'ffff'ffff'ffff */
+			BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+			/* r2 = 0xffff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+			/* r0 = <oob pointer> */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			/* access to OOB pointer */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "map_value pointer and 4294967295",
+		.result = REJECT
+	},
+	{
+		"bounds check based on sign-extended MOV. test2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			/* r2 = 0xffff'ffff'ffff'ffff */
+			BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+			/* r2 = 0xfff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+			/* r0 = <oob pointer> */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			/* access to OOB pointer */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 min value is outside of the array range",
+		.result = REJECT
+	},
+	{
+		"bounds check based on reg_off + var_off + insn_off. test1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "value_size=8 off=1073741825",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"bounds check based on reg_off + var_off + insn_off. test2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "value 1073741823",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"bounds check after truncation of non-boundary-crossing range",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			/* r2 = 0x10'0000'0000 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+			/* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			/* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			/* r1 = [0x00, 0xff] */
+			BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+			/* r1 = 0 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* no-op */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* access at offset 0 */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT
+	},
+	{
+		"bounds check after truncation of boundary-crossing range (1)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0x1'0000'007f] */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0xffff'ffff] or
+			 *      [0x0000'0000, 0x0000'007f]
+			 */
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0x00, 0xff] or
+			 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = 0 or
+			 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* no-op or OOB pointer computation */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		/* not actually fully unbounded, but the bound is very high */
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT
+	},
+	{
+		"bounds check after truncation of boundary-crossing range (2)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0x1'0000'007f] */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0xffff'ffff] or
+			 *      [0x0000'0000, 0x0000'007f]
+			 * difference to previous test: truncation via MOV32
+			 * instead of ALU32.
+			 */
+			BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0x00, 0xff] or
+			 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = 0 or
+			 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* no-op or OOB pointer computation */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		/* not actually fully unbounded, but the bound is very high */
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT
+	},
+	{
+		"bounds check after wrapping 32-bit addition",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			/* r1 = 0x7fff'ffff */
+			BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+			/* r1 = 0xffff'fffe */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			/* r1 = 0 */
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+			/* no-op */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* access at offset 0 */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT
+	},
+	{
+		"bounds check after shift with oversized count operand",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 32),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			/* r1 = (u32)1 << (u32)32 = ? */
+			BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+			/* r1 = [0x0000, 0xffff] */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+			/* computes unknown pointer, potentially OOB */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT
+	},
+	{
+		"bounds check after right shift of maybe-negative number",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			/* r1 = [-0x01, 0xfe] */
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+			/* r1 = 0 or 0xff'ffff'ffff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* r1 = 0 or 0xffff'ffff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* computes unknown pointer, potentially OOB */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test1",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "map_value pointer and 2147483646",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "pointer offset 1073741822",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test3",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "pointer offset -1073741822",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test4",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_1, 1000000),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "map_value pointer and 1000000000000",
+		.result = REJECT
+	},
+	{
+		"pointer/scalar confusion in state equality check (way 1)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_JMP_A(1),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R0 leaks addr as return value"
+	},
+	{
+		"pointer/scalar confusion in state equality check (way 2)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_JMP_A(1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R0 leaks addr as return value"
+	},
 	{
 		"variable-offset ctx access",
 		.insns = {
@@ -6782,6 +7247,71 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
 	},
+	{
+		"indirect variable-offset stack access",
+		.insns = {
+			/* Fill the top 8 bytes of the stack */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+			/* add it to fp.  We now have either fp-4 or fp-8, but
+			 * we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+			/* dereference it indirectly */
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 5 },
+		.errstr = "variable stack read R2",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"direct stack access with 32-bit wraparound. test1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN()
+		},
+		.errstr = "fp pointer and 2147483647",
+		.result = REJECT
+	},
+	{
+		"direct stack access with 32-bit wraparound. test2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN()
+		},
+		.errstr = "fp pointer and 1073741823",
+		.result = REJECT
+	},
+	{
+		"direct stack access with 32-bit wraparound. test3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN()
+		},
+		.errstr = "fp pointer offset 1073741822",
+		.result = REJECT
+	},
 	{
 		"liveness pruning and write screening",
 		.insns = {
@@ -7103,6 +7633,19 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"pkt_end - pkt_start is allowed",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"XDP pkt read, pkt_end mangling, bad access 1",
 		.insns = {
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R1 offset is outside of the packet",
+		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R1 offset is outside of the packet",
+		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index 3cc0b561489ea2c1b54701f1aaaf580e79c2535b..e9626cf5607ad060b070680d25986a270c5cd59c 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -3,6 +3,8 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -131,11 +133,16 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
+	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	char full_log[LOG_SIZE];
 	char log[LOG_SIZE];
 	size_t want_len;
 	int i;
 
+	/* allow unlimited locked memory to have more consistent error code */
+	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
+		perror("Unable to lift memlock rlimit");
+
 	memset(log, 1, LOG_SIZE);
 
 	/* Test incorrect attr */
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index e57b4ac40e72e0502dff75ea1d80c543280428eb..7177bea1fdfa62a1aa4e424d4dab665d8a9b7aaf 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,3 +1,4 @@
 CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 939a337128dbf3fb08277995de98437b10b037b8..5d4f10ac2af226b58c7ece2f965749b72899ddc6 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -7,7 +7,7 @@ include ../lib.mk
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
 			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
-			protection_keys test_vdso
+			protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 66e5ce5b91f008d5dffc7848d0e99d730ad589b5..1aef72df20a112a30c17f2e567ca8b972a550104 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt,
 	 * NB: Different Linux versions do different things with the
 	 * accessed bit in set_thread_area().
 	 */
-	if (ar != expected_ar &&
-	    (ldt || ar != (expected_ar | AR_ACCESSED))) {
+	if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
 		printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
 		       (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
 		nerrs++;
@@ -627,13 +626,10 @@ static void do_multicpu_tests(void)
 static int finish_exec_test(void)
 {
 	/*
-	 * In a sensible world, this would be check_invalid_segment(0, 1);
-	 * For better or for worse, though, the LDT is inherited across exec.
-	 * We can probably change this safely, but for now we test it.
+	 * Older kernel versions did inherit the LDT on exec() which is
+	 * wrong because exec() starts from a clean state.
 	 */
-	check_valid_segment(0, 1,
-			    AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
-			    42, true);
+	check_invalid_segment(0, 1);
 
 	return nerrs ? 1 : 0;
 }
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 0000000000000000000000000000000000000000..7a744fa7b78655dfc9327e5f50d40dd1dd80476a
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+	void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[WARN]\tfailed to find vDSO\n");
+		return;
+	}
+
+	vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gtod)
+		printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+	vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_gettime)
+		printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+	vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+	if (!vdso_time)
+		printf("[WARN]\tfailed to find time in vDSO\n");
+
+	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	if (!vdso_getcpu) {
+		/* getcpu() was never wired up in the 32-bit vDSO. */
+		printf("[%s]\tfailed to find getcpu in vDSO\n",
+		       sizeof(long) == 8 ? "WARN" : "NOTE");
+	}
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+	int nerrs = 0;
+	FILE *maps;
+	char line[128];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+		should_read_vsyscall = true;
+		return 0;
+	}
+
+	while (fgets(line, sizeof(line), maps)) {
+		char r, x;
+		void *start, *end;
+		char name[128];
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		printf("\tvsyscall map: %s", line);
+
+		if (start != (void *)0xffffffffff600000 ||
+		    end != (void *)0xffffffffff601000) {
+			printf("[FAIL]\taddress range is nonsense\n");
+			nerrs++;
+		}
+
+		printf("\tvsyscall permissions are %c-%c\n", r, x);
+		should_read_vsyscall = (r == 'r');
+		if (x != 'x') {
+			vgtod = NULL;
+			vtime = NULL;
+			vgetcpu = NULL;
+		}
+
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("\tno vsyscall map in /proc/self/maps\n");
+		should_read_vsyscall = false;
+		vgtod = NULL;
+		vtime = NULL;
+		vgetcpu = NULL;
+	}
+
+	return nerrs;
+#else
+	return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+	return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+			      void* cache)
+{
+	return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+	return (double)(a->tv_sec - b->tv_sec) +
+		(double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+		      const struct timeval *tv_sys2,
+		      const struct timezone *tz_sys,
+		      const char *which,
+		      const struct timeval *tv_other,
+		      const struct timezone *tz_other)
+{
+	int nerrs = 0;
+	double d1, d2;
+
+	if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+		printf("[FAIL] %s tz mismatch\n", which);
+		nerrs++;
+	}
+
+	d1 = tv_diff(tv_other, tv_sys1);
+	d2 = tv_diff(tv_sys2, tv_other); 
+	printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+	if (d1 < 0 || d2 < 0) {
+		printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+		nerrs++;
+	} else {
+		printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+	}
+
+	return nerrs;
+}
+
+static int test_gtod(void)
+{
+	struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+	struct timezone tz_sys, tz_vdso, tz_vsys;
+	long ret_vdso = -1;
+	long ret_vsys = -1;
+	int nerrs = 0;
+
+	printf("[RUN]\ttest gettimeofday()\n");
+
+	if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+		err(1, "syscall gettimeofday");
+	if (vdso_gtod)
+		ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+	if (vgtod)
+		ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+	if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+		err(1, "syscall gettimeofday");
+
+	if (vdso_gtod) {
+		if (ret_vdso == 0) {
+			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+		} else {
+			printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+			nerrs++;
+		}
+	}
+
+	if (vgtod) {
+		if (ret_vsys == 0) {
+			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+		} else {
+			printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+			nerrs++;
+		}
+	}
+
+	return nerrs;
+}
+
+static int test_time(void) {
+	int nerrs = 0;
+
+	printf("[RUN]\ttest time()\n");
+	long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+	long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+	t_sys1 = sys_time(&t2_sys1);
+	if (vdso_time)
+		t_vdso = vdso_time(&t2_vdso);
+	if (vtime)
+		t_vsys = vtime(&t2_vsys);
+	t_sys2 = sys_time(&t2_sys2);
+	if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+		printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+		nerrs++;
+		return nerrs;
+	}
+
+	if (vdso_time) {
+		if (t_vdso < 0 || t_vdso != t2_vdso) {
+			printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+			nerrs++;
+		} else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+			printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+			nerrs++;
+		} else {
+			printf("[OK]\tvDSO time() is okay\n");
+		}
+	}
+
+	if (vtime) {
+		if (t_vsys < 0 || t_vsys != t2_vsys) {
+			printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+			nerrs++;
+		} else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+			printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+			nerrs++;
+		} else {
+			printf("[OK]\tvsyscall time() is okay\n");
+		}
+	}
+
+	return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+	int nerrs = 0;
+	long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+	printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+		printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+		return nerrs;
+	}
+
+	unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+	unsigned node = 0;
+	bool have_node = false;
+	ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+	if (vdso_getcpu)
+		ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+	if (vgetcpu)
+		ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+	if (ret_sys == 0) {
+		if (cpu_sys != cpu) {
+			printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+			nerrs++;
+		}
+
+		have_node = true;
+		node = node_sys;
+	}
+
+	if (vdso_getcpu) {
+		if (ret_vdso) {
+			printf("[FAIL]\tvDSO getcpu() failed\n");
+			nerrs++;
+		} else {
+			if (!have_node) {
+				have_node = true;
+				node = node_vdso;
+			}
+
+			if (cpu_vdso != cpu) {
+				printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+				nerrs++;
+			} else {
+				printf("[OK]\tvDSO reported correct CPU\n");
+			}
+
+			if (node_vdso != node) {
+				printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+				nerrs++;
+			} else {
+				printf("[OK]\tvDSO reported correct node\n");
+			}
+		}
+	}
+
+	if (vgetcpu) {
+		if (ret_vsys) {
+			printf("[FAIL]\tvsyscall getcpu() failed\n");
+			nerrs++;
+		} else {
+			if (!have_node) {
+				have_node = true;
+				node = node_vsys;
+			}
+
+			if (cpu_vsys != cpu) {
+				printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+				nerrs++;
+			} else {
+				printf("[OK]\tvsyscall reported correct CPU\n");
+			}
+
+			if (node_vsys != node) {
+				printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+				nerrs++;
+			} else {
+				printf("[OK]\tvsyscall reported correct node\n");
+			}
+		}
+	}
+
+	return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+	printf("[RUN]\tChecking read access to the vsyscall page\n");
+	bool can_read;
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		*(volatile int *)0xffffffffff600000;
+		can_read = true;
+	} else {
+		can_read = false;
+	}
+
+	if (can_read && !should_read_vsyscall) {
+		printf("[FAIL]\tWe have read access, but we shouldn't\n");
+		return 1;
+	} else if (!can_read && should_read_vsyscall) {
+		printf("[FAIL]\tWe don't have read access, but we should\n");
+		return 1;
+	} else {
+		printf("[OK]\tgot expected result\n");
+	}
+#endif
+
+	return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t *)ctx_void;
+	unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+	if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+		num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+	time_t tmp;
+	bool is_native;
+
+	if (!vtime)
+		return 0;
+
+	printf("[RUN]\tchecking for native vsyscall\n");
+	sethandler(SIGTRAP, sigtrap, 0);
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	vtime(&tmp);
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+	/*
+	 * If vsyscalls are emulated, we expect a single trap in the
+	 * vsyscall page -- the call instruction will trap with RIP
+	 * pointing to the entry point before emulation takes over.
+	 * In native mode, we expect two traps, since whatever code
+	 * the vsyscall page contains will be more than just a ret
+	 * instruction.
+	 */
+	is_native = (num_vsyscall_traps > 1);
+
+	printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	       (is_native ? "native" : "emulated"),
+	       (int)num_vsyscall_traps);
+
+	return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	int nerrs = 0;
+
+	init_vdso();
+	nerrs += init_vsys();
+
+	nerrs += test_gtod();
+	nerrs += test_time();
+	nerrs += test_getcpu(0);
+	nerrs += test_getcpu(1);
+
+	sethandler(SIGSEGV, sigsegv, 0);
+	nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+	nerrs += test_native_vsyscall();
+#endif
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index 5727dfb15a83efecb4ef7dc951149183a38e5108..c9c81614a66ad6245d831066b204b72b1af8817f 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -50,14 +50,14 @@ static int parse_status(const char *value)
 
 	while (*c != '\0') {
 		int port, status, speed, devid;
-		unsigned long socket;
+		int sockfd;
 		char lbusid[SYSFS_BUS_ID_SIZE];
 		struct usbip_imported_device *idev;
 		char hub[3];
 
-		ret = sscanf(c, "%2s  %d %d %d %x %lx %31s\n",
+		ret = sscanf(c, "%2s  %d %d %d %x %u %31s\n",
 				hub, &port, &status, &speed,
-				&devid, &socket, lbusid);
+				&devid, &sockfd, lbusid);
 
 		if (ret < 5) {
 			dbg("sscanf failed: %d", ret);
@@ -66,7 +66,7 @@ static int parse_status(const char *value)
 
 		dbg("hub %s port %d status %d speed %d devid %x",
 				hub, port, status, speed, devid);
-		dbg("socket %lx lbusid %s", socket, lbusid);
+		dbg("sockfd %u lbusid %s", sockfd, lbusid);
 
 		/* if a device is connected, look at it */
 		idev = &vhci_driver->idev[port];
@@ -106,7 +106,7 @@ static int parse_status(const char *value)
 	return 0;
 }
 
-#define MAX_STATUS_NAME 16
+#define MAX_STATUS_NAME 18
 
 static int refresh_imported_device_list(void)
 {
@@ -329,9 +329,17 @@ int usbip_vhci_refresh_device_list(void)
 int usbip_vhci_get_free_port(uint32_t speed)
 {
 	for (int i = 0; i < vhci_driver->nports; i++) {
-		if (speed == USB_SPEED_SUPER &&
-		    vhci_driver->idev[i].hub != HUB_SPEED_SUPER)
-			continue;
+
+		switch (speed) {
+		case	USB_SPEED_SUPER:
+			if (vhci_driver->idev[i].hub != HUB_SPEED_SUPER)
+				continue;
+		break;
+		default:
+			if (vhci_driver->idev[i].hub != HUB_SPEED_HIGH)
+				continue;
+		break;
+		}
 
 		if (vhci_driver->idev[i].status == VDEV_ST_NULL)
 			return vhci_driver->idev[i].port;
diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c
index 2b3d6d2350158b73a2597dc1a78ac007b6a3563c..3d7b42e7729941b1ca8f35fb7f6d10d4151a1a2f 100644
--- a/tools/usb/usbip/src/utils.c
+++ b/tools/usb/usbip/src/utils.c
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add)
 	char command[SYSFS_BUS_ID_SIZE + 4];
 	char match_busid_attr_path[SYSFS_PATH_MAX];
 	int rc;
+	int cmd_size;
 
 	snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
 		 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add)
 		 attr_name);
 
 	if (add)
-		snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid);
+		cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+				    busid);
 	else
-		snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid);
+		cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+				    busid);
 
 	rc = write_sysfs_attribute(match_busid_attr_path, command,
-				   sizeof(command));
+				   cmd_size);
 	if (rc < 0) {
 		dbg("failed to write match_busid: %s", strerror(errno));
 		return -1;
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 38bb171acebade83314d9556baa7893c7d203911..e6e81305ef469559ffde4013535be10019c30da4 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -16,24 +16,41 @@
 #define unlikely(x)    (__builtin_expect(!!(x), 0))
 #define likely(x)    (__builtin_expect(!!(x), 1))
 #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+#define SIZE_MAX        (~(size_t)0)
+
 typedef pthread_spinlock_t  spinlock_t;
 
 typedef int gfp_t;
-static void *kmalloc(unsigned size, gfp_t gfp)
-{
-	return memalign(64, size);
-}
+#define __GFP_ZERO 0x1
 
-static void *kzalloc(unsigned size, gfp_t gfp)
+static void *kmalloc(unsigned size, gfp_t gfp)
 {
 	void *p = memalign(64, size);
 	if (!p)
 		return p;
-	memset(p, 0, size);
 
+	if (gfp & __GFP_ZERO)
+		memset(p, 0, size);
 	return p;
 }
 
+static inline void *kzalloc(unsigned size, gfp_t flags)
+{
+	return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+	if (size != 0 && n > SIZE_MAX / size)
+		return NULL;
+	return kmalloc(n * size, flags);
+}
+
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+	return kmalloc_array(n, size, flags | __GFP_ZERO);
+}
+
 static void kfree(void *p)
 {
 	if (p)
diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh
index 35b039864b778c1f03df563dce35ca59da1d79b4..0cf28aa6f21c3b392b8a90b4aa9e9087f6807b3e 100644
--- a/tools/vm/slabinfo-gnuplot.sh
+++ b/tools/vm/slabinfo-gnuplot.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 # Sergey Senozhatsky, 2015
 # sergey.senozhatsky.work@gmail.com
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4151250ce8da94ac3c83853e7d6e980515ad23a5..cc29a814832837f5fb237dfdf74845a284e04367 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
 	struct arch_timer_context *vtimer;
+	u32 cnt_ctl;
 
-	if (!vcpu) {
-		pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
-		return IRQ_NONE;
-	}
-	vtimer = vcpu_vtimer(vcpu);
+	/*
+	 * We may see a timer interrupt after vcpu_put() has been called which
+	 * sets the CPU's vcpu pointer to NULL, because even though the timer
+	 * has been disabled in vtimer_save_state(), the hardware interrupt
+	 * signal may not have been retired from the interrupt controller yet.
+	 */
+	if (!vcpu)
+		return IRQ_HANDLED;
 
+	vtimer = vcpu_vtimer(vcpu);
 	if (!vtimer->irq.level) {
-		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-		if (kvm_timer_irq_can_fire(vtimer))
+		cnt_ctl = read_sysreg_el0(cntv_ctl);
+		cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
+			   ARCH_TIMER_CTRL_IT_MASK;
+		if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
 			kvm_timer_update_irq(vcpu, true, vtimer);
 	}
 
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
 
 	/* Disable the virtual timer */
 	write_sysreg_el0(0, cntv_ctl);
+	isb();
 
 	vtimer->loaded = false;
 out:
@@ -479,9 +487,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 
 	vtimer_restore_state(vcpu);
 
-	if (has_vhe())
-		disable_el1_phys_timer_access();
-
 	/* Set the background timer for the physical timer emulation. */
 	phys_timer_emulate(vcpu);
 }
@@ -510,9 +515,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (unlikely(!timer->enabled))
 		return;
 
-	if (has_vhe())
-		enable_el1_phys_timer_access();
-
 	vtimer_save_state(vcpu);
 
 	/*
@@ -726,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
 	return 0;
 }
 
-int kvm_timer_hyp_init(void)
+int kvm_timer_hyp_init(bool has_gic)
 {
 	struct arch_timer_kvm_info *info;
 	int err;
@@ -762,10 +764,13 @@ int kvm_timer_hyp_init(void)
 		return err;
 	}
 
-	err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
-	if (err) {
-		kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
-		goto out_free_irq;
+	if (has_gic) {
+		err = irq_set_vcpu_affinity(host_vtimer_irq,
+					    kvm_get_running_vcpus());
+		if (err) {
+			kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+			goto out_free_irq;
+		}
 	}
 
 	kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -841,7 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 no_vgic:
 	preempt_disable();
 	timer->enabled = 1;
-	kvm_timer_vcpu_load_vgic(vcpu);
+	kvm_timer_vcpu_load(vcpu);
 	preempt_enable();
 
 	return 0;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index a67c106d73f5c33a039d046c9f46454bbcfc801f..2e43f9d42bd5db2a07438bb98f5e029c6246adb4 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -188,6 +188,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+	atomic_set(&kvm->online_vcpus, 0);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -296,7 +297,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
-	kvm_vgic_vcpu_destroy(vcpu);
 	kvm_pmu_vcpu_destroy(vcpu);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -627,6 +627,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 		if (ret)
 			return ret;
+		if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+			return 0;
+
 	}
 
 	if (run->immediate_exit)
@@ -1323,7 +1326,7 @@ static int init_subsystems(void)
 	/*
 	 * Init HYP architected timer support
 	 */
-	err = kvm_timer_hyp_init();
+	err = kvm_timer_hyp_init(vgic_present);
 	if (err)
 		goto out;
 
@@ -1502,7 +1505,7 @@ int kvm_arch_init(void *opaque)
 	bool in_hyp_mode;
 
 	if (!is_hyp_mode_available()) {
-		kvm_err("HYP mode not available\n");
+		kvm_info("HYP mode not available\n");
 		return -ENODEV;
 	}
 
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f39861639f08f81a305fff841f0f08dcf0878686..f24404b3c8dff8016c358c9339655465a0468ee7 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,42 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
 	write_sysreg(cntvoff, cntvoff_el2);
 }
 
-void __hyp_text enable_el1_phys_timer_access(void)
-{
-	u64 val;
-
-	/* Allow physical timer/counter access for the host */
-	val = read_sysreg(cnthctl_el2);
-	val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
-	write_sysreg(val, cnthctl_el2);
-}
-
-void __hyp_text disable_el1_phys_timer_access(void)
-{
-	u64 val;
-
-	/*
-	 * Disallow physical timer access for the guest
-	 * Physical counter access is allowed
-	 */
-	val = read_sysreg(cnthctl_el2);
-	val &= ~CNTHCTL_EL1PCEN;
-	val |= CNTHCTL_EL1PCTEN;
-	write_sysreg(val, cnthctl_el2);
-}
-
 void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
 {
 	/*
 	 * We don't need to do this for VHE since the host kernel runs in EL2
 	 * with HCR_EL2.TGE ==1, which makes those bits have no impact.
 	 */
-	if (!has_vhe())
-		enable_el1_phys_timer_access();
+	if (!has_vhe()) {
+		u64 val;
+
+		/* Allow physical timer/counter access for the host */
+		val = read_sysreg(cnthctl_el2);
+		val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+		write_sysreg(val, cnthctl_el2);
+	}
 }
 
 void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
 {
-	if (!has_vhe())
-		disable_el1_phys_timer_access();
+	if (!has_vhe()) {
+		u64 val;
+
+		/*
+		 * Disallow physical timer access for the guest
+		 * Physical counter access is allowed
+		 */
+		val = read_sysreg(cnthctl_el2);
+		val &= ~CNTHCTL_EL1PCEN;
+		val |= CNTHCTL_EL1PCTEN;
+		write_sysreg(val, cnthctl_el2);
+	}
 }
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index a3f18d3623661ae6204750fa9aa8639a5fde7617..d7fd46fe9efb35ca28a0685b333f8c68a61b2d64 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -34,11 +34,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 	else
 		elrsr1 = 0;
 
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
 	cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
 }
 
 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90af8c74408b72652f9974a3fb894d..dac7ceb1a677746cadb086a2cf8a07d8e560373c 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		}
 
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-			       data);
+			       &data);
 		data = vcpu_data_host_to_guest(vcpu, data, len);
 		vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
 	}
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
 					       len);
 
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
 		kvm_mmio_write_buf(data_buf, len, data);
 
 		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
 				       data_buf);
 	} else {
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-			       fault_ipa, 0);
+			       fault_ipa, NULL);
 
 		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
 				      data_buf);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986dd5c0f097f16837d72d81f655308..b4b69c2d10120237e12bc6524243071bf645f1d9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
  */
 void free_hyp_pgds(void)
 {
-	unsigned long addr;
-
 	mutex_lock(&kvm_hyp_pgd_mutex);
 
 	if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
 
 	if (hyp_pgd) {
 		unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+		unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
+				(uintptr_t)high_memory - PAGE_OFFSET);
+		unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
+				VMALLOC_END - VMALLOC_START);
 
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index b7baf581611ae8730bb480d6e465607eb3cb3d2a..99e026d2dade9bf5dc7e1506bca391b2ac631d41 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -112,8 +112,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
 	u32 nr = dist->nr_spis;
 	int i, ret;
 
-	entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry),
-			  GFP_KERNEL);
+	entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
 	if (!entries)
 		return -ENOMEM;
 
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 1f761a9991e7d6ee8f3eddaa67aabd5152387eed..8e633bd9cc1e74706e0490419f27a20e6fa8e0b7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -421,6 +421,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 	u32 *intids;
 	int nr_irqs, i;
 	unsigned long flags;
+	u8 pendmask;
 
 	nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
 	if (nr_irqs < 0)
@@ -428,7 +429,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < nr_irqs; i++) {
 		int byte_offset, bit_nr;
-		u8 pendmask;
 
 		byte_offset = intids[i] / BITS_PER_BYTE;
 		bit_nr = intids[i] % BITS_PER_BYTE;
@@ -821,6 +821,8 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
 		return E_ITS_MAPC_COLLECTION_OOR;
 
 	collection = kzalloc(sizeof(*collection), GFP_KERNEL);
+	if (!collection)
+		return -ENOMEM;
 
 	collection->collection_id = coll_id;
 	collection->target_addr = COLLECTION_NOT_MAPPED;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 2f05f732d3fd467e600e8b223b36cd80ac91303a..f47e8481fa452d2b67aaca6fc6985a4fa05b4e77 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -327,13 +327,13 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
 	int last_byte_offset = -1;
 	struct vgic_irq *irq;
 	int ret;
+	u8 val;
 
 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
 		int byte_offset, bit_nr;
 		struct kvm_vcpu *vcpu;
 		gpa_t pendbase, ptr;
 		bool stored;
-		u8 val;
 
 		vcpu = irq->target_vcpu;
 		if (!vcpu)
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 53c324aa44efa7398727c08b0c3901f0b33c4a84..4a37292855bc7f8bed299b99407d68882b98c84e 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -337,8 +337,10 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
 		goto out;
 
 	WARN_ON(!(irq->hw && irq->host_irq == virq));
-	irq->hw = false;
-	ret = its_unmap_vlpi(virq);
+	if (irq->hw) {
+		irq->hw = false;
+		ret = its_unmap_vlpi(virq);
+	}
 
 out:
 	mutex_unlock(&its->its_lock);
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index b168a328a9e0748052d506e63df613517a7f7e17..ecb8e25f5fe56d69065757a80c44c2d4a532bbcb 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -492,6 +492,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 {
 	struct vgic_irq *irq;
+	unsigned long flags;
 	int ret = 0;
 
 	if (!vgic_initialized(vcpu->kvm))
@@ -502,12 +503,12 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 		return -EINVAL;
 
 	irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
-	spin_lock(&irq->irq_lock);
+	spin_lock_irqsave(&irq->irq_lock, flags);
 	if (irq->owner && irq->owner != owner)
 		ret = -EEXIST;
 	else
 		irq->owner = owner;
-	spin_unlock(&irq->irq_lock);
+	spin_unlock_irqrestore(&irq->irq_lock, flags);
 
 	return ret;
 }
@@ -823,13 +824,14 @@ void vgic_kick_vcpus(struct kvm *kvm)
 
 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
-	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+	struct vgic_irq *irq;
 	bool map_is_active;
 	unsigned long flags;
 
 	if (!vgic_initialized(vcpu->kvm))
 		return false;
 
+	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 	spin_lock_irqsave(&irq->irq_lock, flags);
 	map_is_active = irq->hw && irq->active;
 	spin_unlock_irqrestore(&irq->irq_lock, flags);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c422c10cd1dd176a973b234f742414cd2443cbff..210bf820385a70967c729259bf762b0d9be98b73 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -135,6 +135,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
 static unsigned long long kvm_createvm_count;
 static unsigned long long kvm_active_vms;
 
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end)
+{
+}
+
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
 {
 	if (pfn_valid(pfn))
@@ -360,6 +365,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 		kvm_flush_remote_tlbs(kvm);
 
 	spin_unlock(&kvm->mmu_lock);
+
+	kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+
 	srcu_read_unlock(&kvm->srcu, idx);
 }